1;; Copyright (C) 2002-2015 Free Software Foundation, Inc.
2;;
3;; This file is part of GCC.
4;;
5;; GCC is free software; you can redistribute it and/or modify
6;; it under the terms of the GNU General Public License as published by
7;; the Free Software Foundation; either version 3, or (at your option)
8;; any later version.
9;;
10;; GCC is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13;; GNU General Public License for more details.
14;;
15;; You should have received a copy of the GNU General Public License
16;; along with GCC; see the file COPYING3.  If not see
17;; <http://www.gnu.org/licenses/>.
18;;
19;; AMD Athlon Scheduling
20;;
21;; The Athlon does contain three pipelined FP units, three integer units and
22;; three address generation units.
23;;
24;; The predecode logic is determining boundaries of instructions in the 64
25;; byte cache line. So the cache line straddling problem of K6 might be issue
26;; here as well, but it is not noted in the documentation.
27;;
28;; Three DirectPath instructions decoders and only one VectorPath decoder
29;; is available. They can decode three DirectPath instructions or one VectorPath
30;; instruction per cycle.
31;; Decoded macro instructions are then passed to 72 entry instruction control
32;; unit, that passes
33;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
34;;
35;; The load/store queue unit is not attached to the schedulers but
36;; communicates with all the execution units separately instead.
37
38(define_attr "athlon_decode" "direct,vector,double"
39  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,leave")
40	   (const_string "vector")
41         (and (eq_attr "type" "push")
42              (match_operand 1 "memory_operand"))
43	   (const_string "vector")
44         (and (eq_attr "type" "fmov")
45	      (and (eq_attr "memory" "load,store")
46		   (eq_attr "mode" "XF")))
47	   (const_string "vector")]
48	(const_string "direct")))
49
50(define_attr "amdfam10_decode" "direct,vector,double"
51  (const_string "direct"))
52;;
53;;           decode0 decode1 decode2
54;;                 \    |   /
55;;    instruction control unit (72 entry scheduler)
56;;                |                        |
57;;      integer scheduler (18)         stack map
58;;     /  |    |    |    |   \        stack rename
59;;  ieu0 agu0 ieu1 agu1 ieu2 agu2      scheduler
60;;    |  agu0  |   agu1      agu2    register file
61;;    |      \ |    |       /         |     |     |
62;;     \      /\    |     /         fadd  fmul  fstore
63;;       \  /    \  |   /           fadd  fmul  fstore
64;;       imul  load/store (2x)      fadd  fmul  fstore
65
66(define_automaton "athlon,athlon_load,athlon_mult,athlon_fp")
67(define_cpu_unit "athlon-decode0" "athlon")
68(define_cpu_unit "athlon-decode1" "athlon")
69(define_cpu_unit "athlon-decode2" "athlon")
70(define_cpu_unit "athlon-decodev" "athlon")
71;; Model the fact that double decoded instruction may take 2 cycles
72;; to decode when decoder2 and decoder0 in next cycle
73;; is used (this is needed to allow troughput of 1.5 double decoded
74;; instructions per cycle).
75;;
76;; In order to avoid dependence between reservation of decoder
77;; and other units, we model decoder as two stage fully pipelined unit
78;; and only double decoded instruction may occupy unit in the first cycle.
79;; With this scheme however two double instructions can be issued cycle0.
80;;
81;; Avoid this by using presence set requiring decoder0 to be allocated
82;; too. Vector decoded instructions then can't be issued when
83;; modeled as consuming decoder0+decoder1+decoder2.
84;; We solve that by specialized vector decoder unit and exclusion set.
85(presence_set "athlon-decode2" "athlon-decode0")
86(exclusion_set "athlon-decodev" "athlon-decode0,athlon-decode1,athlon-decode2")
87(define_reservation "athlon-vector" "nothing,athlon-decodev")
88(define_reservation "athlon-direct0" "nothing,athlon-decode0")
89(define_reservation "athlon-direct" "nothing,
90				     (athlon-decode0 | athlon-decode1
91				     | athlon-decode2)")
92;; Double instructions behaves like two direct instructions.
93(define_reservation "athlon-double" "((athlon-decode2, athlon-decode0)
94				     | (nothing,(athlon-decode0 + athlon-decode1))
95				     | (nothing,(athlon-decode1 + athlon-decode2)))")
96
97;; Agu and ieu unit results in extremely large automatons and
98;; in our approximation they are hardly filled in.  Only ieu
99;; unit can, as issue rate is 3 and agu unit is always used
100;; first in the insn reservations.  Skip the models.
101
102;(define_cpu_unit "athlon-ieu0" "athlon_ieu")
103;(define_cpu_unit "athlon-ieu1" "athlon_ieu")
104;(define_cpu_unit "athlon-ieu2" "athlon_ieu")
105;(define_reservation "athlon-ieu" "(athlon-ieu0 | athlon-ieu1 | athlon-ieu2)")
106(define_reservation "athlon-ieu" "nothing")
107(define_cpu_unit "athlon-ieu0" "athlon")
108;(define_cpu_unit "athlon-agu0" "athlon_agu")
109;(define_cpu_unit "athlon-agu1" "athlon_agu")
110;(define_cpu_unit "athlon-agu2" "athlon_agu")
111;(define_reservation "athlon-agu" "(athlon-agu0 | athlon-agu1 | athlon-agu2)")
112(define_reservation "athlon-agu" "nothing")
113
114(define_cpu_unit "athlon-mult" "athlon_mult")
115
116(define_cpu_unit "athlon-load0" "athlon_load")
117(define_cpu_unit "athlon-load1" "athlon_load")
118(define_reservation "athlon-load" "athlon-agu,
119				   (athlon-load0 | athlon-load1),nothing")
120;; 128bit SSE instructions issue two loads at once
121(define_reservation "athlon-load2" "athlon-agu,
122				   (athlon-load0 + athlon-load1),nothing")
123
124(define_reservation "athlon-store" "(athlon-load0 | athlon-load1)")
125;; 128bit SSE instructions issue two stores at once
126(define_reservation "athlon-store2" "(athlon-load0 + athlon-load1)")
127
128
129;; The FP operations start to execute at stage 12 in the pipeline, while
130;; integer operations start to execute at stage 9 for Athlon and 11 for K8
131;; Compensate the difference for Athlon because it results in significantly
132;; smaller automata.
133(define_reservation "athlon-fpsched" "nothing,nothing,nothing")
134;; The floating point loads.
135(define_reservation "athlon-fpload" "(athlon-fpsched + athlon-load)")
136(define_reservation "athlon-fpload2" "(athlon-fpsched + athlon-load2)")
137(define_reservation "athlon-fploadk8" "(athlon-fpsched + athlon-load)")
138(define_reservation "athlon-fpload2k8" "(athlon-fpsched + athlon-load2)")
139
140
141;; The three fp units are fully pipelined with latency of 3
142(define_cpu_unit "athlon-fadd" "athlon_fp")
143(define_cpu_unit "athlon-fmul" "athlon_fp")
144(define_cpu_unit "athlon-fstore" "athlon_fp")
145(define_reservation "athlon-fany" "(athlon-fstore | athlon-fmul | athlon-fadd)")
146(define_reservation "athlon-faddmul" "(athlon-fadd | athlon-fmul)")
147
148;; Vector operations usually consume many of pipes.
149(define_reservation "athlon-fvector" "(athlon-fadd + athlon-fmul + athlon-fstore)")
150
151
152;; Jump instructions are executed in the branch unit completely transparent to us
153(define_insn_reservation "athlon_branch" 0
154			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
155			      (eq_attr "type" "ibr"))
156			 "athlon-direct,athlon-ieu")
157(define_insn_reservation "athlon_call" 0
158			 (and (eq_attr "cpu" "athlon,k8,generic")
159			      (eq_attr "type" "call,callv"))
160			 "athlon-vector,athlon-ieu")
161(define_insn_reservation "athlon_call_amdfam10" 0
162			 (and (eq_attr "cpu" "amdfam10")
163			      (eq_attr "type" "call,callv"))
164			 "athlon-double,athlon-ieu")
165
166;; Latency of push operation is 3 cycles, but ESP value is available
167;; earlier
168(define_insn_reservation "athlon_push" 2
169			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
170			      (eq_attr "type" "push"))
171			 "athlon-direct,athlon-agu,athlon-store")
172(define_insn_reservation "athlon_pop" 4
173			 (and (eq_attr "cpu" "athlon,k8,generic")
174			      (eq_attr "type" "pop"))
175			 "athlon-vector,athlon-load,athlon-ieu")
176(define_insn_reservation "athlon_pop_k8" 3
177			 (and (eq_attr "cpu" "k8,generic")
178			      (eq_attr "type" "pop"))
179			 "athlon-double,(athlon-ieu+athlon-load)")
180(define_insn_reservation "athlon_pop_amdfam10" 3
181			 (and (eq_attr "cpu" "amdfam10")
182			      (eq_attr "type" "pop"))
183			 "athlon-direct,(athlon-ieu+athlon-load)")
184(define_insn_reservation "athlon_leave" 3
185			 (and (eq_attr "cpu" "athlon")
186			      (eq_attr "type" "leave"))
187			 "athlon-vector,(athlon-ieu+athlon-load)")
188(define_insn_reservation "athlon_leave_k8" 3
189			 (and (eq_attr "cpu" "k8,generic,amdfam10")
190			      (eq_attr "type" "leave"))
191			 "athlon-double,(athlon-ieu+athlon-load)")
192
193;; Lea executes in AGU unit with 2 cycles latency.
194(define_insn_reservation "athlon_lea" 2
195			 (and (eq_attr "cpu" "athlon,k8,generic")
196			      (eq_attr "type" "lea"))
197			 "athlon-direct,athlon-agu,nothing")
198;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
199(define_insn_reservation "athlon_lea_amdfam10" 1
200			 (and (eq_attr "cpu" "amdfam10")
201			      (eq_attr "type" "lea"))
202			 "athlon-direct,athlon-agu,nothing")
203
204;; Mul executes in special multiplier unit attached to IEU0
205(define_insn_reservation "athlon_imul" 5
206			 (and (eq_attr "cpu" "athlon")
207			      (and (eq_attr "type" "imul")
208				   (eq_attr "memory" "none,unknown")))
209			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
210;; ??? Widening multiply is vector or double.
211(define_insn_reservation "athlon_imul_k8_DI" 4
212			 (and (eq_attr "cpu" "k8,generic,amdfam10")
213			      (and (eq_attr "type" "imul")
214				   (and (eq_attr "mode" "DI")
215					(eq_attr "memory" "none,unknown"))))
216			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
217(define_insn_reservation "athlon_imul_k8" 3
218			 (and (eq_attr "cpu" "k8,generic,amdfam10")
219			      (and (eq_attr "type" "imul")
220				   (eq_attr "memory" "none,unknown")))
221			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
222(define_insn_reservation "athlon_imul_amdfam10_HI" 4
223			 (and (eq_attr "cpu" "amdfam10")
224			      (and (eq_attr "type" "imul")
225				   (and (eq_attr "mode" "HI")
226					(eq_attr "memory" "none,unknown"))))
227			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
228(define_insn_reservation "athlon_imul_mem" 8
229			 (and (eq_attr "cpu" "athlon")
230			      (and (eq_attr "type" "imul")
231				   (eq_attr "memory" "load,both")))
232			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
233(define_insn_reservation "athlon_imul_mem_k8_DI" 7
234			 (and (eq_attr "cpu" "k8,generic,amdfam10")
235			      (and (eq_attr "type" "imul")
236				   (and (eq_attr "mode" "DI")
237					(eq_attr "memory" "load,both"))))
238			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
239(define_insn_reservation "athlon_imul_mem_k8" 6
240			 (and (eq_attr "cpu" "k8,generic,amdfam10")
241			      (and (eq_attr "type" "imul")
242				   (eq_attr "memory" "load,both")))
243			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
244
245;; Idiv cannot execute in parallel with other instructions.  Dealing with it
246;; as with short latency vector instruction is good approximation avoiding
247;; scheduler from trying too hard to can hide it's latency by overlap with
248;; other instructions.
249;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
250;; of the other code
251;; Using the same heuristics for amdfam10 as K8 with idiv
252
253(define_insn_reservation "athlon_idiv" 6
254			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
255			      (and (eq_attr "type" "idiv")
256				   (eq_attr "memory" "none,unknown")))
257			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
258(define_insn_reservation "athlon_idiv_mem" 9
259			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
260			      (and (eq_attr "type" "idiv")
261				   (eq_attr "memory" "load,both")))
262			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
263;; The parallelism of string instructions is not documented.  Model it same way
264;; as idiv to create smaller automata.  This probably does not matter much.
265;; Using the same heuristics for amdfam10 as K8 with idiv
266(define_insn_reservation "athlon_str" 6
267			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
268			      (and (eq_attr "type" "str")
269				   (eq_attr "memory" "load,both,store")))
270			 "athlon-vector,athlon-load,athlon-ieu0*6")
271
272(define_insn_reservation "athlon_idirect" 1
273			 (and (eq_attr "cpu" "athlon,k8,generic")
274			      (and (eq_attr "athlon_decode" "direct")
275				   (and (eq_attr "unit" "integer,unknown")
276					(eq_attr "memory" "none,unknown"))))
277			 "athlon-direct,athlon-ieu")
278(define_insn_reservation "athlon_idirect_amdfam10" 1
279			 (and (eq_attr "cpu" "amdfam10")
280			      (and (eq_attr "amdfam10_decode" "direct")
281				   (and (eq_attr "unit" "integer,unknown")
282					(eq_attr "memory" "none,unknown"))))
283			 "athlon-direct,athlon-ieu")
284(define_insn_reservation "athlon_ivector" 2
285			 (and (eq_attr "cpu" "athlon,k8,generic")
286			      (and (eq_attr "athlon_decode" "vector")
287				   (and (eq_attr "unit" "integer,unknown")
288					(eq_attr "memory" "none,unknown"))))
289			 "athlon-vector,athlon-ieu,athlon-ieu")
290(define_insn_reservation "athlon_ivector_amdfam10" 2
291			 (and (eq_attr "cpu" "amdfam10")
292			      (and (eq_attr "amdfam10_decode" "vector")
293				   (and (eq_attr "unit" "integer,unknown")
294					(eq_attr "memory" "none,unknown"))))
295			 "athlon-vector,athlon-ieu,athlon-ieu")
296
297(define_insn_reservation "athlon_idirect_loadmov" 3
298			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
299			      (and (eq_attr "type" "imov")
300				   (eq_attr "memory" "load")))
301			 "athlon-direct,athlon-load")
302
303(define_insn_reservation "athlon_idirect_load" 4
304			 (and (eq_attr "cpu" "athlon,k8,generic")
305			      (and (eq_attr "athlon_decode" "direct")
306				   (and (eq_attr "unit" "integer,unknown")
307					(eq_attr "memory" "load"))))
308			 "athlon-direct,athlon-load,athlon-ieu")
309(define_insn_reservation "athlon_idirect_load_amdfam10" 4
310			 (and (eq_attr "cpu" "amdfam10")
311			      (and (eq_attr "amdfam10_decode" "direct")
312				   (and (eq_attr "unit" "integer,unknown")
313					(eq_attr "memory" "load"))))
314			 "athlon-direct,athlon-load,athlon-ieu")
315(define_insn_reservation "athlon_ivector_load" 6
316			 (and (eq_attr "cpu" "athlon,k8,generic")
317			      (and (eq_attr "athlon_decode" "vector")
318				   (and (eq_attr "unit" "integer,unknown")
319					(eq_attr "memory" "load"))))
320			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
321(define_insn_reservation "athlon_ivector_load_amdfam10" 6
322			 (and (eq_attr "cpu" "amdfam10")
323			      (and (eq_attr "amdfam10_decode" "vector")
324				   (and (eq_attr "unit" "integer,unknown")
325					(eq_attr "memory" "load"))))
326			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
327
328(define_insn_reservation "athlon_idirect_movstore" 1
329			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
330			      (and (eq_attr "type" "imov")
331				   (eq_attr "memory" "store")))
332			 "athlon-direct,athlon-agu,athlon-store")
333
334(define_insn_reservation "athlon_idirect_both" 4
335			 (and (eq_attr "cpu" "athlon,k8,generic")
336			      (and (eq_attr "athlon_decode" "direct")
337				   (and (eq_attr "unit" "integer,unknown")
338					(eq_attr "memory" "both"))))
339			 "athlon-direct,athlon-load,
340			  athlon-ieu,athlon-store,
341			  athlon-store")
342(define_insn_reservation "athlon_idirect_both_amdfam10" 4
343			 (and (eq_attr "cpu" "amdfam10")
344			      (and (eq_attr "amdfam10_decode" "direct")
345				   (and (eq_attr "unit" "integer,unknown")
346					(eq_attr "memory" "both"))))
347			 "athlon-direct,athlon-load,
348			  athlon-ieu,athlon-store,
349			  athlon-store")
350
351(define_insn_reservation "athlon_ivector_both" 6
352			 (and (eq_attr "cpu" "athlon,k8,generic")
353			      (and (eq_attr "athlon_decode" "vector")
354				   (and (eq_attr "unit" "integer,unknown")
355					(eq_attr "memory" "both"))))
356			 "athlon-vector,athlon-load,
357			  athlon-ieu,
358			  athlon-ieu,
359			  athlon-store")
360(define_insn_reservation "athlon_ivector_both_amdfam10" 6
361			 (and (eq_attr "cpu" "amdfam10")
362			      (and (eq_attr "amdfam10_decode" "vector")
363				   (and (eq_attr "unit" "integer,unknown")
364					(eq_attr "memory" "both"))))
365			 "athlon-vector,athlon-load,
366			  athlon-ieu,
367			  athlon-ieu,
368			  athlon-store")
369
370(define_insn_reservation "athlon_idirect_store" 1
371			 (and (eq_attr "cpu" "athlon,k8,generic")
372			      (and (eq_attr "athlon_decode" "direct")
373				   (and (eq_attr "unit" "integer,unknown")
374					(eq_attr "memory" "store"))))
375			 "athlon-direct,(athlon-ieu+athlon-agu),
376			  athlon-store")
377(define_insn_reservation "athlon_idirect_store_amdfam10" 1
378			 (and (eq_attr "cpu" "amdfam10")
379			      (and (eq_attr "amdfam10_decode" "direct")
380				   (and (eq_attr "unit" "integer,unknown")
381					(eq_attr "memory" "store"))))
382			 "athlon-direct,(athlon-ieu+athlon-agu),
383			  athlon-store")
384
385(define_insn_reservation "athlon_ivector_store" 2
386			 (and (eq_attr "cpu" "athlon,k8,generic")
387			      (and (eq_attr "athlon_decode" "vector")
388				   (and (eq_attr "unit" "integer,unknown")
389					(eq_attr "memory" "store"))))
390			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
391			  athlon-store")
392(define_insn_reservation "athlon_ivector_store_amdfam10" 2
393			 (and (eq_attr "cpu" "amdfam10")
394			      (and (eq_attr "amdfam10_decode" "vector")
395				   (and (eq_attr "unit" "integer,unknown")
396					(eq_attr "memory" "store"))))
397			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
398			  athlon-store")
399
400;; Athlon floatin point unit
401(define_insn_reservation "athlon_fldxf" 12
402			 (and (eq_attr "cpu" "athlon")
403			      (and (eq_attr "type" "fmov")
404				   (and (eq_attr "memory" "load")
405					(eq_attr "mode" "XF"))))
406			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
407(define_insn_reservation "athlon_fldxf_k8" 13
408			 (and (eq_attr "cpu" "k8,generic,amdfam10")
409			      (and (eq_attr "type" "fmov")
410				   (and (eq_attr "memory" "load")
411					(eq_attr "mode" "XF"))))
412			 "athlon-vector,athlon-fpload2k8,athlon-fvector*9")
413;; Assume superforwarding to take place so effective latency of fany op is 0.
414(define_insn_reservation "athlon_fld" 0
415			 (and (eq_attr "cpu" "athlon")
416			      (and (eq_attr "type" "fmov")
417				   (eq_attr "memory" "load")))
418			 "athlon-direct,athlon-fpload,athlon-fany")
419(define_insn_reservation "athlon_fld_k8" 2
420			 (and (eq_attr "cpu" "k8,generic,amdfam10")
421			      (and (eq_attr "type" "fmov")
422				   (eq_attr "memory" "load")))
423			 "athlon-direct,athlon-fploadk8,athlon-fstore")
424
425(define_insn_reservation "athlon_fstxf" 10
426			 (and (eq_attr "cpu" "athlon")
427			      (and (eq_attr "type" "fmov")
428				   (and (eq_attr "memory" "store,both")
429					(eq_attr "mode" "XF"))))
430			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
431(define_insn_reservation "athlon_fstxf_k8" 8
432			 (and (eq_attr "cpu" "k8,generic,amdfam10")
433			      (and (eq_attr "type" "fmov")
434				   (and (eq_attr "memory" "store,both")
435					(eq_attr "mode" "XF"))))
436			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*6))")
437(define_insn_reservation "athlon_fst" 4
438			 (and (eq_attr "cpu" "athlon")
439			      (and (eq_attr "type" "fmov")
440				   (eq_attr "memory" "store,both")))
441			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
442(define_insn_reservation "athlon_fst_k8" 2
443			 (and (eq_attr "cpu" "k8,generic,amdfam10")
444			      (and (eq_attr "type" "fmov")
445				   (eq_attr "memory" "store,both")))
446			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
447(define_insn_reservation "athlon_fist" 4
448			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
449			      (eq_attr "type" "fistp,fisttp"))
450			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
451(define_insn_reservation "athlon_fmov" 2
452			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
453			      (eq_attr "type" "fmov"))
454			 "athlon-direct,athlon-fpsched,athlon-faddmul")
455(define_insn_reservation "athlon_fadd_load" 4
456			 (and (eq_attr "cpu" "athlon")
457			      (and (eq_attr "type" "fop")
458				   (eq_attr "memory" "load")))
459			 "athlon-direct,athlon-fpload,athlon-fadd")
460(define_insn_reservation "athlon_fadd_load_k8" 6
461			 (and (eq_attr "cpu" "k8,generic,amdfam10")
462			      (and (eq_attr "type" "fop")
463				   (eq_attr "memory" "load")))
464			 "athlon-direct,athlon-fploadk8,athlon-fadd")
465(define_insn_reservation "athlon_fadd" 4
466			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
467			      (eq_attr "type" "fop"))
468			 "athlon-direct,athlon-fpsched,athlon-fadd")
469(define_insn_reservation "athlon_fmul_load" 4
470			 (and (eq_attr "cpu" "athlon")
471			      (and (eq_attr "type" "fmul")
472				   (eq_attr "memory" "load")))
473			 "athlon-direct,athlon-fpload,athlon-fmul")
474(define_insn_reservation "athlon_fmul_load_k8" 6
475			 (and (eq_attr "cpu" "k8,generic,amdfam10")
476			      (and (eq_attr "type" "fmul")
477				   (eq_attr "memory" "load")))
478			 "athlon-direct,athlon-fploadk8,athlon-fmul")
479(define_insn_reservation "athlon_fmul" 4
480			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
481			      (eq_attr "type" "fmul"))
482			 "athlon-direct,athlon-fpsched,athlon-fmul")
483(define_insn_reservation "athlon_fsgn" 2
484			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
485			      (eq_attr "type" "fsgn"))
486			 "athlon-direct,athlon-fpsched,athlon-fmul")
487(define_insn_reservation "athlon_fdiv_load" 24
488			 (and (eq_attr "cpu" "athlon")
489			      (and (eq_attr "type" "fdiv")
490				   (eq_attr "memory" "load")))
491			 "athlon-direct,athlon-fpload,athlon-fmul")
492(define_insn_reservation "athlon_fdiv_load_k8" 13
493			 (and (eq_attr "cpu" "k8,generic,amdfam10")
494			      (and (eq_attr "type" "fdiv")
495				   (eq_attr "memory" "load")))
496			 "athlon-direct,athlon-fploadk8,athlon-fmul")
497(define_insn_reservation "athlon_fdiv" 24
498			 (and (eq_attr "cpu" "athlon")
499			      (eq_attr "type" "fdiv"))
500			 "athlon-direct,athlon-fpsched,athlon-fmul")
501(define_insn_reservation "athlon_fdiv_k8" 11
502			 (and (eq_attr "cpu" "k8,generic,amdfam10")
503			      (eq_attr "type" "fdiv"))
504			 "athlon-direct,athlon-fpsched,athlon-fmul")
505(define_insn_reservation "athlon_fpspc_load" 103
506			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
507			      (and (eq_attr "type" "fpspc")
508				   (eq_attr "memory" "load")))
509			 "athlon-vector,athlon-fpload,athlon-fvector")
510(define_insn_reservation "athlon_fpspc" 100
511			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
512			      (eq_attr "type" "fpspc"))
513			 "athlon-vector,athlon-fpsched,athlon-fvector")
514(define_insn_reservation "athlon_fcmov_load" 7
515			 (and (eq_attr "cpu" "athlon")
516			      (and (eq_attr "type" "fcmov")
517				   (eq_attr "memory" "load")))
518			 "athlon-vector,athlon-fpload,athlon-fvector")
519(define_insn_reservation "athlon_fcmov" 7
520			 (and (eq_attr "cpu" "athlon")
521			      (eq_attr "type" "fcmov"))
522			 "athlon-vector,athlon-fpsched,athlon-fvector")
523(define_insn_reservation "athlon_fcmov_load_k8" 17
524			 (and (eq_attr "cpu" "k8,generic,amdfam10")
525			      (and (eq_attr "type" "fcmov")
526				   (eq_attr "memory" "load")))
527			 "athlon-vector,athlon-fploadk8,athlon-fvector")
528(define_insn_reservation "athlon_fcmov_k8" 15
529			 (and (eq_attr "cpu" "k8,generic,amdfam10")
530			      (eq_attr "type" "fcmov"))
531			 "athlon-vector,athlon-fpsched,athlon-fvector")
532;; fcomi is vector decoded by uses only one pipe.
533(define_insn_reservation "athlon_fcomi_load" 3
534			 (and (eq_attr "cpu" "athlon")
535			      (and (eq_attr "type" "fcmp")
536				   (and (eq_attr "athlon_decode" "vector")
537				        (eq_attr "memory" "load"))))
538			 "athlon-vector,athlon-fpload,athlon-fadd")
539(define_insn_reservation "athlon_fcomi_load_k8" 5
540			 (and (eq_attr "cpu" "k8,generic,amdfam10")
541			      (and (eq_attr "type" "fcmp")
542				   (and (eq_attr "athlon_decode" "vector")
543				        (eq_attr "memory" "load"))))
544			 "athlon-vector,athlon-fploadk8,athlon-fadd")
545(define_insn_reservation "athlon_fcomi" 3
546			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
547			      (and (eq_attr "athlon_decode" "vector")
548				   (eq_attr "type" "fcmp")))
549			 "athlon-vector,athlon-fpsched,athlon-fadd")
550(define_insn_reservation "athlon_fcom_load" 2
551			 (and (eq_attr "cpu" "athlon")
552			      (and (eq_attr "type" "fcmp")
553				   (eq_attr "memory" "load")))
554			 "athlon-direct,athlon-fpload,athlon-fadd")
555(define_insn_reservation "athlon_fcom_load_k8" 4
556			 (and (eq_attr "cpu" "k8,generic,amdfam10")
557			      (and (eq_attr "type" "fcmp")
558				   (eq_attr "memory" "load")))
559			 "athlon-direct,athlon-fploadk8,athlon-fadd")
560(define_insn_reservation "athlon_fcom" 2
561			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
562			      (eq_attr "type" "fcmp"))
563			 "athlon-direct,athlon-fpsched,athlon-fadd")
564;; Never seen by the scheduler because we still don't do post reg-stack
565;; scheduling.
566;(define_insn_reservation "athlon_fxch" 2
567;			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
568;			      (eq_attr "type" "fxch"))
569;			 "athlon-direct,athlon-fpsched,athlon-fany")
570
571;; Athlon handle MMX operations in the FPU unit with shorter latencies
572
573(define_insn_reservation "athlon_movlpd_load" 0
574			 (and (eq_attr "cpu" "athlon")
575			      (and (eq_attr "type" "ssemov")
576				   (match_operand:DF 1 "memory_operand")))
577			 "athlon-direct,athlon-fpload,athlon-fany")
578(define_insn_reservation "athlon_movlpd_load_k8" 2
579			 (and (eq_attr "cpu" "k8")
580			      (and (eq_attr "type" "ssemov")
581				   (match_operand:DF 1 "memory_operand")))
582			 "athlon-direct,athlon-fploadk8,athlon-fstore")
583(define_insn_reservation "athlon_movsd_load_generic" 2
584			 (and (eq_attr "cpu" "generic")
585			      (and (eq_attr "type" "ssemov")
586				   (match_operand:DF 1 "memory_operand")))
587			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fmul)")
588(define_insn_reservation "athlon_movaps_load_k8" 2
589			 (and (eq_attr "cpu" "k8,generic")
590			      (and (eq_attr "type" "ssemov")
591				   (and (eq_attr "mode" "V4SF,V2DF,TI")
592					(eq_attr "memory" "load"))))
593			 "athlon-double,athlon-fpload2k8,athlon-fstore,athlon-fstore")
594(define_insn_reservation "athlon_movaps_load" 0
595			 (and (eq_attr "cpu" "athlon")
596			      (and (eq_attr "type" "ssemov")
597				   (and (eq_attr "mode" "V4SF,V2DF,TI")
598					(eq_attr "memory" "load"))))
599			 "athlon-vector,athlon-fpload2,(athlon-fany+athlon-fany)")
600(define_insn_reservation "athlon_movss_load" 1
601			 (and (eq_attr "cpu" "athlon")
602			      (and (eq_attr "type" "ssemov")
603				   (and (eq_attr "mode" "SF,DI")
604					(eq_attr "memory" "load"))))
605			 "athlon-vector,athlon-fpload,(athlon-fany*2)")
606(define_insn_reservation "athlon_movss_load_k8" 1
607			 (and (eq_attr "cpu" "k8,generic")
608			      (and (eq_attr "type" "ssemov")
609				   (and (eq_attr "mode" "SF,DI")
610					(eq_attr "memory" "load"))))
611			 "athlon-double,athlon-fploadk8,(athlon-fstore+athlon-fany)")
612(define_insn_reservation "athlon_mmxsseld" 0
613			 (and (eq_attr "cpu" "athlon")
614			      (and (eq_attr "type" "mmxmov,ssemov")
615				   (eq_attr "memory" "load")))
616			 "athlon-direct,athlon-fpload,athlon-fany")
617(define_insn_reservation "athlon_mmxsseld_k8" 2
618			 (and (eq_attr "cpu" "k8,generic")
619			      (and (eq_attr "type" "mmxmov,ssemov")
620				   (eq_attr "memory" "load")))
621			 "athlon-direct,athlon-fploadk8,athlon-fstore")
622;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
623;; loads  generated are direct path, latency of 2 and do not use any FP
624;; executions units. No separate entries for movlpx/movhpx loads, which
625;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
626;; as they will not be generated.
627(define_insn_reservation "athlon_sseld_amdfam10" 2
628			 (and (eq_attr "cpu" "amdfam10")
629			      (and (eq_attr "type" "ssemov")
630				   (eq_attr "memory" "load")))
631			 "athlon-direct,athlon-fploadk8")
632;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
633;; and can use any  FP executions units
634(define_insn_reservation "athlon_mmxld_amdfam10" 4
635			 (and (eq_attr "cpu" "amdfam10")
636			      (and (eq_attr "type" "mmxmov")
637				   (eq_attr "memory" "load")))
638			 "athlon-direct,athlon-fploadk8, athlon-fany")
639(define_insn_reservation "athlon_mmxssest" 3
640			 (and (eq_attr "cpu" "k8,generic")
641			      (and (eq_attr "type" "mmxmov,ssemov")
642				   (and (eq_attr "mode" "V4SF,V2DF,TI")
643					(eq_attr "memory" "store,both"))))
644			 "athlon-vector,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
645(define_insn_reservation "athlon_mmxssest_k8" 3
646			 (and (eq_attr "cpu" "k8,generic")
647			      (and (eq_attr "type" "mmxmov,ssemov")
648				   (and (eq_attr "mode" "V4SF,V2DF,TI")
649					(eq_attr "memory" "store,both"))))
650			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store2)*2)")
651(define_insn_reservation "athlon_mmxssest_short" 2
652			 (and (eq_attr "cpu" "athlon,k8,generic")
653			      (and (eq_attr "type" "mmxmov,ssemov")
654				   (eq_attr "memory" "store,both")))
655			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
656;; On AMDFAM10 all double, single and integer packed SSEx data stores
657;; generated are all double path, latency of 2 and use the FSTORE FP
658;; execution unit. No entries separate for movupx/movdqu, which are
659;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
660;; as they will not be generated.
661(define_insn_reservation "athlon_ssest_amdfam10" 2
662			 (and (eq_attr "cpu" "amdfam10")
663			      (and (eq_attr "type" "ssemov")
664				   (and (eq_attr "mode" "V4SF,V2DF,TI")
665					(eq_attr "memory" "store,both"))))
666			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
667;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
668;; data stores generated are all direct path, latency of 2 and use
669;; the FSTORE FP execution unit
670(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
671			 (and (eq_attr "cpu" "amdfam10")
672			      (and (eq_attr "type" "mmxmov,ssemov")
673				   (eq_attr "memory" "store,both")))
674			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
675(define_insn_reservation "athlon_movaps_k8" 2
676			 (and (eq_attr "cpu" "k8,generic")
677			      (and (eq_attr "type" "ssemov")
678				   (eq_attr "mode" "V4SF,V2DF,TI")))
679			 "athlon-double,athlon-fpsched,((athlon-faddmul+athlon-faddmul) | (athlon-faddmul, athlon-faddmul))")
680(define_insn_reservation "athlon_movaps" 2
681			 (and (eq_attr "cpu" "athlon")
682			      (and (eq_attr "type" "ssemov")
683				   (eq_attr "mode" "V4SF,V2DF,TI")))
684			 "athlon-vector,athlon-fpsched,(athlon-faddmul+athlon-faddmul)")
685(define_insn_reservation "athlon_mmxssemov" 2
686			 (and (eq_attr "cpu" "athlon,k8,generic")
687			      (eq_attr "type" "mmxmov,ssemov"))
688			 "athlon-direct,athlon-fpsched,athlon-faddmul")
689(define_insn_reservation "athlon_mmxmul_load" 4
690			 (and (eq_attr "cpu" "athlon,k8,generic")
691			      (and (eq_attr "type" "mmxmul")
692				   (eq_attr "memory" "load")))
693			 "athlon-direct,athlon-fpload,athlon-fmul")
694(define_insn_reservation "athlon_mmxmul" 3
695			 (and (eq_attr "cpu" "athlon,k8,generic")
696			      (eq_attr "type" "mmxmul"))
697			 "athlon-direct,athlon-fpsched,athlon-fmul")
698(define_insn_reservation "athlon_mmx_load" 3
699			 (and (eq_attr "cpu" "athlon,k8,generic")
700			      (and (eq_attr "unit" "mmx")
701				   (eq_attr "memory" "load")))
702			 "athlon-direct,athlon-fpload,athlon-faddmul")
703(define_insn_reservation "athlon_mmx" 2
704			 (and (eq_attr "cpu" "athlon,k8,generic")
705			      (eq_attr "unit" "mmx"))
706			 "athlon-direct,athlon-fpsched,athlon-faddmul")
707;; SSE operations are handled by the i387 unit as well.  The latency
708;; is same as for i387 operations for scalar operations
709
710(define_insn_reservation "athlon_sselog_load" 3
711			 (and (eq_attr "cpu" "athlon")
712			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
713				   (eq_attr "memory" "load")))
714			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
715(define_insn_reservation "athlon_sselog_load_k8" 5
716			 (and (eq_attr "cpu" "k8,generic")
717			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
718				   (eq_attr "memory" "load")))
719			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
720(define_insn_reservation "athlon_sselog_load_amdfam10" 4
721			 (and (eq_attr "cpu" "amdfam10")
722			      (and (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")
723				   (eq_attr "memory" "load")))
724			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
725(define_insn_reservation "athlon_sselog" 3
726			 (and (eq_attr "cpu" "athlon")
727			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
728			 "athlon-vector,athlon-fpsched,athlon-fmul*2")
729(define_insn_reservation "athlon_sselog_k8" 3
730			 (and (eq_attr "cpu" "k8,generic")
731			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
732			 "athlon-double,athlon-fpsched,athlon-fmul")
733(define_insn_reservation "athlon_sselog_amdfam10" 2
734			 (and (eq_attr "cpu" "amdfam10")
735			      (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))
736			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
737
738;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
739(define_insn_reservation "athlon_ssecmp_load" 2
740			 (and (eq_attr "cpu" "athlon")
741			      (and (eq_attr "type" "ssecmp")
742				   (and (eq_attr "mode" "SF,DF,DI")
743					(eq_attr "memory" "load"))))
744			 "athlon-direct,athlon-fpload,athlon-fadd")
745(define_insn_reservation "athlon_ssecmp_load_k8" 4
746			 (and (eq_attr "cpu" "k8,generic,amdfam10")
747			      (and (eq_attr "type" "ssecmp")
748				   (and (eq_attr "mode" "SF,DF,DI,TI")
749					(eq_attr "memory" "load"))))
750			 "athlon-direct,athlon-fploadk8,athlon-fadd")
751(define_insn_reservation "athlon_ssecmp" 2
752			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
753			      (and (eq_attr "type" "ssecmp")
754				   (eq_attr "mode" "SF,DF,DI,TI")))
755			 "athlon-direct,athlon-fpsched,athlon-fadd")
756(define_insn_reservation "athlon_ssecmpvector_load" 3
757			 (and (eq_attr "cpu" "athlon")
758			      (and (eq_attr "type" "ssecmp")
759				   (eq_attr "memory" "load")))
760			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
761(define_insn_reservation "athlon_ssecmpvector_load_k8" 5
762			 (and (eq_attr "cpu" "k8,generic")
763			      (and (eq_attr "type" "ssecmp")
764				   (eq_attr "memory" "load")))
765			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
766(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
767			 (and (eq_attr "cpu" "amdfam10")
768			      (and (eq_attr "type" "ssecmp")
769				   (eq_attr "memory" "load")))
770			 "athlon-direct,athlon-fploadk8,athlon-fadd")
771(define_insn_reservation "athlon_ssecmpvector" 3
772			 (and (eq_attr "cpu" "athlon")
773			      (eq_attr "type" "ssecmp"))
774			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
775(define_insn_reservation "athlon_ssecmpvector_k8" 3
776			 (and (eq_attr "cpu" "k8,generic")
777			      (eq_attr "type" "ssecmp"))
778			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
779(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
780			 (and (eq_attr "cpu" "amdfam10")
781			      (eq_attr "type" "ssecmp"))
782			 "athlon-direct,athlon-fpsched,athlon-fadd")
783(define_insn_reservation "athlon_ssecomi_load" 4
784			 (and (eq_attr "cpu" "athlon")
785			      (and (eq_attr "type" "ssecomi")
786				   (eq_attr "memory" "load")))
787			 "athlon-vector,athlon-fpload,athlon-fadd")
788(define_insn_reservation "athlon_ssecomi_load_k8" 6
789			 (and (eq_attr "cpu" "k8,generic")
790			      (and (eq_attr "type" "ssecomi")
791				   (eq_attr "memory" "load")))
792			 "athlon-vector,athlon-fploadk8,athlon-fadd")
793(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
794			 (and (eq_attr "cpu" "amdfam10")
795			      (and (eq_attr "type" "ssecomi")
796				   (eq_attr "memory" "load")))
797			 "athlon-direct,athlon-fploadk8,athlon-fadd")
798(define_insn_reservation "athlon_ssecomi" 4
799			 (and (eq_attr "cpu" "athlon,k8,generic")
800			      (eq_attr "type" "ssecomi"))
801			 "athlon-vector,athlon-fpsched,athlon-fadd")
802(define_insn_reservation "athlon_ssecomi_amdfam10" 3
803			 (and (eq_attr "cpu" "amdfam10")
804;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
805			      (eq_attr "type" "ssecomi"))
806			 "athlon-direct,athlon-fpsched,athlon-fadd")
807(define_insn_reservation "athlon_sseadd_load" 4
808			 (and (eq_attr "cpu" "athlon")
809			      (and (eq_attr "type" "sseadd,sseadd1")
810				   (and (eq_attr "mode" "SF,DF,DI")
811					(eq_attr "memory" "load"))))
812			 "athlon-direct,athlon-fpload,athlon-fadd")
813(define_insn_reservation "athlon_sseadd_load_k8" 6
814			 (and (eq_attr "cpu" "k8,generic,amdfam10")
815			      (and (eq_attr "type" "sseadd,sseadd1")
816				   (and (eq_attr "mode" "SF,DF,DI")
817					(eq_attr "memory" "load"))))
818			 "athlon-direct,athlon-fploadk8,athlon-fadd")
819(define_insn_reservation "athlon_sseadd" 4
820			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
821			      (and (eq_attr "type" "sseadd,sseadd1")
822				   (eq_attr "mode" "SF,DF,DI")))
823			 "athlon-direct,athlon-fpsched,athlon-fadd")
824(define_insn_reservation "athlon_sseaddvector_load" 5
825			 (and (eq_attr "cpu" "athlon")
826			      (and (eq_attr "type" "sseadd,sseadd1")
827				   (eq_attr "memory" "load")))
828			 "athlon-vector,athlon-fpload2,(athlon-fadd*2)")
829(define_insn_reservation "athlon_sseaddvector_load_k8" 7
830			 (and (eq_attr "cpu" "k8,generic")
831			      (and (eq_attr "type" "sseadd,sseadd1")
832				   (eq_attr "memory" "load")))
833			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
834(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
835			 (and (eq_attr "cpu" "amdfam10")
836			      (and (eq_attr "type" "sseadd,sseadd1")
837				   (eq_attr "memory" "load")))
838			 "athlon-direct,athlon-fploadk8,athlon-fadd")
839(define_insn_reservation "athlon_sseaddvector" 5
840			 (and (eq_attr "cpu" "athlon")
841			      (eq_attr "type" "sseadd,sseadd1"))
842			 "athlon-vector,athlon-fpsched,(athlon-fadd*2)")
843(define_insn_reservation "athlon_sseaddvector_k8" 5
844			 (and (eq_attr "cpu" "k8,generic")
845			      (eq_attr "type" "sseadd,sseadd1"))
846			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
847(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
848			 (and (eq_attr "cpu" "amdfam10")
849			      (eq_attr "type" "sseadd,sseadd1"))
850			 "athlon-direct,athlon-fpsched,athlon-fadd")
851
852;; Conversions behaves very irregularly and the scheduling is critical here.
853;; Take each instruction separately.  Assume that the mode is always set to the
854;; destination one and athlon_decode is set to the K8 versions.
855
856;; cvtss2sd
857(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_k8" 4
858			 (and (eq_attr "cpu" "k8,athlon,generic")
859			      (and (eq_attr "type" "ssecvt")
860				   (and (eq_attr "athlon_decode" "direct")
861					(and (eq_attr "mode" "DF")
862					     (eq_attr "memory" "load")))))
863			 "athlon-direct,athlon-fploadk8,athlon-fstore")
864(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
865			 (and (eq_attr "cpu" "amdfam10")
866			      (and (eq_attr "type" "ssecvt")
867				   (and (eq_attr "amdfam10_decode" "double")
868					(and (eq_attr "mode" "DF")
869					     (eq_attr "memory" "load")))))
870			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
871(define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
872			 (and (eq_attr "cpu" "athlon,k8,generic")
873			      (and (eq_attr "type" "ssecvt")
874				   (and (eq_attr "athlon_decode" "direct")
875					(eq_attr "mode" "DF"))))
876			 "athlon-direct,athlon-fpsched,athlon-fstore")
877(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
878			 (and (eq_attr "cpu" "amdfam10")
879			      (and (eq_attr "type" "ssecvt")
880				   (and (eq_attr "amdfam10_decode" "vector")
881					(eq_attr "mode" "DF"))))
882			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
883;; cvtps2pd.  Model same way the other double decoded FP conversions.
884(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
885			 (and (eq_attr "cpu" "k8,athlon,generic")
886			      (and (eq_attr "type" "ssecvt")
887				   (and (eq_attr "athlon_decode" "double")
888					(and (eq_attr "mode" "V2DF,V4SF,TI")
889					     (eq_attr "memory" "load")))))
890			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
891(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
892			 (and (eq_attr "cpu" "amdfam10")
893			      (and (eq_attr "type" "ssecvt")
894				   (and (eq_attr "amdfam10_decode" "direct")
895					(and (eq_attr "mode" "V2DF,V4SF,TI")
896					     (eq_attr "memory" "load")))))
897			 "athlon-direct,athlon-fploadk8,athlon-fstore")
898(define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
899			 (and (eq_attr "cpu" "k8,athlon,generic")
900			      (and (eq_attr "type" "ssecvt")
901				   (and (eq_attr "athlon_decode" "double")
902					(eq_attr "mode" "V2DF,V4SF,TI"))))
903			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
904(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
905			 (and (eq_attr "cpu" "amdfam10")
906			      (and (eq_attr "type" "ssecvt")
907				   (and (eq_attr "amdfam10_decode" "direct")
908					(eq_attr "mode" "V2DF,V4SF,TI"))))
909			 "athlon-direct,athlon-fpsched,athlon-fstore")
910;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
911;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
912(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
913			 (and (eq_attr "cpu" "athlon,k8")
914			      (and (eq_attr "type" "sseicvt")
915				   (and (eq_attr "athlon_decode" "direct")
916					(and (eq_attr "mode" "SF,DF")
917					     (eq_attr "memory" "load")))))
918			 "athlon-direct,athlon-fploadk8,athlon-fstore")
919(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
920			 (and (eq_attr "cpu" "amdfam10")
921			      (and (eq_attr "type" "sseicvt")
922				   (and (eq_attr "amdfam10_decode" "double")
923					(and (eq_attr "mode" "SF,DF")
924					     (eq_attr "memory" "load")))))
925			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
926;; cvtsi2ss mem, reg is doublepath
927(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
928			 (and (eq_attr "cpu" "athlon")
929			      (and (eq_attr "type" "sseicvt")
930				   (and (eq_attr "athlon_decode" "double")
931					(and (eq_attr "mode" "SF,DF")
932					     (eq_attr "memory" "load")))))
933			 "athlon-vector,athlon-fpload,(athlon-fstore*2)")
934(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_k8" 9
935			 (and (eq_attr "cpu" "k8,generic")
936			      (and (eq_attr "type" "sseicvt")
937				   (and (eq_attr "athlon_decode" "double")
938					(and (eq_attr "mode" "SF,DF")
939					     (eq_attr "memory" "load")))))
940			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
941(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
942			 (and (eq_attr "cpu" "amdfam10")
943			      (and (eq_attr "type" "sseicvt")
944				   (and (eq_attr "amdfam10_decode" "double")
945					(and (eq_attr "mode" "SF,DF")
946					     (eq_attr "memory" "load")))))
947			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
948;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
949(define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
950			 (and (eq_attr "cpu" "k8,athlon,generic")
951			      (and (eq_attr "type" "sseicvt")
952				   (and (eq_attr "athlon_decode" "double")
953					(and (eq_attr "mode" "SF,DF")
954					     (eq_attr "memory" "none")))))
955			 "athlon-double,athlon-fploadk8,athlon-fstore")
956(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
957			 (and (eq_attr "cpu" "amdfam10")
958			      (and (eq_attr "type" "sseicvt")
959				   (and (eq_attr "amdfam10_decode" "vector")
960					(and (eq_attr "mode" "SF,DF")
961					     (eq_attr "memory" "none")))))
962			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
963;; cvtsi2ss reg, reg is doublepath
964(define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
965			 (and (eq_attr "cpu" "athlon,k8,generic")
966			      (and (eq_attr "type" "sseicvt")
967				   (and (eq_attr "athlon_decode" "vector")
968					(and (eq_attr "mode" "SF,DF")
969					     (eq_attr "memory" "none")))))
970			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
971(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
972			 (and (eq_attr "cpu" "amdfam10")
973			      (and (eq_attr "type" "sseicvt")
974				   (and (eq_attr "amdfam10_decode" "vector")
975					(and (eq_attr "mode" "SF,DF")
976					     (eq_attr "memory" "none")))))
977			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
978;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
979(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
980			 (and (eq_attr "cpu" "k8,athlon,generic")
981			      (and (eq_attr "type" "ssecvt")
982				   (and (eq_attr "athlon_decode" "double")
983					(and (eq_attr "mode" "SF")
984					     (eq_attr "memory" "load")))))
985			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
986(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
987			 (and (eq_attr "cpu" "amdfam10")
988			      (and (eq_attr "type" "ssecvt")
989				   (and (eq_attr "amdfam10_decode" "double")
990					(and (eq_attr "mode" "SF")
991					     (eq_attr "memory" "load")))))
992			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
993;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
994(define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
995			 (and (eq_attr "cpu" "athlon,k8,generic")
996			      (and (eq_attr "type" "ssecvt")
997				   (and (eq_attr "athlon_decode" "vector")
998					(and (eq_attr "mode" "SF")
999					     (eq_attr "memory" "none")))))
1000			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
1001(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
1002			 (and (eq_attr "cpu" "amdfam10")
1003			      (and (eq_attr "type" "ssecvt")
1004				   (and (eq_attr "amdfam10_decode" "vector")
1005					(and (eq_attr "mode" "SF")
1006					     (eq_attr "memory" "none")))))
1007			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
1008(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
1009			 (and (eq_attr "cpu" "athlon,k8,generic")
1010			      (and (eq_attr "type" "ssecvt")
1011				   (and (eq_attr "athlon_decode" "vector")
1012					(and (eq_attr "mode" "V4SF,V2DF,TI")
1013					     (eq_attr "memory" "load")))))
1014			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
1015(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
1016			 (and (eq_attr "cpu" "amdfam10")
1017			      (and (eq_attr "type" "ssecvt")
1018				   (and (eq_attr "amdfam10_decode" "double")
1019					(and (eq_attr "mode" "V4SF,V2DF,TI")
1020					     (eq_attr "memory" "load")))))
1021			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1022;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
1023;; ??? Why it is fater than cvtsd2ss?
1024(define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
1025			 (and (eq_attr "cpu" "athlon,k8,generic")
1026			      (and (eq_attr "type" "ssecvt")
1027				   (and (eq_attr "athlon_decode" "vector")
1028					(and (eq_attr "mode" "V4SF,V2DF,TI")
1029					     (eq_attr "memory" "none")))))
1030			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
1031(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
1032			 (and (eq_attr "cpu" "amdfam10")
1033			      (and (eq_attr "type" "ssecvt")
1034				   (and (eq_attr "amdfam10_decode" "double")
1035					(and (eq_attr "mode" "V4SF,V2DF,TI")
1036					     (eq_attr "memory" "none")))))
1037			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1038;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
1039(define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
1040			 (and (eq_attr "cpu" "athlon,k8,generic")
1041			      (and (eq_attr "type" "sseicvt")
1042				   (and (eq_attr "athlon_decode" "vector")
1043					(and (eq_attr "mode" "SI,DI")
1044					     (eq_attr "memory" "load")))))
1045			 "athlon-vector,athlon-fploadk8,athlon-fvector")
1046(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
1047			 (and (eq_attr "cpu" "amdfam10")
1048			      (and (eq_attr "type" "sseicvt")
1049				   (and (eq_attr "amdfam10_decode" "double")
1050					(and (eq_attr "mode" "SI,DI")
1051					     (eq_attr "memory" "load")))))
1052			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
1053;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
1054(define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
1055			 (and (eq_attr "cpu" "athlon")
1056			      (and (eq_attr "type" "sseicvt")
1057				   (and (eq_attr "athlon_decode" "double")
1058					(and (eq_attr "mode" "SI,DI")
1059					     (eq_attr "memory" "none")))))
1060			 "athlon-vector,athlon-fpsched,athlon-fvector")
1061(define_insn_reservation "athlon_ssecvt_cvtsX2si_k8" 9
1062			 (and (eq_attr "cpu" "k8,generic")
1063			      (and (eq_attr "type" "sseicvt")
1064				   (and (eq_attr "athlon_decode" "double")
1065					(and (eq_attr "mode" "SI,DI")
1066					     (eq_attr "memory" "none")))))
1067			 "athlon-double,athlon-fpsched,athlon-fstore")
1068(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
1069			 (and (eq_attr "cpu" "amdfam10")
1070			      (and (eq_attr "type" "sseicvt")
1071				   (and (eq_attr "amdfam10_decode" "double")
1072					(and (eq_attr "mode" "SI,DI")
1073					     (eq_attr "memory" "none")))))
1074			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
1075;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
1076(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
1077			 (and (eq_attr "cpu" "amdfam10")
1078			      (and (eq_attr "type" "sseicvt")
1079				   (and (eq_attr "amdfam10_decode" "double")
1080					(and (eq_attr "mode" "TI")
1081					     (eq_attr "memory" "load")))))
1082			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
1083;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
1084(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
1085			 (and (eq_attr "cpu" "amdfam10")
1086			      (and (eq_attr "type" "sseicvt")
1087				   (and (eq_attr "amdfam10_decode" "double")
1088					(and (eq_attr "mode" "TI")
1089					     (eq_attr "memory" "none")))))
1090			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
1091
1092
1093(define_insn_reservation "athlon_ssemul_load" 4
1094			 (and (eq_attr "cpu" "athlon")
1095			      (and (eq_attr "type" "ssemul")
1096				   (and (eq_attr "mode" "SF,DF")
1097					(eq_attr "memory" "load"))))
1098			 "athlon-direct,athlon-fpload,athlon-fmul")
1099(define_insn_reservation "athlon_ssemul_load_k8" 6
1100			 (and (eq_attr "cpu" "k8,generic,amdfam10")
1101			      (and (eq_attr "type" "ssemul")
1102				   (and (eq_attr "mode" "SF,DF")
1103					(eq_attr "memory" "load"))))
1104			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1105(define_insn_reservation "athlon_ssemul" 4
1106			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
1107			      (and (eq_attr "type" "ssemul")
1108				   (eq_attr "mode" "SF,DF")))
1109			 "athlon-direct,athlon-fpsched,athlon-fmul")
1110(define_insn_reservation "athlon_ssemulvector_load" 5
1111			 (and (eq_attr "cpu" "athlon")
1112			      (and (eq_attr "type" "ssemul")
1113				   (eq_attr "memory" "load")))
1114			 "athlon-vector,athlon-fpload2,(athlon-fmul*2)")
1115(define_insn_reservation "athlon_ssemulvector_load_k8" 7
1116			 (and (eq_attr "cpu" "k8,generic")
1117			      (and (eq_attr "type" "ssemul")
1118				   (eq_attr "memory" "load")))
1119			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
1120(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
1121			 (and (eq_attr "cpu" "amdfam10")
1122			      (and (eq_attr "type" "ssemul")
1123				   (eq_attr "memory" "load")))
1124			 "athlon-direct,athlon-fploadk8,athlon-fmul")
1125(define_insn_reservation "athlon_ssemulvector" 5
1126			 (and (eq_attr "cpu" "athlon")
1127			      (eq_attr "type" "ssemul"))
1128			 "athlon-vector,athlon-fpsched,(athlon-fmul*2)")
1129(define_insn_reservation "athlon_ssemulvector_k8" 5
1130			 (and (eq_attr "cpu" "k8,generic")
1131			      (eq_attr "type" "ssemul"))
1132			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
1133(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
1134			 (and (eq_attr "cpu" "amdfam10")
1135			      (eq_attr "type" "ssemul"))
1136			 "athlon-direct,athlon-fpsched,athlon-fmul")
1137;; divsd timings.  divss is faster
1138(define_insn_reservation "athlon_ssediv_load" 20
1139			 (and (eq_attr "cpu" "athlon")
1140			      (and (eq_attr "type" "ssediv")
1141				   (and (eq_attr "mode" "SF,DF")
1142					(eq_attr "memory" "load"))))
1143			 "athlon-direct,athlon-fpload,athlon-fmul*17")
1144(define_insn_reservation "athlon_ssediv_load_k8" 22
1145			 (and (eq_attr "cpu" "k8,generic,amdfam10")
1146			      (and (eq_attr "type" "ssediv")
1147				   (and (eq_attr "mode" "SF,DF")
1148					(eq_attr "memory" "load"))))
1149			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
1150(define_insn_reservation "athlon_ssediv" 20
1151			 (and (eq_attr "cpu" "athlon,k8,generic,amdfam10")
1152			      (and (eq_attr "type" "ssediv")
1153				   (eq_attr "mode" "SF,DF")))
1154			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
1155(define_insn_reservation "athlon_ssedivvector_load" 39
1156			 (and (eq_attr "cpu" "athlon")
1157			      (and (eq_attr "type" "ssediv")
1158				   (eq_attr "memory" "load")))
1159			 "athlon-vector,athlon-fpload2,athlon-fmul*34")
1160(define_insn_reservation "athlon_ssedivvector_load_k8" 35
1161			 (and (eq_attr "cpu" "k8,generic")
1162			      (and (eq_attr "type" "ssediv")
1163				   (eq_attr "memory" "load")))
1164			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
1165(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
1166			 (and (eq_attr "cpu" "amdfam10")
1167			      (and (eq_attr "type" "ssediv")
1168				   (eq_attr "memory" "load")))
1169			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
1170(define_insn_reservation "athlon_ssedivvector" 39
1171			 (and (eq_attr "cpu" "athlon")
1172			      (eq_attr "type" "ssediv"))
1173			 "athlon-vector,athlon-fmul*34")
1174(define_insn_reservation "athlon_ssedivvector_k8" 39
1175			 (and (eq_attr "cpu" "k8,generic")
1176			      (eq_attr "type" "ssediv"))
1177			 "athlon-double,athlon-fmul*34")
1178(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
1179			 (and (eq_attr "cpu" "amdfam10")
1180			      (eq_attr "type" "ssediv"))
1181			 "athlon-direct,athlon-fmul*17")
1182(define_insn_reservation "athlon_sseins_amdfam10" 5
1183                         (and (eq_attr "cpu" "amdfam10")
1184                              (and (eq_attr "type" "sseins")
1185                                   (eq_attr "mode" "TI")))
1186                         "athlon-vector,athlon-fpsched,athlon-faddmul")
1187