athlon.md revision 117395
11573Srgrimes;; AMD Athlon Scheduling
21573Srgrimes;; Copyright (C) 2002 Free Software Foundation, Inc.
31573Srgrimes;;
41573Srgrimes;; This file is part of GNU CC.
51573Srgrimes;;
61573Srgrimes;; GNU CC is free software; you can redistribute it and/or modify
71573Srgrimes;; it under the terms of the GNU General Public License as published by
81573Srgrimes;; the Free Software Foundation; either version 2, or (at your option)
91573Srgrimes;; any later version.
101573Srgrimes;;
111573Srgrimes;; GNU CC is distributed in the hope that it will be useful,
121573Srgrimes;; but WITHOUT ANY WARRANTY; without even the implied warranty of
131573Srgrimes;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
141573Srgrimes;; GNU General Public License for more details.
151573Srgrimes;;
161573Srgrimes;; You should have received a copy of the GNU General Public License
171573Srgrimes;; along with GNU CC; see the file COPYING.  If not, write to
181573Srgrimes;; the Free Software Foundation, 59 Temple Place - Suite 330,
191573Srgrimes;; Boston, MA 02111-1307, USA.  */
201573Srgrimes(define_attr "athlon_decode" "direct,vector"
211573Srgrimes  (cond [(eq_attr "type" "call,imul,idiv,other,multi,fcmov,fpspc,str,pop,cld,fcmov")
221573Srgrimes	   (const_string "vector")
231573Srgrimes         (and (eq_attr "type" "push")
241573Srgrimes              (match_operand 1 "memory_operand" ""))
251573Srgrimes	   (const_string "vector")
261573Srgrimes         (and (eq_attr "type" "fmov")
271573Srgrimes	      (and (eq_attr "memory" "load,store")
281573Srgrimes		   (eq_attr "mode" "XF")))
2950476Speter	   (const_string "vector")]
301573Srgrimes	(const_string "direct")))
31321249Sngie
321573Srgrimes;; The Athlon does contain three pipelined FP units, three integer units and
3379531Sru;; three address generation units. 
341573Srgrimes;;
351573Srgrimes;; The predecode logic is determining boundaries of instructions in the 64
361573Srgrimes;; byte cache line. So the cache line straddling problem of K6 might be issue
3759460Sphantom;; here as well, but it is not noted in the documentation.
3859460Sphantom;;
391573Srgrimes;; Three DirectPath instructions decoders and only one VectorPath decoder
4084306Sru;; is available. They can decode three DirectPath instructions or one VectorPath
411573Srgrimes;; instruction per cycle.
421573Srgrimes;; Decoded macro instructions are then passed to 72 entry instruction control
431573Srgrimes;; unit, that passes
44108030Sru;; it to the specialized integer (18 entry) and fp (36 entry) schedulers.
45108030Sru;;
46108030Sru;; The load/store queue unit is not attached to the schedulers but
471573Srgrimes;; communicates with all the execution units separately instead.
481573Srgrimes
491573Srgrimes(define_function_unit "athlon_vectordec" 1 0
501573Srgrimes  (and (eq_attr "cpu" "athlon")
5114855Smpp       (eq_attr "athlon_decode" "vector"))
521573Srgrimes  1 1)
531573Srgrimes
541573Srgrimes(define_function_unit "athlon_directdec" 3 0
5521907Swosch  (and (eq_attr "cpu" "athlon")
5621907Swosch       (eq_attr "athlon_decode" "direct"))
571573Srgrimes  1 1)
581573Srgrimes
59108317Sschweikh(define_function_unit "athlon_vectordec" 1 0
60108030Sru  (and (eq_attr "cpu" "athlon")
611573Srgrimes       (eq_attr "athlon_decode" "direct"))
62  1 1 [(eq_attr "athlon_decode" "vector")])
63
64(define_function_unit "athlon_ieu" 3 0
65  (and (eq_attr "cpu" "athlon")
66       (eq_attr "type" "alu1,negnot,alu,icmp,test,imov,imovx,lea,incdec,ishift,ishift1,rotate,rotate1,ibr,call,callv,icmov,cld,pop,setcc,push,pop"))
67  1 1)
68
69(define_function_unit "athlon_ieu" 3 0
70  (and (eq_attr "cpu" "athlon")
71       (eq_attr "type" "str"))
72  15 15)
73
74(define_function_unit "athlon_ieu" 3 0
75  (and (eq_attr "cpu" "athlon")
76       (eq_attr "type" "imul"))
77  5 0)
78
79(define_function_unit "athlon_ieu" 3 0
80  (and (eq_attr "cpu" "athlon")
81       (eq_attr "type" "idiv"))
82  42 0)
83
84(define_function_unit "athlon_muldiv" 1 0
85  (and (eq_attr "cpu" "athlon")
86       (eq_attr "type" "imul"))
87  5 0)
88
89(define_function_unit "athlon_muldiv" 1 0
90  (and (eq_attr "cpu" "athlon")
91       (eq_attr "type" "idiv"))
92  42 42)
93
94(define_attr "athlon_fpunits" "none,store,mul,add,muladd,any"
95  (cond [(eq_attr "type" "fop,fcmp,fistp")
96	   (const_string "add")
97         (eq_attr "type" "fmul,fdiv,fpspc,fsgn,fcmov")
98	   (const_string "mul")
99	 (and (eq_attr "type" "fmov") (eq_attr "memory" "store,both"))
100	   (const_string "store")
101	 (and (eq_attr "type" "fmov") (eq_attr "memory" "load"))
102	   (const_string "any")
103         (and (eq_attr "type" "fmov")
104              (ior (match_operand:SI 1 "register_operand" "")
105                   (match_operand 1 "immediate_operand" "")))
106	   (const_string "store")
107         (eq_attr "type" "fmov")
108	   (const_string "muladd")]
109	(const_string "none")))
110
111;; We use latencies 1 for definitions.  This is OK to model colisions
112;; in execution units.  The real latencies are modeled in the "fp" pipeline.
113
114;; fsin, fcos: 96-192
115;; fsincos: 107-211
116;; fsqrt: 19 for SFmode, 27 for DFmode, 35 for XFmode.
117(define_function_unit "athlon_fp" 3 0
118  (and (eq_attr "cpu" "athlon")
119       (eq_attr "type" "fpspc"))
120  100 1)
121
122;; 16 cycles for SFmode, 20 for DFmode and 24 for XFmode.
123(define_function_unit "athlon_fp" 3 0
124  (and (eq_attr "cpu" "athlon")
125       (eq_attr "type" "fdiv"))
126  24 1)
127
128(define_function_unit "athlon_fp" 3 0
129  (and (eq_attr "cpu" "athlon")
130       (eq_attr "type" "fop,fmul,fistp"))
131  4 1)
132
133;; XFmode loads are slow.
134;; XFmode store is slow too (8 cycles), but we don't need to model it, because
135;; there are no dependent instructions.
136
137(define_function_unit "athlon_fp" 3 0
138  (and (eq_attr "cpu" "athlon")
139       (and (eq_attr "type" "fmov")
140	    (and (eq_attr "memory" "load")
141		 (eq_attr "mode" "XF"))))
142  10 1)
143
144(define_function_unit "athlon_fp" 3 0
145  (and (eq_attr "cpu" "athlon")
146       (eq_attr "type" "fmov,fsgn"))
147  2 1)
148
149;; fcmp and ftst instructions
150(define_function_unit "athlon_fp" 3 0
151  (and (eq_attr "cpu" "athlon")
152       (and (eq_attr "type" "fcmp")
153	    (eq_attr "athlon_decode" "direct")))
154  3 1)
155
156;; fcmpi instructions.
157(define_function_unit "athlon_fp" 3 0
158  (and (eq_attr "cpu" "athlon")
159       (and (eq_attr "type" "fcmp")
160	    (eq_attr "athlon_decode" "vector")))
161  3 1)
162
163(define_function_unit "athlon_fp" 3 0
164  (and (eq_attr "cpu" "athlon")
165       (eq_attr "type" "fcmov"))
166  7 1)
167
168(define_function_unit "athlon_fp_mul" 1 0
169  (and (eq_attr "cpu" "athlon")
170       (eq_attr "athlon_fpunits" "mul"))
171  1 1)
172
173(define_function_unit "athlon_fp_add" 1 0
174  (and (eq_attr "cpu" "athlon")
175       (eq_attr "athlon_fpunits" "add"))
176  1 1)
177
178(define_function_unit "athlon_fp_muladd" 2 0
179  (and (eq_attr "cpu" "athlon")
180       (eq_attr "athlon_fpunits" "muladd,mul,add"))
181  1 1)
182
183(define_function_unit "athlon_fp_store" 1 0
184  (and (eq_attr "cpu" "athlon")
185       (eq_attr "athlon_fpunits" "store"))
186  1 1)
187
188;; We don't need to model the Address Generation Unit, since we don't model
189;; the re-order buffer yet and thus we never schedule more than three operations
190;; at time.  Later we may want to experiment with MD_SCHED macros modeling the
191;; decoders independently on the functional units.
192
193;(define_function_unit "athlon_agu" 3 0
194;  (and (eq_attr "cpu" "athlon")
195;       (and (eq_attr "memory" "!none")
196;            (eq_attr "athlon_fpunits" "none")))
197;  1 1)
198
199;; Model load unit to avoid too long sequences of loads.  We don't need to
200;; model store queue, since it is hardly going to be bottleneck.
201
202(define_function_unit "athlon_load" 2 0
203  (and (eq_attr "cpu" "athlon")
204       (eq_attr "memory" "load,both"))
205  1 1)
206
207