1;; Scheduling description for IBM Power4 and PowerPC 970 processors.
2;;   Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify it
7;; under the terms of the GNU General Public License as published
8;; by the Free Software Foundation; either version 2, or (at your
9;; option) any later version.
10;;
11;; GCC is distributed in the hope that it will be useful, but WITHOUT
12;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
14;; License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING.  If not, write to the
18;; Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
19;; MA 02110-1301, USA.
20
21;; Sources: IBM Red Book and White Paper on POWER4
22
23;; The POWER4 has 2 iu, 2 fpu, 2 lsu per engine (2 engines per chip).
24;; Instructions that update more than one register get broken into two
25;; (split) or more internal ops.  The chip can issue up to 5
26;; internal ops per cycle.
27
28(define_automaton "power4iu,power4fpu,power4vec,power4misc")
29
30(define_cpu_unit "iu1_power4,iu2_power4" "power4iu")
31(define_cpu_unit "lsu1_power4,lsu2_power4" "power4misc")
32(define_cpu_unit "fpu1_power4,fpu2_power4" "power4fpu")
33(define_cpu_unit "bpu_power4,cru_power4" "power4misc")
34(define_cpu_unit "vec_power4,vecperm_power4" "power4vec")
35(define_cpu_unit "du1_power4,du2_power4,du3_power4,du4_power4,du5_power4"
36		 "power4misc")
37
38(define_reservation "lsq_power4"
39		    "(du1_power4,lsu1_power4)\
40		    |(du2_power4,lsu2_power4)\
41		    |(du3_power4,lsu2_power4)\
42		    |(du4_power4,lsu1_power4)")
43
44(define_reservation "lsuq_power4"
45		    "(du1_power4+du2_power4,lsu1_power4+iu2_power4)\
46		    |(du2_power4+du3_power4,lsu2_power4+iu2_power4)\
47		    |(du3_power4+du4_power4,lsu2_power4+iu1_power4)")
48
49(define_reservation "iq_power4"
50		    "(du1_power4,iu1_power4)\
51		    |(du2_power4,iu2_power4)\
52		    |(du3_power4,iu2_power4)\
53		    |(du4_power4,iu1_power4)")
54
55(define_reservation "fpq_power4"
56		    "(du1_power4,fpu1_power4)\
57		    |(du2_power4,fpu2_power4)\
58		    |(du3_power4,fpu2_power4)\
59		    |(du4_power4,fpu1_power4)")
60
61(define_reservation "vq_power4"
62		    "(du1_power4,vec_power4)\
63		    |(du2_power4,vec_power4)\
64		    |(du3_power4,vec_power4)\
65		    |(du4_power4,vec_power4)")
66
67(define_reservation "vpq_power4"
68		    "(du1_power4,vecperm_power4)\
69		    |(du2_power4,vecperm_power4)\
70		    |(du3_power4,vecperm_power4)\
71		    |(du4_power4,vecperm_power4)")
72
73
74; Dispatch slots are allocated in order conforming to program order.
75(absence_set "du1_power4" "du2_power4,du3_power4,du4_power4,du5_power4")
76(absence_set "du2_power4" "du3_power4,du4_power4,du5_power4")
77(absence_set "du3_power4" "du4_power4,du5_power4")
78(absence_set "du4_power4" "du5_power4")
79
80
81; Load/store
82(define_insn_reservation "power4-load" 4 ; 3
83  (and (eq_attr "type" "load")
84       (eq_attr "cpu" "power4"))
85  "lsq_power4")
86
87(define_insn_reservation "power4-load-ext" 5
88  (and (eq_attr "type" "load_ext")
89       (eq_attr "cpu" "power4"))
90  "(du1_power4+du2_power4,lsu1_power4,nothing,nothing,iu2_power4)\
91  |(du2_power4+du3_power4,lsu2_power4,nothing,nothing,iu2_power4)\
92  |(du3_power4+du4_power4,lsu2_power4,nothing,nothing,iu1_power4)")
93
94(define_insn_reservation "power4-load-ext-update" 5
95  (and (eq_attr "type" "load_ext_u")
96       (eq_attr "cpu" "power4"))
97  "du1_power4+du2_power4+du3_power4+du4_power4,\
98   lsu1_power4+iu2_power4,nothing,nothing,iu2_power4")
99
100(define_insn_reservation "power4-load-ext-update-indexed" 5
101  (and (eq_attr "type" "load_ext_ux")
102       (eq_attr "cpu" "power4"))
103  "du1_power4+du2_power4+du3_power4+du4_power4,\
104   iu1_power4,lsu2_power4+iu1_power4,nothing,nothing,iu2_power4")
105
106(define_insn_reservation "power4-load-update-indexed" 3
107  (and (eq_attr "type" "load_ux")
108       (eq_attr "cpu" "power4"))
109  "du1_power4+du2_power4+du3_power4+du4_power4,\
110   iu1_power4,lsu2_power4+iu2_power4")
111
112(define_insn_reservation "power4-load-update" 4 ; 3
113  (and (eq_attr "type" "load_u")
114       (eq_attr "cpu" "power4"))
115  "lsuq_power4")
116
117(define_insn_reservation "power4-fpload" 6 ; 5
118  (and (eq_attr "type" "fpload")
119       (eq_attr "cpu" "power4"))
120  "lsq_power4")
121
122(define_insn_reservation "power4-fpload-update" 6 ; 5
123  (and (eq_attr "type" "fpload_u,fpload_ux")
124       (eq_attr "cpu" "power4"))
125  "lsuq_power4")
126
127(define_insn_reservation "power4-vecload" 6 ; 5
128  (and (eq_attr "type" "vecload")
129       (eq_attr "cpu" "power4"))
130  "lsq_power4")
131
132(define_insn_reservation "power4-store" 12
133  (and (eq_attr "type" "store")
134       (eq_attr "cpu" "power4"))
135  "(du1_power4,lsu1_power4,iu1_power4)\
136  |(du2_power4,lsu2_power4,iu2_power4)\
137  |(du3_power4,lsu2_power4,iu2_power4)\
138  |(du4_power4,lsu1_power4,iu1_power4)")
139
140(define_insn_reservation "power4-store-update" 12
141  (and (eq_attr "type" "store_u")
142       (eq_attr "cpu" "power4"))
143  "(du1_power4+du2_power4,lsu1_power4+iu2_power4,iu1_power4)\
144  |(du2_power4+du3_power4,lsu2_power4+iu2_power4,iu2_power4)\
145  |(du3_power4+du4_power4,lsu2_power4+iu1_power4,iu2_power4)\
146  |(du3_power4+du4_power4,lsu2_power4,iu1_power4,iu2_power4)")
147
148(define_insn_reservation "power4-store-update-indexed" 12
149  (and (eq_attr "type" "store_ux")
150       (eq_attr "cpu" "power4"))
151   "du1_power4+du2_power4+du3_power4+du4_power4,\
152    iu1_power4,lsu2_power4+iu2_power4,iu2_power4")
153
154(define_insn_reservation "power4-fpstore" 12
155  (and (eq_attr "type" "fpstore")
156       (eq_attr "cpu" "power4"))
157  "(du1_power4,lsu1_power4,fpu1_power4)\
158  |(du2_power4,lsu2_power4,fpu2_power4)\
159  |(du3_power4,lsu2_power4,fpu2_power4)\
160  |(du4_power4,lsu1_power4,fpu1_power4)")
161
162(define_insn_reservation "power4-fpstore-update" 12
163  (and (eq_attr "type" "fpstore_u,fpstore_ux")
164       (eq_attr "cpu" "power4"))
165  "(du1_power4+du2_power4,lsu1_power4+iu2_power4,fpu1_power4)\
166  |(du2_power4+du3_power4,lsu2_power4+iu2_power4,fpu2_power4)\
167  |(du3_power4+du4_power4,lsu2_power4+iu1_power4,fpu2_power4)")
168
169(define_insn_reservation "power4-vecstore" 12
170  (and (eq_attr "type" "vecstore")
171       (eq_attr "cpu" "power4"))
172  "(du1_power4,lsu1_power4,vec_power4)\
173  |(du2_power4,lsu2_power4,vec_power4)\
174  |(du3_power4,lsu2_power4,vec_power4)\
175  |(du4_power4,lsu1_power4,vec_power4)")
176
177(define_insn_reservation "power4-llsc" 11
178  (and (eq_attr "type" "load_l,store_c,sync")
179       (eq_attr "cpu" "power4"))
180  "du1_power4+du2_power4+du3_power4+du4_power4,\
181  lsu1_power4")
182
183
184; Integer latency is 2 cycles
185(define_insn_reservation "power4-integer" 2
186  (and (eq_attr "type" "integer")
187       (eq_attr "cpu" "power4"))
188  "iq_power4")
189
190(define_insn_reservation "power4-two" 2
191  (and (eq_attr "type" "two")
192       (eq_attr "cpu" "power4"))
193  "(du1_power4+du2_power4,iu1_power4,nothing,iu2_power4)\
194  |(du2_power4+du3_power4,iu2_power4,nothing,iu2_power4)\
195  |(du3_power4+du4_power4,iu2_power4,nothing,iu1_power4)\
196  |(du4_power4+du1_power4,iu1_power4,nothing,iu1_power4)")
197
198(define_insn_reservation "power4-three" 2
199  (and (eq_attr "type" "three")
200       (eq_attr "cpu" "power4"))
201  "(du1_power4+du2_power4+du3_power4,\
202    iu1_power4,nothing,iu2_power4,nothing,iu2_power4)\
203  |(du2_power4+du3_power4+du4_power4,\
204    iu2_power4,nothing,iu2_power4,nothing,iu1_power4)\
205  |(du3_power4+du4_power4+du1_power4,\
206    iu2_power4,nothing,iu1_power4,nothing,iu1_power4)\
207  |(du4_power4+du1_power4+du2_power4,\
208    iu1_power4,nothing,iu2_power4,nothing,iu2_power4)")
209
210(define_insn_reservation "power4-insert" 4
211  (and (eq_attr "type" "insert_word")
212       (eq_attr "cpu" "power4"))
213  "(du1_power4+du2_power4,iu1_power4,nothing,iu2_power4)\
214  |(du2_power4+du3_power4,iu2_power4,nothing,iu2_power4)\
215  |(du3_power4+du4_power4,iu2_power4,nothing,iu1_power4)")
216
217(define_insn_reservation "power4-cmp" 3
218  (and (eq_attr "type" "cmp,fast_compare")
219       (eq_attr "cpu" "power4"))
220  "iq_power4")
221
222(define_insn_reservation "power4-compare" 2
223  (and (eq_attr "type" "compare,delayed_compare")
224       (eq_attr "cpu" "power4"))
225  "(du1_power4+du2_power4,iu1_power4,iu2_power4)\
226  |(du2_power4+du3_power4,iu2_power4,iu2_power4)\
227  |(du3_power4+du4_power4,iu2_power4,iu1_power4)")
228
229(define_bypass 4 "power4-compare" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
230
231(define_insn_reservation "power4-lmul-cmp" 7
232  (and (eq_attr "type" "lmul_compare")
233       (eq_attr "cpu" "power4"))
234  "(du1_power4+du2_power4,iu1_power4*6,iu2_power4)\
235  |(du2_power4+du3_power4,iu2_power4*6,iu2_power4)\
236  |(du3_power4+du4_power4,iu2_power4*6,iu1_power4)")
237
238(define_bypass 10 "power4-lmul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
239
240(define_insn_reservation "power4-imul-cmp" 5
241  (and (eq_attr "type" "imul_compare")
242       (eq_attr "cpu" "power4"))
243  "(du1_power4+du2_power4,iu1_power4*4,iu2_power4)\
244  |(du2_power4+du3_power4,iu2_power4*4,iu2_power4)\
245  |(du3_power4+du4_power4,iu2_power4*4,iu1_power4)")
246
247(define_bypass 8 "power4-imul-cmp" "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
248
249(define_insn_reservation "power4-lmul" 7
250  (and (eq_attr "type" "lmul")
251       (eq_attr "cpu" "power4"))
252  "(du1_power4,iu1_power4*6)\
253  |(du2_power4,iu2_power4*6)\
254  |(du3_power4,iu2_power4*6)\
255  |(du4_power4,iu1_power4*6)")
256
257(define_insn_reservation "power4-imul" 5
258  (and (eq_attr "type" "imul")
259       (eq_attr "cpu" "power4"))
260  "(du1_power4,iu1_power4*4)\
261  |(du2_power4,iu2_power4*4)\
262  |(du3_power4,iu2_power4*4)\
263  |(du4_power4,iu1_power4*4)")
264
265(define_insn_reservation "power4-imul3" 4
266  (and (eq_attr "type" "imul2,imul3")
267       (eq_attr "cpu" "power4"))
268  "(du1_power4,iu1_power4*3)\
269  |(du2_power4,iu2_power4*3)\
270  |(du3_power4,iu2_power4*3)\
271  |(du4_power4,iu1_power4*3)")
272
273
274; SPR move only executes in first IU.
275; Integer division only executes in second IU.
276(define_insn_reservation "power4-idiv" 36
277  (and (eq_attr "type" "idiv")
278       (eq_attr "cpu" "power4"))
279  "du1_power4+du2_power4,iu2_power4*35")
280
281(define_insn_reservation "power4-ldiv" 68
282  (and (eq_attr "type" "ldiv")
283       (eq_attr "cpu" "power4"))
284  "du1_power4+du2_power4,iu2_power4*67")
285
286
287(define_insn_reservation "power4-mtjmpr" 3
288  (and (eq_attr "type" "mtjmpr,mfjmpr")
289       (eq_attr "cpu" "power4"))
290  "du1_power4,bpu_power4")
291
292
293; Branches take dispatch Slot 4.  The presence_sets prevent other insn from
294; grabbing previous dispatch slots once this is assigned.
295(define_insn_reservation "power4-branch" 2
296  (and (eq_attr "type" "jmpreg,branch")
297       (eq_attr "cpu" "power4"))
298  "(du5_power4\
299   |du4_power4+du5_power4\
300   |du3_power4+du4_power4+du5_power4\
301   |du2_power4+du3_power4+du4_power4+du5_power4\
302   |du1_power4+du2_power4+du3_power4+du4_power4+du5_power4),bpu_power4")
303
304
305; Condition Register logical ops are split if non-destructive (RT != RB)
306(define_insn_reservation "power4-crlogical" 2
307  (and (eq_attr "type" "cr_logical")
308       (eq_attr "cpu" "power4"))
309  "du1_power4,cru_power4")
310
311(define_insn_reservation "power4-delayedcr" 4
312  (and (eq_attr "type" "delayed_cr")
313       (eq_attr "cpu" "power4"))
314  "du1_power4+du2_power4,cru_power4,cru_power4")
315
316; 4 mfcrf (each 3 cyc, 1/cyc) + 3 fxu
317(define_insn_reservation "power4-mfcr" 6
318  (and (eq_attr "type" "mfcr")
319       (eq_attr "cpu" "power4"))
320  "du1_power4+du2_power4+du3_power4+du4_power4,\
321   du1_power4+du2_power4+du3_power4+du4_power4+cru_power4,\
322   cru_power4,cru_power4,cru_power4")
323
324; mfcrf (1 field)
325(define_insn_reservation "power4-mfcrf" 3
326  (and (eq_attr "type" "mfcrf")
327       (eq_attr "cpu" "power4"))
328  "du1_power4,cru_power4")
329
330; mtcrf (1 field)
331(define_insn_reservation "power4-mtcr" 4
332  (and (eq_attr "type" "mtcr")
333       (eq_attr "cpu" "power4"))
334  "du1_power4,iu1_power4")
335
336; Basic FP latency is 6 cycles
337(define_insn_reservation "power4-fp" 6
338  (and (eq_attr "type" "fp,dmul")
339       (eq_attr "cpu" "power4"))
340  "fpq_power4")
341
342(define_insn_reservation "power4-fpcompare" 5
343  (and (eq_attr "type" "fpcompare")
344       (eq_attr "cpu" "power4"))
345  "fpq_power4")
346
347(define_insn_reservation "power4-sdiv" 33
348  (and (eq_attr "type" "sdiv,ddiv")
349       (eq_attr "cpu" "power4"))
350  "(du1_power4,fpu1_power4*28)\
351  |(du2_power4,fpu2_power4*28)\
352  |(du3_power4,fpu2_power4*28)\
353  |(du4_power4,fpu1_power4*28)")
354
355(define_insn_reservation "power4-sqrt" 40
356  (and (eq_attr "type" "ssqrt,dsqrt")
357       (eq_attr "cpu" "power4"))
358  "(du1_power4,fpu1_power4*35)\
359  |(du2_power4,fpu2_power4*35)\
360  |(du3_power4,fpu2_power4*35)\
361  |(du4_power4,fpu2_power4*35)")
362
363(define_insn_reservation "power4-isync" 2
364  (and (eq_attr "type" "isync")
365       (eq_attr "cpu" "power4"))
366  "du1_power4+du2_power4+du3_power4+du4_power4,\
367  lsu1_power4")
368
369
370; VMX
371(define_insn_reservation "power4-vecsimple" 2
372  (and (eq_attr "type" "vecsimple")
373       (eq_attr "cpu" "power4"))
374  "vq_power4")
375
376(define_insn_reservation "power4-veccomplex" 5
377  (and (eq_attr "type" "veccomplex")
378       (eq_attr "cpu" "power4"))
379  "vq_power4")
380
381; vecfp compare
382(define_insn_reservation "power4-veccmp" 8
383  (and (eq_attr "type" "veccmp")
384       (eq_attr "cpu" "power4"))
385  "vq_power4")
386
387(define_insn_reservation "power4-vecfloat" 8
388  (and (eq_attr "type" "vecfloat")
389       (eq_attr "cpu" "power4"))
390  "vq_power4")
391
392(define_insn_reservation "power4-vecperm" 2
393  (and (eq_attr "type" "vecperm")
394       (eq_attr "cpu" "power4"))
395  "vpq_power4")
396
397(define_bypass 4 "power4-vecload" "power4-vecperm")
398
399(define_bypass 3 "power4-vecsimple" "power4-vecperm")
400(define_bypass 6 "power4-veccomplex" "power4-vecperm")
401(define_bypass 3 "power4-vecperm"
402		 "power4-vecsimple,power4-veccomplex,power4-vecfloat")
403(define_bypass 9 "power4-vecfloat" "power4-vecperm")
404
405(define_bypass 5 "power4-vecsimple,power4-veccomplex"
406		 "power4-branch,power4-crlogical,power4-delayedcr,power4-mfcr,power4-mfcrf")
407
408(define_bypass 4 "power4-vecsimple,power4-vecperm" "power4-vecstore")
409(define_bypass 7 "power4-veccomplex" "power4-vecstore")
410(define_bypass 10 "power4-vecfloat" "power4-vecstore")
411