1;; Scheduling description for IBM POWER9 processor.
2;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
3;;
4;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
5
6;; This file is part of GCC.
7;;
8;; GCC is free software; you can redistribute it and/or modify it
9;; under the terms of the GNU General Public License as published
10;; by the Free Software Foundation; either version 3, or (at your
11;; option) any later version.
12;;
13;; GCC is distributed in the hope that it will be useful, but WITHOUT
14;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
16;; License for more details.
17;;
18;; You should have received a copy of the GNU General Public License
19;; along with GCC; see the file COPYING3.  If not see
20;; <http://www.gnu.org/licenses/>.
21
22(define_automaton "power9dsp,power9lsu,power9vsu,power9fpdiv,power9misc")
23
24(define_cpu_unit "lsu0_power9,lsu1_power9,lsu2_power9,lsu3_power9" "power9lsu")
25(define_cpu_unit "vsu0_power9,vsu1_power9,vsu2_power9,vsu3_power9" "power9vsu")
26; Two vector permute units, part of vsu
27(define_cpu_unit "prm0_power9,prm1_power9" "power9vsu")
28; Two fixed point divide units, not pipelined
29(define_cpu_unit "fx_div0_power9,fx_div1_power9" "power9misc")
30(define_cpu_unit "bru_power9,cryptu_power9,dfu_power9" "power9misc")
31; Create a false unit for use by non-pipelined FP div/sqrt
32(define_cpu_unit "fp_div0_power9,fp_div1_power9,fp_div2_power9,fp_div3_power9"
33		 "power9fpdiv")
34
35
36(define_cpu_unit "x0_power9,x1_power9,xa0_power9,xa1_power9,
37		  x2_power9,x3_power9,xb0_power9,xb1_power9,
38		  br0_power9,br1_power9" "power9dsp")
39
40
41; Dispatch port reservations
42;
43; Power9 can dispatch a maximum of 6 iops per cycle with the following
44; general restrictions (other restrictions also apply):
45;   1) At most 2 iops per execution slice
46;   2) At most 2 iops to the branch unit
47; Note that insn position in a dispatch group of 6 insns does not infer which
48; execution slice the insn is routed to.  The units are used to infer the
49; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
50; with 2 insns with 'superslice' requirement).
51
52; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
53; are listed as separate units to allow those insns that preclude its use to
54; still be scheduled two to a superslice while reserving the 3rd slot.  The
55; same applies for xb0/xb1.
56(define_reservation "DU_xa_power9" "xa0_power9+xa1_power9")
57(define_reservation "DU_xb_power9" "xb0_power9+xb1_power9")
58
59; Any execution slice dispatch
60(define_reservation "DU_any_power9"
61		    "x0_power9|x1_power9|DU_xa_power9|x2_power9|x3_power9|
62		     DU_xb_power9")
63
64; Even slice, actually takes even/odd slots
65(define_reservation "DU_even_power9" "x0_power9+x1_power9|x2_power9+x3_power9")
66
67; Slice plus 3rd slot
68(define_reservation "DU_slice_3_power9"
69		    "x0_power9+xa0_power9|x1_power9+xa1_power9|
70		     x2_power9+xb0_power9|x3_power9+xb1_power9")
71
72; Superslice
73(define_reservation "DU_super_power9"
74		    "x0_power9+x1_power9|x2_power9+x3_power9")
75
76; 2-way cracked
77(define_reservation "DU_C2_power9" "x0_power9+x1_power9|
78				    x1_power9+DU_xa_power9|
79				    x1_power9+x2_power9|
80				    DU_xa_power9+x2_power9|
81				    x2_power9+x3_power9|
82				    x3_power9+DU_xb_power9")
83
84; 2-way cracked plus 3rd slot
85(define_reservation "DU_C2_3_power9" "x0_power9+x1_power9+xa0_power9|
86				      x1_power9+x2_power9+xa1_power9|
87				      x2_power9+x3_power9+xb0_power9")
88
89; 3-way cracked (consumes whole decode/dispatch cycle)
90(define_reservation "DU_C3_power9"
91		    "x0_power9+x1_power9+xa0_power9+xa1_power9+x2_power9+
92		     x3_power9+xb0_power9+xb1_power9+br0_power9+br1_power9")
93
94; Branch ports
95(define_reservation "DU_branch_power9" "br0_power9|br1_power9")
96
97
98; Execution unit reservations
99(define_reservation "LSU_power9"
100		    "lsu0_power9|lsu1_power9|lsu2_power9|lsu3_power9")
101
102(define_reservation "LSU_pair_power9"
103		    "lsu0_power9+lsu1_power9|lsu1_power9+lsu2_power9|
104		     lsu2_power9+lsu3_power9|lsu3_power9+lsu0_power9")
105
106(define_reservation "VSU_power9"
107		    "vsu0_power9|vsu1_power9|vsu2_power9|vsu3_power9")
108
109(define_reservation "VSU_super_power9"
110		    "vsu0_power9+vsu1_power9|vsu2_power9+vsu3_power9")
111
112(define_reservation "VSU_PRM_power9" "prm0_power9|prm1_power9")
113
114; Define the reservation to be used by FP div/sqrt which allows other insns
115; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
116; Note that the number of cycles blocked varies depending on insn, but we
117; just use the same number for all in order to keep the number of DFA states
118; reasonable.
119(define_reservation "FP_DIV_power9"
120		    "fp_div0_power9*8|fp_div1_power9*8|fp_div2_power9*8|
121		     fp_div3_power9*8")
122(define_reservation "VEC_DIV_power9"
123		    "fp_div0_power9*8+fp_div1_power9*8|
124		     fp_div2_power9*8+fp_div3_power9*8")
125
126
127; LS Unit
128(define_insn_reservation "power9-load" 4
129  (and (eq_attr "type" "load")
130       (eq_attr "sign_extend" "no")
131       (eq_attr "update" "no")
132       (eq_attr "cpu" "power9"))
133  "DU_any_power9,LSU_power9")
134
135(define_insn_reservation "power9-load-update" 4
136  (and (eq_attr "type" "load")
137       (eq_attr "sign_extend" "no")
138       (eq_attr "update" "yes")
139       (eq_attr "cpu" "power9"))
140  "DU_C2_power9,LSU_power9+VSU_power9")
141
142(define_insn_reservation "power9-load-ext" 6
143  (and (eq_attr "type" "load")
144       (eq_attr "sign_extend" "yes")
145       (eq_attr "update" "no")
146       (eq_attr "cpu" "power9"))
147  "DU_C2_power9,LSU_power9")
148
149(define_insn_reservation "power9-load-ext-update" 6
150  (and (eq_attr "type" "load")
151       (eq_attr "sign_extend" "yes")
152       (eq_attr "update" "yes")
153       (eq_attr "cpu" "power9"))
154  "DU_C3_power9,LSU_power9+VSU_power9")
155
156(define_insn_reservation "power9-fpload-double" 4
157  (and (eq_attr "type" "fpload")
158       (eq_attr "update" "no")
159       (eq_attr "size" "64")
160       (eq_attr "cpu" "power9"))
161  "DU_slice_3_power9,LSU_power9")
162
163(define_insn_reservation "power9-fpload-update-double" 4
164  (and (eq_attr "type" "fpload")
165       (eq_attr "update" "yes")
166       (eq_attr "size" "64")
167       (eq_attr "cpu" "power9"))
168  "DU_C2_3_power9,LSU_power9+VSU_power9")
169
170; SFmode loads are cracked and have additional 2 cycles over DFmode
171(define_insn_reservation "power9-fpload-single" 6
172  (and (eq_attr "type" "fpload")
173       (eq_attr "update" "no")
174       (eq_attr "size" "32")
175       (eq_attr "cpu" "power9"))
176  "DU_C2_3_power9,LSU_power9")
177
178(define_insn_reservation "power9-fpload-update-single" 6
179  (and (eq_attr "type" "fpload")
180       (eq_attr "update" "yes")
181       (eq_attr "size" "32")
182       (eq_attr "cpu" "power9"))
183  "DU_C3_power9,LSU_power9+VSU_power9")
184
185(define_insn_reservation "power9-vecload" 5
186  (and (eq_attr "type" "vecload")
187       (eq_attr "cpu" "power9"))
188  "DU_any_power9,LSU_pair_power9")
189
190; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
191(define_insn_reservation "power9-store" 0
192  (and (eq_attr "type" "store")
193       (eq_attr "update" "no")
194       (eq_attr "indexed" "no")
195       (eq_attr "cpu" "power9"))
196  "DU_slice_3_power9,LSU_power9")
197
198(define_insn_reservation "power9-store-indexed" 0
199  (and (eq_attr "type" "store")
200       (eq_attr "update" "no")
201       (eq_attr "indexed" "yes")
202       (eq_attr "cpu" "power9"))
203  "DU_slice_3_power9,LSU_power9")
204
205; Update forms have 2 cycle latency for updated addr reg
206(define_insn_reservation "power9-store-update" 2
207  (and (eq_attr "type" "store")
208       (eq_attr "update" "yes")
209       (eq_attr "indexed" "no")
210       (eq_attr "cpu" "power9"))
211  "DU_C2_3_power9,LSU_power9+VSU_power9")
212
213; Update forms have 2 cycle latency for updated addr reg
214(define_insn_reservation "power9-store-update-indexed" 2
215  (and (eq_attr "type" "store")
216       (eq_attr "update" "yes")
217       (eq_attr "indexed" "yes")
218       (eq_attr "cpu" "power9"))
219  "DU_C2_3_power9,LSU_power9+VSU_power9")
220
221(define_insn_reservation "power9-fpstore" 0
222  (and (eq_attr "type" "fpstore")
223       (eq_attr "update" "no")
224       (eq_attr "cpu" "power9"))
225  "DU_slice_3_power9,LSU_power9")
226
227; Update forms have 2 cycle latency for updated addr reg
228(define_insn_reservation "power9-fpstore-update" 2
229  (and (eq_attr "type" "fpstore")
230       (eq_attr "update" "yes")
231       (eq_attr "cpu" "power9"))
232  "DU_C2_3_power9,LSU_power9+VSU_power9")
233
234(define_insn_reservation "power9-vecstore" 0
235  (and (eq_attr "type" "vecstore")
236       (eq_attr "cpu" "power9"))
237  "DU_super_power9,LSU_pair_power9")
238
239; Store forwarding latency is 6
240(define_bypass 6 "power9-*store*" "power9-*load*")
241
242(define_insn_reservation "power9-larx" 4
243  (and (eq_attr "type" "load_l")
244       (eq_attr "cpu" "power9"))
245  "DU_any_power9,LSU_power9")
246
247(define_insn_reservation "power9-stcx" 2
248  (and (eq_attr "type" "store_c")
249       (eq_attr "cpu" "power9"))
250  "DU_C2_3_power9,LSU_power9+VSU_power9")
251
252(define_insn_reservation "power9-sync" 4
253  (and (eq_attr "type" "sync,isync")
254       (eq_attr "cpu" "power9"))
255  "DU_any_power9,LSU_power9")
256
257
258; VSU Execution Unit
259
260; Fixed point ops
261
262; Most ALU insns are simple 2 cycle, including record form
263(define_insn_reservation "power9-alu" 2
264  (and (eq_attr "type" "add,exts,integer,logical,isel")
265       (eq_attr "cpu" "power9"))
266  "DU_any_power9,VSU_power9")
267; 5 cycle CR latency
268(define_bypass 5 "power9-alu"
269		 "power9-crlogical,power9-mfcr,power9-mfcrf")
270
271; Rotate/shift prevent use of third slot
272(define_insn_reservation "power9-rot" 2
273  (and (eq_attr "type" "insert,shift")
274       (eq_attr "dot" "no")
275       (eq_attr "cpu" "power9"))
276  "DU_slice_3_power9,VSU_power9")
277
278; Record form rotate/shift are cracked
279(define_insn_reservation "power9-cracked-alu" 2
280  (and (eq_attr "type" "insert,shift")
281       (eq_attr "dot" "yes")
282       (eq_attr "cpu" "power9"))
283  "DU_C2_3_power9,VSU_power9")
284; 7 cycle CR latency
285(define_bypass 7 "power9-cracked-alu"
286		 "power9-crlogical,power9-mfcr,power9-mfcrf")
287
288(define_insn_reservation "power9-alu2" 3
289  (and (eq_attr "type" "cntlz,popcnt,trap")
290       (eq_attr "cpu" "power9"))
291  "DU_any_power9,VSU_power9")
292; 6 cycle CR latency
293(define_bypass 6 "power9-alu2"
294		 "power9-crlogical,power9-mfcr,power9-mfcrf")
295
296(define_insn_reservation "power9-cmp" 2
297  (and (eq_attr "type" "cmp")
298       (eq_attr "cpu" "power9"))
299  "DU_any_power9,VSU_power9")
300
301
302; Treat 'two' and 'three' types as 2 or 3 way cracked
303(define_insn_reservation "power9-two" 4
304  (and (eq_attr "type" "two")
305       (eq_attr "cpu" "power9"))
306  "DU_C2_power9,VSU_power9")
307
308(define_insn_reservation "power9-three" 6
309  (and (eq_attr "type" "three")
310       (eq_attr "cpu" "power9"))
311  "DU_C3_power9,VSU_power9")
312
313(define_insn_reservation "power9-mul" 5
314  (and (eq_attr "type" "mul")
315       (eq_attr "dot" "no")
316       (eq_attr "cpu" "power9"))
317  "DU_slice_3_power9,VSU_power9")
318
319(define_insn_reservation "power9-mul-compare" 5
320  (and (eq_attr "type" "mul")
321       (eq_attr "dot" "yes")
322       (eq_attr "cpu" "power9"))
323  "DU_C2_3_power9,VSU_power9")
324; 10 cycle CR latency
325(define_bypass 10 "power9-mul-compare"
326		 "power9-crlogical,power9-mfcr,power9-mfcrf")
327
328; Fixed point divides reserve the divide units for a minimum of 8 cycles
329(define_insn_reservation "power9-idiv" 16
330  (and (eq_attr "type" "div")
331       (eq_attr "size" "32")
332       (eq_attr "cpu" "power9"))
333  "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
334
335(define_insn_reservation "power9-ldiv" 24
336  (and (eq_attr "type" "div")
337       (eq_attr "size" "64")
338       (eq_attr "cpu" "power9"))
339  "DU_even_power9,fx_div0_power9*8|fx_div1_power9*8")
340
341(define_insn_reservation "power9-crlogical" 2
342  (and (eq_attr "type" "cr_logical")
343       (eq_attr "cpu" "power9"))
344  "DU_any_power9,VSU_power9")
345
346(define_insn_reservation "power9-mfcrf" 2
347  (and (eq_attr "type" "mfcrf")
348       (eq_attr "cpu" "power9"))
349  "DU_any_power9,VSU_power9")
350
351(define_insn_reservation "power9-mfcr" 6
352  (and (eq_attr "type" "mfcr")
353       (eq_attr "cpu" "power9"))
354  "DU_C3_power9,VSU_power9")
355
356; Should differentiate between 1 cr field and > 1 since target of > 1 cr
357; is cracked
358(define_insn_reservation "power9-mtcr" 2
359  (and (eq_attr "type" "mtcr")
360       (eq_attr "cpu" "power9"))
361  "DU_any_power9,VSU_power9")
362
363; Move to LR/CTR are executed in VSU
364(define_insn_reservation "power9-mtjmpr" 5
365  (and (eq_attr "type" "mtjmpr")
366       (eq_attr "cpu" "power9"))
367  "DU_any_power9,VSU_power9")
368
369; Floating point/Vector ops
370(define_insn_reservation "power9-fpsimple" 2
371  (and (eq_attr "type" "fpsimple")
372       (eq_attr "cpu" "power9"))
373  "DU_slice_3_power9,VSU_power9")
374
375(define_insn_reservation "power9-fp" 5
376  (and (eq_attr "type" "fp,dmul")
377       (eq_attr "cpu" "power9"))
378  "DU_slice_3_power9,VSU_power9")
379
380(define_insn_reservation "power9-fpcompare" 3
381  (and (eq_attr "type" "fpcompare")
382       (eq_attr "cpu" "power9"))
383  "DU_slice_3_power9,VSU_power9")
384
385; FP div/sqrt are executed in the VSU slices.  They are not pipelined wrt other
386; div/sqrt insns, but for the most part do not block pipelined ops.
387(define_insn_reservation "power9-sdiv" 22
388  (and (eq_attr "type" "sdiv")
389       (eq_attr "cpu" "power9"))
390  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
391
392(define_insn_reservation "power9-ddiv" 27
393  (and (eq_attr "type" "ddiv")
394       (eq_attr "cpu" "power9"))
395  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
396
397(define_insn_reservation "power9-sqrt" 26
398  (and (eq_attr "type" "ssqrt")
399       (eq_attr "cpu" "power9"))
400  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
401
402(define_insn_reservation "power9-dsqrt" 36
403  (and (eq_attr "type" "dsqrt")
404       (eq_attr "cpu" "power9"))
405  "DU_slice_3_power9,VSU_power9,FP_DIV_power9")
406
407(define_insn_reservation "power9-vec-2cyc" 2
408  (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
409       (eq_attr "cpu" "power9"))
410  "DU_super_power9,VSU_super_power9")
411
412(define_insn_reservation "power9-veccmp" 3
413  (and (eq_attr "type" "veccmp")
414       (eq_attr "cpu" "power9"))
415  "DU_super_power9,VSU_super_power9")
416
417(define_insn_reservation "power9-vecsimple" 3
418  (and (eq_attr "type" "vecsimple")
419       (eq_attr "cpu" "power9"))
420  "DU_super_power9,VSU_super_power9")
421
422(define_insn_reservation "power9-vecnormal" 7
423  (and (eq_attr "type" "vecfloat,vecdouble")
424       (eq_attr "size" "!128")
425       (eq_attr "cpu" "power9"))
426  "DU_super_power9,VSU_super_power9")
427
428; Quad-precision FP ops, execute in DFU
429(define_insn_reservation "power9-qp" 12
430  (and (eq_attr "type" "vecfloat,vecdouble")
431       (eq_attr "size" "128")
432       (eq_attr "cpu" "power9"))
433  "DU_super_power9,dfu_power9")
434
435(define_insn_reservation "power9-vecperm" 3
436  (and (eq_attr "type" "vecperm")
437       (eq_attr "cpu" "power9"))
438  "DU_super_power9,VSU_PRM_power9")
439
440(define_insn_reservation "power9-veccomplex" 7
441  (and (eq_attr "type" "veccomplex")
442       (eq_attr "cpu" "power9"))
443  "DU_super_power9,VSU_super_power9")
444
445(define_insn_reservation "power9-vecfdiv" 24
446  (and (eq_attr "type" "vecfdiv")
447       (eq_attr "cpu" "power9"))
448  "DU_super_power9,VSU_super_power9,VEC_DIV_power9")
449
450(define_insn_reservation "power9-vecdiv" 27
451  (and (eq_attr "type" "vecdiv")
452       (eq_attr "size" "!128")
453       (eq_attr "cpu" "power9"))
454  "DU_super_power9,VSU_super_power9,VEC_DIV_power9")
455
456; Use 8 for DFU reservation on QP div/mul to limit DFA state size
457(define_insn_reservation "power9-qpdiv" 56
458  (and (eq_attr "type" "vecdiv")
459       (eq_attr "size" "128")
460       (eq_attr "cpu" "power9"))
461  "DU_super_power9,dfu_power9*8")
462
463(define_insn_reservation "power9-qpmul" 24
464  (and (eq_attr "type" "qmul")
465       (eq_attr "size" "128")
466       (eq_attr "cpu" "power9"))
467  "DU_super_power9,dfu_power9*8")
468
469(define_insn_reservation "power9-mffgpr" 2
470  (and (eq_attr "type" "mffgpr")
471       (eq_attr "cpu" "power9"))
472  "DU_slice_3_power9,VSU_power9")
473
474(define_insn_reservation "power9-mftgpr" 2
475  (and (eq_attr "type" "mftgpr")
476       (eq_attr "cpu" "power9"))
477  "DU_slice_3_power9,VSU_power9")
478
479
480; Branch Unit
481; Move from LR/CTR are executed in BRU but consume a writeback port from an
482; execution slice.
483(define_insn_reservation "power9-mfjmpr" 6
484  (and (eq_attr "type" "mfjmpr")
485       (eq_attr "cpu" "power9"))
486  "DU_branch_power9,bru_power9+VSU_power9")
487
488; Branch is 2 cycles
489(define_insn_reservation "power9-branch" 2
490  (and (eq_attr "type" "jmpreg,branch")
491       (eq_attr "cpu" "power9"))
492  "DU_branch_power9,bru_power9")
493
494
495; Crypto Unit
496(define_insn_reservation "power9-crypto" 6
497  (and (eq_attr "type" "crypto")
498       (eq_attr "cpu" "power9"))
499  "DU_super_power9,cryptu_power9")
500
501
502; HTM Unit
503(define_insn_reservation "power9-htm" 4
504  (and (eq_attr "type" "htm")
505       (eq_attr "cpu" "power9"))
506  "DU_C2_power9,LSU_power9")
507
508(define_insn_reservation "power9-htm-simple" 2
509  (and (eq_attr "type" "htmsimple")
510       (eq_attr "cpu" "power9"))
511  "DU_any_power9,VSU_power9")
512
513
514; DFP Unit
515(define_insn_reservation "power9-dfp" 12
516  (and (eq_attr "type" "dfp")
517       (eq_attr "cpu" "power9"))
518  "DU_even_power9,dfu_power9")
519
520