1;; Scheduling description for IBM POWER10 processor.
2;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
3;;
4;; This is a clone of power9.md.  It is intended to be a placeholder until a
5;; real scheduler model can be contributed.
6;; The original power9.md was contributed by Pat Haugen (pthaugen@us.ibm.com).
7
8;; This file is part of GCC.
9;;
10;; GCC is free software; you can redistribute it and/or modify it
11;; under the terms of the GNU General Public License as published
12;; by the Free Software Foundation; either version 3, or (at your
13;; option) any later version.
14;;
15;; GCC is distributed in the hope that it will be useful, but WITHOUT
16;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
18;; License for more details.
19;;
20;; You should have received a copy of the GNU General Public License
21;; along with GCC; see the file COPYING3.  If not see
22;; <http://www.gnu.org/licenses/>.
23
24;; This file was cloned from power9.md, it does not (yet) describe the actual
25;; POWER10 processor.
26
27(define_automaton "power10dsp,power10lsu,power10vsu,power10fpdiv,power10misc")
28
29(define_cpu_unit "lsu0_power10,lsu1_power10,lsu2_power10,lsu3_power10" "power10lsu")
30(define_cpu_unit "vsu0_power10,vsu1_power10,vsu2_power10,vsu3_power10" "power10vsu")
31; Two vector permute units, part of vsu
32(define_cpu_unit "prm0_power10,prm1_power10" "power10vsu")
33; Two fixed point divide units, not pipelined
34(define_cpu_unit "fx_div0_power10,fx_div1_power10" "power10misc")
35(define_cpu_unit "bru_power10,cryptu_power10,dfu_power10" "power10misc")
36; Create a false unit for use by non-pipelined FP div/sqrt
37(define_cpu_unit "fp_div0_power10,fp_div1_power10,fp_div2_power10,fp_div3_power10"
38		 "power10fpdiv")
39
40
41(define_cpu_unit "x0_power10,x1_power10,xa0_power10,xa1_power10,
42		  x2_power10,x3_power10,xb0_power10,xb1_power10,
43		  br0_power10,br1_power10" "power10dsp")
44
45
46; Dispatch port reservations
47;
48; The processor can dispatch a maximum of 6 iops per cycle with the following
49; general restrictions (other restrictions also apply):
50;   1) At most 2 iops per execution slice
51;   2) At most 2 iops to the branch unit
52; Note that insn position in a dispatch group of 6 insns does not infer which
53; execution slice the insn is routed to.  The units are used to infer the
54; conflicts that exist (i.e. an 'even' requirement will preclude dispatch
55; with 2 insns with 'superslice' requirement).
56
57; The xa0/xa1 units really represent the 3rd dispatch port for a superslice but
58; are listed as separate units to allow those insns that preclude its use to
59; still be scheduled two to a superslice while reserving the 3rd slot.  The
60; same applies for xb0/xb1.
61(define_reservation "DU_xa_power10" "xa0_power10+xa1_power10")
62(define_reservation "DU_xb_power10" "xb0_power10+xb1_power10")
63
64; Any execution slice dispatch
65(define_reservation "DU_any_power10"
66		    "x0_power10|x1_power10|DU_xa_power10|x2_power10|x3_power10|
67		     DU_xb_power10")
68
69; Even slice, actually takes even/odd slots
70(define_reservation "DU_even_power10" "x0_power10+x1_power10|x2_power10+x3_power10")
71
72; Slice plus 3rd slot
73(define_reservation "DU_slice_3_power10"
74		    "x0_power10+xa0_power10|x1_power10+xa1_power10|
75		     x2_power10+xb0_power10|x3_power10+xb1_power10")
76
77; Superslice
78(define_reservation "DU_super_power10"
79		    "x0_power10+x1_power10|x2_power10+x3_power10")
80
81; 2-way cracked
82(define_reservation "DU_C2_power10" "x0_power10+x1_power10|
83				    x1_power10+DU_xa_power10|
84				    x1_power10+x2_power10|
85				    DU_xa_power10+x2_power10|
86				    x2_power10+x3_power10|
87				    x3_power10+DU_xb_power10")
88
89; 2-way cracked plus 3rd slot
90(define_reservation "DU_C2_3_power10" "x0_power10+x1_power10+xa0_power10|
91				      x1_power10+x2_power10+xa1_power10|
92				      x2_power10+x3_power10+xb0_power10")
93
94; 3-way cracked (consumes whole decode/dispatch cycle)
95(define_reservation "DU_C3_power10"
96		    "x0_power10+x1_power10+xa0_power10+xa1_power10+x2_power10+
97		     x3_power10+xb0_power10+xb1_power10+br0_power10+br1_power10")
98
99; Branch ports
100(define_reservation "DU_branch_power10" "br0_power10|br1_power10")
101
102
103; Execution unit reservations
104(define_reservation "LSU_power10"
105		    "lsu0_power10|lsu1_power10|lsu2_power10|lsu3_power10")
106
107(define_reservation "LSU_pair_power10"
108		    "lsu0_power10+lsu1_power10|lsu1_power10+lsu2_power10|
109		     lsu2_power10+lsu3_power10|lsu3_power10+lsu0_power10")
110
111(define_reservation "VSU_power10"
112		    "vsu0_power10|vsu1_power10|vsu2_power10|vsu3_power10")
113
114(define_reservation "VSU_super_power10"
115		    "vsu0_power10+vsu1_power10|vsu2_power10+vsu3_power10")
116
117(define_reservation "VSU_PRM_power10" "prm0_power10|prm1_power10")
118
119; Define the reservation to be used by FP div/sqrt which allows other insns
120; to be issued to the VSU, but blocks other div/sqrt for a number of cycles.
121; Note that the number of cycles blocked varies depending on insn, but we
122; just use the same number for all in order to keep the number of DFA states
123; reasonable.
124(define_reservation "FP_DIV_power10"
125		    "fp_div0_power10*8|fp_div1_power10*8|fp_div2_power10*8|
126		     fp_div3_power10*8")
127(define_reservation "VEC_DIV_power10"
128		    "fp_div0_power10*8+fp_div1_power10*8|
129		     fp_div2_power10*8+fp_div3_power10*8")
130
131
132; LS Unit
133(define_insn_reservation "power10-load" 4
134  (and (eq_attr "type" "load")
135       (eq_attr "sign_extend" "no")
136       (eq_attr "update" "no")
137       (eq_attr "cpu" "power10"))
138  "DU_any_power10,LSU_power10")
139
140(define_insn_reservation "power10-load-update" 4
141  (and (eq_attr "type" "load")
142       (eq_attr "sign_extend" "no")
143       (eq_attr "update" "yes")
144       (eq_attr "cpu" "power10"))
145  "DU_C2_power10,LSU_power10+VSU_power10")
146
147(define_insn_reservation "power10-load-ext" 6
148  (and (eq_attr "type" "load")
149       (eq_attr "sign_extend" "yes")
150       (eq_attr "update" "no")
151       (eq_attr "cpu" "power10"))
152  "DU_C2_power10,LSU_power10")
153
154(define_insn_reservation "power10-load-ext-update" 6
155  (and (eq_attr "type" "load")
156       (eq_attr "sign_extend" "yes")
157       (eq_attr "update" "yes")
158       (eq_attr "cpu" "power10"))
159  "DU_C3_power10,LSU_power10+VSU_power10")
160
161(define_insn_reservation "power10-fpload-double" 4
162  (and (eq_attr "type" "fpload")
163       (eq_attr "update" "no")
164       (eq_attr "size" "64")
165       (eq_attr "cpu" "power10"))
166  "DU_slice_3_power10,LSU_power10")
167
168(define_insn_reservation "power10-fpload-update-double" 4
169  (and (eq_attr "type" "fpload")
170       (eq_attr "update" "yes")
171       (eq_attr "size" "64")
172       (eq_attr "cpu" "power10"))
173  "DU_C2_3_power10,LSU_power10+VSU_power10")
174
175; SFmode loads are cracked and have additional 2 cycles over DFmode
176(define_insn_reservation "power10-fpload-single" 6
177  (and (eq_attr "type" "fpload")
178       (eq_attr "update" "no")
179       (eq_attr "size" "32")
180       (eq_attr "cpu" "power10"))
181  "DU_C2_3_power10,LSU_power10")
182
183(define_insn_reservation "power10-fpload-update-single" 6
184  (and (eq_attr "type" "fpload")
185       (eq_attr "update" "yes")
186       (eq_attr "size" "32")
187       (eq_attr "cpu" "power10"))
188  "DU_C3_power10,LSU_power10+VSU_power10")
189
190(define_insn_reservation "power10-vecload" 5
191  (and (eq_attr "type" "vecload")
192       (eq_attr "cpu" "power10"))
193  "DU_any_power10,LSU_pair_power10")
194
195; Store data can issue 2 cycles after AGEN issue, 3 cycles for vector store
196(define_insn_reservation "power10-store" 0
197  (and (eq_attr "type" "store")
198       (eq_attr "update" "no")
199       (eq_attr "indexed" "no")
200       (eq_attr "cpu" "power10"))
201  "DU_slice_3_power10,LSU_power10")
202
203(define_insn_reservation "power10-store-indexed" 0
204  (and (eq_attr "type" "store")
205       (eq_attr "update" "no")
206       (eq_attr "indexed" "yes")
207       (eq_attr "cpu" "power10"))
208  "DU_slice_3_power10,LSU_power10")
209
210; Update forms have 2 cycle latency for updated addr reg
211(define_insn_reservation "power10-store-update" 2
212  (and (eq_attr "type" "store")
213       (eq_attr "update" "yes")
214       (eq_attr "indexed" "no")
215       (eq_attr "cpu" "power10"))
216  "DU_C2_3_power10,LSU_power10+VSU_power10")
217
218; Update forms have 2 cycle latency for updated addr reg
219(define_insn_reservation "power10-store-update-indexed" 2
220  (and (eq_attr "type" "store")
221       (eq_attr "update" "yes")
222       (eq_attr "indexed" "yes")
223       (eq_attr "cpu" "power10"))
224  "DU_C2_3_power10,LSU_power10+VSU_power10")
225
226(define_insn_reservation "power10-fpstore" 0
227  (and (eq_attr "type" "fpstore")
228       (eq_attr "update" "no")
229       (eq_attr "cpu" "power10"))
230  "DU_slice_3_power10,LSU_power10")
231
232; Update forms have 2 cycle latency for updated addr reg
233(define_insn_reservation "power10-fpstore-update" 2
234  (and (eq_attr "type" "fpstore")
235       (eq_attr "update" "yes")
236       (eq_attr "cpu" "power10"))
237  "DU_C2_3_power10,LSU_power10+VSU_power10")
238
239(define_insn_reservation "power10-vecstore" 0
240  (and (eq_attr "type" "vecstore")
241       (eq_attr "cpu" "power10"))
242  "DU_super_power10,LSU_pair_power10")
243
244(define_insn_reservation "power10-larx" 4
245  (and (eq_attr "type" "load_l")
246       (eq_attr "cpu" "power10"))
247  "DU_any_power10,LSU_power10")
248
249(define_insn_reservation "power10-stcx" 2
250  (and (eq_attr "type" "store_c")
251       (eq_attr "cpu" "power10"))
252  "DU_C2_3_power10,LSU_power10+VSU_power10")
253
254(define_insn_reservation "power10-sync" 4
255  (and (eq_attr "type" "sync,isync")
256       (eq_attr "cpu" "power10"))
257  "DU_any_power10,LSU_power10")
258
259
260; VSU Execution Unit
261
262; Fixed point ops
263
264; Most ALU insns are simple 2 cycle, including record form
265(define_insn_reservation "power10-alu" 2
266  (and (eq_attr "type" "add,exts,integer,logical,isel")
267       (eq_attr "cpu" "power10"))
268  "DU_any_power10,VSU_power10")
269; 5 cycle CR latency
270(define_bypass 5 "power10-alu"
271		 "power10-crlogical,power10-mfcr,power10-mfcrf")
272
273; Rotate/shift prevent use of third slot
274(define_insn_reservation "power10-rot" 2
275  (and (eq_attr "type" "insert,shift")
276       (eq_attr "dot" "no")
277       (eq_attr "cpu" "power10"))
278  "DU_slice_3_power10,VSU_power10")
279
280; Record form rotate/shift are cracked
281(define_insn_reservation "power10-cracked-alu" 2
282  (and (eq_attr "type" "insert,shift")
283       (eq_attr "dot" "yes")
284       (eq_attr "cpu" "power10"))
285  "DU_C2_3_power10,VSU_power10")
286; 7 cycle CR latency
287(define_bypass 7 "power10-cracked-alu"
288		 "power10-crlogical,power10-mfcr,power10-mfcrf")
289
290(define_insn_reservation "power10-alu2" 3
291  (and (eq_attr "type" "cntlz,popcnt,trap")
292       (eq_attr "cpu" "power10"))
293  "DU_any_power10,VSU_power10")
294; 6 cycle CR latency
295(define_bypass 6 "power10-alu2"
296		 "power10-crlogical,power10-mfcr,power10-mfcrf")
297
298(define_insn_reservation "power10-cmp" 2
299  (and (eq_attr "type" "cmp")
300       (eq_attr "cpu" "power10"))
301  "DU_any_power10,VSU_power10")
302
303
304; Treat 'two' and 'three' types as 2 or 3 way cracked
305(define_insn_reservation "power10-two" 4
306  (and (eq_attr "type" "two")
307       (eq_attr "cpu" "power10"))
308  "DU_C2_power10,VSU_power10")
309
310(define_insn_reservation "power10-three" 6
311  (and (eq_attr "type" "three")
312       (eq_attr "cpu" "power10"))
313  "DU_C3_power10,VSU_power10")
314
315(define_insn_reservation "power10-mul" 5
316  (and (eq_attr "type" "mul")
317       (eq_attr "dot" "no")
318       (eq_attr "cpu" "power10"))
319  "DU_slice_3_power10,VSU_power10")
320
321(define_insn_reservation "power10-mul-compare" 5
322  (and (eq_attr "type" "mul")
323       (eq_attr "dot" "yes")
324       (eq_attr "cpu" "power10"))
325  "DU_C2_3_power10,VSU_power10")
326; 10 cycle CR latency
327(define_bypass 10 "power10-mul-compare"
328		 "power10-crlogical,power10-mfcr,power10-mfcrf")
329
330; Fixed point divides reserve the divide units for a minimum of 8 cycles
331(define_insn_reservation "power10-idiv" 16
332  (and (eq_attr "type" "div")
333       (eq_attr "size" "32")
334       (eq_attr "cpu" "power10"))
335  "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
336
337(define_insn_reservation "power10-ldiv" 24
338  (and (eq_attr "type" "div")
339       (eq_attr "size" "64")
340       (eq_attr "cpu" "power10"))
341  "DU_even_power10,fx_div0_power10*8|fx_div1_power10*8")
342
343(define_insn_reservation "power10-crlogical" 2
344  (and (eq_attr "type" "cr_logical")
345       (eq_attr "cpu" "power10"))
346  "DU_any_power10,VSU_power10")
347
348(define_insn_reservation "power10-mfcrf" 2
349  (and (eq_attr "type" "mfcrf")
350       (eq_attr "cpu" "power10"))
351  "DU_any_power10,VSU_power10")
352
353(define_insn_reservation "power10-mfcr" 6
354  (and (eq_attr "type" "mfcr")
355       (eq_attr "cpu" "power10"))
356  "DU_C3_power10,VSU_power10")
357
358; Should differentiate between 1 cr field and > 1 since target of > 1 cr
359; is cracked
360(define_insn_reservation "power10-mtcr" 2
361  (and (eq_attr "type" "mtcr")
362       (eq_attr "cpu" "power10"))
363  "DU_any_power10,VSU_power10")
364
365; Move to LR/CTR are executed in VSU
366(define_insn_reservation "power10-mtjmpr" 5
367  (and (eq_attr "type" "mtjmpr")
368       (eq_attr "cpu" "power10"))
369  "DU_any_power10,VSU_power10")
370
371; Floating point/Vector ops
372(define_insn_reservation "power10-fpsimple" 2
373  (and (eq_attr "type" "fpsimple")
374       (eq_attr "cpu" "power10"))
375  "DU_slice_3_power10,VSU_power10")
376
377(define_insn_reservation "power10-fp" 5
378  (and (eq_attr "type" "fp,dmul")
379       (eq_attr "cpu" "power10"))
380  "DU_slice_3_power10,VSU_power10")
381
382(define_insn_reservation "power10-fpcompare" 3
383  (and (eq_attr "type" "fpcompare")
384       (eq_attr "cpu" "power10"))
385  "DU_slice_3_power10,VSU_power10")
386
387; FP div/sqrt are executed in the VSU slices.  They are not pipelined wrt other
388; div/sqrt insns, but for the most part do not block pipelined ops.
389(define_insn_reservation "power10-sdiv" 22
390  (and (eq_attr "type" "sdiv")
391       (eq_attr "cpu" "power10"))
392  "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
393
394(define_insn_reservation "power10-ddiv" 27
395  (and (eq_attr "type" "ddiv")
396       (eq_attr "cpu" "power10"))
397  "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
398
399(define_insn_reservation "power10-sqrt" 26
400  (and (eq_attr "type" "ssqrt")
401       (eq_attr "cpu" "power10"))
402  "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
403
404(define_insn_reservation "power10-dsqrt" 36
405  (and (eq_attr "type" "dsqrt")
406       (eq_attr "cpu" "power10"))
407  "DU_slice_3_power10,VSU_power10,FP_DIV_power10")
408
409(define_insn_reservation "power10-vec-2cyc" 2
410  (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
411       (eq_attr "cpu" "power10"))
412  "DU_super_power10,VSU_super_power10")
413
414(define_insn_reservation "power10-veccmp" 3
415  (and (eq_attr "type" "veccmp")
416       (eq_attr "cpu" "power10"))
417  "DU_super_power10,VSU_super_power10")
418
419(define_insn_reservation "power10-vecsimple" 3
420  (and (eq_attr "type" "vecsimple")
421       (eq_attr "cpu" "power10"))
422  "DU_super_power10,VSU_super_power10")
423
424(define_insn_reservation "power10-vecnormal" 7
425  (and (eq_attr "type" "vecfloat,vecdouble")
426       (eq_attr "size" "!128")
427       (eq_attr "cpu" "power10"))
428  "DU_super_power10,VSU_super_power10")
429
430; Quad-precision FP ops, execute in DFU
431(define_insn_reservation "power10-qp" 12
432  (and (eq_attr "type" "vecfloat,vecdouble")
433       (eq_attr "size" "128")
434       (eq_attr "cpu" "power10"))
435  "DU_super_power10,dfu_power10")
436
437(define_insn_reservation "power10-vecperm" 3
438  (and (eq_attr "type" "vecperm")
439       (eq_attr "cpu" "power10"))
440  "DU_super_power10,VSU_PRM_power10")
441
442(define_insn_reservation "power10-veccomplex" 7
443  (and (eq_attr "type" "veccomplex")
444       (eq_attr "cpu" "power10"))
445  "DU_super_power10,VSU_super_power10")
446
447(define_insn_reservation "power10-vecfdiv" 24
448  (and (eq_attr "type" "vecfdiv")
449       (eq_attr "cpu" "power10"))
450  "DU_super_power10,VSU_super_power10,VEC_DIV_power10")
451
452(define_insn_reservation "power10-vecdiv" 27
453  (and (eq_attr "type" "vecdiv")
454       (eq_attr "size" "!128")
455       (eq_attr "cpu" "power10"))
456  "DU_super_power10,VSU_super_power10,VEC_DIV_power10")
457
458; Use 8 for DFU reservation on QP div/mul to limit DFA state size
459(define_insn_reservation "power10-qpdiv" 56
460  (and (eq_attr "type" "vecdiv")
461       (eq_attr "size" "128")
462       (eq_attr "cpu" "power10"))
463  "DU_super_power10,dfu_power10*8")
464
465(define_insn_reservation "power10-qpmul" 24
466  (and (eq_attr "type" "qmul")
467       (eq_attr "size" "128")
468       (eq_attr "cpu" "power10"))
469  "DU_super_power10,dfu_power10*8")
470
471(define_insn_reservation "power10-mffgpr" 2
472  (and (eq_attr "type" "mffgpr")
473       (eq_attr "cpu" "power10"))
474  "DU_slice_3_power10,VSU_power10")
475
476(define_insn_reservation "power10-mftgpr" 2
477  (and (eq_attr "type" "mftgpr")
478       (eq_attr "cpu" "power10"))
479  "DU_slice_3_power10,VSU_power10")
480
481
482; Branch Unit
483; Move from LR/CTR are executed in BRU but consume a writeback port from an
484; execution slice.
485(define_insn_reservation "power10-mfjmpr" 6
486  (and (eq_attr "type" "mfjmpr")
487       (eq_attr "cpu" "power10"))
488  "DU_branch_power10,bru_power10+VSU_power10")
489
490; Branch is 2 cycles
491(define_insn_reservation "power10-branch" 2
492  (and (eq_attr "type" "jmpreg,branch")
493       (eq_attr "cpu" "power10"))
494  "DU_branch_power10,bru_power10")
495
496
497; Crypto Unit
498(define_insn_reservation "power10-crypto" 6
499  (and (eq_attr "type" "crypto")
500       (eq_attr "cpu" "power10"))
501  "DU_super_power10,cryptu_power10")
502
503
504; HTM Unit
505(define_insn_reservation "power10-htm" 4
506  (and (eq_attr "type" "htm")
507       (eq_attr "cpu" "power10"))
508  "DU_C2_power10,LSU_power10")
509
510(define_insn_reservation "power10-htm-simple" 2
511  (and (eq_attr "type" "htmsimple")
512       (eq_attr "cpu" "power10"))
513  "DU_any_power10,VSU_power10")
514
515
516; DFP Unit
517(define_insn_reservation "power10-dfp" 12
518  (and (eq_attr "type" "dfp")
519       (eq_attr "cpu" "power10"))
520  "DU_even_power10,dfu_power10")
521
522