1;; Cavium ThunderX 3 CN11xx pipeline description
2;; Copyright (C) 2020 Free Software Foundation, Inc.
3;;
4;; Contributed by Marvell
5
6;; This file is part of GCC.
7
8;; GCC is free software; you can redistribute it and/or modify
9;; it under the terms of the GNU General Public License as published by
10;; the Free Software Foundation; either version 3, or (at your option)
11;; any later version.
12
13;; GCC is distributed in the hope that it will be useful,
14;; but WITHOUT ANY WARRANTY; without even the implied warranty of
15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16;; GNU General Public License for more details.
17
18;; You should have received a copy of the GNU General Public License
19;; along with GCC; see the file COPYING3.  If not see
20;; <http://www.gnu.org/licenses/>.
21
22(define_automaton "thunderx3t110, thunderx3t110_advsimd, thunderx3t110_ldst")
23(define_automaton "thunderx3t110_mult")
24
25(define_cpu_unit "thunderx3t110_i0" "thunderx3t110")
26(define_cpu_unit "thunderx3t110_i1" "thunderx3t110")
27(define_cpu_unit "thunderx3t110_i2" "thunderx3t110")
28(define_cpu_unit "thunderx3t110_i3" "thunderx3t110")
29
30(define_cpu_unit "thunderx3t110_ls0" "thunderx3t110_ldst")
31(define_cpu_unit "thunderx3t110_ls1" "thunderx3t110_ldst")
32(define_cpu_unit "thunderx3t110_sd" "thunderx3t110_ldst")
33
34; Pseudo-units for multiply pipeline.
35; unchanged from TX2, occupies I1 for four (1 + 3 additional) slots
36
37(define_cpu_unit "thunderx3t110_i1m1" "thunderx3t110_mult")
38(define_cpu_unit "thunderx3t110_i1m2" "thunderx3t110_mult")
39(define_cpu_unit "thunderx3t110_i1m3" "thunderx3t110_mult")
40
41; Pseudo-units for load delay (assuming dcache hit).
42
43(define_cpu_unit "thunderx3t110_ls0d1" "thunderx3t110_ldst")
44(define_cpu_unit "thunderx3t110_ls0d2" "thunderx3t110_ldst")
45(define_cpu_unit "thunderx3t110_ls0d3" "thunderx3t110_ldst")
46
47(define_cpu_unit "thunderx3t110_ls1d1" "thunderx3t110_ldst")
48(define_cpu_unit "thunderx3t110_ls1d2" "thunderx3t110_ldst")
49(define_cpu_unit "thunderx3t110_ls1d3" "thunderx3t110_ldst")
50
51; Define FP units f0/f1/f2/f3.
52(define_cpu_unit "thunderx3t110_f0" "thunderx3t110_advsimd")
53(define_cpu_unit "thunderx3t110_f1" "thunderx3t110_advsimd")
54(define_cpu_unit "thunderx3t110_f2" "thunderx3t110_advsimd")
55(define_cpu_unit "thunderx3t110_f3" "thunderx3t110_advsimd")
56
57(define_reservation "thunderx3t110_i23" "thunderx3t110_i2|thunderx3t110_i3")
58(define_reservation "thunderx3t110_i01"
59    "thunderx3t110_i0|thunderx3t110_i1")
60(define_reservation "thunderx3t110_i012"
61    "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2")
62(define_reservation "thunderx3t110_i0123"
63    "thunderx3t110_i0|thunderx3t110_i1|thunderx3t110_i2|thunderx3t110_i3")
64(define_reservation "thunderx3t110_ls01" "thunderx3t110_ls0|thunderx3t110_ls1")
65(define_reservation "thunderx3t110_f01" "thunderx3t110_f0|thunderx3t110_f1")
66(define_reservation "thunderx3t110_f23" "thunderx3t110_f2|thunderx3t110_f3")
67(define_reservation "thunderx3t110_f0123"
68    "thunderx3t110_f0|thunderx3t110_f1|thunderx3t110_f2|thunderx3t110_f3")
69
70; A load with delay in the ls0/ls1 pipes.
71; this is always a delay of four
72(define_reservation "thunderx3t110_l0delay"
73    "thunderx3t110_ls0,thunderx3t110_ls0d1,thunderx3t110_ls0d2,\
74     thunderx3t110_ls0d3")
75(define_reservation "thunderx3t110_l1delay"
76    "thunderx3t110_ls1,thunderx3t110_ls1d1,thunderx3t110_ls1d2,\
77     thunderx3t110_ls1d3")
78(define_reservation "thunderx3t110_l01delay"
79    "thunderx3t110_l0delay|thunderx3t110_l1delay")
80;; Branch and call instructions.
81
82(define_insn_reservation "thunderx3t110_branch" 1
83  (and (eq_attr "tune" "thunderx3t110")
84       (eq_attr "type" "call,branch,trap"))
85  "thunderx3t110_i23")
86
87;; Misc instructions.
88
89; Speculation barrier
90(define_insn_reservation "thunderx3t110_nothing" 0
91  (and (eq_attr "tune" "thunderx3t110")
92       (eq_attr "type" "block"))
93  "nothing")
94
95(define_insn_reservation "thunderx3t110_mrs" 0
96  (and (eq_attr "tune" "thunderx3t110")
97       (eq_attr "type" "mrs"))
98  "thunderx3t110_i2")
99
100(define_insn_reservation "thunderx3t110_multiple" 1
101  (and (eq_attr "tune" "thunderx3t110")
102       (eq_attr "type" "multiple"))
103  "thunderx3t110_i0+thunderx3t110_i1+thunderx3t110_i3+thunderx3t110_ls0+\
104   thunderx3t110_ls1+thunderx3t110_sd+thunderx3t110_i1m1+thunderx3t110_i1m2+\
105   thunderx3t110_i1m3+thunderx3t110_f0+thunderx3t110_f1")
106
107;; Integer arithmetic/logic instructions.
108
109; Plain register moves are handled by renaming,
110; and don't create any uops.
111(define_insn_reservation "thunderx3t110_regmove" 0
112  (and (eq_attr "tune" "thunderx3t110")
113       (eq_attr "type" "mov_reg"))
114  "nothing")
115
116(define_insn_reservation "thunderx3t110_alu_basic" 1
117  (and (eq_attr "tune" "thunderx3t110")
118       (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\
119			adc_reg,adc_imm,adcs_reg,adcs_imm,\
120			logic_reg,logic_imm,logics_reg,logics_imm,\
121			csel,adr,mov_imm,shift_reg,shift_imm,bfm,\
122			bfx,rbit,rev,extend,rotate_imm"))
123  "thunderx3t110_i0123")
124
125; distinguish between latency 1|2 and throughput 1/4|2/4?
126; is it actually 1,1/2,{i0,i1} vs 2,1/4,{i0,i1,i2,i3}
127(define_insn_reservation "thunderx3t110_alu_shift" 2
128  (and (eq_attr "tune" "thunderx3t110")
129       (eq_attr "type" "alu_shift_imm,alu_ext,\
130			alus_shift_imm,alus_ext,\
131			logic_shift_imm,logics_shift_imm"))
132  "thunderx3t110_i0123")
133
134(define_insn_reservation "thunderx3t110_alu_shift1" 1
135  (and (eq_attr "tune" "thunderx3t110")
136       (eq_attr "type" "alu_shift_imm,alu_ext,\
137			alus_shift_imm,alus_ext,\
138			logic_shift_imm,logics_shift_imm"))
139  "thunderx3t110_i01")
140
141; we are going for the the optimistic answer (13)
142; for now, the worst case is 23
143(define_insn_reservation "thunderx3t110_div" 13
144  (and (eq_attr "tune" "thunderx3t110")
145       (eq_attr "type" "sdiv,udiv"))
146  "thunderx3t110_i1*3")
147
148(define_insn_reservation "thunderx3t110_madd" 5
149  (and (eq_attr "tune" "thunderx3t110")
150       (eq_attr "type" "mla,smlal,umlal"))
151  "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3,\
152   thunderx3t110_i012")
153
154; NOTE: smull, umull are used for "high part" multiplies too.
155; mul is alias for MADD
156; it has to be distinguished between smulh, umulh (4,1) and
157; other (5,1) but there is no such a type, so, we go for the
158; conservative approach of (5,1) for now
159; smulh, umulh only runs on I1
160(define_insn_reservation "thunderx3t110_mul" 5
161  (and (eq_attr "tune" "thunderx3t110")
162       (eq_attr "type" "mul,smull,umull"))
163  "thunderx3t110_i0123,thunderx3t110_i1m1,thunderx3t110_i1m2,thunderx3t110_i1m3")
164
165(define_insn_reservation "thunderx3t110_countbits" 3
166  (and (eq_attr "tune" "thunderx3t110")
167       (eq_attr "type" "clz"))
168  "thunderx3t110_i1")
169
170;; Integer loads and stores.
171
172; load_4 matches prefetch, a multitude of move/str/dup variants,
173; sign extend
174(define_insn_reservation "thunderx3t110_load_basic" 4
175  (and (eq_attr "tune" "thunderx3t110")
176       (eq_attr "type" "load_4"))
177  "thunderx3t110_ls01")
178
179; model use of I0/I1/I2 for index versions only, model 4|8 2nd on load
180(define_insn_reservation "thunderx3t110_loadpair" 5
181  (and (eq_attr "tune" "thunderx3t110")
182       (eq_attr "type" "load_8,load_16"))
183  "thunderx3t110_i012,thunderx3t110_ls01")
184
185(define_insn_reservation "thunderx3t110_store_basic" 1
186  (and (eq_attr "tune" "thunderx3t110")
187       (eq_attr "type" "store_4"))
188  "thunderx3t110_ls01,thunderx3t110_sd")
189
190; model use of I0/I1/I2/I3 for index versions, model differing
191; throughputs
192(define_insn_reservation "thunderx3t110_storepair_basic" 1
193  (and (eq_attr "tune" "thunderx3t110")
194       (eq_attr "type" "store_8,store_16"))
195  "thunderx3t110_ls01,thunderx3t110_sd")
196
197;; FP data processing instructions.
198
199(define_insn_reservation "thunderx3t110_fp_simple" 5
200  (and (eq_attr "tune" "thunderx3t110")
201       (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd"))
202  "thunderx3t110_f0123")
203
204; distinguish latency 3/4 throughput 1/2|1/4
205(define_insn_reservation "thunderx3t110_fp_addsub3" 3
206  (and (eq_attr "tune" "thunderx3t110")
207       (eq_attr "type" "fadds,faddd"))
208  "thunderx3t110_f23")
209(define_insn_reservation "thunderx3t110_fp_addsub4" 4
210  (and (eq_attr "tune" "thunderx3t110")
211       (eq_attr "type" "fadds,faddd"))
212  "thunderx3t110_f0123")
213
214(define_insn_reservation "thunderx3t110_fp_cmp" 4
215  (and (eq_attr "tune" "thunderx3t110")
216       (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
217  "thunderx3t110_f0123")
218
219; need to split out latency 23 throughput 23/4: F64 from
220; latency 16 throughput  16/4: FDIV F32
221(define_insn_reservation "thunderx3t110_fp_divsqrt_s" 16
222  (and (eq_attr "tune" "thunderx3t110")
223       (eq_attr "type" "fdivs,fsqrts"))
224  "thunderx3t110_f0*3|thunderx3t110_f1*3|\
225   thunderx3t110_f2*3|thunderx3t110_f3*3")
226
227(define_insn_reservation "thunderx3t110_fp_divsqrt_d" 23
228  (and (eq_attr "tune" "thunderx3t110")
229       (eq_attr "type" "fdivd,fsqrtd"))
230  "thunderx3t110_f0*5|thunderx3t110_f1*5|\
231   thunderx3t110_f2*5|thunderx3t110_f3*5")
232
233(define_insn_reservation "thunderx3t110_fp_mul_mac" 5
234  (and (eq_attr "tune" "thunderx3t110")
235       (eq_attr "type" "fmuls,fmuld,fmacs,fmacd"))
236  "thunderx3t110_f01")
237
238(define_insn_reservation "thunderx3t110_frint" 5
239  (and (eq_attr "tune" "thunderx3t110")
240       (eq_attr "type" "f_rints,f_rintd"))
241  "thunderx3t110_f0123")
242
243; mimic latency 3|4 throughput 1/2|1/4
244(define_insn_reservation "thunderx3t110_fcsel3" 3
245  (and (eq_attr "tune" "thunderx3t110")
246       (eq_attr "type" "fcsel"))
247  "thunderx3t110_f23")
248
249(define_insn_reservation "thunderx3t110_fcsel4" 4
250  (and (eq_attr "tune" "thunderx3t110")
251       (eq_attr "type" "fcsel"))
252  "thunderx3t110_f0123")
253
254;; FP miscellaneous instructions.
255
256(define_insn_reservation "thunderx3t110_fp_cvt" 5
257  (and (eq_attr "tune" "thunderx3t110")
258       (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f"))
259  "thunderx3t110_f0123")
260
261; even though f_mrc has to belong to fp_mov_to_gen
262; we retain this for the sake of legacy as codegen
263; doesn't use it anyway
264(define_insn_reservation "thunderx3t110_fp_mov3" 3
265  (and (eq_attr "tune" "thunderx3t110")
266       (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
267  "thunderx3t110_f23")
268
269(define_insn_reservation "thunderx3t110_fp_mov" 4
270  (and (eq_attr "tune" "thunderx3t110")
271       (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
272  "thunderx3t110_f0123")
273
274(define_insn_reservation "thunderx3t110_fp_mov_to_gen" 4
275  (and (eq_attr "tune" "thunderx3t110")
276       (eq_attr "type" "f_mcr"))
277  "thunderx3t110_f0123")
278
279;; FP loads and stores.
280;  model use of I0/I1/I2 for post/pre index modes
281
282(define_insn_reservation "thunderx3t110_fp_load_basic" 4
283  (and (eq_attr "tune" "thunderx3t110")
284       (eq_attr "type" "f_loads,f_loadd"))
285  "thunderx3t110_ls01")
286
287; model throughput 1
288(define_insn_reservation "thunderx3t110_fp_store_basic" 1
289  (and (eq_attr "tune" "thunderx3t110")
290       (eq_attr "type" "f_stores,f_stored"))
291  "thunderx3t110_ls01,thunderx3t110_sd")
292
293;; ASIMD integer instructions.
294
295(define_insn_reservation "thunderx3t110_asimd_int" 5
296  (and (eq_attr "tune" "thunderx3t110")
297       (eq_attr "type" "neon_abd,neon_abd_q,\
298			neon_arith_acc,neon_arith_acc_q,\
299			neon_abs,neon_abs_q,\
300			neon_add,neon_add_q,\
301			neon_sub,neon_sub_q,\
302			neon_neg,neon_neg_q,\
303			neon_add_long,neon_add_widen,\
304			neon_add_halve,neon_add_halve_q,\
305			neon_sub_long,neon_sub_widen,\
306			neon_sub_halve,neon_sub_halve_q,\
307			neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
308			neon_qabs,neon_qabs_q,\
309			neon_qadd,neon_qadd_q,\
310			neon_qneg,neon_qneg_q,\
311			neon_qsub,neon_qsub_q,\
312			neon_minmax,neon_minmax_q,\
313			neon_reduc_minmax,neon_reduc_minmax_q,\
314			neon_mul_b,neon_mul_h,neon_mul_s,\
315			neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\
316			neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\
317			neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\
318			neon_mla_b,neon_mla_h,neon_mla_s,\
319			neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\
320			neon_mul_b_long,neon_mul_h_long,\
321			neon_mul_s_long,neon_mul_d_long,\
322			neon_sat_mul_b_long,neon_sat_mul_h_long,\
323			neon_sat_mul_s_long,\
324			neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
325			neon_sat_mla_b_long,neon_sat_mla_h_long,\
326			neon_sat_mla_s_long,\
327			neon_shift_acc,neon_shift_acc_q,\
328			neon_shift_imm,neon_shift_imm_q,\
329			neon_shift_reg,neon_shift_reg_q,\
330			neon_shift_imm_long,neon_shift_imm_narrow_q,\
331			neon_sat_shift_imm,neon_sat_shift_imm_q,\
332			neon_sat_shift_reg,neon_sat_shift_reg_q,\
333			neon_sat_shift_imm_narrow_q"))
334  "thunderx3t110_f0123")
335
336; neon_reduc_add is used for both addp and [su]adalp
337(define_insn_reservation "thunderx3t110_asimd_reduc_add" 5
338  (and (eq_attr "tune" "thunderx3t110")
339       (eq_attr "type" "neon_reduc_add,neon_reduc_add_q"))
340  "thunderx3t110_f01")
341
342(define_insn_reservation "thunderx3t110_asimd_cmp" 5
343  (and (eq_attr "tune" "thunderx3t110")
344       (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\
345			neon_tst,neon_tst_q"))
346  "thunderx3t110_f0123")
347
348; neon_logic used in ldr, str, mov, umov, fmov, mov; orn; bic; and,
349;   simd mov immediate; orr, simd mov immediate; eor; not (mvn)
350; latency 4 throughput 1/2 LS0/LS1: ldr
351; latency 1 throughput 1 LS0/LS1,SDI,I0/I1/I2: str
352; latency 3|4 throughput 1/2|1/4 F2/F3 F0/F1/F2/F3: fmov immed, orn,
353;   bic, and, orr, eor, not (mvn)
354; latency 4 throughput 1/4 F0/F1/F2/F3: fmov register, fmov gen to vec
355; latency 5 throughput 1/4 F0/F1/F2/F3: fmov vec to gen, umov, fmov
356(define_insn_reservation "thunderx3t110_asimd_logic4" 4
357  (and (eq_attr "tune" "thunderx3t110")
358       (eq_attr "type" "neon_logic,neon_logic_q"))
359  "thunderx3t110_f23")
360
361(define_insn_reservation "thunderx3t110_asimd_logic5" 5
362  (and (eq_attr "tune" "thunderx3t110")
363       (eq_attr "type" "neon_logic,neon_logic_q"))
364  "thunderx3t110_f0123")
365
366;; ASIMD floating-point instructions.
367
368; Distinguish between latency 5 throughput 1/4: fabs, fmax, fmin, fneg
369; latency 4 throughput 1/4: fcmp
370(define_insn_reservation "thunderx3t110_asimd_fp_simple" 5
371  (and (eq_attr "tune" "thunderx3t110")
372       (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\
373			neon_fp_abs_s_q,neon_fp_abs_d_q,\
374			neon_fp_compare_s,neon_fp_compare_d,\
375			neon_fp_compare_s_q,neon_fp_compare_d_q,\
376			neon_fp_minmax_s,neon_fp_minmax_d,\
377			neon_fp_minmax_s_q,neon_fp_minmax_d_q,\
378			neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\
379			neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\
380			neon_fp_neg_s,neon_fp_neg_d,\
381			neon_fp_neg_s_q,neon_fp_neg_d_q"))
382  "thunderx3t110_f0123")
383
384; distinguish between latency 3 throughput 1/2,
385; latency 4 throughput 1/4
386; neon_fp_reduc_add_<stype><q> is used for both faddp and
387; vector reduction add. On TX3, faddp is 3|4 1/2|1/4 and reduction is 5 1/4
388(define_insn_reservation "thunderx3t110_asimd_fp_arith3" 3
389  (and (eq_attr "tune" "thunderx3t110")
390       (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
391			neon_fp_abd_s_q,neon_fp_abd_d_q,\
392			neon_fp_addsub_s,neon_fp_addsub_d,\
393			neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
394			neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
395			neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
396  "thunderx3t110_f23")
397
398(define_insn_reservation "thunderx3t110_asimd_fp_arith4" 4
399  (and (eq_attr "tune" "thunderx3t110")
400       (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
401			neon_fp_abd_s_q,neon_fp_abd_d_q,\
402			neon_fp_addsub_s,neon_fp_addsub_d,\
403			neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
404			neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
405			neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
406  "thunderx3t110_f0123")
407
408(define_insn_reservation "thunderx3t110_asimd_fp_arith5" 5
409  (and (eq_attr "tune" "thunderx3t110")
410       (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_d,\
411			neon_fp_mul_s_q,neon_fp_mul_d_q,\
412			neon_fp_mul_s_scalar_q,neon_fp_mul_d_scalar_q,\
413			neon_fp_mla_s,neon_fp_mla_d,\
414			neon_fp_mla_s_q,neon_fp_mla_d_q"))
415  "thunderx3t110_f0123")
416
417; neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q: fcvtl,fctvl2,fcvtn,fcvtn2
418; neon_fp_to_int_s,neon_fp_to_int_d: fcvt{<frint_suffix><su>,z<su>}
419;   where frint_suffix: zpmixan, su: su (plus other sign/unsign/extract...
420; neon_fp_to_int_s_q,neon_fp_to_int_d_q: fcvtz<su> other
421; The int_to_fp* is complicated
422;   neon_int_to_fp_s,neon_int_to_fp_d: <su_optab>cvtf
423;   neon_int_to_fp_s_q,neon_int_to_fp_d_q
424; Round matches single define_insn, frint<frint_suffix>
425;   neon_fp_round_s,neon_fp_round_d,neon_fp_round_s_q,
426;   neon_fp_round_d_q: frint<frint_suffix>
427; FCVT*,VCVTAU,[SU]CVTF: latency 5 throughput 1/4
428; FRINT*: latency 5 throughput 1/4
429(define_insn_reservation "thunderx3t110_asimd_fp_conv" 5
430  (and (eq_attr "tune" "thunderx3t110")
431       (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\
432			neon_fp_to_int_s,neon_fp_to_int_d,\
433			neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
434			neon_int_to_fp_s,neon_int_to_fp_d,\
435			neon_int_to_fp_s_q,neon_int_to_fp_d_q,\
436			neon_fp_round_s,neon_fp_round_d,\
437			neon_fp_round_s_q,neon_fp_round_d_q"))
438  "thunderx3t110_f0123")
439
440; model that pipeline is occupied the whole time D/F32, Q/F32: 16/4
441; Q/F64: 23/4
442(define_insn_reservation "thunderx3t110_asimd_fp_div_s" 16
443  (and (eq_attr "tune" "thunderx3t110")
444       (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q"))
445  "thunderx3t110_f0123")
446
447(define_insn_reservation "thunderx3t110_asimd_fp_div_d" 23
448  (and (eq_attr "tune" "thunderx3t110")
449       (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q"))
450  "thunderx3t110_f0123")
451
452;; ASIMD miscellaneous instructions.
453
454;  divided out:
455;  rbit,bsl,bsl_q,cls,cls_q,cnt,cnt_q,move,move_q: 3|4 1/2 | 1/4
456;  from_gp,from_gp_q : 4 | 1/4
457;  dup,dup_q,ext,ext_q,ins,ins_q,all recpe forms, rev,rev_q: 5 1/4
458;  permute,permute_q needs to depend on aarch64_expand_vec_perm_const does
459;  on TX3
460(define_insn_reservation "thunderx3t110_asimd_misc3" 3
461  (and (eq_attr "tune" "thunderx3t110")
462       (eq_attr "type" "neon_rbit,\
463			neon_bsl,neon_bsl_q,\
464			neon_cls,neon_cls_q,\
465			neon_cnt,neon_cnt_q,\
466			neon_move,neon_move_q"))
467  "thunderx3t110_f23")
468
469(define_insn_reservation "thunderx3t110_asimd_misc4" 4
470  (and (eq_attr "tune" "thunderx3t110")
471       (eq_attr "type" "neon_rbit,\
472			neon_bsl,neon_bsl_q,\
473			neon_cls,neon_cls_q,\
474			neon_cnt,neon_cnt_q,\
475			neon_from_gp,neon_from_gp_q,\
476			neon_move,neon_move_q"))
477  "thunderx3t110_f0123")
478
479(define_insn_reservation "thunderx3t110_asimd_misc" 5
480  (and (eq_attr "tune" "thunderx3t110")
481       (eq_attr "type" "
482			neon_dup,neon_dup_q,\
483			neon_ext,neon_ext_q,\
484			neon_ins,neon_ins_q,\
485			neon_move,neon_move_q,\
486			neon_fp_recpe_s,neon_fp_recpe_d,\
487			neon_fp_recpe_s_q,neon_fp_recpe_d_q,\
488			neon_fp_recpx_s,neon_fp_recpx_d,\
489			neon_fp_recpx_s_q,neon_fp_recpx_d_q,\
490			neon_rev,neon_rev_q,\
491			neon_permute,neon_permute_q"))
492  "thunderx3t110_f0123")
493
494(define_insn_reservation "thunderx3t110_asimd_recip_step" 5
495  (and (eq_attr "tune" "thunderx3t110")
496       (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\
497			neon_fp_recps_d,neon_fp_recps_d_q,\
498			neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
499			neon_fp_sqrt_d,neon_fp_sqrt_d_q,\
500			neon_fp_rsqrte_s, neon_fp_rsqrte_s_q,\
501			neon_fp_rsqrte_d, neon_fp_rsqrte_d_q,\
502			neon_fp_rsqrts_s, neon_fp_rsqrts_s_q,\
503			neon_fp_rsqrts_d, neon_fp_rsqrts_d_q"))
504  "thunderx3t110_f0123")
505
506(define_insn_reservation "thunderx3t110_asimd_lut1" 5
507  (and (eq_attr "tune" "thunderx3t110")
508       (eq_attr "type" "neon_tbl1,neon_tbl1_q"))
509  "thunderx3t110_f0123")
510
511(define_insn_reservation "thunderx3t110_asimd_lut2" 10
512  (and (eq_attr "tune" "thunderx3t110")
513       (eq_attr "type" "neon_tbl2,neon_tbl2_q"))
514  "thunderx3t110_f0123")
515
516(define_insn_reservation "thunderx3t110_asimd_lut3" 15
517  (and (eq_attr "tune" "thunderx3t110")
518       (eq_attr "type" "neon_tbl3,neon_tbl3_q"))
519  "thunderx3t110_f0123")
520
521(define_insn_reservation "thunderx3t110_asimd_lut4" 20
522  (and (eq_attr "tune" "thunderx3t110")
523       (eq_attr "type" "neon_tbl4,neon_tbl4_q"))
524  "thunderx3t110_f0123")
525
526(define_insn_reservation "thunderx3t110_asimd_elt_to_gr" 5
527  (and (eq_attr "tune" "thunderx3t110")
528       (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
529  "thunderx3t110_f0123")
530
531;; ASIMD load instructions.
532
533; NOTE: These reservations attempt to model latency and throughput
534; correctly, but the cycle timing of unit allocation is not
535; necessarily accurate (because insns are split into uops, and those
536; may be issued out-of-order).
537
538; the LDP/LDNP imm-offset S/D/Q suppplies the first arg with latency 4
539; and the 2nd at 5 (Q form) or 8 (S/D form). Can this be modeled? These
540;forms, as documented, do not use the I0/I1/I2 units (no I3), but the
541; other LDP ones do.
542(define_insn_reservation "thunderx3t110_asimd_load1_ldp" 5
543  (and (eq_attr "tune" "thunderx3t110")
544       (eq_attr "type" "neon_ldp,neon_ldp_q"))
545  "thunderx3t110_i012,thunderx3t110_ls01")
546
547; Need to distinguish latency 6 throughput 2: 4 reg D/Q
548; latency 5 throughput 3/2: 3 reg D/Q
549; latency 4 throughput 1: 2 reg D/Q
550; latency 4 throughput 1/2: 1 reg D/Q
551(define_insn_reservation "thunderx3t110_asimd_load1" 4
552  (and (eq_attr "tune" "thunderx3t110")
553       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
554			neon_load1_2reg,neon_load1_2reg_q,\
555			neon_load1_3reg,neon_load1_3reg_q,\
556			neon_load1_4reg,neon_load1_4reg_q"))
557  "thunderx3t110_ls01")
558
559(define_insn_reservation "thunderx3t110_asimd_load1_onelane" 5
560  (and (eq_attr "tune" "thunderx3t110")
561       (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q"))
562  "thunderx3t110_l01delay,thunderx3t110_f0123")
563
564(define_insn_reservation "thunderx3t110_asimd_load1_all" 5
565  (and (eq_attr "tune" "thunderx3t110")
566       (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q"))
567  "thunderx3t110_l01delay,thunderx3t110_f0123")
568
569(define_insn_reservation "thunderx3t110_asimd_load2" 5
570  (and (eq_attr "tune" "thunderx3t110")
571       (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\
572			neon_load2_one_lane,neon_load2_one_lane_q,\
573			neon_load2_all_lanes,neon_load2_all_lanes_q"))
574  "thunderx3t110_l01delay,thunderx3t110_f0123")
575
576(define_insn_reservation "thunderx3t110_asimd_load3" 7
577  (and (eq_attr "tune" "thunderx3t110")
578       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q,\
579			neon_load3_one_lane,neon_load3_one_lane_q,\
580			neon_load3_all_lanes,neon_load3_all_lanes_q"))
581  "thunderx3t110_l01delay,thunderx3t110_f0123")
582
583(define_insn_reservation "thunderx3t110_asimd_load4" 8
584  (and (eq_attr "tune" "thunderx3t110")
585       (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q,\
586			neon_load4_one_lane,neon_load4_one_lane_q,\
587			neon_load4_all_lanes,neon_load4_all_lanes_q"))
588  "thunderx3t110_l01delay,thunderx3t110_f0123")
589
590;; ASIMD store instructions.
591
592; Same note applies as for ASIMD load instructions.
593
594; Vector Store pair Need to distinguish:
595; 5 throughput: imm-offset S/D; imm-postindex S/D; imm-preindex S/D
596; 2 throughput: imm-offset Q; imm-postindex Q; imm-preindex Q
597; all index modes use I0/I1/I2
598(define_insn_reservation "thunderx3t110_asimd_store_stp" 1
599  (and (eq_attr "tune" "thunderx3t110")
600       (eq_attr "type" "neon_stp,neon_stp_q"))
601  "thunderx3t110_ls01,thunderx3t110_sd")
602
603; There are multiple forms of ST1
604; The following two groups, as documented, do not use the FP pipelines.
605; multiple, 1 reg, D-form     ST1
606; tx2_ltp:    x    1/2     LS0/LS1
607; tx3_ltp:    x    1/2     LS0/LS1
608; multiple, 1 reg, Q-form     ST1
609; tx2_ltp:    x    1/2     LS0/LS1
610; tx3_ltp:    x    1/2     LS0/LS1
611;
612; one lane, B/H/S         ST1
613; tx2_ltp:    x       1/2     LS0/LS1,F0/F1
614; tx3_ltp:    x       1/2     LS0/LS1,F0/F1/F2/F3
615; one lane, D             ST1
616; tx2_ltp:    x       1/2     LS0/LS1,F0/F1
617; tx3_ltp:    x       1/2     LS0/LS1,F0/F1/F2/F3
618;; Model for st1 insn needs refinement for different register forms
619; multiple, 2 reg, D-form     ST1     x    1     LS0/LS1
620; multiple, 2 reg, Q-form     ST1     x    1     LS0/LS1
621; multiple, 3 reg, D-form     ST1     x    3/2     LS0/LS1
622; multiple, 3 reg, Q-form     ST1     x    3/2     LS0/LS1
623; multiple,4 reg, D-form         ST1     x    2     LS0/LS1
624; multiple,4 reg, Q-form         ST1     x    2     LS0/LS1
625(define_insn_reservation "thunderx3t110_asimd_store1" 1
626  (and (eq_attr "tune" "thunderx3t110")
627       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,\
628			neon_store1_2reg,neon_store1_2reg_q,\
629			neon_store1_3reg,neon_store1_4reg"))
630  "thunderx3t110_ls01")
631
632(define_insn_reservation "thunderx3t110_asimd_store1_onelane" 1
633  (and (eq_attr "tune" "thunderx3t110")
634       (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q"))
635  "thunderx3t110_ls01,thunderx3t110_f0123")
636
637; distinguish between throughput 1: D/Q-form B/H/S, Q-form D and
638; throughput 1/2: one lane B/H/S/D
639(define_insn_reservation "thunderx3t110_asimd_store2" 1
640  (and (eq_attr "tune" "thunderx3t110")
641       (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q,\
642			neon_store2_one_lane,neon_store2_one_lane_q"))
643  "thunderx3t110_ls01,thunderx3t110_f0123")
644
645; distinguish between throughput 3: D/Q-form B/H/S, Q-form D and
646; throughput 1: one lane B/H/S/D
647(define_insn_reservation "thunderx3t110_asimd_store3" 1
648  (and (eq_attr "tune" "thunderx3t110")
649       (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q,\
650			neon_store3_one_lane,neon_store3_one_lane_q"))
651  "thunderx3t110_ls01,thunderx3t110_f0123")
652
653; distinguish between throughput 4: D/Q-form B/H/S, Q-form D and
654; throughput 1: one lane B/H/S/D? (not in doc)
655(define_insn_reservation "thunderx3t110_asimd_store4" 1
656  (and (eq_attr "tune" "thunderx3t110")
657       (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q,\
658			neon_store4_one_lane,neon_store4_one_lane_q"))
659  "thunderx3t110_ls01,thunderx3t110_f0123")
660
661;; Crypto extensions.
662
663(define_insn_reservation "thunderx3t110_aes" 4
664  (and (eq_attr "tune" "thunderx3t110")
665       (eq_attr "type" "crypto_aese,crypto_aesmc"))
666  "thunderx3t110_f0123")
667
668(define_insn_reservation "thunderx3t110_sha" 5
669  (and (eq_attr "tune" "thunderx3t110")
670       (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\
671			crypto_sha256_fast,crypto_sha256_slow"))
672  "thunderx3t110_f0123")
673
674;; CRC extension.
675
676(define_insn_reservation "thunderx3t110_crc" 3
677  (and (eq_attr "tune" "thunderx3t110")
678       (eq_attr "type" "crc"))
679  "thunderx3t110_i1")
680
681;; PMULL extension.
682
683(define_insn_reservation "thunderx3t110_pmull" 5
684  (and (eq_attr "tune" "thunderx3t110")
685       (eq_attr "type" "crypto_pmull"))
686  "thunderx3t110_f0123")
687