1;; ARM Cortex-A53 pipeline description
2;; Copyright (C) 2013-2015 Free Software Foundation, Inc.
3;;
4;; Contributed by ARM Ltd.
5;;
6;; This file is part of GCC.
7;;
8;; GCC is free software; you can redistribute it and/or modify it
9;; under the terms of the GNU General Public License as published by
10;; the Free Software Foundation; either version 3, or (at your option)
11;; any later version.
12;;
13;; GCC is distributed in the hope that it will be useful, but
14;; WITHOUT ANY WARRANTY; without even the implied warranty of
15;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16;; General Public License for more details.
17;;
18;; You should have received a copy of the GNU General Public License
19;; along with GCC; see the file COPYING3.  If not see
20;; <http://www.gnu.org/licenses/>.
21
22(define_automaton "cortex_a53")
23
24;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
25;; Functional units.
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27
28;; There are two main integer execution pipelines, described as
29;; slot 0 and issue slot 1.
30
31(define_cpu_unit "cortex_a53_slot0" "cortex_a53")
32(define_cpu_unit "cortex_a53_slot1" "cortex_a53")
33
34(define_reservation "cortex_a53_slot_any" "cortex_a53_slot0|cortex_a53_slot1")
35(define_reservation "cortex_a53_single_issue" "cortex_a53_slot0+cortex_a53_slot1")
36
37;; The load/store pipeline.  Load/store instructions can dual-issue from
38;; either pipeline, but two load/stores cannot simultaneously issue.
39
40(define_cpu_unit "cortex_a53_ls" "cortex_a53")
41
42;; The store pipeline.  Shared between both execution pipelines.
43
44(define_cpu_unit "cortex_a53_store" "cortex_a53")
45
46;; The branch pipeline.  Branches can dual-issue with other instructions
47;; (except when those instructions take multiple cycles to issue).
48
49(define_cpu_unit "cortex_a53_branch" "cortex_a53")
50
51;; The integer divider.
52
53(define_cpu_unit "cortex_a53_idiv" "cortex_a53")
54
55;; The floating-point add pipeline used to model the usage
56;; of the add pipeline by fmac instructions.
57
58(define_cpu_unit "cortex_a53_fpadd_pipe" "cortex_a53")
59
60;; Floating-point div/sqrt (long latency, out-of-order completion).
61
62(define_cpu_unit "cortex_a53_fp_div_sqrt" "cortex_a53")
63
64;; The Advanced SIMD pipelines.
65
66(define_cpu_unit "cortex_a53_simd0" "cortex_a53")
67(define_cpu_unit "cortex_a53_simd1" "cortex_a53")
68
69;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
70;; ALU instructions.
71;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
72
73(define_insn_reservation "cortex_a53_alu" 2
74  (and (eq_attr "tune" "cortexa53")
75       (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\
76                        alu_sreg,alus_sreg,logic_reg,logics_reg,\
77                        adc_imm,adcs_imm,adc_reg,adcs_reg,\
78                        adr,bfm,csel,clz,rbit,rev,alu_dsp_reg,\
79                        shift_imm,shift_reg,\
80                        mov_imm,mov_reg,mvn_imm,mvn_reg,\
81                        mrs,multiple,no_insn"))
82  "cortex_a53_slot_any")
83
84(define_insn_reservation "cortex_a53_alu_shift" 2
85  (and (eq_attr "tune" "cortexa53")
86       (eq_attr "type" "alu_shift_imm,alus_shift_imm,\
87                        crc,logic_shift_imm,logics_shift_imm,\
88                        alu_ext,alus_ext,alu_shift_reg,alus_shift_reg,\
89                        logic_shift_reg,logics_shift_reg,\
90                        extend,mov_shift,mov_shift_reg,\
91                        mvn_shift,mvn_shift_reg"))
92  "cortex_a53_slot_any")
93
94;; Forwarding path for unshifted operands.
95
96(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
97  "cortex_a53_alu")
98
99(define_bypass 1 "cortex_a53_alu,cortex_a53_alu_shift"
100  "cortex_a53_alu_shift"
101  "arm_no_early_alu_shift_dep")
102
103;; The multiplier pipeline can forward results so there's no need to specify
104;; bypasses. Multiplies can only single-issue currently.
105
106(define_insn_reservation "cortex_a53_mul" 3
107  (and (eq_attr "tune" "cortexa53")
108       (ior (eq_attr "mul32" "yes")
109            (eq_attr "mul64" "yes")))
110  "cortex_a53_single_issue")
111
112;; A multiply with a single-register result or an MLA, followed by an
113;; MLA with an accumulator dependency, has its result forwarded so two
114;; such instructions can issue back-to-back.
115
116(define_bypass 1 "cortex_a53_mul"
117               "cortex_a53_mul"
118               "arm_mac_accumulator_is_mul_result")
119
120;; Punt with a high enough latency for divides.
121(define_insn_reservation "cortex_a53_udiv" 8
122  (and (eq_attr "tune" "cortexa53")
123       (eq_attr "type" "udiv"))
124  "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
125
126(define_insn_reservation "cortex_a53_sdiv" 9
127  (and (eq_attr "tune" "cortexa53")
128       (eq_attr "type" "sdiv"))
129  "(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
130
131
132(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
133               "cortex_a53_alu")
134(define_bypass 2 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv"
135               "cortex_a53_alu_shift"
136               "arm_no_early_alu_shift_dep")
137
138;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
139;; Load/store instructions.
140;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
141
142;; Address-generation happens in the issue stage.
143
144(define_insn_reservation "cortex_a53_load1" 3
145  (and (eq_attr "tune" "cortexa53")
146       (eq_attr "type" "load_byte,load1,load_acq"))
147  "cortex_a53_slot_any+cortex_a53_ls")
148
149(define_insn_reservation "cortex_a53_store1" 2
150  (and (eq_attr "tune" "cortexa53")
151       (eq_attr "type" "store1,store_rel"))
152  "cortex_a53_slot_any+cortex_a53_ls+cortex_a53_store")
153
154(define_insn_reservation "cortex_a53_load2" 3
155  (and (eq_attr "tune" "cortexa53")
156       (eq_attr "type" "load2"))
157  "cortex_a53_single_issue+cortex_a53_ls")
158
159(define_insn_reservation "cortex_a53_store2" 2
160  (and (eq_attr "tune" "cortexa53")
161       (eq_attr "type" "store2"))
162  "cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store")
163
164(define_insn_reservation "cortex_a53_load3plus" 4
165  (and (eq_attr "tune" "cortexa53")
166       (eq_attr "type" "load3,load4"))
167  "(cortex_a53_single_issue+cortex_a53_ls)*2")
168
169(define_insn_reservation "cortex_a53_store3plus" 3
170  (and (eq_attr "tune" "cortexa53")
171       (eq_attr "type" "store3,store4"))
172  "(cortex_a53_single_issue+cortex_a53_ls+cortex_a53_store)*2")
173
174;; Load/store addresses are required early in Issue.
175(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
176                 "cortex_a53_load*"
177                 "arm_early_load_addr_dep")
178(define_bypass 3 "cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus,cortex_a53_alu,cortex_a53_alu_shift"
179                 "cortex_a53_store*"
180                 "arm_early_store_addr_dep")
181
182;; Load data can forward in the ALU pipeline
183(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
184               "cortex_a53_alu")
185(define_bypass 2 "cortex_a53_load1,cortex_a53_load2"
186               "cortex_a53_alu_shift"
187               "arm_no_early_alu_shift_dep")
188
189;; ALU ops can forward to stores.
190(define_bypass 0 "cortex_a53_alu,cortex_a53_alu_shift"
191                 "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
192                 "arm_no_early_store_addr_dep")
193
194(define_bypass 1 "cortex_a53_mul,cortex_a53_udiv,cortex_a53_sdiv,cortex_a53_load1,cortex_a53_load2,cortex_a53_load3plus"
195                 "cortex_a53_store1,cortex_a53_store2,cortex_a53_store3plus"
196                 "arm_no_early_store_addr_dep")
197
198;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
199;; Branches.
200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
201
202;; Currently models all branches as dual-issuable from either execution
203;; slot, which isn't true for all cases. We still need to model indirect
204;; branches.
205
206(define_insn_reservation "cortex_a53_branch" 0
207  (and (eq_attr "tune" "cortexa53")
208       (eq_attr "type" "branch,call"))
209  "cortex_a53_slot_any+cortex_a53_branch")
210
211;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
212;; Floating-point arithmetic.
213;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
214
215(define_insn_reservation "cortex_a53_fpalu" 4
216  (and (eq_attr "tune" "cortexa53")
217       (eq_attr "type" "ffariths, fadds, ffarithd, faddd, fmov, fmuls,\
218                        f_cvt,f_cvtf2i,f_cvti2f,\
219                        fcmps, fcmpd, fcsel, f_rints, f_rintd, f_minmaxs,\
220                        f_minmaxd"))
221  "cortex_a53_slot0+cortex_a53_fpadd_pipe")
222
223(define_insn_reservation "cortex_a53_fconst" 2
224  (and (eq_attr "tune" "cortexa53")
225       (eq_attr "type" "fconsts,fconstd"))
226  "cortex_a53_slot0+cortex_a53_fpadd_pipe")
227
228(define_insn_reservation "cortex_a53_fpmul" 4
229  (and (eq_attr "tune" "cortexa53")
230       (eq_attr "type" "fmuls,fmuld"))
231  "cortex_a53_slot0")
232
233;; For single-precision multiply-accumulate, the add (accumulate) is issued after
234;; the multiply completes. Model that accordingly.
235
236(define_insn_reservation "cortex_a53_fpmac" 8
237  (and (eq_attr "tune" "cortexa53")
238       (eq_attr "type" "fmacs,fmacd,ffmas,ffmad"))
239  "cortex_a53_slot0, nothing*3, cortex_a53_fpadd_pipe")
240
241;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
242;; Floating-point divide/square root instructions.
243;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
244;; fsqrt really takes one cycle less, but that is not modelled.
245
246(define_insn_reservation "cortex_a53_fdivs" 14
247  (and (eq_attr "tune" "cortexa53")
248       (eq_attr "type" "fdivs, fsqrts"))
249  "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 5")
250
251(define_insn_reservation "cortex_a53_fdivd" 29
252  (and (eq_attr "tune" "cortexa53")
253       (eq_attr "type" "fdivd, fsqrtd"))
254  "cortex_a53_slot0, cortex_a53_fp_div_sqrt * 8")
255
256;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
257;; ARMv8-A Cryptographic extensions.
258;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
259
260(define_insn_reservation "cortex_a53_crypto_aese" 2
261  (and (eq_attr "tune" "cortexa53")
262       (eq_attr "type" "crypto_aese"))
263  "cortex_a53_simd0")
264
265(define_insn_reservation "cortex_a53_crypto_aesmc" 2
266  (and (eq_attr "tune" "cortexa53")
267       (eq_attr "type" "crypto_aesmc"))
268  "cortex_a53_simd0 | cortex_a53_simd1")
269
270(define_insn_reservation "cortex_a53_crypto_sha1_fast" 2
271  (and (eq_attr "tune" "cortexa53")
272       (eq_attr "type" "crypto_sha1_fast, crypto_sha256_fast"))
273  "cortex_a53_simd0")
274
275(define_insn_reservation "cortex_a53_crypto_sha1_xor" 3
276  (and (eq_attr "tune" "cortexa53")
277       (eq_attr "type" "crypto_sha1_xor"))
278  "cortex_a53_simd0")
279
280(define_insn_reservation "cortex_a53_crypto_sha_slow" 5
281  (and (eq_attr "tune" "cortexa53")
282       (eq_attr "type" "crypto_sha1_slow, crypto_sha256_slow"))
283  "cortex_a53_simd0")
284
285(define_bypass 0 "cortex_a53_crypto_aese"
286                 "cortex_a53_crypto_aesmc"
287                 "aarch_crypto_can_dual_issue")
288
289;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
290;; VFP to/from core transfers.
291;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
292
293(define_insn_reservation "cortex_a53_r2f" 4
294  (and (eq_attr "tune" "cortexa53")
295       (eq_attr "type" "f_mcr,f_mcrr"))
296  "cortex_a53_slot0")
297
298(define_insn_reservation "cortex_a53_f2r" 2
299  (and (eq_attr "tune" "cortexa53")
300       (eq_attr "type" "f_mrc,f_mrrc"))
301  "cortex_a53_slot0")
302
303;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
304;; VFP flag transfer.
305;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
306
307(define_insn_reservation "cortex_a53_f_flags" 4
308  (and (eq_attr "tune" "cortexa53")
309       (eq_attr "type" "f_flag"))
310  "cortex_a53_slot0")
311
312;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
313;; VFP load/store.
314;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
315
316(define_insn_reservation "cortex_a53_f_loads" 4
317  (and (eq_attr "tune" "cortexa53")
318       (eq_attr "type" "f_loads"))
319  "cortex_a53_slot0")
320
321(define_insn_reservation "cortex_a53_f_loadd" 5
322  (and (eq_attr "tune" "cortexa53")
323       (eq_attr "type" "f_loadd"))
324  "cortex_a53_slot0")
325
326(define_insn_reservation "cortex_a53_f_load_2reg" 5
327  (and (eq_attr "tune" "cortexa53")
328       (eq_attr "type" "neon_load2_2reg_q"))
329  "(cortex_a53_slot_any+cortex_a53_ls)*2")
330
331(define_insn_reservation "cortex_a53_f_loadq" 5
332  (and (eq_attr "tune" "cortexa53")
333       (eq_attr "type" "neon_load1_1reg_q"))
334  "cortex_a53_slot_any+cortex_a53_ls")
335
336(define_insn_reservation "cortex_a53_f_stores" 0
337  (and (eq_attr "tune" "cortexa53")
338       (eq_attr "type" "f_stores"))
339  "cortex_a53_slot0")
340
341(define_insn_reservation "cortex_a53_f_stored" 0
342  (and (eq_attr "tune" "cortexa53")
343       (eq_attr "type" "f_stored"))
344  "cortex_a53_slot0")
345
346;; Load-to-use for floating-point values has a penalty of one cycle,
347;; i.e. a latency of two.
348
349(define_bypass 2 "cortex_a53_f_loads"
350                 "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
351		  cortex_a53_fdivs, cortex_a53_fdivd,\
352		  cortex_a53_f2r")
353
354(define_bypass 2 "cortex_a53_f_loadd"
355                 "cortex_a53_fpalu, cortex_a53_fpmac, cortex_a53_fpmul,\
356		  cortex_a53_fdivs, cortex_a53_fdivd,\
357		  cortex_a53_f2r")
358
359;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
360;; Crude Advanced SIMD approximation.
361;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
362
363(define_insn_reservation "cortex_53_advsimd" 4
364  (and (eq_attr "tune" "cortexa53")
365       (eq_attr "is_neon_type" "yes"))
366  "cortex_a53_simd0")
367