1;; ARM Cortex-A8 NEON scheduling description.
2;; Copyright (C) 2007-2015 Free Software Foundation, Inc.
3;; Contributed by CodeSourcery.
4
5;; This file is part of GCC.
6
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published
9;; by the Free Software Foundation; either version 3, or (at your
10;; option) any later version.
11
12;; GCC is distributed in the hope that it will be useful, but WITHOUT
13;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15;; License for more details.
16
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_attr "cortex_a8_neon_type"
22   "neon_int_1,neon_int_2,neon_int_3,neon_int_4,neon_int_5,neon_vqneg_vqabs,
23   neon_bit_ops_q,
24   neon_vaba,neon_vaba_qqq, neon_vmov,
25   neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,neon_mul_qqq_8_16_32_ddd_32,
26   neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar,
27   neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,neon_mla_qqq_8_16,
28   neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long,
29   neon_mla_qqq_32_qqd_32_scalar,neon_mul_ddd_16_scalar_32_16_long_scalar,
30   neon_mul_qqd_32_scalar,neon_mla_ddd_16_scalar_qdd_32_16_long_scalar,
31   neon_shift_1,neon_shift_2,neon_shift_3,
32   neon_vqshl_vrshl_vqrshl_qqq,neon_vsra_vrsra,neon_fp_vadd_ddd_vabs_dd,
33   neon_fp_vadd_qqq_vabs_qq,neon_fp_vsum,neon_fp_vmul_ddd,neon_fp_vmul_qqd,
34   neon_fp_vmla_ddd,neon_fp_vmla_qqq,neon_fp_vmla_ddd_scalar,
35   neon_fp_vmla_qqq_scalar,neon_fp_vrecps_vrsqrts_ddd,
36   neon_fp_vrecps_vrsqrts_qqq,neon_bp_simple,neon_bp_2cycle,neon_bp_3cycle,
37   neon_ldr,neon_str,neon_vld1_1_2_regs,neon_vld1_3_4_regs,
38   neon_vld2_2_regs_vld1_vld2_all_lanes,neon_vld2_4_regs,neon_vld3_vld4,
39   neon_vst1_1_2_regs_vst2_2_regs,neon_vst1_3_4_regs,
40   neon_vst2_4_regs_vst3_vst4,neon_vld1_vld2_lane,
41   neon_vld3_vld4_lane,neon_vst1_vst2_lane,neon_vst3_vst4_lane,
42   neon_vld3_vld4_all_lanes,neon_mcr,neon_mcr_2_mcrr,neon_mrc,neon_mrrc,
43   neon_ldm_2,neon_stm_2,none,unknown"
44  (cond [
45          (eq_attr "type" "neon_logic, neon_logic_q,\
46                           neon_bsl, neon_cls, neon_cnt,\
47                           neon_add, neon_add_q")
48                          (const_string "neon_int_1")
49          (eq_attr "type" "neon_add_widen, neon_sub_widen,\
50                           neon_sub, neon_sub_q")
51                          (const_string "neon_int_2")
52          (eq_attr "type" "neon_neg, neon_neg_q,\
53                           neon_reduc_add, neon_reduc_add_q,\
54                           neon_reduc_add_long,\
55                           neon_add_long, neon_sub_long")
56                          (const_string "neon_int_3")
57          (eq_attr "type" "neon_abs, neon_abs_q,
58                           neon_compare_zero, neon_compare_zero_q,\
59                           neon_add_halve_narrow_q,\
60                           neon_sub_halve_narrow_q,\
61                           neon_add_halve, neon_add_halve_q,\
62                           neon_qadd, neon_qadd_q,\
63                           neon_tst, neon_tst_q")
64                          (const_string "neon_int_4")
65          (eq_attr "type" "neon_abd_long, neon_sub_halve, neon_sub_halve_q,\
66                           neon_qsub, neon_qsub_q,\
67                           neon_abd, neon_abd_q,\
68                           neon_compare, neon_compare_q,\
69                           neon_minmax, neon_minmax_q, neon_reduc_minmax,\
70                           neon_reduc_minmax_q")
71                          (const_string "neon_int_5")
72          (eq_attr "type" "neon_qneg, neon_qneg_q, neon_qabs, neon_qabs_q")
73                           (const_string "neon_vqneg_vqabs")
74          (eq_attr "type" "neon_move, neon_move_q")
75                           (const_string "neon_vmov")
76          (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q")
77                           (const_string "neon_bit_ops_q")
78          (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc")
79                          (const_string "neon_vaba")
80          (eq_attr "type" "neon_arith_acc_q")
81                          (const_string "neon_vaba_qqq")
82          (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\
83                           neon_shift_imm_long, neon_shift_imm_narrow_q,\
84                           neon_shift_reg")
85                           (const_string "neon_shift_1")
86          (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,
87                           neon_sat_shift_imm_narrow_q,\
88                           neon_sat_shift_reg")
89                           (const_string "neon_shift_2")
90          (eq_attr "type" "neon_shift_reg_q")
91                           (const_string "neon_shift_3")
92          (eq_attr "type" "neon_sat_shift_reg_q")
93                           (const_string "neon_vqshl_vrshl_vqrshl_qqq")
94          (eq_attr "type" "neon_shift_acc, neon_shift_acc_q")
95                           (const_string "neon_vsra_vrsra")
96          (eq_attr "type" "neon_mul_b, neon_mul_h,\
97                           neon_mul_b_long, neon_mul_h_long,\
98                           neon_sat_mul_b, neon_sat_mul_h,\
99                           neon_sat_mul_b_long, neon_sat_mul_h_long")
100                           (const_string
101                            "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long")
102          (eq_attr "type" "neon_mul_b_q, neon_mul_h_q,\
103                           neon_sat_mul_b_q, neon_sat_mul_h_q")
104                           (const_string "neon_mul_qqq_8_16_32_ddd_32")
105          (eq_attr "type" "neon_mul_s, neon_mul_s_long,\
106                           neon_sat_mul_s, neon_sat_mul_s_long,\
107                           neon_mul_h_scalar_q, neon_sat_mul_h_scalar_q,\
108                           neon_mul_s_scalar, neon_sat_mul_s_scalar,\
109                           neon_mul_s_scalar_long,\
110                           neon_sat_mul_s_scalar_long")
111                           (const_string
112             "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")
113          (eq_attr "type" "neon_mla_b, neon_mla_h,\
114                           neon_mla_b_long, neon_mla_h_long,\
115                           neon_sat_mla_b_long, neon_sat_mla_h_long,\
116                           neon_sat_mla_h_scalar_long")
117                           (const_string
118                             "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long")
119          (eq_attr "type" "neon_mla_b_q, neon_mla_h_q")
120                           (const_string "neon_mla_qqq_8_16")
121          (eq_attr "type" "neon_mla_s, neon_mla_s_long,\
122                           neon_sat_mla_s_long,\
123                           neon_mla_h_scalar_q, neon_mla_s_scalar,\
124                           neon_mla_s_scalar_long,\
125                           neon_sat_mla_s_scalar_long")
126                           (const_string
127 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")
128          (eq_attr "type" "neon_mla_s_q, neon_mla_s_scalar_q")
129                           (const_string "neon_mla_qqq_32_qqd_32_scalar")
130          (eq_attr "type" "neon_mul_h_scalar, neon_sat_mul_h_scalar,\
131                           neon_mul_h_scalar_long,\
132                           neon_sat_mul_h_scalar_long")
133                          (const_string
134                            "neon_mul_ddd_16_scalar_32_16_long_scalar")
135          (eq_attr "type" "neon_mul_s_q, neon_sat_mul_s_q,\
136                           neon_mul_s_scalar_q")
137                           (const_string "neon_mul_qqd_32_scalar")
138          (eq_attr "type" "neon_mla_h_scalar, neon_mla_h_scalar_long")
139                           (const_string
140                             "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar")
141          (eq_attr "type" "neon_fp_abd_s, neon_fp_abs_s, neon_fp_neg_s,\
142                           neon_fp_addsub_s, neon_fp_compare_s,\
143                           neon_fp_minmax_s, neon_fp_mul_s,\
144                           neon_fp_recpe_s, neon_fp_rsqrte_s,\
145                           neon_fp_to_int_s, neon_int_to_fp_s")
146                           (const_string "neon_fp_vadd_ddd_vabs_dd")
147          (eq_attr "type" "neon_fp_abd_s_q, neon_fp_abs_s_q,\
148                           neon_fp_neg_s_q,\
149                           neon_fp_addsub_s_q, neon_fp_compare_s_q,\
150                           neon_fp_minmax_s_q, neon_fp_mul_s_q,\
151                           neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\
152                           neon_fp_to_int_s_q, neon_int_to_fp_s_q")
153                           (const_string "neon_fp_vadd_qqq_vabs_qq")
154          (eq_attr "type" "neon_fp_reduc_add_s, neon_fp_reduc_minmax_s,\
155                           neon_fp_reduc_add_s_q, neon_fp_reduc_minmax_s_q")
156                           (const_string "neon_fp_vsum")
157          (eq_attr "type" "neon_fp_mul_s_scalar")
158                           (const_string "neon_fp_vmul_ddd")
159          (eq_attr "type" "neon_fp_mul_s_scalar_q")
160                           (const_string "neon_fp_vmul_qqd")
161          (eq_attr "type" "neon_fp_mla_s")
162                           (const_string "neon_fp_vmla_ddd")
163          (eq_attr "type" "neon_fp_mla_s_q")
164                           (const_string "neon_fp_vmla_qqq")
165          (eq_attr "type" "neon_fp_mla_s_scalar")
166                           (const_string "neon_fp_vmla_ddd_scalar")
167          (eq_attr "type" "neon_fp_mla_s_scalar_q")
168                           (const_string "neon_fp_vmla_qqq_scalar")
169          (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s")
170                           (const_string "neon_fp_vrecps_vrsqrts_ddd")
171          (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q")
172                           (const_string "neon_fp_vrecps_vrsqrts_qqq")
173          (eq_attr "type" "neon_move_narrow_q, neon_dup,\
174                           neon_dup_q, neon_permute, neon_zip,\
175                           neon_ext, neon_rev, neon_rev_q")
176                           (const_string "neon_bp_simple")
177          (eq_attr "type" "neon_permute_q, neon_ext_q, neon_tbl1, neon_tbl2")
178                           (const_string "neon_bp_2cycle")
179          (eq_attr "type" "neon_zip_q, neon_tbl3, neon_tbl4")
180                           (const_string "neon_bp_3cycle")
181          (eq_attr "type" "neon_ldr")
182                           (const_string "neon_ldr")
183          (eq_attr "type" "neon_str")
184                           (const_string "neon_str")
185          (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q,\
186                           neon_load1_2reg, neon_load1_2reg_q,\
187                           neon_load2_2reg, neon_load2_2reg_q")
188                           (const_string "neon_vld1_1_2_regs")
189          (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\
190                           neon_load1_4reg, neon_load1_4reg_q")
191                           (const_string "neon_vld1_3_4_regs")
192          (eq_attr "type" "neon_load1_all_lanes, neon_load1_all_lanes_q,\
193                           neon_load2_all_lanes, neon_load2_all_lanes_q")
194                           (const_string
195                              "neon_vld2_2_regs_vld1_vld2_all_lanes")
196          (eq_attr "type" "neon_load3_all_lanes, neon_load3_all_lanes_q,\
197                           neon_load4_all_lanes, neon_load4_all_lanes_q,\
198                           neon_load2_4reg, neon_load2_4reg_q")
199                           (const_string "neon_vld2_4_regs")
200          (eq_attr "type" "neon_load3_3reg, neon_load3_3reg_q,\
201                           neon_load4_4reg, neon_load4_4reg_q")
202                           (const_string "neon_vld3_vld4")
203          (eq_attr "type" "f_loads, f_loadd, f_stores, f_stored,\
204                           neon_load1_one_lane, neon_load1_one_lane_q,\
205                           neon_load2_one_lane, neon_load2_one_lane_q")
206                           (const_string "neon_vld1_vld2_lane")
207          (eq_attr "type" "neon_load3_one_lane, neon_load3_one_lane_q,\
208                           neon_load4_one_lane, neon_load4_one_lane_q")
209                           (const_string "neon_vld3_vld4_lane")
210          (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q,\
211                           neon_store1_2reg, neon_store1_2reg_q,\
212                           neon_store2_2reg, neon_store2_2reg_q")
213                           (const_string "neon_vst1_1_2_regs_vst2_2_regs")
214          (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\
215                           neon_store1_4reg, neon_store1_4reg_q")
216                           (const_string "neon_vst1_3_4_regs")
217          (eq_attr "type" "neon_store2_4reg, neon_store2_4reg_q,\
218                           neon_store3_3reg, neon_store3_3reg_q,\
219                           neon_store4_4reg, neon_store4_4reg_q")
220                           (const_string "neon_vst2_4_regs_vst3_vst4")
221          (eq_attr "type" "neon_store1_one_lane, neon_store1_one_lane_q,\
222                           neon_store2_one_lane, neon_store2_one_lane_q")
223                           (const_string "neon_vst1_vst2_lane")
224          (eq_attr "type" "neon_store3_one_lane, neon_store3_one_lane_q,\
225                           neon_store4_one_lane, neon_store4_one_lane_q")
226                           (const_string "neon_vst3_vst4_lane")
227          (eq_attr "type" "neon_from_gp, f_mcr")
228                           (const_string "neon_mcr")
229          (eq_attr "type" "neon_from_gp_q, f_mcrr")
230                           (const_string "neon_mcr_2_mcrr")
231          (eq_attr "type" "neon_to_gp, f_mrc")
232                           (const_string "neon_mrc")
233          (eq_attr "type" "neon_to_gp_q, f_mrrc")
234                           (const_string "neon_mrrc")]
235          (const_string "unknown")))
236
237(define_automaton "cortex_a8_neon")
238
239;; Only one load, store, permute, MCR or MRC instruction can be issued
240;; per cycle.
241(define_cpu_unit "cortex_a8_neon_issue_perm" "cortex_a8_neon")
242
243;; Only one data-processing instruction can be issued per cycle.
244(define_cpu_unit "cortex_a8_neon_issue_dp" "cortex_a8_neon")
245
246;; The VFPLite unit (non-pipelined).
247(define_cpu_unit "cortex_a8_vfplite" "cortex_a8_neon")
248
249;; We need a special mutual exclusion (to be used in addition to
250;; cortex_a8_neon_issue_dp) for the case when an instruction such as
251;; vmla.f is forwarded from E5 of the floating-point multiply pipeline to
252;; E2 of the floating-point add pipeline.  On the cycle previous to that
253;; forward we must prevent issue of any instruction to the floating-point
254;; add pipeline, but still allow issue of a data-processing instruction
255;; to any of the other pipelines.
256(define_cpu_unit "cortex_a8_neon_issue_fadd" "cortex_a8_neon")
257
258;; Patterns of reservation.
259;; We model the NEON issue units as running in parallel with the core ones.
260;; We assume that multi-cycle NEON instructions get decomposed into
261;; micro-ops as they are issued into the NEON pipeline, and not as they
262;; are issued into the ARM pipeline.  Dual issue may not occur except
263;; upon the first and last cycles of a multi-cycle instruction, but it
264;; is unclear whether two multi-cycle instructions can issue together (in
265;; this model they cannot).  It is also unclear whether a pair of
266;; a multi-cycle and single-cycle instructions, that could potentially
267;; issue together, only do so if (say) the single-cycle one precedes
268;; the other.
269
270(define_reservation "cortex_a8_neon_dp"
271                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp")
272(define_reservation "cortex_a8_neon_dp_2"
273                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
274                     cortex_a8_neon_issue_dp")
275(define_reservation "cortex_a8_neon_dp_4"
276                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
277                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
278                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
279                     cortex_a8_neon_issue_dp")
280
281(define_reservation "cortex_a8_neon_fadd"
282                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
283                     cortex_a8_neon_issue_fadd")
284(define_reservation "cortex_a8_neon_fadd_2"
285                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
286                     cortex_a8_neon_issue_fadd,\
287                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_fadd")
288
289(define_reservation "cortex_a8_neon_perm"
290                    "(cortex_a8_alu0|cortex_a8_alu1)+\
291                     cortex_a8_neon_issue_perm")
292(define_reservation "cortex_a8_neon_perm_2"
293                    "(cortex_a8_alu0|cortex_a8_alu1)+\
294                     cortex_a8_neon_issue_perm,\
295                     cortex_a8_neon_issue_perm")
296(define_reservation "cortex_a8_neon_perm_3"
297                    "(cortex_a8_alu0|cortex_a8_alu1)+\
298                     cortex_a8_neon_issue_perm,\
299                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
300                     cortex_a8_neon_issue_perm")
301
302(define_reservation "cortex_a8_neon_ls"
303                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm")
304(define_reservation "cortex_a8_neon_ls_2"
305                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
306                     cortex_a8_neon_issue_perm")
307(define_reservation "cortex_a8_neon_ls_3"
308                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
309                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
310                     cortex_a8_neon_issue_perm")
311(define_reservation "cortex_a8_neon_ls_4"
312                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
313                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
314                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
315                     cortex_a8_neon_issue_perm")
316(define_reservation "cortex_a8_neon_ls_5"
317                    "cortex_a8_issue_ls+cortex_a8_neon_issue_perm,\
318                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
319                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
320                     cortex_a8_neon_issue_dp+cortex_a8_neon_issue_perm,\
321                     cortex_a8_neon_issue_perm")
322
323(define_reservation "cortex_a8_neon_fmul_then_fadd"
324                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
325		     nothing*3,\
326		     cortex_a8_neon_issue_fadd")
327(define_reservation "cortex_a8_neon_fmul_then_fadd_2"
328                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp,\
329		     cortex_a8_neon_issue_dp,\
330		     nothing*2,\
331		     cortex_a8_neon_issue_fadd,\
332		     cortex_a8_neon_issue_fadd")
333
334;; VFP instructions can only be single-issued into the NEON pipeline.
335(define_reservation "cortex_a8_vfp"
336                    "(cortex_a8_alu0|cortex_a8_alu1)+cortex_a8_neon_issue_dp+\
337                     cortex_a8_neon_issue_perm+cortex_a8_vfplite")
338
339;; VFP instructions.
340;; The VFPLite unit that executes these isn't pipelined; we give the
341;; worst-case latencies (and choose the double-precision ones where we
342;; do not distinguish on precision).  We assume RunFast mode is not
343;; enabled and therefore do not model the possible VFP instruction
344;; execution in the NEON floating point pipelines, nor additional
345;; latencies for the processing of subnormals.
346;;
347;; TODO: RunFast mode could potentially be enabled when -ffast-math
348;; is specified.
349
350(define_insn_reservation "cortex_a8_vfp_add_sub" 10
351  (and (eq_attr "tune" "cortexa8")
352       (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
353  "cortex_a8_vfp,cortex_a8_vfplite*9")
354
355(define_insn_reservation "cortex_a8_vfp_muls" 12
356  (and (eq_attr "tune" "cortexa8")
357       (eq_attr "type" "fmuls"))
358  "cortex_a8_vfp,cortex_a8_vfplite*11")
359
360(define_insn_reservation "cortex_a8_vfp_muld" 17
361  (and (eq_attr "tune" "cortexa8")
362       (eq_attr "type" "fmuld"))
363  "cortex_a8_vfp,cortex_a8_vfplite*16")
364
365(define_insn_reservation "cortex_a8_vfp_macs" 21
366  (and (eq_attr "tune" "cortexa8")
367       (eq_attr "type" "fmacs,ffmas"))
368  "cortex_a8_vfp,cortex_a8_vfplite*20")
369
370(define_insn_reservation "cortex_a8_vfp_macd" 26
371  (and (eq_attr "tune" "cortexa8")
372       (eq_attr "type" "fmacd,ffmad"))
373  "cortex_a8_vfp,cortex_a8_vfplite*25")
374
375(define_insn_reservation "cortex_a8_vfp_divs" 37
376  (and (eq_attr "tune" "cortexa8")
377       (eq_attr "type" "fdivs, fsqrts"))
378  "cortex_a8_vfp,cortex_a8_vfplite*36")
379
380(define_insn_reservation "cortex_a8_vfp_divd" 65
381  (and (eq_attr "tune" "cortexa8")
382       (eq_attr "type" "fdivd, fsqrtd"))
383  "cortex_a8_vfp,cortex_a8_vfplite*64")
384
385;; Comparisons can actually take 7 cycles sometimes instead of four,
386;; but given all the other instructions lumped into type=ffarith that
387;; take four cycles, we pick that latency.
388(define_insn_reservation "cortex_a8_vfp_farith" 4
389  (and (eq_attr "tune" "cortexa8")
390       (eq_attr "type" "fmov,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
391  "cortex_a8_vfp,cortex_a8_vfplite*3")
392
393(define_insn_reservation "cortex_a8_vfp_cvt" 7
394  (and (eq_attr "tune" "cortexa8")
395       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
396  "cortex_a8_vfp,cortex_a8_vfplite*6")
397
398;; NEON -> core transfers.
399
400(define_insn_reservation "cortex_a8_neon_mrc" 20
401  (and (eq_attr "tune" "cortexa8")
402       (eq_attr "cortex_a8_neon_type" "neon_mrc"))
403  "cortex_a8_neon_ls")
404
405(define_insn_reservation "cortex_a8_neon_mrrc" 21
406  (and (eq_attr "tune" "cortexa8")
407       (eq_attr "cortex_a8_neon_type" "neon_mrrc"))
408  "cortex_a8_neon_ls_2")
409
410;; Arithmetic Operations
411
412;; Instructions using this reservation read their source operands at N2, and
413;; produce a result at N3.
414(define_insn_reservation "cortex_a8_neon_int_1" 3
415  (and (eq_attr "tune" "cortexa8")
416       (eq_attr "cortex_a8_neon_type" "neon_int_1"))
417  "cortex_a8_neon_dp")
418
419;; Instructions using this reservation read their (D|Q)m operands at N1,
420;; their (D|Q)n operands at N2, and produce a result at N3.
421(define_insn_reservation "cortex_a8_neon_int_2" 3
422  (and (eq_attr "tune" "cortexa8")
423       (eq_attr "cortex_a8_neon_type" "neon_int_2"))
424  "cortex_a8_neon_dp")
425
426;; Instructions using this reservation read their source operands at N1, and
427;; produce a result at N3.
428(define_insn_reservation "cortex_a8_neon_int_3" 3
429  (and (eq_attr "tune" "cortexa8")
430       (eq_attr "cortex_a8_neon_type" "neon_int_3"))
431  "cortex_a8_neon_dp")
432
433;; Instructions using this reservation read their source operands at N2, and
434;; produce a result at N4.
435(define_insn_reservation "cortex_a8_neon_int_4" 4
436  (and (eq_attr "tune" "cortexa8")
437       (eq_attr "cortex_a8_neon_type" "neon_int_4"))
438  "cortex_a8_neon_dp")
439
440;; Instructions using this reservation read their (D|Q)m operands at N1,
441;; their (D|Q)n operands at N2, and produce a result at N4.
442(define_insn_reservation "cortex_a8_neon_int_5" 4
443  (and (eq_attr "tune" "cortexa8")
444       (eq_attr "cortex_a8_neon_type" "neon_int_5"))
445  "cortex_a8_neon_dp")
446
447;; Instructions using this reservation read their source operands at N1, and
448;; produce a result at N4.
449(define_insn_reservation "cortex_a8_neon_vqneg_vqabs" 4
450  (and (eq_attr "tune" "cortexa8")
451       (eq_attr "cortex_a8_neon_type" "neon_vqneg_vqabs"))
452  "cortex_a8_neon_dp")
453
454;; Instructions using this reservation produce a result at N3.
455(define_insn_reservation "cortex_a8_neon_vmov" 3
456  (and (eq_attr "tune" "cortexa8")
457       (eq_attr "cortex_a8_neon_type" "neon_vmov"))
458  "cortex_a8_neon_dp")
459
460;; Instructions using this reservation read their (D|Q)n operands at N2,
461;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
462;; produce a result at N6.
463(define_insn_reservation "cortex_a8_neon_vaba" 6
464  (and (eq_attr "tune" "cortexa8")
465       (eq_attr "cortex_a8_neon_type" "neon_vaba"))
466  "cortex_a8_neon_dp")
467
468;; Instructions using this reservation read their (D|Q)n operands at N2,
469;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
470;; produce a result at N6 on cycle 2.
471(define_insn_reservation "cortex_a8_neon_vaba_qqq" 7
472  (and (eq_attr "tune" "cortexa8")
473       (eq_attr "cortex_a8_neon_type" "neon_vaba_qqq"))
474  "cortex_a8_neon_dp_2")
475
476;; Instructions using this reservation read their source operands at N2, and
477;; produce a result at N3 on cycle 2.
478(define_insn_reservation "cortex_a8_neon_bit_ops_q" 4
479  (and (eq_attr "tune" "cortexa8")
480       (eq_attr "cortex_a8_neon_type" "neon_bit_ops_q"))
481  "cortex_a8_neon_dp_2")
482
483;; Integer Multiply/Accumulate Operations
484
485;; Instructions using this reservation read their source operands at N2, and
486;; produce a result at N6.
487(define_insn_reservation "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long" 6
488  (and (eq_attr "tune" "cortexa8")
489       (eq_attr "cortex_a8_neon_type"
490         "neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"))
491  "cortex_a8_neon_dp")
492
493;; Instructions using this reservation read their source operands at N2, and
494;; produce a result at N6 on cycle 2.
495(define_insn_reservation "cortex_a8_neon_mul_qqq_8_16_32_ddd_32" 7
496  (and (eq_attr "tune" "cortexa8")
497       (eq_attr "cortex_a8_neon_type" "neon_mul_qqq_8_16_32_ddd_32"))
498  "cortex_a8_neon_dp_2")
499
500;; Instructions using this reservation read their (D|Q)n operands at N2,
501;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 2.
502(define_insn_reservation "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar" 7
503  (and (eq_attr "tune" "cortexa8")
504       (eq_attr "cortex_a8_neon_type"
505            "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"))
506  "cortex_a8_neon_dp_2")
507
508;; Instructions using this reservation read their (D|Q)n operands at N2,
509;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
510;; produce a result at N6.
511(define_insn_reservation "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long" 6
512  (and (eq_attr "tune" "cortexa8")
513       (eq_attr "cortex_a8_neon_type"
514                  "neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"))
515  "cortex_a8_neon_dp")
516
517;; Instructions using this reservation read their (D|Q)n operands at N2,
518;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
519;; produce a result at N6 on cycle 2.
520(define_insn_reservation "cortex_a8_neon_mla_qqq_8_16" 7
521  (and (eq_attr "tune" "cortexa8")
522       (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_8_16"))
523  "cortex_a8_neon_dp_2")
524
525;; Instructions using this reservation read their (D|Q)n operands at N2,
526;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
527;; produce a result at N6 on cycle 2.
528(define_insn_reservation "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long" 7
529  (and (eq_attr "tune" "cortexa8")
530       (eq_attr "cortex_a8_neon_type"
531 "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"))
532  "cortex_a8_neon_dp_2")
533
534;; Instructions using this reservation read their (D|Q)n operands at N2,
535;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
536;; produce a result at N6 on cycle 4.
537(define_insn_reservation "cortex_a8_neon_mla_qqq_32_qqd_32_scalar" 9
538  (and (eq_attr "tune" "cortexa8")
539       (eq_attr "cortex_a8_neon_type" "neon_mla_qqq_32_qqd_32_scalar"))
540  "cortex_a8_neon_dp_4")
541
542;; Instructions using this reservation read their (D|Q)n operands at N2,
543;; their (D|Q)m operands at N1, and produce a result at N6.
544(define_insn_reservation "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar" 6
545  (and (eq_attr "tune" "cortexa8")
546       (eq_attr "cortex_a8_neon_type"
547                  "neon_mul_ddd_16_scalar_32_16_long_scalar"))
548  "cortex_a8_neon_dp")
549
550;; Instructions using this reservation read their (D|Q)n operands at N2,
551;; their (D|Q)m operands at N1, and produce a result at N6 on cycle 4.
552(define_insn_reservation "cortex_a8_neon_mul_qqd_32_scalar" 9
553  (and (eq_attr "tune" "cortexa8")
554       (eq_attr "cortex_a8_neon_type" "neon_mul_qqd_32_scalar"))
555  "cortex_a8_neon_dp_4")
556
557;; Instructions using this reservation read their (D|Q)n operands at N2,
558;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
559;; produce a result at N6.
560(define_insn_reservation "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar" 6
561  (and (eq_attr "tune" "cortexa8")
562       (eq_attr "cortex_a8_neon_type"
563                  "neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"))
564  "cortex_a8_neon_dp")
565
566;; Shift Operations
567
568;; Instructions using this reservation read their source operands at N1, and
569;; produce a result at N3.
570(define_insn_reservation "cortex_a8_neon_shift_1" 3
571  (and (eq_attr "tune" "cortexa8")
572       (eq_attr "cortex_a8_neon_type" "neon_shift_1"))
573  "cortex_a8_neon_dp")
574
575;; Instructions using this reservation read their source operands at N1, and
576;; produce a result at N4.
577(define_insn_reservation "cortex_a8_neon_shift_2" 4
578  (and (eq_attr "tune" "cortexa8")
579       (eq_attr "cortex_a8_neon_type" "neon_shift_2"))
580  "cortex_a8_neon_dp")
581
582;; Instructions using this reservation read their source operands at N1, and
583;; produce a result at N3 on cycle 2.
584(define_insn_reservation "cortex_a8_neon_shift_3" 4
585  (and (eq_attr "tune" "cortexa8")
586       (eq_attr "cortex_a8_neon_type" "neon_shift_3"))
587  "cortex_a8_neon_dp_2")
588
589;; Instructions using this reservation read their source operands at N1, and
590;; produce a result at N4 on cycle 2.
591(define_insn_reservation "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq" 5
592  (and (eq_attr "tune" "cortexa8")
593       (eq_attr "cortex_a8_neon_type" "neon_vqshl_vrshl_vqrshl_qqq"))
594  "cortex_a8_neon_dp_2")
595
596;; Instructions using this reservation read their (D|Q)m operands at N1,
597;; their (D|Q)d operands at N3, and produce a result at N6.
598(define_insn_reservation "cortex_a8_neon_vsra_vrsra" 6
599  (and (eq_attr "tune" "cortexa8")
600       (eq_attr "cortex_a8_neon_type" "neon_vsra_vrsra"))
601  "cortex_a8_neon_dp")
602
603;; Floating point Operations
604
605;; Instructions using this reservation read their source operands at N2, and
606;; produce a result at N5.
607(define_insn_reservation "cortex_a8_neon_fp_vadd_ddd_vabs_dd" 5
608  (and (eq_attr "tune" "cortexa8")
609       (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_ddd_vabs_dd"))
610 "cortex_a8_neon_fadd")
611
612;; Instructions using this reservation read their source operands at N2, and
613;; produce a result at N5 on cycle 2.
614(define_insn_reservation "cortex_a8_neon_fp_vadd_qqq_vabs_qq" 6
615  (and (eq_attr "tune" "cortexa8")
616       (eq_attr "cortex_a8_neon_type" "neon_fp_vadd_qqq_vabs_qq"))
617  "cortex_a8_neon_fadd_2")
618
619;; Instructions using this reservation read their source operands at N1, and
620;; produce a result at N5.
621(define_insn_reservation "cortex_a8_neon_fp_vsum" 5
622  (and (eq_attr "tune" "cortexa8")
623       (eq_attr "cortex_a8_neon_type" "neon_fp_vsum"))
624  "cortex_a8_neon_fadd")
625
626;; Instructions using this reservation read their (D|Q)n operands at N2,
627;; their (D|Q)m operands at N1, and produce a result at N5.
628(define_insn_reservation "cortex_a8_neon_fp_vmul_ddd" 5
629  (and (eq_attr "tune" "cortexa8")
630       (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_ddd"))
631  "cortex_a8_neon_dp")
632
633;; Instructions using this reservation read their (D|Q)n operands at N2,
634;; their (D|Q)m operands at N1, and produce a result at N5 on cycle 2.
635(define_insn_reservation "cortex_a8_neon_fp_vmul_qqd" 6
636  (and (eq_attr "tune" "cortexa8")
637       (eq_attr "cortex_a8_neon_type" "neon_fp_vmul_qqd"))
638  "cortex_a8_neon_dp_2")
639
640;; Instructions using this reservation read their (D|Q)n operands at N2,
641;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
642;; produce a result at N9.
643(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd" 9
644  (and (eq_attr "tune" "cortexa8")
645       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd"))
646  "cortex_a8_neon_fmul_then_fadd")
647
648;; Instructions using this reservation read their (D|Q)n operands at N2,
649;; their (D|Q)m operands at N2, their (D|Q)d operands at N3, and
650;; produce a result at N9 on cycle 2.
651(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq" 10
652  (and (eq_attr "tune" "cortexa8")
653       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq"))
654  "cortex_a8_neon_fmul_then_fadd_2")
655
656;; Instructions using this reservation read their (D|Q)n operands at N2,
657;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
658;; produce a result at N9.
659(define_insn_reservation "cortex_a8_neon_fp_vmla_ddd_scalar" 9
660  (and (eq_attr "tune" "cortexa8")
661       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_ddd_scalar"))
662  "cortex_a8_neon_fmul_then_fadd")
663
664;; Instructions using this reservation read their (D|Q)n operands at N2,
665;; their (D|Q)m operands at N1, their (D|Q)d operands at N3, and
666;; produce a result at N9 on cycle 2.
667(define_insn_reservation "cortex_a8_neon_fp_vmla_qqq_scalar" 10
668  (and (eq_attr "tune" "cortexa8")
669       (eq_attr "cortex_a8_neon_type" "neon_fp_vmla_qqq_scalar"))
670  "cortex_a8_neon_fmul_then_fadd_2")
671
672;; Instructions using this reservation read their source operands at N2, and
673;; produce a result at N9.
674(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_ddd" 9
675  (and (eq_attr "tune" "cortexa8")
676       (eq_attr "cortex_a8_neon_type" "neon_fp_vrecps_vrsqrts_ddd"))
677  "cortex_a8_neon_fmul_then_fadd")
678
679;; Instructions using this reservation read their source operands at N2, and
680;; produce a result at N9 on cycle 2.
681(define_insn_reservation "cortex_a8_neon_fp_vrecps_vrsqrts_qqq" 10
682  (and (eq_attr "tune" "cortexa8")
683       (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q"))
684  "cortex_a8_neon_fmul_then_fadd_2")
685
686;; Permute operations.
687
688;; Instructions using this reservation read their source operands at N1, and
689;; produce a result at N2.
690(define_insn_reservation "cortex_a8_neon_bp_simple" 2
691  (and (eq_attr "tune" "cortexa8")
692       (eq_attr "cortex_a8_neon_type" "neon_bp_simple"))
693  "cortex_a8_neon_perm")
694
695;; Instructions using this reservation read their source operands at N1, and
696;; produce a result at N2 on cycle 2.
697(define_insn_reservation "cortex_a8_neon_bp_2cycle" 3
698  (and (eq_attr "tune" "cortexa8")
699       (eq_attr "cortex_a8_neon_type" "neon_bp_2cycle"))
700  "cortex_a8_neon_perm_2")
701
702;; Instructions using this reservation read their source operands at N1, and
703;; produce a result at N2 on cycle 3.
704(define_insn_reservation "cortex_a8_neon_bp_3cycle" 4
705  (and (eq_attr "tune" "cortexa8")
706       (eq_attr "cortex_a8_neon_type" "neon_bp_3cycle"))
707  "cortex_a8_neon_perm_3")
708
709;; Load Operations.
710
711;; Instructions using this reservation produce a result at N1.
712(define_insn_reservation "cortex_a8_neon_ldr" 1
713  (and (eq_attr "tune" "cortexa8")
714       (eq_attr "cortex_a8_neon_type" "neon_ldr"))
715  "cortex_a8_neon_ls")
716
717;; Instructions using this reservation read their source operands at N1.
718(define_insn_reservation "cortex_a8_neon_str" 0
719  (and (eq_attr "tune" "cortexa8")
720       (eq_attr "cortex_a8_neon_type" "neon_str"))
721  "cortex_a8_neon_ls")
722
723;; Instructions using this reservation produce a result at N1 on cycle 2.
724(define_insn_reservation "cortex_a8_neon_vld1_1_2_regs" 2
725  (and (eq_attr "tune" "cortexa8")
726       (eq_attr "cortex_a8_neon_type" "neon_vld1_1_2_regs"))
727  "cortex_a8_neon_ls_2")
728
729;; Instructions using this reservation produce a result at N1 on cycle 3.
730(define_insn_reservation "cortex_a8_neon_vld1_3_4_regs" 3
731  (and (eq_attr "tune" "cortexa8")
732       (eq_attr "cortex_a8_neon_type" "neon_vld1_3_4_regs"))
733  "cortex_a8_neon_ls_3")
734
735;; Instructions using this reservation produce a result at N2 on cycle 2.
736(define_insn_reservation "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes" 3
737  (and (eq_attr "tune" "cortexa8")
738       (eq_attr "cortex_a8_neon_type" "neon_vld2_2_regs_vld1_vld2_all_lanes"))
739  "cortex_a8_neon_ls_2")
740
741;; Instructions using this reservation produce a result at N2 on cycle 3.
742(define_insn_reservation "cortex_a8_neon_vld2_4_regs" 4
743  (and (eq_attr "tune" "cortexa8")
744       (eq_attr "cortex_a8_neon_type" "neon_vld2_4_regs"))
745  "cortex_a8_neon_ls_3")
746
747;; Instructions using this reservation produce a result at N2 on cycle 4.
748(define_insn_reservation "cortex_a8_neon_vld3_vld4" 5
749  (and (eq_attr "tune" "cortexa8")
750       (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4"))
751  "cortex_a8_neon_ls_4")
752
753;; Store operations.
754
755;; Instructions using this reservation read their source operands at N1.
756(define_insn_reservation "cortex_a8_neon_vst1_1_2_regs_vst2_2_regs" 0
757  (and (eq_attr "tune" "cortexa8")
758       (eq_attr "cortex_a8_neon_type" "neon_vst1_1_2_regs_vst2_2_regs"))
759  "cortex_a8_neon_ls_2")
760
761;; Instructions using this reservation read their source operands at N1.
762(define_insn_reservation "cortex_a8_neon_vst1_3_4_regs" 0
763  (and (eq_attr "tune" "cortexa8")
764       (eq_attr "cortex_a8_neon_type" "neon_vst1_3_4_regs"))
765  "cortex_a8_neon_ls_3")
766
767;; Instructions using this reservation read their source operands at N1.
768(define_insn_reservation "cortex_a8_neon_vst2_4_regs_vst3_vst4" 0
769  (and (eq_attr "tune" "cortexa8")
770       (eq_attr "cortex_a8_neon_type" "neon_vst2_4_regs_vst3_vst4"))
771  "cortex_a8_neon_ls_4")
772
773;; Instructions using this reservation read their source operands at N1, and
774;; produce a result at N2 on cycle 3.
775(define_insn_reservation "cortex_a8_neon_vld1_vld2_lane" 4
776  (and (eq_attr "tune" "cortexa8")
777       (eq_attr "cortex_a8_neon_type" "neon_vld1_vld2_lane"))
778  "cortex_a8_neon_ls_3")
779
780;; Instructions using this reservation read their source operands at N1, and
781;; produce a result at N2 on cycle 5.
782(define_insn_reservation "cortex_a8_neon_vld3_vld4_lane" 6
783  (and (eq_attr "tune" "cortexa8")
784       (eq_attr "cortex_a8_neon_type" "neon_vld3_vld4_lane"))
785  "cortex_a8_neon_ls_5")
786
787;; Instructions using this reservation read their source operands at N1.
788(define_insn_reservation "cortex_a8_neon_vst1_vst2_lane" 0
789  (and (eq_attr "tune" "cortexa8")
790       (eq_attr "cortex_a8_neon_type" "neon_vst1_vst2_lane"))
791  "cortex_a8_neon_ls_2")
792
793;; Instructions using this reservation read their source operands at N1.
794(define_insn_reservation "cortex_a8_neon_vst3_vst4_lane" 0
795  (and (eq_attr "tune" "cortexa8")
796       (eq_attr "cortex_a8_neon_type" "neon_vst3_vst4_lane"))
797  "cortex_a8_neon_ls_3")
798
799;; Register Transfer Operations
800
801;; Instructions using this reservation produce a result at N2.
802(define_insn_reservation "cortex_a8_neon_mcr" 2
803  (and (eq_attr "tune" "cortexa8")
804       (eq_attr "cortex_a8_neon_type" "neon_mcr"))
805  "cortex_a8_neon_perm")
806
807;; Instructions using this reservation produce a result at N2.
808(define_insn_reservation "cortex_a8_neon_mcr_2_mcrr" 2
809  (and (eq_attr "tune" "cortexa8")
810       (eq_attr "cortex_a8_neon_type" "neon_mcr_2_mcrr"))
811  "cortex_a8_neon_perm_2")
812
813;; Exceptions to the default latencies.
814
815(define_bypass 1 "cortex_a8_neon_mcr_2_mcrr"
816               "cortex_a8_neon_int_1,\
817               cortex_a8_neon_int_4,\
818               cortex_a8_neon_bit_ops_q,\
819               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
820               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
821               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
822               cortex_a8_neon_mla_qqq_8_16,\
823               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
824               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
825               cortex_a8_neon_fp_vmla_ddd,\
826               cortex_a8_neon_fp_vmla_qqq,\
827               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
828               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
829
830(define_bypass 1 "cortex_a8_neon_mcr"
831               "cortex_a8_neon_int_1,\
832               cortex_a8_neon_int_4,\
833               cortex_a8_neon_bit_ops_q,\
834               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
835               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
836               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
837               cortex_a8_neon_mla_qqq_8_16,\
838               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
839               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
840               cortex_a8_neon_fp_vmla_ddd,\
841               cortex_a8_neon_fp_vmla_qqq,\
842               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
843               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
844
845(define_bypass 5 "cortex_a8_neon_vld3_vld4_lane"
846               "cortex_a8_neon_int_1,\
847               cortex_a8_neon_int_4,\
848               cortex_a8_neon_bit_ops_q,\
849               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
850               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
851               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
852               cortex_a8_neon_mla_qqq_8_16,\
853               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
854               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
855               cortex_a8_neon_fp_vmla_ddd,\
856               cortex_a8_neon_fp_vmla_qqq,\
857               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
858               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
859
860(define_bypass 3 "cortex_a8_neon_vld1_vld2_lane"
861               "cortex_a8_neon_int_1,\
862               cortex_a8_neon_int_4,\
863               cortex_a8_neon_bit_ops_q,\
864               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
865               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
866               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
867               cortex_a8_neon_mla_qqq_8_16,\
868               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
869               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
870               cortex_a8_neon_fp_vmla_ddd,\
871               cortex_a8_neon_fp_vmla_qqq,\
872               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
873               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
874
875(define_bypass 4 "cortex_a8_neon_vld3_vld4"
876               "cortex_a8_neon_int_1,\
877               cortex_a8_neon_int_4,\
878               cortex_a8_neon_bit_ops_q,\
879               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
880               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
881               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
882               cortex_a8_neon_mla_qqq_8_16,\
883               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
884               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
885               cortex_a8_neon_fp_vmla_ddd,\
886               cortex_a8_neon_fp_vmla_qqq,\
887               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
888               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
889
890(define_bypass 3 "cortex_a8_neon_vld2_4_regs"
891               "cortex_a8_neon_int_1,\
892               cortex_a8_neon_int_4,\
893               cortex_a8_neon_bit_ops_q,\
894               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
895               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
896               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
897               cortex_a8_neon_mla_qqq_8_16,\
898               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
899               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
900               cortex_a8_neon_fp_vmla_ddd,\
901               cortex_a8_neon_fp_vmla_qqq,\
902               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
903               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
904
905(define_bypass 2 "cortex_a8_neon_vld2_2_regs_vld1_vld2_all_lanes"
906               "cortex_a8_neon_int_1,\
907               cortex_a8_neon_int_4,\
908               cortex_a8_neon_bit_ops_q,\
909               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
910               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
911               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
912               cortex_a8_neon_mla_qqq_8_16,\
913               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
914               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
915               cortex_a8_neon_fp_vmla_ddd,\
916               cortex_a8_neon_fp_vmla_qqq,\
917               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
918               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
919
920(define_bypass 2 "cortex_a8_neon_vld1_3_4_regs"
921               "cortex_a8_neon_int_1,\
922               cortex_a8_neon_int_4,\
923               cortex_a8_neon_bit_ops_q,\
924               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
925               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
926               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
927               cortex_a8_neon_mla_qqq_8_16,\
928               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
929               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
930               cortex_a8_neon_fp_vmla_ddd,\
931               cortex_a8_neon_fp_vmla_qqq,\
932               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
933               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
934
935(define_bypass 1 "cortex_a8_neon_vld1_1_2_regs"
936               "cortex_a8_neon_int_1,\
937               cortex_a8_neon_int_4,\
938               cortex_a8_neon_bit_ops_q,\
939               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
940               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
941               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
942               cortex_a8_neon_mla_qqq_8_16,\
943               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
944               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
945               cortex_a8_neon_fp_vmla_ddd,\
946               cortex_a8_neon_fp_vmla_qqq,\
947               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
948               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
949
950(define_bypass 0 "cortex_a8_neon_ldr"
951               "cortex_a8_neon_int_1,\
952               cortex_a8_neon_int_4,\
953               cortex_a8_neon_bit_ops_q,\
954               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
955               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
956               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
957               cortex_a8_neon_mla_qqq_8_16,\
958               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
959               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
960               cortex_a8_neon_fp_vmla_ddd,\
961               cortex_a8_neon_fp_vmla_qqq,\
962               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
963               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
964
965(define_bypass 3 "cortex_a8_neon_bp_3cycle"
966               "cortex_a8_neon_int_1,\
967               cortex_a8_neon_int_4,\
968               cortex_a8_neon_bit_ops_q,\
969               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
970               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
971               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
972               cortex_a8_neon_mla_qqq_8_16,\
973               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
974               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
975               cortex_a8_neon_fp_vmla_ddd,\
976               cortex_a8_neon_fp_vmla_qqq,\
977               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
978               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
979
980(define_bypass 2 "cortex_a8_neon_bp_2cycle"
981               "cortex_a8_neon_int_1,\
982               cortex_a8_neon_int_4,\
983               cortex_a8_neon_bit_ops_q,\
984               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
985               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
986               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
987               cortex_a8_neon_mla_qqq_8_16,\
988               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
989               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
990               cortex_a8_neon_fp_vmla_ddd,\
991               cortex_a8_neon_fp_vmla_qqq,\
992               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
993               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
994
995(define_bypass 1 "cortex_a8_neon_bp_simple"
996               "cortex_a8_neon_int_1,\
997               cortex_a8_neon_int_4,\
998               cortex_a8_neon_bit_ops_q,\
999               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1000               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1001               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1002               cortex_a8_neon_mla_qqq_8_16,\
1003               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1004               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1005               cortex_a8_neon_fp_vmla_ddd,\
1006               cortex_a8_neon_fp_vmla_qqq,\
1007               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1008               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1009
1010(define_bypass 9 "cortex_a8_neon_fp_vrecps_vrsqrts_qqq"
1011               "cortex_a8_neon_int_1,\
1012               cortex_a8_neon_int_4,\
1013               cortex_a8_neon_bit_ops_q,\
1014               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1015               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1016               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1017               cortex_a8_neon_mla_qqq_8_16,\
1018               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1019               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1020               cortex_a8_neon_fp_vmla_ddd,\
1021               cortex_a8_neon_fp_vmla_qqq,\
1022               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1023               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1024
1025(define_bypass 8 "cortex_a8_neon_fp_vrecps_vrsqrts_ddd"
1026               "cortex_a8_neon_int_1,\
1027               cortex_a8_neon_int_4,\
1028               cortex_a8_neon_bit_ops_q,\
1029               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1030               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1031               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1032               cortex_a8_neon_mla_qqq_8_16,\
1033               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1034               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1035               cortex_a8_neon_fp_vmla_ddd,\
1036               cortex_a8_neon_fp_vmla_qqq,\
1037               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1038               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1039
1040(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq_scalar"
1041               "cortex_a8_neon_int_1,\
1042               cortex_a8_neon_int_4,\
1043               cortex_a8_neon_bit_ops_q,\
1044               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1045               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1046               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1047               cortex_a8_neon_mla_qqq_8_16,\
1048               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1049               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1050               cortex_a8_neon_fp_vmla_ddd,\
1051               cortex_a8_neon_fp_vmla_qqq,\
1052               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1053               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1054
1055(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd_scalar"
1056               "cortex_a8_neon_int_1,\
1057               cortex_a8_neon_int_4,\
1058               cortex_a8_neon_bit_ops_q,\
1059               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1060               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1061               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1062               cortex_a8_neon_mla_qqq_8_16,\
1063               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1064               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1065               cortex_a8_neon_fp_vmla_ddd,\
1066               cortex_a8_neon_fp_vmla_qqq,\
1067               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1068               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1069
1070(define_bypass 9 "cortex_a8_neon_fp_vmla_qqq"
1071               "cortex_a8_neon_int_1,\
1072               cortex_a8_neon_int_4,\
1073               cortex_a8_neon_bit_ops_q,\
1074               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1075               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1076               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1077               cortex_a8_neon_mla_qqq_8_16,\
1078               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1079               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1080               cortex_a8_neon_fp_vmla_ddd,\
1081               cortex_a8_neon_fp_vmla_qqq,\
1082               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1083               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1084
1085(define_bypass 8 "cortex_a8_neon_fp_vmla_ddd"
1086               "cortex_a8_neon_int_1,\
1087               cortex_a8_neon_int_4,\
1088               cortex_a8_neon_bit_ops_q,\
1089               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1090               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1091               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1092               cortex_a8_neon_mla_qqq_8_16,\
1093               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1094               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1095               cortex_a8_neon_fp_vmla_ddd,\
1096               cortex_a8_neon_fp_vmla_qqq,\
1097               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1098               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1099
1100(define_bypass 5 "cortex_a8_neon_fp_vmul_qqd"
1101               "cortex_a8_neon_int_1,\
1102               cortex_a8_neon_int_4,\
1103               cortex_a8_neon_bit_ops_q,\
1104               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1105               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1106               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1107               cortex_a8_neon_mla_qqq_8_16,\
1108               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1109               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1110               cortex_a8_neon_fp_vmla_ddd,\
1111               cortex_a8_neon_fp_vmla_qqq,\
1112               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1113               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1114
1115(define_bypass 4 "cortex_a8_neon_fp_vmul_ddd"
1116               "cortex_a8_neon_int_1,\
1117               cortex_a8_neon_int_4,\
1118               cortex_a8_neon_bit_ops_q,\
1119               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1120               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1121               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1122               cortex_a8_neon_mla_qqq_8_16,\
1123               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1124               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1125               cortex_a8_neon_fp_vmla_ddd,\
1126               cortex_a8_neon_fp_vmla_qqq,\
1127               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1128               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1129
1130(define_bypass 4 "cortex_a8_neon_fp_vsum"
1131               "cortex_a8_neon_int_1,\
1132               cortex_a8_neon_int_4,\
1133               cortex_a8_neon_bit_ops_q,\
1134               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1135               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1136               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1137               cortex_a8_neon_mla_qqq_8_16,\
1138               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1139               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1140               cortex_a8_neon_fp_vmla_ddd,\
1141               cortex_a8_neon_fp_vmla_qqq,\
1142               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1143               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1144
1145(define_bypass 5 "cortex_a8_neon_fp_vadd_qqq_vabs_qq"
1146               "cortex_a8_neon_int_1,\
1147               cortex_a8_neon_int_4,\
1148               cortex_a8_neon_bit_ops_q,\
1149               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1150               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1151               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1152               cortex_a8_neon_mla_qqq_8_16,\
1153               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1154               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1155               cortex_a8_neon_fp_vmla_ddd,\
1156               cortex_a8_neon_fp_vmla_qqq,\
1157               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1158               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1159
1160(define_bypass 4 "cortex_a8_neon_fp_vadd_ddd_vabs_dd"
1161               "cortex_a8_neon_int_1,\
1162               cortex_a8_neon_int_4,\
1163               cortex_a8_neon_bit_ops_q,\
1164               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1165               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1166               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1167               cortex_a8_neon_mla_qqq_8_16,\
1168               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1169               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1170               cortex_a8_neon_fp_vmla_ddd,\
1171               cortex_a8_neon_fp_vmla_qqq,\
1172               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1173               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1174
1175(define_bypass 5 "cortex_a8_neon_vsra_vrsra"
1176               "cortex_a8_neon_int_1,\
1177               cortex_a8_neon_int_4,\
1178               cortex_a8_neon_bit_ops_q,\
1179               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1180               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1181               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1182               cortex_a8_neon_mla_qqq_8_16,\
1183               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1184               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1185               cortex_a8_neon_fp_vmla_ddd,\
1186               cortex_a8_neon_fp_vmla_qqq,\
1187               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1188               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1189
1190(define_bypass 4 "cortex_a8_neon_vqshl_vrshl_vqrshl_qqq"
1191               "cortex_a8_neon_int_1,\
1192               cortex_a8_neon_int_4,\
1193               cortex_a8_neon_bit_ops_q,\
1194               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1195               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1196               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1197               cortex_a8_neon_mla_qqq_8_16,\
1198               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1199               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1200               cortex_a8_neon_fp_vmla_ddd,\
1201               cortex_a8_neon_fp_vmla_qqq,\
1202               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1203               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1204
1205(define_bypass 3 "cortex_a8_neon_shift_3"
1206               "cortex_a8_neon_int_1,\
1207               cortex_a8_neon_int_4,\
1208               cortex_a8_neon_bit_ops_q,\
1209               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1210               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1211               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1212               cortex_a8_neon_mla_qqq_8_16,\
1213               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1214               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1215               cortex_a8_neon_fp_vmla_ddd,\
1216               cortex_a8_neon_fp_vmla_qqq,\
1217               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1218               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1219
1220(define_bypass 3 "cortex_a8_neon_shift_2"
1221               "cortex_a8_neon_int_1,\
1222               cortex_a8_neon_int_4,\
1223               cortex_a8_neon_bit_ops_q,\
1224               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1225               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1226               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1227               cortex_a8_neon_mla_qqq_8_16,\
1228               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1229               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1230               cortex_a8_neon_fp_vmla_ddd,\
1231               cortex_a8_neon_fp_vmla_qqq,\
1232               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1233               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1234
1235(define_bypass 2 "cortex_a8_neon_shift_1"
1236               "cortex_a8_neon_int_1,\
1237               cortex_a8_neon_int_4,\
1238               cortex_a8_neon_bit_ops_q,\
1239               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1240               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1241               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1242               cortex_a8_neon_mla_qqq_8_16,\
1243               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1244               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1245               cortex_a8_neon_fp_vmla_ddd,\
1246               cortex_a8_neon_fp_vmla_qqq,\
1247               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1248               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1249
1250(define_bypass 5 "cortex_a8_neon_mla_ddd_16_scalar_qdd_32_16_long_scalar"
1251               "cortex_a8_neon_int_1,\
1252               cortex_a8_neon_int_4,\
1253               cortex_a8_neon_bit_ops_q,\
1254               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1255               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1256               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1257               cortex_a8_neon_mla_qqq_8_16,\
1258               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1259               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1260               cortex_a8_neon_fp_vmla_ddd,\
1261               cortex_a8_neon_fp_vmla_qqq,\
1262               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1263               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1264
1265(define_bypass 8 "cortex_a8_neon_mul_qqd_32_scalar"
1266               "cortex_a8_neon_int_1,\
1267               cortex_a8_neon_int_4,\
1268               cortex_a8_neon_bit_ops_q,\
1269               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1270               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1271               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1272               cortex_a8_neon_mla_qqq_8_16,\
1273               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1274               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1275               cortex_a8_neon_fp_vmla_ddd,\
1276               cortex_a8_neon_fp_vmla_qqq,\
1277               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1278               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1279
1280(define_bypass 5 "cortex_a8_neon_mul_ddd_16_scalar_32_16_long_scalar"
1281               "cortex_a8_neon_int_1,\
1282               cortex_a8_neon_int_4,\
1283               cortex_a8_neon_bit_ops_q,\
1284               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1285               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1286               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1287               cortex_a8_neon_mla_qqq_8_16,\
1288               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1289               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1290               cortex_a8_neon_fp_vmla_ddd,\
1291               cortex_a8_neon_fp_vmla_qqq,\
1292               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1293               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1294
1295(define_bypass 8 "cortex_a8_neon_mla_qqq_32_qqd_32_scalar"
1296               "cortex_a8_neon_int_1,\
1297               cortex_a8_neon_int_4,\
1298               cortex_a8_neon_bit_ops_q,\
1299               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1300               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1301               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1302               cortex_a8_neon_mla_qqq_8_16,\
1303               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1304               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1305               cortex_a8_neon_fp_vmla_ddd,\
1306               cortex_a8_neon_fp_vmla_qqq,\
1307               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1308               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1309
1310(define_bypass 6 "cortex_a8_neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long"
1311               "cortex_a8_neon_int_1,\
1312               cortex_a8_neon_int_4,\
1313               cortex_a8_neon_bit_ops_q,\
1314               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1315               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1316               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1317               cortex_a8_neon_mla_qqq_8_16,\
1318               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1319               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1320               cortex_a8_neon_fp_vmla_ddd,\
1321               cortex_a8_neon_fp_vmla_qqq,\
1322               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1323               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1324
1325(define_bypass 6 "cortex_a8_neon_mla_qqq_8_16"
1326               "cortex_a8_neon_int_1,\
1327               cortex_a8_neon_int_4,\
1328               cortex_a8_neon_bit_ops_q,\
1329               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1330               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1331               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1332               cortex_a8_neon_mla_qqq_8_16,\
1333               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1334               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1335               cortex_a8_neon_fp_vmla_ddd,\
1336               cortex_a8_neon_fp_vmla_qqq,\
1337               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1338               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1339
1340(define_bypass 5 "cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long"
1341               "cortex_a8_neon_int_1,\
1342               cortex_a8_neon_int_4,\
1343               cortex_a8_neon_bit_ops_q,\
1344               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1345               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1346               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1347               cortex_a8_neon_mla_qqq_8_16,\
1348               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1349               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1350               cortex_a8_neon_fp_vmla_ddd,\
1351               cortex_a8_neon_fp_vmla_qqq,\
1352               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1353               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1354
1355(define_bypass 6 "cortex_a8_neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar"
1356               "cortex_a8_neon_int_1,\
1357               cortex_a8_neon_int_4,\
1358               cortex_a8_neon_bit_ops_q,\
1359               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1360               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1361               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1362               cortex_a8_neon_mla_qqq_8_16,\
1363               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1364               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1365               cortex_a8_neon_fp_vmla_ddd,\
1366               cortex_a8_neon_fp_vmla_qqq,\
1367               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1368               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1369
1370(define_bypass 6 "cortex_a8_neon_mul_qqq_8_16_32_ddd_32"
1371               "cortex_a8_neon_int_1,\
1372               cortex_a8_neon_int_4,\
1373               cortex_a8_neon_bit_ops_q,\
1374               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1375               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1376               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1377               cortex_a8_neon_mla_qqq_8_16,\
1378               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1379               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1380               cortex_a8_neon_fp_vmla_ddd,\
1381               cortex_a8_neon_fp_vmla_qqq,\
1382               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1383               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1384
1385(define_bypass 5 "cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long"
1386               "cortex_a8_neon_int_1,\
1387               cortex_a8_neon_int_4,\
1388               cortex_a8_neon_bit_ops_q,\
1389               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1390               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1391               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1392               cortex_a8_neon_mla_qqq_8_16,\
1393               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1394               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1395               cortex_a8_neon_fp_vmla_ddd,\
1396               cortex_a8_neon_fp_vmla_qqq,\
1397               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1398               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1399
1400(define_bypass 6 "cortex_a8_neon_vaba_qqq"
1401               "cortex_a8_neon_int_1,\
1402               cortex_a8_neon_int_4,\
1403               cortex_a8_neon_bit_ops_q,\
1404               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1405               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1406               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1407               cortex_a8_neon_mla_qqq_8_16,\
1408               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1409               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1410               cortex_a8_neon_fp_vmla_ddd,\
1411               cortex_a8_neon_fp_vmla_qqq,\
1412               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1413               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1414
1415(define_bypass 5 "cortex_a8_neon_vaba"
1416               "cortex_a8_neon_int_1,\
1417               cortex_a8_neon_int_4,\
1418               cortex_a8_neon_bit_ops_q,\
1419               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1420               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1421               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1422               cortex_a8_neon_mla_qqq_8_16,\
1423               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1424               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1425               cortex_a8_neon_fp_vmla_ddd,\
1426               cortex_a8_neon_fp_vmla_qqq,\
1427               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1428               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1429
1430(define_bypass 3 "cortex_a8_neon_bit_ops_q"
1431               "cortex_a8_neon_int_1,\
1432               cortex_a8_neon_int_4,\
1433               cortex_a8_neon_bit_ops_q,\
1434               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1435               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1436               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1437               cortex_a8_neon_mla_qqq_8_16,\
1438               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1439               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1440               cortex_a8_neon_fp_vmla_ddd,\
1441               cortex_a8_neon_fp_vmla_qqq,\
1442               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1443               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1444
1445(define_bypass 3 "cortex_a8_neon_vqneg_vqabs"
1446               "cortex_a8_neon_int_1,\
1447               cortex_a8_neon_int_4,\
1448               cortex_a8_neon_bit_ops_q,\
1449               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1450               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1451               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1452               cortex_a8_neon_mla_qqq_8_16,\
1453               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1454               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1455               cortex_a8_neon_fp_vmla_ddd,\
1456               cortex_a8_neon_fp_vmla_qqq,\
1457               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1458               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1459
1460(define_bypass 3 "cortex_a8_neon_int_5"
1461               "cortex_a8_neon_int_1,\
1462               cortex_a8_neon_int_4,\
1463               cortex_a8_neon_bit_ops_q,\
1464               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1465               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1466               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1467               cortex_a8_neon_mla_qqq_8_16,\
1468               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1469               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1470               cortex_a8_neon_fp_vmla_ddd,\
1471               cortex_a8_neon_fp_vmla_qqq,\
1472               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1473               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1474
1475(define_bypass 3 "cortex_a8_neon_int_4"
1476               "cortex_a8_neon_int_1,\
1477               cortex_a8_neon_int_4,\
1478               cortex_a8_neon_bit_ops_q,\
1479               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1480               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1481               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1482               cortex_a8_neon_mla_qqq_8_16,\
1483               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1484               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1485               cortex_a8_neon_fp_vmla_ddd,\
1486               cortex_a8_neon_fp_vmla_qqq,\
1487               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1488               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1489
1490(define_bypass 2 "cortex_a8_neon_int_3"
1491               "cortex_a8_neon_int_1,\
1492               cortex_a8_neon_int_4,\
1493               cortex_a8_neon_bit_ops_q,\
1494               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1495               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1496               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1497               cortex_a8_neon_mla_qqq_8_16,\
1498               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1499               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1500               cortex_a8_neon_fp_vmla_ddd,\
1501               cortex_a8_neon_fp_vmla_qqq,\
1502               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1503               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1504
1505(define_bypass 2 "cortex_a8_neon_int_2"
1506               "cortex_a8_neon_int_1,\
1507               cortex_a8_neon_int_4,\
1508               cortex_a8_neon_bit_ops_q,\
1509               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1510               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1511               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1512               cortex_a8_neon_mla_qqq_8_16,\
1513               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1514               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1515               cortex_a8_neon_fp_vmla_ddd,\
1516               cortex_a8_neon_fp_vmla_qqq,\
1517               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1518               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1519
1520(define_bypass 2 "cortex_a8_neon_int_1"
1521               "cortex_a8_neon_int_1,\
1522               cortex_a8_neon_int_4,\
1523               cortex_a8_neon_bit_ops_q,\
1524               cortex_a8_neon_mul_ddd_8_16_qdd_16_8_long_32_16_long,\
1525               cortex_a8_neon_mul_qqq_8_16_32_ddd_32,\
1526               cortex_a8_neon_mla_ddd_8_16_qdd_16_8_long_32_16_long,\
1527               cortex_a8_neon_mla_qqq_8_16,\
1528               cortex_a8_neon_fp_vadd_ddd_vabs_dd,\
1529               cortex_a8_neon_fp_vadd_qqq_vabs_qq,\
1530               cortex_a8_neon_fp_vmla_ddd,\
1531               cortex_a8_neon_fp_vmla_qqq,\
1532               cortex_a8_neon_fp_vrecps_vrsqrts_ddd,\
1533               cortex_a8_neon_fp_vrecps_vrsqrts_qqq")
1534
1535