1;; ARM Cortex-A57 pipeline description 2;; Copyright (C) 2014-2015 Free Software Foundation, Inc. 3;; 4;; This file is part of GCC. 5;; 6;; GCC is free software; you can redistribute it and/or modify it 7;; under the terms of the GNU General Public License as published by 8;; the Free Software Foundation; either version 3, or (at your option) 9;; any later version. 10;; 11;; GCC is distributed in the hope that it will be useful, but 12;; WITHOUT ANY WARRANTY; without even the implied warranty of 13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14;; General Public License for more details. 15;; 16;; You should have received a copy of the GNU General Public License 17;; along with GCC; see the file COPYING3. If not see 18;; <http://www.gnu.org/licenses/>. 19 20(define_automaton "cortex_a57") 21 22(define_attr "cortex_a57_neon_type" 23 "neon_abd, neon_abd_q, neon_arith_acc, neon_arith_acc_q, 24 neon_arith_basic, neon_arith_complex, 25 neon_reduc_add_acc, neon_multiply, neon_multiply_q, 26 neon_multiply_long, neon_mla, neon_mla_q, neon_mla_long, 27 neon_sat_mla_long, neon_shift_acc, neon_shift_imm_basic, 28 neon_shift_imm_complex, 29 neon_shift_reg_basic, neon_shift_reg_basic_q, neon_shift_reg_complex, 30 neon_shift_reg_complex_q, neon_fp_negabs, neon_fp_arith, 31 neon_fp_arith_q, neon_fp_reductions_q, neon_fp_cvt_int, 32 neon_fp_cvt_int_q, neon_fp_cvt16, neon_fp_minmax, neon_fp_mul, 33 neon_fp_mul_q, neon_fp_mla, neon_fp_mla_q, neon_fp_recpe_rsqrte, 34 neon_fp_recpe_rsqrte_q, neon_fp_recps_rsqrts, neon_fp_recps_rsqrts_q, 35 neon_bitops, neon_bitops_q, neon_from_gp, 36 neon_from_gp_q, neon_move, neon_tbl3_tbl4, neon_zip_q, neon_to_gp, 37 neon_load_a, neon_load_b, neon_load_c, neon_load_d, neon_load_e, 38 neon_load_f, neon_store_a, neon_store_b, neon_store_complex, 39 unknown" 40 (cond [ 41 (eq_attr "type" "neon_abd, neon_abd_long") 42 (const_string "neon_abd") 43 (eq_attr "type" "neon_abd_q") 44 (const_string "neon_abd_q") 45 (eq_attr "type" "neon_arith_acc, neon_reduc_add_acc,\ 46 neon_reduc_add_acc_q") 47 (const_string "neon_arith_acc") 48 (eq_attr "type" "neon_arith_acc_q") 49 (const_string "neon_arith_acc_q") 50 (eq_attr "type" "neon_add, neon_add_q, neon_add_long,\ 51 neon_add_widen, neon_neg, neon_neg_q,\ 52 neon_reduc_add, neon_reduc_add_q,\ 53 neon_reduc_add_long, neon_sub, neon_sub_q,\ 54 neon_sub_long, neon_sub_widen, neon_logic,\ 55 neon_logic_q, neon_tst, neon_tst_q") 56 (const_string "neon_arith_basic") 57 (eq_attr "type" "neon_abs, neon_abs_q, neon_add_halve_narrow_q,\ 58 neon_add_halve, neon_add_halve_q,\ 59 neon_sub_halve, neon_sub_halve_q, neon_qabs,\ 60 neon_qabs_q, neon_qadd, neon_qadd_q, neon_qneg,\ 61 neon_qneg_q, neon_qsub, neon_qsub_q,\ 62 neon_sub_halve_narrow_q,\ 63 neon_compare, neon_compare_q,\ 64 neon_compare_zero, neon_compare_zero_q,\ 65 neon_minmax, neon_minmax_q, neon_reduc_minmax,\ 66 neon_reduc_minmax_q") 67 (const_string "neon_arith_complex") 68 69 (eq_attr "type" "neon_mul_b, neon_mul_h, neon_mul_s,\ 70 neon_mul_h_scalar, neon_mul_s_scalar,\ 71 neon_sat_mul_b, neon_sat_mul_h,\ 72 neon_sat_mul_s, neon_sat_mul_h_scalar,\ 73 neon_sat_mul_s_scalar,\ 74 neon_mul_b_long, neon_mul_h_long,\ 75 neon_mul_s_long, neon_mul_d_long,\ 76 neon_mul_h_scalar_long, neon_mul_s_scalar_long,\ 77 neon_sat_mul_b_long, neon_sat_mul_h_long,\ 78 neon_sat_mul_s_long, neon_sat_mul_h_scalar_long,\ 79 neon_sat_mul_s_scalar_long") 80 (const_string "neon_multiply") 81 (eq_attr "type" "neon_mul_b_q, neon_mul_h_q, neon_mul_s_q,\ 82 neon_mul_h_scalar_q, neon_mul_s_scalar_q,\ 83 neon_sat_mul_b_q, neon_sat_mul_h_q,\ 84 neon_sat_mul_s_q, neon_sat_mul_h_scalar_q,\ 85 neon_sat_mul_s_scalar_q") 86 (const_string "neon_multiply_q") 87 (eq_attr "type" "neon_mla_b, neon_mla_h, neon_mla_s,\ 88 neon_mla_h_scalar, neon_mla_s_scalar,\ 89 neon_mla_b_long, neon_mla_h_long,\ 90 neon_mla_s_long,\ 91 neon_mla_h_scalar_long, neon_mla_s_scalar_long") 92 (const_string "neon_mla") 93 (eq_attr "type" "neon_mla_b_q, neon_mla_h_q, neon_mla_s_q,\ 94 neon_mla_h_scalar_q, neon_mla_s_scalar_q") 95 (const_string "neon_mla_q") 96 (eq_attr "type" "neon_sat_mla_b_long, neon_sat_mla_h_long,\ 97 neon_sat_mla_s_long, neon_sat_mla_h_scalar_long,\ 98 neon_sat_mla_s_scalar_long") 99 (const_string "neon_sat_mla_long") 100 101 (eq_attr "type" "neon_shift_acc, neon_shift_acc_q") 102 (const_string "neon_shift_acc") 103 (eq_attr "type" "neon_shift_imm, neon_shift_imm_q,\ 104 neon_shift_imm_narrow_q, neon_shift_imm_long") 105 (const_string "neon_shift_imm_basic") 106 (eq_attr "type" "neon_sat_shift_imm, neon_sat_shift_imm_q,\ 107 neon_sat_shift_imm_narrow_q") 108 (const_string "neon_shift_imm_complex") 109 (eq_attr "type" "neon_shift_reg") 110 (const_string "neon_shift_reg_basic") 111 (eq_attr "type" "neon_shift_reg_q") 112 (const_string "neon_shift_reg_basic_q") 113 (eq_attr "type" "neon_sat_shift_reg") 114 (const_string "neon_shift_reg_complex") 115 (eq_attr "type" "neon_sat_shift_reg_q") 116 (const_string "neon_shift_reg_complex_q") 117 118 (eq_attr "type" "neon_fp_neg_s, neon_fp_neg_s_q,\ 119 neon_fp_abs_s, neon_fp_abs_s_q,\ 120 neon_fp_neg_d, neon_fp_neg_d_q,\ 121 neon_fp_abs_d, neon_fp_abs_d_q") 122 (const_string "neon_fp_negabs") 123 (eq_attr "type" "neon_fp_addsub_s, neon_fp_abd_s,\ 124 neon_fp_reduc_add_s, neon_fp_compare_s,\ 125 neon_fp_minmax_s, neon_fp_round_s,\ 126 neon_fp_addsub_d, neon_fp_abd_d,\ 127 neon_fp_reduc_add_d, neon_fp_compare_d,\ 128 neon_fp_minmax_d, neon_fp_round_d,\ 129 neon_fp_reduc_minmax_s, neon_fp_reduc_minmax_d") 130 (const_string "neon_fp_arith") 131 (eq_attr "type" "neon_fp_addsub_s_q, neon_fp_abd_s_q,\ 132 neon_fp_reduc_add_s_q, neon_fp_compare_s_q,\ 133 neon_fp_minmax_s_q, neon_fp_round_s_q,\ 134 neon_fp_addsub_d_q, neon_fp_abd_d_q,\ 135 neon_fp_reduc_add_d_q, neon_fp_compare_d_q,\ 136 neon_fp_minmax_d_q, neon_fp_round_d_q") 137 (const_string "neon_fp_arith_q") 138 (eq_attr "type" "neon_fp_reduc_minmax_s_q,\ 139 neon_fp_reduc_minmax_d_q,\ 140 neon_fp_reduc_add_s_q, neon_fp_reduc_add_d_q") 141 (const_string "neon_fp_reductions_q") 142 (eq_attr "type" "neon_fp_to_int_s, neon_int_to_fp_s,\ 143 neon_fp_to_int_d, neon_int_to_fp_d") 144 (const_string "neon_fp_cvt_int") 145 (eq_attr "type" "neon_fp_to_int_s_q, neon_int_to_fp_s_q,\ 146 neon_fp_to_int_d_q, neon_int_to_fp_d_q") 147 (const_string "neon_fp_cvt_int_q") 148 (eq_attr "type" "neon_fp_cvt_narrow_s_q, neon_fp_cvt_widen_h") 149 (const_string "neon_fp_cvt16") 150 (eq_attr "type" "neon_fp_mul_s, neon_fp_mul_s_scalar,\ 151 neon_fp_mul_d") 152 (const_string "neon_fp_mul") 153 (eq_attr "type" "neon_fp_mul_s_q, neon_fp_mul_s_scalar_q,\ 154 neon_fp_mul_d_q, neon_fp_mul_d_scalar_q") 155 (const_string "neon_fp_mul_q") 156 (eq_attr "type" "neon_fp_mla_s, neon_fp_mla_s_scalar,\ 157 neon_fp_mla_d") 158 (const_string "neon_fp_mla") 159 (eq_attr "type" "neon_fp_mla_s_q, neon_fp_mla_s_scalar_q, 160 neon_fp_mla_d_q, neon_fp_mla_d_scalar_q") 161 (const_string "neon_fp_mla_q") 162 (eq_attr "type" "neon_fp_recpe_s, neon_fp_rsqrte_s,\ 163 neon_fp_recpx_s,\ 164 neon_fp_recpe_d, neon_fp_rsqrte_d,\ 165 neon_fp_recpx_d") 166 (const_string "neon_fp_recpe_rsqrte") 167 (eq_attr "type" "neon_fp_recpe_s_q, neon_fp_rsqrte_s_q,\ 168 neon_fp_recpx_s_q,\ 169 neon_fp_recpe_d_q, neon_fp_rsqrte_d_q,\ 170 neon_fp_recpx_d_q") 171 (const_string "neon_fp_recpe_rsqrte_q") 172 (eq_attr "type" "neon_fp_recps_s, neon_fp_rsqrts_s,\ 173 neon_fp_recps_d, neon_fp_rsqrts_d") 174 (const_string "neon_fp_recps_rsqrts") 175 (eq_attr "type" "neon_fp_recps_s_q, neon_fp_rsqrts_s_q,\ 176 neon_fp_recps_d_q, neon_fp_rsqrts_d_q") 177 (const_string "neon_fp_recps_rsqrts_q") 178 (eq_attr "type" "neon_bsl, neon_cls, neon_cnt,\ 179 neon_rev, neon_permute, neon_rbit,\ 180 neon_tbl1, neon_tbl2, neon_zip,\ 181 neon_dup, neon_dup_q, neon_ext, neon_ext_q,\ 182 neon_move, neon_move_q, neon_move_narrow_q") 183 (const_string "neon_bitops") 184 (eq_attr "type" "neon_bsl_q, neon_cls_q, neon_cnt_q,\ 185 neon_rev_q, neon_permute_q, neon_rbit_q") 186 (const_string "neon_bitops_q") 187 (eq_attr "type" "neon_from_gp,f_mcr,f_mcrr") 188 (const_string "neon_from_gp") 189 (eq_attr "type" "neon_from_gp_q") 190 (const_string "neon_from_gp_q") 191 (eq_attr "type" "neon_tbl3, neon_tbl4") 192 (const_string "neon_tbl3_tbl4") 193 (eq_attr "type" "neon_zip_q") 194 (const_string "neon_zip_q") 195 (eq_attr "type" "neon_to_gp, neon_to_gp_q,f_mrc,f_mrrc") 196 (const_string "neon_to_gp") 197 198 (eq_attr "type" "f_loads, f_loadd,\ 199 neon_load1_1reg, neon_load1_1reg_q,\ 200 neon_load1_2reg, neon_load1_2reg_q") 201 (const_string "neon_load_a") 202 (eq_attr "type" "neon_load1_3reg, neon_load1_3reg_q,\ 203 neon_load1_4reg, neon_load1_4reg_q") 204 (const_string "neon_load_b") 205 (eq_attr "type" "neon_load1_one_lane, neon_load1_one_lane_q,\ 206 neon_load1_all_lanes, neon_load1_all_lanes_q,\ 207 neon_load2_2reg, neon_load2_2reg_q,\ 208 neon_load2_all_lanes, neon_load2_all_lanes_q") 209 (const_string "neon_load_c") 210 (eq_attr "type" "neon_load2_4reg, neon_load2_4reg_q,\ 211 neon_load3_3reg, neon_load3_3reg_q,\ 212 neon_load3_one_lane, neon_load3_one_lane_q,\ 213 neon_load4_4reg, neon_load4_4reg_q") 214 (const_string "neon_load_d") 215 (eq_attr "type" "neon_load2_one_lane, neon_load2_one_lane_q,\ 216 neon_load3_all_lanes, neon_load3_all_lanes_q,\ 217 neon_load4_all_lanes, neon_load4_all_lanes_q") 218 (const_string "neon_load_e") 219 (eq_attr "type" "neon_load4_one_lane, neon_load4_one_lane_q") 220 (const_string "neon_load_f") 221 222 (eq_attr "type" "f_stores, f_stored,\ 223 neon_store1_1reg") 224 (const_string "neon_store_a") 225 (eq_attr "type" "neon_store1_2reg, neon_store1_1reg_q") 226 (const_string "neon_store_b") 227 (eq_attr "type" "neon_store1_3reg, neon_store1_3reg_q,\ 228 neon_store3_3reg, neon_store3_3reg_q,\ 229 neon_store2_4reg, neon_store2_4reg_q,\ 230 neon_store4_4reg, neon_store4_4reg_q,\ 231 neon_store2_2reg, neon_store2_2reg_q,\ 232 neon_store3_one_lane, neon_store3_one_lane_q,\ 233 neon_store4_one_lane, neon_store4_one_lane_q,\ 234 neon_store1_4reg, neon_store1_4reg_q,\ 235 neon_store1_one_lane, neon_store1_one_lane_q,\ 236 neon_store2_one_lane, neon_store2_one_lane_q") 237 (const_string "neon_store_complex")] 238 (const_string "unknown"))) 239 240;; The Cortex-A57 core is modelled as a triple issue pipeline that has 241;; the following functional units. 242;; 1. Two pipelines for integer operations: SX1, SX2 243 244(define_cpu_unit "ca57_sx1_issue" "cortex_a57") 245(define_reservation "ca57_sx1" "ca57_sx1_issue") 246 247(define_cpu_unit "ca57_sx2_issue" "cortex_a57") 248(define_reservation "ca57_sx2" "ca57_sx2_issue") 249 250;; 2. One pipeline for complex integer operations: MX 251 252(define_cpu_unit "ca57_mx_issue" 253 "cortex_a57") 254(define_reservation "ca57_mx" "ca57_mx_issue") 255(define_reservation "ca57_mx_block" "ca57_mx_issue") 256 257;; 3. Two asymmetric pipelines for Neon and FP operations: CX1, CX2 258(define_automaton "cortex_a57_cx") 259 260(define_cpu_unit "ca57_cx1_issue" 261 "cortex_a57_cx") 262(define_cpu_unit "ca57_cx2_issue" 263 "cortex_a57_cx") 264 265(define_reservation "ca57_cx1" "ca57_cx1_issue") 266 267(define_reservation "ca57_cx2" "ca57_cx2_issue") 268(define_reservation "ca57_cx2_block" "ca57_cx2_issue*2") 269 270;; 4. One pipeline for branch operations: BX 271 272(define_cpu_unit "ca57_bx_issue" "cortex_a57") 273(define_reservation "ca57_bx" "ca57_bx_issue") 274 275;; 5. Two pipelines for load and store operations: LS1, LS2. The most 276;; valuable thing we can do is force a structural hazard to split 277;; up loads/stores. 278 279(define_cpu_unit "ca57_ls_issue" "cortex_a57") 280(define_cpu_unit "ca57_ldr, ca57_str" "cortex_a57") 281(define_reservation "ca57_load_model" "ca57_ls_issue,ca57_ldr*2") 282(define_reservation "ca57_store_model" "ca57_ls_issue,ca57_str") 283 284;; Block all issue queues. 285 286(define_reservation "ca57_block" "ca57_cx1_issue + ca57_cx2_issue 287 + ca57_mx_issue + ca57_sx1_issue 288 + ca57_sx2_issue + ca57_ls_issue") 289 290;; Simple Execution Unit: 291;; 292;; Simple ALU without shift 293(define_insn_reservation "cortex_a57_alu" 2 294 (and (eq_attr "tune" "cortexa57") 295 (eq_attr "type" "alu_imm,alus_imm,logic_imm,logics_imm,\ 296 alu_sreg,alus_sreg,logic_reg,logics_reg,\ 297 adc_imm,adcs_imm,adc_reg,adcs_reg,\ 298 adr,bfm,clz,rbit,rev,alu_dsp_reg,\ 299 shift_imm,shift_reg,\ 300 mov_imm,mov_reg,\ 301 mvn_imm,mvn_reg,\ 302 mrs,multiple,no_insn")) 303 "ca57_sx1|ca57_sx2") 304 305;; ALU ops with immediate shift 306(define_insn_reservation "cortex_a57_alu_shift" 3 307 (and (eq_attr "tune" "cortexa57") 308 (eq_attr "type" "extend,\ 309 alu_shift_imm,alus_shift_imm,\ 310 crc,logic_shift_imm,logics_shift_imm,\ 311 mov_shift,mvn_shift")) 312 "ca57_mx") 313 314;; Multi-Cycle Execution Unit: 315;; 316;; ALU ops with register controlled shift 317(define_insn_reservation "cortex_a57_alu_shift_reg" 3 318 (and (eq_attr "tune" "cortexa57") 319 (eq_attr "type" "alu_shift_reg,alus_shift_reg,\ 320 logic_shift_reg,logics_shift_reg,\ 321 mov_shift_reg,mvn_shift_reg")) 322 "ca57_mx") 323 324;; All multiplies 325;; TODO: AArch32 and AArch64 have different behaviour 326(define_insn_reservation "cortex_a57_mult32" 3 327 (and (eq_attr "tune" "cortexa57") 328 (ior (eq_attr "mul32" "yes") 329 (eq_attr "mul64" "yes"))) 330 "ca57_mx") 331 332;; Integer divide 333(define_insn_reservation "cortex_a57_div" 10 334 (and (eq_attr "tune" "cortexa57") 335 (eq_attr "type" "udiv,sdiv")) 336 "ca57_mx_issue,ca57_mx_block*3") 337 338;; Block all issue pipes for a cycle 339(define_insn_reservation "cortex_a57_block" 1 340 (and (eq_attr "tune" "cortexa57") 341 (eq_attr "type" "block")) 342 "ca57_block") 343 344;; Branch execution Unit 345;; 346;; Branches take one issue slot. 347;; No latency as there is no result 348(define_insn_reservation "cortex_a57_branch" 0 349 (and (eq_attr "tune" "cortexa57") 350 (eq_attr "type" "branch")) 351 "ca57_bx") 352 353;; Load-store execution Unit 354;; 355;; Loads of up to two words. 356(define_insn_reservation "cortex_a57_load1" 5 357 (and (eq_attr "tune" "cortexa57") 358 (eq_attr "type" "load_byte,load1,load2")) 359 "ca57_load_model") 360 361;; Loads of three or four words. 362(define_insn_reservation "cortex_a57_load3" 5 363 (and (eq_attr "tune" "cortexa57") 364 (eq_attr "type" "load3,load4")) 365 "ca57_ls_issue*2,ca57_load_model") 366 367;; Stores of up to two words. 368(define_insn_reservation "cortex_a57_store1" 0 369 (and (eq_attr "tune" "cortexa57") 370 (eq_attr "type" "store1,store2")) 371 "ca57_store_model") 372 373;; Stores of three or four words. 374(define_insn_reservation "cortex_a57_store3" 0 375 (and (eq_attr "tune" "cortexa57") 376 (eq_attr "type" "store3,store4")) 377 "ca57_ls_issue*2,ca57_store_model") 378 379;; Advanced SIMD Unit - Integer Arithmetic Instructions. 380 381(define_insn_reservation "cortex_a57_neon_abd" 5 382 (and (eq_attr "tune" "cortexa57") 383 (eq_attr "cortex_a57_neon_type" "neon_abd")) 384 "ca57_cx1|ca57_cx2") 385 386(define_insn_reservation "cortex_a57_neon_abd_q" 5 387 (and (eq_attr "tune" "cortexa57") 388 (eq_attr "cortex_a57_neon_type" "neon_abd_q")) 389 "ca57_cx1+ca57_cx2") 390 391(define_insn_reservation "cortex_a57_neon_aba" 7 392 (and (eq_attr "tune" "cortexa57") 393 (eq_attr "cortex_a57_neon_type" "neon_arith_acc")) 394 "ca57_cx2") 395 396(define_insn_reservation "cortex_a57_neon_aba_q" 8 397 (and (eq_attr "tune" "cortexa57") 398 (eq_attr "cortex_a57_neon_type" "neon_arith_acc_q")) 399 "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") 400 401(define_insn_reservation "cortex_a57_neon_arith_basic" 4 402 (and (eq_attr "tune" "cortexa57") 403 (eq_attr "cortex_a57_neon_type" "neon_arith_basic")) 404 "ca57_cx1|ca57_cx2") 405 406(define_insn_reservation "cortex_a57_neon_arith_complex" 5 407 (and (eq_attr "tune" "cortexa57") 408 (eq_attr "cortex_a57_neon_type" "neon_arith_complex")) 409 "ca57_cx1|ca57_cx2") 410 411;; Integer Multiply Instructions. 412 413(define_insn_reservation "cortex_a57_neon_multiply" 6 414 (and (eq_attr "tune" "cortexa57") 415 (eq_attr "cortex_a57_neon_type" "neon_multiply")) 416 "ca57_cx1") 417 418(define_insn_reservation "cortex_a57_neon_multiply_q" 7 419 (and (eq_attr "tune" "cortexa57") 420 (eq_attr "cortex_a57_neon_type" "neon_multiply_q")) 421 "ca57_cx1+(ca57_cx1_issue,ca57_cx1)") 422 423(define_insn_reservation "cortex_a57_neon_mla" 6 424 (and (eq_attr "tune" "cortexa57") 425 (eq_attr "cortex_a57_neon_type" "neon_mla")) 426 "ca57_cx1") 427 428(define_insn_reservation "cortex_a57_neon_mla_q" 7 429 (and (eq_attr "tune" "cortexa57") 430 (eq_attr "cortex_a57_neon_type" "neon_mla_q")) 431 "ca57_cx1+(ca57_cx1_issue,ca57_cx1)") 432 433(define_insn_reservation "cortex_a57_neon_sat_mla_long" 6 434 (and (eq_attr "tune" "cortexa57") 435 (eq_attr "cortex_a57_neon_type" "neon_sat_mla_long")) 436 "ca57_cx1") 437 438;; Integer Shift Instructions. 439 440(define_insn_reservation 441 "cortex_a57_neon_shift_acc" 7 442 (and (eq_attr "tune" "cortexa57") 443 (eq_attr "cortex_a57_neon_type" "neon_shift_acc")) 444 "ca57_cx2") 445 446(define_insn_reservation 447 "cortex_a57_neon_shift_imm_basic" 4 448 (and (eq_attr "tune" "cortexa57") 449 (eq_attr "cortex_a57_neon_type" "neon_shift_imm_basic")) 450 "ca57_cx2") 451 452(define_insn_reservation 453 "cortex_a57_neon_shift_imm_complex" 5 454 (and (eq_attr "tune" "cortexa57") 455 (eq_attr "cortex_a57_neon_type" "neon_shift_imm_complex")) 456 "ca57_cx2") 457 458(define_insn_reservation 459 "cortex_a57_neon_shift_reg_basic" 4 460 (and (eq_attr "tune" "cortexa57") 461 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic")) 462 "ca57_cx2") 463 464(define_insn_reservation 465 "cortex_a57_neon_shift_reg_basic_q" 5 466 (and (eq_attr "tune" "cortexa57") 467 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_basic_q")) 468 "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") 469 470(define_insn_reservation 471 "cortex_a57_neon_shift_reg_complex" 5 472 (and (eq_attr "tune" "cortexa57") 473 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex")) 474 "ca57_cx2") 475 476(define_insn_reservation 477 "cortex_a57_neon_shift_reg_complex_q" 6 478 (and (eq_attr "tune" "cortexa57") 479 (eq_attr "cortex_a57_neon_type" "neon_shift_reg_complex_q")) 480 "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") 481 482;; Floating Point Instructions. 483 484(define_insn_reservation 485 "cortex_a57_neon_fp_negabs" 4 486 (and (eq_attr "tune" "cortexa57") 487 (eq_attr "cortex_a57_neon_type" "neon_fp_negabs")) 488 "(ca57_cx1|ca57_cx2)") 489 490(define_insn_reservation 491 "cortex_a57_neon_fp_arith" 6 492 (and (eq_attr "tune" "cortexa57") 493 (eq_attr "cortex_a57_neon_type" "neon_fp_arith")) 494 "(ca57_cx1|ca57_cx2)") 495 496(define_insn_reservation 497 "cortex_a57_neon_fp_arith_q" 6 498 (and (eq_attr "tune" "cortexa57") 499 (eq_attr "cortex_a57_neon_type" "neon_fp_arith_q")) 500 "(ca57_cx1+ca57_cx2)") 501 502(define_insn_reservation 503 "cortex_a57_neon_fp_reductions_q" 10 504 (and (eq_attr "tune" "cortexa57") 505 (eq_attr "cortex_a57_neon_type" "neon_fp_reductions_q")) 506 "(ca57_cx1+ca57_cx2),(ca57_cx1|ca57_cx2)") 507 508(define_insn_reservation 509 "cortex_a57_neon_fp_cvt_int" 6 510 (and (eq_attr "tune" "cortexa57") 511 (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int")) 512 "(ca57_cx1|ca57_cx2)") 513 514(define_insn_reservation 515 "cortex_a57_neon_fp_cvt_int_q" 6 516 (and (eq_attr "tune" "cortexa57") 517 (eq_attr "cortex_a57_neon_type" "neon_fp_cvt_int_q")) 518 "(ca57_cx1+ca57_cx2)") 519 520(define_insn_reservation 521 "cortex_a57_neon_fp_cvt16" 10 522 (and (eq_attr "tune" "cortexa57") 523 (eq_attr "cortex_a57_neon_type" "neon_fp_cvt16")) 524 "(ca57_cx1_issue+ca57_cx2_issue),(ca57_cx1|ca57_cx2)") 525 526(define_insn_reservation 527 "cortex_a57_neon_fp_mul" 5 528 (and (eq_attr "tune" "cortexa57") 529 (eq_attr "cortex_a57_neon_type" "neon_fp_mul")) 530 "(ca57_cx1|ca57_cx2)") 531 532(define_insn_reservation 533 "cortex_a57_neon_fp_mul_q" 5 534 (and (eq_attr "tune" "cortexa57") 535 (eq_attr "cortex_a57_neon_type" "neon_fp_mul_q")) 536 "(ca57_cx1+ca57_cx2)") 537 538(define_insn_reservation 539 "cortex_a57_neon_fp_mla" 9 540 (and (eq_attr "tune" "cortexa57") 541 (eq_attr "cortex_a57_neon_type" "neon_fp_mla")) 542 "(ca57_cx1,ca57_cx1)|(ca57_cx2,ca57_cx2)") 543 544(define_insn_reservation 545 "cortex_a57_neon_fp_mla_q" 9 546 (and (eq_attr "tune" "cortexa57") 547 (eq_attr "cortex_a57_neon_type" "neon_fp_mla_q")) 548 "(ca57_cx1+ca57_cx2),(ca57_cx1,ca57_cx2)") 549 550(define_insn_reservation 551 "cortex_a57_neon_fp_recpe_rsqrte" 6 552 (and (eq_attr "tune" "cortexa57") 553 (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte")) 554 "(ca57_cx1|ca57_cx2)") 555 556(define_insn_reservation 557 "cortex_a57_neon_fp_recpe_rsqrte_q" 6 558 (and (eq_attr "tune" "cortexa57") 559 (eq_attr "cortex_a57_neon_type" "neon_fp_recpe_rsqrte_q")) 560 "(ca57_cx1+ca57_cx2)") 561 562(define_insn_reservation 563 "cortex_a57_neon_fp_recps_rsqrts" 10 564 (and (eq_attr "tune" "cortexa57") 565 (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts")) 566 "(ca57_cx1|ca57_cx2)") 567 568(define_insn_reservation 569 "cortex_a57_neon_fp_recps_rsqrts_q" 10 570 (and (eq_attr "tune" "cortexa57") 571 (eq_attr "cortex_a57_neon_type" "neon_fp_recps_rsqrts_q")) 572 "(ca57_cx1+ca57_cx2)") 573 574;; Miscellaneous Instructions. 575 576(define_insn_reservation 577 "cortex_a57_neon_bitops" 4 578 (and (eq_attr "tune" "cortexa57") 579 (eq_attr "cortex_a57_neon_type" "neon_bitops")) 580 "(ca57_cx1|ca57_cx2)") 581 582(define_insn_reservation 583 "cortex_a57_neon_bitops_q" 4 584 (and (eq_attr "tune" "cortexa57") 585 (eq_attr "cortex_a57_neon_type" "neon_bitops_q")) 586 "(ca57_cx1+ca57_cx2)") 587 588(define_insn_reservation 589 "cortex_a57_neon_from_gp" 9 590 (and (eq_attr "tune" "cortexa57") 591 (eq_attr "cortex_a57_neon_type" "neon_from_gp")) 592 "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1) 593 |(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)") 594 595(define_insn_reservation 596 "cortex_a57_neon_from_gp_q" 9 597 (and (eq_attr "tune" "cortexa57") 598 (eq_attr "cortex_a57_neon_type" "neon_from_gp_q")) 599 "(ca57_ls_issue+ca57_cx1_issue,ca57_cx1) 600 +(ca57_ls_issue+ca57_cx2_issue,ca57_cx2)") 601 602(define_insn_reservation 603 "cortex_a57_neon_tbl3_tbl4" 7 604 (and (eq_attr "tune" "cortexa57") 605 (eq_attr "cortex_a57_neon_type" "neon_tbl3_tbl4")) 606 "(ca57_cx1_issue,ca57_cx1) 607 +(ca57_cx2_issue,ca57_cx2)") 608 609(define_insn_reservation 610 "cortex_a57_neon_zip_q" 7 611 (and (eq_attr "tune" "cortexa57") 612 (eq_attr "cortex_a57_neon_type" "neon_zip_q")) 613 "(ca57_cx1_issue,ca57_cx1) 614 +(ca57_cx2_issue,ca57_cx2)") 615 616(define_insn_reservation 617 "cortex_a57_neon_to_gp" 7 618 (and (eq_attr "tune" "cortexa57") 619 (eq_attr "cortex_a57_neon_type" "neon_to_gp")) 620 "((ca57_ls_issue+ca57_sx1_issue),ca57_sx1) 621 |((ca57_ls_issue+ca57_sx2_issue),ca57_sx2)") 622 623;; Load Instructions. 624 625(define_insn_reservation 626 "cortex_a57_neon_load_a" 6 627 (and (eq_attr "tune" "cortexa57") 628 (eq_attr "cortex_a57_neon_type" "neon_load_a")) 629 "ca57_load_model") 630 631(define_insn_reservation 632 "cortex_a57_neon_load_b" 7 633 (and (eq_attr "tune" "cortexa57") 634 (eq_attr "cortex_a57_neon_type" "neon_load_b")) 635 "ca57_ls_issue,ca57_ls_issue+ca57_ldr,ca57_ldr*2") 636 637(define_insn_reservation 638 "cortex_a57_neon_load_c" 9 639 (and (eq_attr "tune" "cortexa57") 640 (eq_attr "cortex_a57_neon_type" "neon_load_c")) 641 "ca57_load_model+(ca57_cx1|ca57_cx2)") 642 643(define_insn_reservation 644 "cortex_a57_neon_load_d" 11 645 (and (eq_attr "tune" "cortexa57") 646 (eq_attr "cortex_a57_neon_type" "neon_load_d")) 647 "ca57_cx1_issue+ca57_cx2_issue, 648 ca57_ls_issue+ca57_ls_issue,ca57_ldr*2") 649 650(define_insn_reservation 651 "cortex_a57_neon_load_e" 9 652 (and (eq_attr "tune" "cortexa57") 653 (eq_attr "cortex_a57_neon_type" "neon_load_e")) 654 "ca57_load_model+(ca57_cx1|ca57_cx2)") 655 656(define_insn_reservation 657 "cortex_a57_neon_load_f" 11 658 (and (eq_attr "tune" "cortexa57") 659 (eq_attr "cortex_a57_neon_type" "neon_load_f")) 660 "ca57_cx1_issue+ca57_cx2_issue, 661 ca57_ls_issue+ca57_ls_issue,ca57_ldr*2") 662 663;; Store Instructions. 664 665(define_insn_reservation 666 "cortex_a57_neon_store_a" 0 667 (and (eq_attr "tune" "cortexa57") 668 (eq_attr "cortex_a57_neon_type" "neon_store_a")) 669 "ca57_store_model") 670 671(define_insn_reservation 672 "cortex_a57_neon_store_b" 0 673 (and (eq_attr "tune" "cortexa57") 674 (eq_attr "cortex_a57_neon_type" "neon_store_b")) 675 "ca57_store_model") 676 677;; These block issue for a number of cycles proportional to the number 678;; of 64-bit chunks they will store, we don't attempt to model that 679;; precisely, treat them as blocking execution for two cycles when 680;; issued. 681(define_insn_reservation 682 "cortex_a57_neon_store_complex" 0 683 (and (eq_attr "tune" "cortexa57") 684 (eq_attr "cortex_a57_neon_type" "neon_store_complex")) 685 "ca57_block*2") 686 687;; Floating-Point Operations. 688 689(define_insn_reservation "cortex_a57_fp_const" 4 690 (and (eq_attr "tune" "cortexa57") 691 (eq_attr "type" "fconsts,fconstd")) 692 "(ca57_cx1|ca57_cx2)") 693 694(define_insn_reservation "cortex_a57_fp_add_sub" 6 695 (and (eq_attr "tune" "cortexa57") 696 (eq_attr "type" "fadds,faddd")) 697 "(ca57_cx1|ca57_cx2)") 698 699(define_insn_reservation "cortex_a57_fp_mul" 6 700 (and (eq_attr "tune" "cortexa57") 701 (eq_attr "type" "fmuls,fmuld")) 702 "(ca57_cx1|ca57_cx2)") 703 704(define_insn_reservation "cortex_a57_fp_mac" 10 705 (and (eq_attr "tune" "cortexa57") 706 (eq_attr "type" "fmacs,ffmas,fmacd,ffmad")) 707 "(ca57_cx1,nothing,nothing,ca57_cx1) \ 708 |(ca57_cx2,nothing,nothing,ca57_cx2)") 709 710(define_insn_reservation "cortex_a57_fp_cvt" 6 711 (and (eq_attr "tune" "cortexa57") 712 (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f")) 713 "(ca57_cx1|ca57_cx2)") 714 715(define_insn_reservation "cortex_a57_fp_cmp" 7 716 (and (eq_attr "tune" "cortexa57") 717 (eq_attr "type" "fcmps,fcmpd")) 718 "ca57_cx2") 719 720(define_insn_reservation "cortex_a57_fp_arith" 4 721 (and (eq_attr "tune" "cortexa57") 722 (eq_attr "type" "ffariths,ffarithd")) 723 "(ca57_cx1|ca57_cx2)") 724 725(define_insn_reservation "cortex_a57_fp_cpys" 4 726 (and (eq_attr "tune" "cortexa57") 727 (eq_attr "type" "fmov")) 728 "(ca57_cx1|ca57_cx2)") 729 730(define_insn_reservation "cortex_a57_fp_divs" 12 731 (and (eq_attr "tune" "cortexa57") 732 (eq_attr "type" "fdivs, fsqrts,\ 733 neon_fp_div_s, neon_fp_sqrt_s")) 734 "ca57_cx2_block*5") 735 736(define_insn_reservation "cortex_a57_fp_divd" 16 737 (and (eq_attr "tune" "cortexa57") 738 (eq_attr "type" "fdivd, fsqrtd, neon_fp_div_d, neon_fp_sqrt_d")) 739 "ca57_cx2_block*3") 740 741(define_insn_reservation "cortex_a57_neon_fp_div_q" 20 742 (and (eq_attr "tune" "cortexa57") 743 (eq_attr "type" "fdivd, fsqrtd,\ 744 neon_fp_div_s_q, neon_fp_div_d_q,\ 745 neon_fp_sqrt_s_q, neon_fp_sqrt_d_q")) 746 "ca57_cx2_block*3") 747 748(define_insn_reservation "cortex_a57_crypto_simple" 4 749 (and (eq_attr "tune" "cortexa57") 750 (eq_attr "type" "crypto_aese,crypto_aesmc,crypto_sha1_fast,crypto_sha256_fast")) 751 "ca57_cx2") 752 753(define_insn_reservation "cortex_a57_crypto_complex" 7 754 (and (eq_attr "tune" "cortexa57") 755 (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow")) 756 "ca57_cx2+(ca57_cx2_issue,ca57_cx2)") 757 758(define_insn_reservation "cortex_a57_crypto_xor" 7 759 (and (eq_attr "tune" "cortexa57") 760 (eq_attr "type" "crypto_sha1_xor")) 761 "(ca57_cx1+ca57_cx2)") 762 763;; We lie with calls. They take up all issue slots, but are otherwise 764;; not harmful. 765(define_insn_reservation "cortex_a57_call" 1 766 (and (eq_attr "tune" "cortexa57") 767 (eq_attr "type" "call")) 768 "ca57_sx1_issue+ca57_sx2_issue+ca57_cx1_issue+ca57_cx2_issue\ 769 +ca57_mx_issue+ca57_bx_issue+ca57_ls_issue" 770) 771 772;; Simple execution unit bypasses 773(define_bypass 1 "cortex_a57_alu" 774 "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg") 775(define_bypass 2 "cortex_a57_alu_shift" 776 "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg") 777(define_bypass 2 "cortex_a57_alu_shift_reg" 778 "cortex_a57_alu,cortex_a57_alu_shift,cortex_a57_alu_shift_reg") 779(define_bypass 1 "cortex_a57_alu" "cortex_a57_load1,cortex_a57_load3") 780(define_bypass 2 "cortex_a57_alu_shift" "cortex_a57_load1,cortex_a57_load3") 781(define_bypass 2 "cortex_a57_alu_shift_reg" 782 "cortex_a57_load1,cortex_a57_load3") 783 784;; An MLA or a MUL can feed a dependent MLA. 785(define_bypass 5 "cortex_a57_neon_*mla*,cortex_a57_neon_*mul*" 786 "cortex_a57_neon_*mla*") 787 788(define_bypass 5 "cortex_a57_fp_mul,cortex_a57_fp_mac" 789 "cortex_a57_fp_mac") 790 791;; We don't need to care about control hazards, either the branch is 792;; predicted in which case we pay no penalty, or the branch is 793;; mispredicted in which case instruction scheduling will be unlikely to 794;; help. 795(define_bypass 1 "cortex_a57_*" 796 "cortex_a57_call,cortex_a57_branch") 797 798