1;; Machine description for AArch64 AdvSIMD architecture. 2;; Copyright (C) 2011-2020 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21(define_expand "mov<mode>" 22 [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand") 23 (match_operand:VALL_F16MOV 1 "general_operand"))] 24 "TARGET_SIMD" 25 " 26 /* Force the operand into a register if it is not an 27 immediate whose use can be replaced with xzr. 28 If the mode is 16 bytes wide, then we will be doing 29 a stp in DI mode, so we check the validity of that. 30 If the mode is 8 bytes wide, then we will do doing a 31 normal str, so the check need not apply. */ 32 if (GET_CODE (operands[0]) == MEM 33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16) 35 && aarch64_mem_pair_operand (operands[0], DImode)) 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8)))) 37 operands[1] = force_reg (<MODE>mode, operands[1]); 38 39 /* If a constant is too complex to force to memory (e.g. because it 40 contains CONST_POLY_INTs), build it up from individual elements instead. 41 We should only need to do this before RA; aarch64_legitimate_constant_p 42 should ensure that we don't try to rematerialize the constant later. */ 43 if (GET_CODE (operands[1]) == CONST_VECTOR 44 && targetm.cannot_force_const_mem (<MODE>mode, operands[1])) 45 { 46 aarch64_expand_vector_init (operands[0], operands[1]); 47 DONE; 48 } 49 " 50) 51 52(define_expand "movmisalign<mode>" 53 [(set (match_operand:VALL 0 "nonimmediate_operand") 54 (match_operand:VALL 1 "general_operand"))] 55 "TARGET_SIMD && !STRICT_ALIGNMENT" 56{ 57 /* This pattern is not permitted to fail during expansion: if both arguments 58 are non-registers (e.g. memory := constant, which can be created by the 59 auto-vectorizer), force operand 1 into a register. */ 60 if (!register_operand (operands[0], <MODE>mode) 61 && !register_operand (operands[1], <MODE>mode)) 62 operands[1] = force_reg (<MODE>mode, operands[1]); 63}) 64 65(define_insn "aarch64_simd_dup<mode>" 66 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w") 67 (vec_duplicate:VDQ_I 68 (match_operand:<VEL> 1 "register_operand" "w,?r")))] 69 "TARGET_SIMD" 70 "@ 71 dup\\t%0.<Vtype>, %1.<Vetype>[0] 72 dup\\t%0.<Vtype>, %<vw>1" 73 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")] 74) 75 76(define_insn "aarch64_simd_dup<mode>" 77 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") 78 (vec_duplicate:VDQF_F16 79 (match_operand:<VEL> 1 "register_operand" "w")))] 80 "TARGET_SIMD" 81 "dup\\t%0.<Vtype>, %1.<Vetype>[0]" 82 [(set_attr "type" "neon_dup<q>")] 83) 84 85(define_insn "aarch64_dup_lane<mode>" 86 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 87 (vec_duplicate:VALL_F16 88 (vec_select:<VEL> 89 (match_operand:VALL_F16 1 "register_operand" "w") 90 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 91 )))] 92 "TARGET_SIMD" 93 { 94 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 95 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 96 } 97 [(set_attr "type" "neon_dup<q>")] 98) 99 100(define_insn "aarch64_dup_lane_<vswap_width_name><mode>" 101 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 102 (vec_duplicate:VALL_F16_NO_V2Q 103 (vec_select:<VEL> 104 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") 105 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 106 )))] 107 "TARGET_SIMD" 108 { 109 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 110 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 111 } 112 [(set_attr "type" "neon_dup<q>")] 113) 114 115(define_insn "*aarch64_simd_mov<VDMOV:mode>" 116 [(set (match_operand:VDMOV 0 "nonimmediate_operand" 117 "=w, m, m, w, ?r, ?w, ?r, w") 118 (match_operand:VDMOV 1 "general_operand" 119 "m, Dz, w, w, w, r, r, Dn"))] 120 "TARGET_SIMD 121 && (register_operand (operands[0], <MODE>mode) 122 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 123{ 124 switch (which_alternative) 125 { 126 case 0: return "ldr\t%d0, %1"; 127 case 1: return "str\txzr, %0"; 128 case 2: return "str\t%d1, %0"; 129 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 130 case 4: return "umov\t%0, %1.d[0]"; 131 case 5: return "fmov\t%d0, %1"; 132 case 6: return "mov\t%0, %1"; 133 case 7: 134 return aarch64_output_simd_mov_immediate (operands[1], 64); 135 default: gcc_unreachable (); 136 } 137} 138 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ 139 neon_logic<q>, neon_to_gp<q>, f_mcr,\ 140 mov_reg, neon_move<q>")] 141) 142 143(define_insn "*aarch64_simd_mov<VQMOV:mode>" 144 [(set (match_operand:VQMOV 0 "nonimmediate_operand" 145 "=w, Umn, m, w, ?r, ?w, ?r, w") 146 (match_operand:VQMOV 1 "general_operand" 147 "m, Dz, w, w, w, r, r, Dn"))] 148 "TARGET_SIMD 149 && (register_operand (operands[0], <MODE>mode) 150 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 151{ 152 switch (which_alternative) 153 { 154 case 0: 155 return "ldr\t%q0, %1"; 156 case 1: 157 return "stp\txzr, xzr, %0"; 158 case 2: 159 return "str\t%q1, %0"; 160 case 3: 161 return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 162 case 4: 163 case 5: 164 case 6: 165 return "#"; 166 case 7: 167 return aarch64_output_simd_mov_immediate (operands[1], 128); 168 default: 169 gcc_unreachable (); 170 } 171} 172 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ 173 neon_logic<q>, multiple, multiple,\ 174 multiple, neon_move<q>") 175 (set_attr "length" "4,4,4,4,8,8,8,4")] 176) 177 178;; When storing lane zero we can use the normal STR and its more permissive 179;; addressing modes. 180 181(define_insn "aarch64_store_lane0<mode>" 182 [(set (match_operand:<VEL> 0 "memory_operand" "=m") 183 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") 184 (parallel [(match_operand 2 "const_int_operand" "n")])))] 185 "TARGET_SIMD 186 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" 187 "str\\t%<Vetype>1, %0" 188 [(set_attr "type" "neon_store1_1reg<q>")] 189) 190 191(define_insn "load_pair<DREG:mode><DREG2:mode>" 192 [(set (match_operand:DREG 0 "register_operand" "=w") 193 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) 194 (set (match_operand:DREG2 2 "register_operand" "=w") 195 (match_operand:DREG2 3 "memory_operand" "m"))] 196 "TARGET_SIMD 197 && rtx_equal_p (XEXP (operands[3], 0), 198 plus_constant (Pmode, 199 XEXP (operands[1], 0), 200 GET_MODE_SIZE (<DREG:MODE>mode)))" 201 "ldp\\t%d0, %d2, %1" 202 [(set_attr "type" "neon_ldp")] 203) 204 205(define_insn "vec_store_pair<DREG:mode><DREG2:mode>" 206 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") 207 (match_operand:DREG 1 "register_operand" "w")) 208 (set (match_operand:DREG2 2 "memory_operand" "=m") 209 (match_operand:DREG2 3 "register_operand" "w"))] 210 "TARGET_SIMD 211 && rtx_equal_p (XEXP (operands[2], 0), 212 plus_constant (Pmode, 213 XEXP (operands[0], 0), 214 GET_MODE_SIZE (<DREG:MODE>mode)))" 215 "stp\\t%d1, %d3, %0" 216 [(set_attr "type" "neon_stp")] 217) 218 219(define_insn "load_pair<VQ:mode><VQ2:mode>" 220 [(set (match_operand:VQ 0 "register_operand" "=w") 221 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) 222 (set (match_operand:VQ2 2 "register_operand" "=w") 223 (match_operand:VQ2 3 "memory_operand" "m"))] 224 "TARGET_SIMD 225 && rtx_equal_p (XEXP (operands[3], 0), 226 plus_constant (Pmode, 227 XEXP (operands[1], 0), 228 GET_MODE_SIZE (<VQ:MODE>mode)))" 229 "ldp\\t%q0, %q2, %1" 230 [(set_attr "type" "neon_ldp_q")] 231) 232 233(define_insn "vec_store_pair<VQ:mode><VQ2:mode>" 234 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump") 235 (match_operand:VQ 1 "register_operand" "w")) 236 (set (match_operand:VQ2 2 "memory_operand" "=m") 237 (match_operand:VQ2 3 "register_operand" "w"))] 238 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0), 239 plus_constant (Pmode, 240 XEXP (operands[0], 0), 241 GET_MODE_SIZE (<VQ:MODE>mode)))" 242 "stp\\t%q1, %q3, %0" 243 [(set_attr "type" "neon_stp_q")] 244) 245 246 247(define_split 248 [(set (match_operand:VQMOV 0 "register_operand" "") 249 (match_operand:VQMOV 1 "register_operand" ""))] 250 "TARGET_SIMD && reload_completed 251 && GP_REGNUM_P (REGNO (operands[0])) 252 && GP_REGNUM_P (REGNO (operands[1]))" 253 [(const_int 0)] 254{ 255 aarch64_simd_emit_reg_reg_move (operands, DImode, 2); 256 DONE; 257}) 258 259(define_split 260 [(set (match_operand:VQMOV 0 "register_operand" "") 261 (match_operand:VQMOV 1 "register_operand" ""))] 262 "TARGET_SIMD && reload_completed 263 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) 264 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" 265 [(const_int 0)] 266{ 267 aarch64_split_simd_move (operands[0], operands[1]); 268 DONE; 269}) 270 271(define_expand "@aarch64_split_simd_mov<mode>" 272 [(set (match_operand:VQMOV 0) 273 (match_operand:VQMOV 1))] 274 "TARGET_SIMD" 275 { 276 rtx dst = operands[0]; 277 rtx src = operands[1]; 278 279 if (GP_REGNUM_P (REGNO (src))) 280 { 281 rtx src_low_part = gen_lowpart (<VHALF>mode, src); 282 rtx src_high_part = gen_highpart (<VHALF>mode, src); 283 284 emit_insn 285 (gen_move_lo_quad_<mode> (dst, src_low_part)); 286 emit_insn 287 (gen_move_hi_quad_<mode> (dst, src_high_part)); 288 } 289 290 else 291 { 292 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); 293 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); 294 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 295 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 296 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo)); 297 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi)); 298 } 299 DONE; 300 } 301) 302 303(define_expand "aarch64_get_half<mode>" 304 [(set (match_operand:<VHALF> 0 "register_operand") 305 (vec_select:<VHALF> 306 (match_operand:VQMOV 1 "register_operand") 307 (match_operand 2 "ascending_int_parallel")))] 308 "TARGET_SIMD" 309) 310 311(define_insn_and_split "aarch64_simd_mov_from_<mode>low" 312 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r") 313 (vec_select:<VHALF> 314 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") 315 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))] 316 "TARGET_SIMD" 317 "@ 318 # 319 umov\t%0, %1.d[0]" 320 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)" 321 [(set (match_dup 0) (match_dup 1))] 322 { 323 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode); 324 } 325 [(set_attr "type" "mov_reg,neon_to_gp<q>") 326 (set_attr "length" "4")] 327) 328 329(define_insn "aarch64_simd_mov_from_<mode>high" 330 [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r") 331 (vec_select:<VHALF> 332 (match_operand:VQMOV_NO2E 1 "register_operand" "w,w") 333 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))] 334 "TARGET_SIMD" 335 "@ 336 dup\\t%d0, %1.d[1] 337 umov\t%0, %1.d[1]" 338 [(set_attr "type" "neon_dup<q>,neon_to_gp<q>") 339 (set_attr "length" "4")] 340) 341 342(define_insn "orn<mode>3" 343 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 344 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 345 (match_operand:VDQ_I 2 "register_operand" "w")))] 346 "TARGET_SIMD" 347 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 348 [(set_attr "type" "neon_logic<q>")] 349) 350 351(define_insn "bic<mode>3" 352 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 353 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")) 354 (match_operand:VDQ_I 2 "register_operand" "w")))] 355 "TARGET_SIMD" 356 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 357 [(set_attr "type" "neon_logic<q>")] 358) 359 360(define_insn "add<mode>3" 361 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 362 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 363 (match_operand:VDQ_I 2 "register_operand" "w")))] 364 "TARGET_SIMD" 365 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 366 [(set_attr "type" "neon_add<q>")] 367) 368 369(define_insn "sub<mode>3" 370 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 371 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 372 (match_operand:VDQ_I 2 "register_operand" "w")))] 373 "TARGET_SIMD" 374 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 375 [(set_attr "type" "neon_sub<q>")] 376) 377 378(define_insn "mul<mode>3" 379 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 380 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 381 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 382 "TARGET_SIMD" 383 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 384 [(set_attr "type" "neon_mul_<Vetype><q>")] 385) 386 387(define_insn "bswap<mode>2" 388 [(set (match_operand:VDQHSD 0 "register_operand" "=w") 389 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] 390 "TARGET_SIMD" 391 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>" 392 [(set_attr "type" "neon_rev<q>")] 393) 394 395(define_insn "aarch64_rbit<mode>" 396 [(set (match_operand:VB 0 "register_operand" "=w") 397 (unspec:VB [(match_operand:VB 1 "register_operand" "w")] 398 UNSPEC_RBIT))] 399 "TARGET_SIMD" 400 "rbit\\t%0.<Vbtype>, %1.<Vbtype>" 401 [(set_attr "type" "neon_rbit")] 402) 403 404(define_expand "ctz<mode>2" 405 [(set (match_operand:VS 0 "register_operand") 406 (ctz:VS (match_operand:VS 1 "register_operand")))] 407 "TARGET_SIMD" 408 { 409 emit_insn (gen_bswap<mode>2 (operands[0], operands[1])); 410 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0], 411 <MODE>mode, 0); 412 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi)); 413 emit_insn (gen_clz<mode>2 (operands[0], operands[0])); 414 DONE; 415 } 416) 417 418(define_expand "xorsign<mode>3" 419 [(match_operand:VHSDF 0 "register_operand") 420 (match_operand:VHSDF 1 "register_operand") 421 (match_operand:VHSDF 2 "register_operand")] 422 "TARGET_SIMD" 423{ 424 425 machine_mode imode = <V_INT_EQUIV>mode; 426 rtx v_bitmask = gen_reg_rtx (imode); 427 rtx op1x = gen_reg_rtx (imode); 428 rtx op2x = gen_reg_rtx (imode); 429 430 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode); 431 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode); 432 433 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 434 435 emit_move_insn (v_bitmask, 436 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 437 HOST_WIDE_INT_M1U << bits)); 438 439 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2)); 440 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x)); 441 emit_move_insn (operands[0], 442 lowpart_subreg (<MODE>mode, op1x, imode)); 443 DONE; 444} 445) 446 447;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the 448;; fact that their usage need to guarantee that the source vectors are 449;; contiguous. It would be wrong to describe the operation without being able 450;; to describe the permute that is also required, but even if that is done 451;; the permute would have been created as a LOAD_LANES which means the values 452;; in the registers are in the wrong order. 453(define_insn "aarch64_fcadd<rot><mode>" 454 [(set (match_operand:VHSDF 0 "register_operand" "=w") 455 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 456 (match_operand:VHSDF 2 "register_operand" "w")] 457 FCADD))] 458 "TARGET_COMPLEX" 459 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>" 460 [(set_attr "type" "neon_fcadd")] 461) 462 463(define_insn "aarch64_fcmla<rot><mode>" 464 [(set (match_operand:VHSDF 0 "register_operand" "=w") 465 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") 466 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w") 467 (match_operand:VHSDF 3 "register_operand" "w")] 468 FCMLA)))] 469 "TARGET_COMPLEX" 470 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>" 471 [(set_attr "type" "neon_fcmla")] 472) 473 474 475(define_insn "aarch64_fcmla_lane<rot><mode>" 476 [(set (match_operand:VHSDF 0 "register_operand" "=w") 477 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0") 478 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w") 479 (match_operand:VHSDF 3 "register_operand" "w") 480 (match_operand:SI 4 "const_int_operand" "n")] 481 FCMLA)))] 482 "TARGET_COMPLEX" 483{ 484 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4])); 485 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; 486} 487 [(set_attr "type" "neon_fcmla")] 488) 489 490(define_insn "aarch64_fcmla_laneq<rot>v4hf" 491 [(set (match_operand:V4HF 0 "register_operand" "=w") 492 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0") 493 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w") 494 (match_operand:V8HF 3 "register_operand" "w") 495 (match_operand:SI 4 "const_int_operand" "n")] 496 FCMLA)))] 497 "TARGET_COMPLEX" 498{ 499 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 500 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>"; 501} 502 [(set_attr "type" "neon_fcmla")] 503) 504 505(define_insn "aarch64_fcmlaq_lane<rot><mode>" 506 [(set (match_operand:VQ_HSF 0 "register_operand" "=w") 507 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0") 508 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w") 509 (match_operand:<VHALF> 3 "register_operand" "w") 510 (match_operand:SI 4 "const_int_operand" "n")] 511 FCMLA)))] 512 "TARGET_COMPLEX" 513{ 514 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant (); 515 operands[4] 516 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode); 517 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>"; 518} 519 [(set_attr "type" "neon_fcmla")] 520) 521 522;; These instructions map to the __builtins for the Dot Product operations. 523(define_insn "aarch64_<sur>dot<vsi2qi>" 524 [(set (match_operand:VS 0 "register_operand" "=w") 525 (plus:VS (match_operand:VS 1 "register_operand" "0") 526 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 527 (match_operand:<VSI2QI> 3 "register_operand" "w")] 528 DOTPROD)))] 529 "TARGET_DOTPROD" 530 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" 531 [(set_attr "type" "neon_dot<q>")] 532) 533 534;; These instructions map to the __builtins for the armv8.6a I8MM usdot 535;; (vector) Dot Product operation. 536(define_insn "aarch64_usdot<vsi2qi>" 537 [(set (match_operand:VS 0 "register_operand" "=w") 538 (plus:VS 539 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 540 (match_operand:<VSI2QI> 3 "register_operand" "w")] 541 UNSPEC_USDOT) 542 (match_operand:VS 1 "register_operand" "0")))] 543 "TARGET_I8MM" 544 "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" 545 [(set_attr "type" "neon_dot<q>")] 546) 547 548;; These expands map to the Dot Product optab the vectorizer checks for. 549;; The auto-vectorizer expects a dot product builtin that also does an 550;; accumulation into the provided register. 551;; Given the following pattern 552;; 553;; for (i=0; i<len; i++) { 554;; c = a[i] * b[i]; 555;; r += c; 556;; } 557;; return result; 558;; 559;; This can be auto-vectorized to 560;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; 561;; 562;; given enough iterations. However the vectorizer can keep unrolling the loop 563;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; 564;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; 565;; ... 566;; 567;; and so the vectorizer provides r, in which the result has to be accumulated. 568(define_expand "<sur>dot_prod<vsi2qi>" 569 [(set (match_operand:VS 0 "register_operand") 570 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand") 571 (match_operand:<VSI2QI> 2 "register_operand")] 572 DOTPROD) 573 (match_operand:VS 3 "register_operand")))] 574 "TARGET_DOTPROD" 575{ 576 emit_insn ( 577 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1], 578 operands[2])); 579 emit_insn (gen_rtx_SET (operands[0], operands[3])); 580 DONE; 581}) 582 583;; These instructions map to the __builtins for the Dot Product 584;; indexed operations. 585(define_insn "aarch64_<sur>dot_lane<vsi2qi>" 586 [(set (match_operand:VS 0 "register_operand" "=w") 587 (plus:VS (match_operand:VS 1 "register_operand" "0") 588 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 589 (match_operand:V8QI 3 "register_operand" "<h_con>") 590 (match_operand:SI 4 "immediate_operand" "i")] 591 DOTPROD)))] 592 "TARGET_DOTPROD" 593 { 594 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4])); 595 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 596 } 597 [(set_attr "type" "neon_dot<q>")] 598) 599 600(define_insn "aarch64_<sur>dot_laneq<vsi2qi>" 601 [(set (match_operand:VS 0 "register_operand" "=w") 602 (plus:VS (match_operand:VS 1 "register_operand" "0") 603 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") 604 (match_operand:V16QI 3 "register_operand" "<h_con>") 605 (match_operand:SI 4 "immediate_operand" "i")] 606 DOTPROD)))] 607 "TARGET_DOTPROD" 608 { 609 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4])); 610 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 611 } 612 [(set_attr "type" "neon_dot<q>")] 613) 614 615;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot 616;; (by element) Dot Product operations. 617(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>" 618 [(set (match_operand:VS 0 "register_operand" "=w") 619 (plus:VS 620 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w") 621 (match_operand:VB 3 "register_operand" "w") 622 (match_operand:SI 4 "immediate_operand" "i")] 623 DOTPROD_I8MM) 624 (match_operand:VS 1 "register_operand" "0")))] 625 "TARGET_I8MM" 626 { 627 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant (); 628 int lane = INTVAL (operands[4]); 629 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode); 630 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]"; 631 } 632 [(set_attr "type" "neon_dot<VS:q>")] 633) 634 635(define_expand "copysign<mode>3" 636 [(match_operand:VHSDF 0 "register_operand") 637 (match_operand:VHSDF 1 "register_operand") 638 (match_operand:VHSDF 2 "register_operand")] 639 "TARGET_FLOAT && TARGET_SIMD" 640{ 641 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); 642 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 643 644 emit_move_insn (v_bitmask, 645 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 646 HOST_WIDE_INT_M1U << bits)); 647 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, 648 operands[2], operands[1])); 649 DONE; 650} 651) 652 653(define_insn "*aarch64_mul3_elt<mode>" 654 [(set (match_operand:VMUL 0 "register_operand" "=w") 655 (mult:VMUL 656 (vec_duplicate:VMUL 657 (vec_select:<VEL> 658 (match_operand:VMUL 1 "register_operand" "<h_con>") 659 (parallel [(match_operand:SI 2 "immediate_operand")]))) 660 (match_operand:VMUL 3 "register_operand" "w")))] 661 "TARGET_SIMD" 662 { 663 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 664 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 665 } 666 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 667) 668 669(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>" 670 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w") 671 (mult:VMUL_CHANGE_NLANES 672 (vec_duplicate:VMUL_CHANGE_NLANES 673 (vec_select:<VEL> 674 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 675 (parallel [(match_operand:SI 2 "immediate_operand")]))) 676 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] 677 "TARGET_SIMD" 678 { 679 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 680 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 681 } 682 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] 683) 684 685(define_insn "*aarch64_mul3_elt_from_dup<mode>" 686 [(set (match_operand:VMUL 0 "register_operand" "=w") 687 (mult:VMUL 688 (vec_duplicate:VMUL 689 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 690 (match_operand:VMUL 2 "register_operand" "w")))] 691 "TARGET_SIMD" 692 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; 693 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 694) 695 696(define_insn "@aarch64_rsqrte<mode>" 697 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 698 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 699 UNSPEC_RSQRTE))] 700 "TARGET_SIMD" 701 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 702 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) 703 704(define_insn "@aarch64_rsqrts<mode>" 705 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 706 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 707 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 708 UNSPEC_RSQRTS))] 709 "TARGET_SIMD" 710 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 711 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")]) 712 713(define_expand "rsqrt<mode>2" 714 [(set (match_operand:VALLF 0 "register_operand") 715 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")] 716 UNSPEC_RSQRT))] 717 "TARGET_SIMD" 718{ 719 aarch64_emit_approx_sqrt (operands[0], operands[1], true); 720 DONE; 721}) 722 723(define_insn "*aarch64_mul3_elt_to_64v2df" 724 [(set (match_operand:DF 0 "register_operand" "=w") 725 (mult:DF 726 (vec_select:DF 727 (match_operand:V2DF 1 "register_operand" "w") 728 (parallel [(match_operand:SI 2 "immediate_operand")])) 729 (match_operand:DF 3 "register_operand" "w")))] 730 "TARGET_SIMD" 731 { 732 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 733 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; 734 } 735 [(set_attr "type" "neon_fp_mul_d_scalar_q")] 736) 737 738(define_insn "neg<mode>2" 739 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 740 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 741 "TARGET_SIMD" 742 "neg\t%0.<Vtype>, %1.<Vtype>" 743 [(set_attr "type" "neon_neg<q>")] 744) 745 746(define_insn "abs<mode>2" 747 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 748 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 749 "TARGET_SIMD" 750 "abs\t%0.<Vtype>, %1.<Vtype>" 751 [(set_attr "type" "neon_abs<q>")] 752) 753 754;; The intrinsic version of integer ABS must not be allowed to 755;; combine with any operation with an integerated ABS step, such 756;; as SABD. 757(define_insn "aarch64_abs<mode>" 758 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 759 (unspec:VSDQ_I_DI 760 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")] 761 UNSPEC_ABS))] 762 "TARGET_SIMD" 763 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>" 764 [(set_attr "type" "neon_abs<q>")] 765) 766 767;; It's tempting to represent SABD as ABS (MINUS op1 op2). 768;; This isn't accurate as ABS treats always its input as a signed value. 769;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64. 770;; Whereas SABD would return 192 (-64 signed) on the above example. 771;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead. 772(define_insn "aarch64_<su>abd<mode>_3" 773 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 774 (minus:VDQ_BHSI 775 (USMAX:VDQ_BHSI 776 (match_operand:VDQ_BHSI 1 "register_operand" "w") 777 (match_operand:VDQ_BHSI 2 "register_operand" "w")) 778 (<max_opp>:VDQ_BHSI 779 (match_dup 1) 780 (match_dup 2))))] 781 "TARGET_SIMD" 782 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 783 [(set_attr "type" "neon_abd<q>")] 784) 785 786(define_insn "aarch64_<sur>abdl2<mode>_3" 787 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 788 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") 789 (match_operand:VDQV_S 2 "register_operand" "w")] 790 ABDL2))] 791 "TARGET_SIMD" 792 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 793 [(set_attr "type" "neon_abd<q>")] 794) 795 796(define_insn "aarch64_<sur>abal<mode>_4" 797 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 798 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") 799 (match_operand:VDQV_S 2 "register_operand" "w") 800 (match_operand:<VDBLW> 3 "register_operand" "0")] 801 ABAL))] 802 "TARGET_SIMD" 803 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 804 [(set_attr "type" "neon_arith_acc<q>")] 805) 806 807(define_insn "aarch64_<sur>adalp<mode>_3" 808 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") 809 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") 810 (match_operand:<VDBLW> 2 "register_operand" "0")] 811 ADALP))] 812 "TARGET_SIMD" 813 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>" 814 [(set_attr "type" "neon_reduc_add<q>")] 815) 816 817;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI 818;; inputs in operands 1 and 2. The sequence also has to perform a widening 819;; reduction of the difference into a V4SI vector and accumulate that into 820;; operand 3 before copying that into the result operand 0. 821;; Perform that with a sequence of: 822;; UABDL2 tmp.8h, op1.16b, op2.16b 823;; UABAL tmp.8h, op1.16b, op2.16b 824;; UADALP op3.4s, tmp.8h 825;; MOV op0, op3 // should be eliminated in later passes. 826;; 827;; For TARGET_DOTPROD we do: 828;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops. 829;; UABD tmp2.16b, op1.16b, op2.16b 830;; UDOT op3.4s, tmp2.16b, tmp1.16b 831;; MOV op0, op3 // RA will tie the operands of UDOT appropriately. 832;; 833;; The signed version just uses the signed variants of the above instructions 834;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is 835;; unsigned. 836 837(define_expand "<sur>sadv16qi" 838 [(use (match_operand:V4SI 0 "register_operand")) 839 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) 840 (use (match_operand:V16QI 2 "register_operand"))] ABAL) 841 (use (match_operand:V4SI 3 "register_operand"))] 842 "TARGET_SIMD" 843 { 844 if (TARGET_DOTPROD) 845 { 846 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode)); 847 rtx abd = gen_reg_rtx (V16QImode); 848 emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2])); 849 emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3], 850 abd, ones)); 851 DONE; 852 } 853 rtx reduc = gen_reg_rtx (V8HImode); 854 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1], 855 operands[2])); 856 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1], 857 operands[2], reduc)); 858 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc, 859 operands[3])); 860 emit_move_insn (operands[0], operands[3]); 861 DONE; 862 } 863) 864 865(define_insn "aba<mode>_3" 866 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 867 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI 868 (match_operand:VDQ_BHSI 1 "register_operand" "w") 869 (match_operand:VDQ_BHSI 2 "register_operand" "w"))) 870 (match_operand:VDQ_BHSI 3 "register_operand" "0")))] 871 "TARGET_SIMD" 872 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 873 [(set_attr "type" "neon_arith_acc<q>")] 874) 875 876(define_insn "fabd<mode>3" 877 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 878 (abs:VHSDF_HSDF 879 (minus:VHSDF_HSDF 880 (match_operand:VHSDF_HSDF 1 "register_operand" "w") 881 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))] 882 "TARGET_SIMD" 883 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 884 [(set_attr "type" "neon_fp_abd_<stype><q>")] 885) 886 887;; For AND (vector, register) and BIC (vector, immediate) 888(define_insn "and<mode>3" 889 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 890 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 891 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))] 892 "TARGET_SIMD" 893 { 894 switch (which_alternative) 895 { 896 case 0: 897 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 898 case 1: 899 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 900 AARCH64_CHECK_BIC); 901 default: 902 gcc_unreachable (); 903 } 904 } 905 [(set_attr "type" "neon_logic<q>")] 906) 907 908;; For ORR (vector, register) and ORR (vector, immediate) 909(define_insn "ior<mode>3" 910 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w") 911 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0") 912 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))] 913 "TARGET_SIMD" 914 { 915 switch (which_alternative) 916 { 917 case 0: 918 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 919 case 1: 920 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, 921 AARCH64_CHECK_ORR); 922 default: 923 gcc_unreachable (); 924 } 925 } 926 [(set_attr "type" "neon_logic<q>")] 927) 928 929(define_insn "xor<mode>3" 930 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 931 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 932 (match_operand:VDQ_I 2 "register_operand" "w")))] 933 "TARGET_SIMD" 934 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>" 935 [(set_attr "type" "neon_logic<q>")] 936) 937 938(define_insn "one_cmpl<mode>2" 939 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 940 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))] 941 "TARGET_SIMD" 942 "not\t%0.<Vbtype>, %1.<Vbtype>" 943 [(set_attr "type" "neon_logic<q>")] 944) 945 946(define_insn "aarch64_simd_vec_set<mode>" 947 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w") 948 (vec_merge:VALL_F16 949 (vec_duplicate:VALL_F16 950 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv")) 951 (match_operand:VALL_F16 3 "register_operand" "0,0,0") 952 (match_operand:SI 2 "immediate_operand" "i,i,i")))] 953 "TARGET_SIMD" 954 { 955 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 956 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 957 switch (which_alternative) 958 { 959 case 0: 960 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 961 case 1: 962 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1"; 963 case 2: 964 return "ld1\\t{%0.<Vetype>}[%p2], %1"; 965 default: 966 gcc_unreachable (); 967 } 968 } 969 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")] 970) 971 972(define_insn "*aarch64_simd_vec_copy_lane<mode>" 973 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 974 (vec_merge:VALL_F16 975 (vec_duplicate:VALL_F16 976 (vec_select:<VEL> 977 (match_operand:VALL_F16 3 "register_operand" "w") 978 (parallel 979 [(match_operand:SI 4 "immediate_operand" "i")]))) 980 (match_operand:VALL_F16 1 "register_operand" "0") 981 (match_operand:SI 2 "immediate_operand" "i")))] 982 "TARGET_SIMD" 983 { 984 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 985 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 986 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 987 988 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 989 } 990 [(set_attr "type" "neon_ins<q>")] 991) 992 993(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>" 994 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w") 995 (vec_merge:VALL_F16_NO_V2Q 996 (vec_duplicate:VALL_F16_NO_V2Q 997 (vec_select:<VEL> 998 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w") 999 (parallel 1000 [(match_operand:SI 4 "immediate_operand" "i")]))) 1001 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") 1002 (match_operand:SI 2 "immediate_operand" "i")))] 1003 "TARGET_SIMD" 1004 { 1005 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); 1006 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 1007 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, 1008 INTVAL (operands[4])); 1009 1010 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 1011 } 1012 [(set_attr "type" "neon_ins<q>")] 1013) 1014 1015(define_expand "signbit<mode>2" 1016 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand")) 1017 (use (match_operand:VDQSF 1 "register_operand"))] 1018 "TARGET_SIMD" 1019{ 1020 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1; 1021 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 1022 shift_amount); 1023 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); 1024 1025 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1], 1026 shift_vector)); 1027 DONE; 1028}) 1029 1030(define_insn "aarch64_simd_lshr<mode>" 1031 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1032 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1033 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 1034 "TARGET_SIMD" 1035 "ushr\t%0.<Vtype>, %1.<Vtype>, %2" 1036 [(set_attr "type" "neon_shift_imm<q>")] 1037) 1038 1039(define_insn "aarch64_simd_ashr<mode>" 1040 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1041 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1042 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))] 1043 "TARGET_SIMD" 1044 "sshr\t%0.<Vtype>, %1.<Vtype>, %2" 1045 [(set_attr "type" "neon_shift_imm<q>")] 1046) 1047 1048(define_insn "*aarch64_simd_sra<mode>" 1049 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1050 (plus:VDQ_I 1051 (SHIFTRT:VDQ_I 1052 (match_operand:VDQ_I 1 "register_operand" "w") 1053 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")) 1054 (match_operand:VDQ_I 3 "register_operand" "0")))] 1055 "TARGET_SIMD" 1056 "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2" 1057 [(set_attr "type" "neon_shift_acc<q>")] 1058) 1059 1060(define_insn "aarch64_simd_imm_shl<mode>" 1061 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1062 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1063 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))] 1064 "TARGET_SIMD" 1065 "shl\t%0.<Vtype>, %1.<Vtype>, %2" 1066 [(set_attr "type" "neon_shift_imm<q>")] 1067) 1068 1069(define_insn "aarch64_simd_reg_sshl<mode>" 1070 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1071 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w") 1072 (match_operand:VDQ_I 2 "register_operand" "w")))] 1073 "TARGET_SIMD" 1074 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1075 [(set_attr "type" "neon_shift_reg<q>")] 1076) 1077 1078(define_insn "aarch64_simd_reg_shl<mode>_unsigned" 1079 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1080 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 1081 (match_operand:VDQ_I 2 "register_operand" "w")] 1082 UNSPEC_ASHIFT_UNSIGNED))] 1083 "TARGET_SIMD" 1084 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1085 [(set_attr "type" "neon_shift_reg<q>")] 1086) 1087 1088(define_insn "aarch64_simd_reg_shl<mode>_signed" 1089 [(set (match_operand:VDQ_I 0 "register_operand" "=w") 1090 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w") 1091 (match_operand:VDQ_I 2 "register_operand" "w")] 1092 UNSPEC_ASHIFT_SIGNED))] 1093 "TARGET_SIMD" 1094 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1095 [(set_attr "type" "neon_shift_reg<q>")] 1096) 1097 1098(define_expand "ashl<mode>3" 1099 [(match_operand:VDQ_I 0 "register_operand") 1100 (match_operand:VDQ_I 1 "register_operand") 1101 (match_operand:SI 2 "general_operand")] 1102 "TARGET_SIMD" 1103{ 1104 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1105 int shift_amount; 1106 1107 if (CONST_INT_P (operands[2])) 1108 { 1109 shift_amount = INTVAL (operands[2]); 1110 if (shift_amount >= 0 && shift_amount < bit_width) 1111 { 1112 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1113 shift_amount); 1114 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0], 1115 operands[1], 1116 tmp)); 1117 DONE; 1118 } 1119 } 1120 1121 operands[2] = force_reg (SImode, operands[2]); 1122 1123 rtx tmp = gen_reg_rtx (<MODE>mode); 1124 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode, 1125 operands[2], 1126 0))); 1127 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp)); 1128 DONE; 1129}) 1130 1131(define_expand "lshr<mode>3" 1132 [(match_operand:VDQ_I 0 "register_operand") 1133 (match_operand:VDQ_I 1 "register_operand") 1134 (match_operand:SI 2 "general_operand")] 1135 "TARGET_SIMD" 1136{ 1137 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1138 int shift_amount; 1139 1140 if (CONST_INT_P (operands[2])) 1141 { 1142 shift_amount = INTVAL (operands[2]); 1143 if (shift_amount > 0 && shift_amount <= bit_width) 1144 { 1145 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1146 shift_amount); 1147 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0], 1148 operands[1], 1149 tmp)); 1150 DONE; 1151 } 1152 } 1153 1154 operands[2] = force_reg (SImode, operands[2]); 1155 1156 rtx tmp = gen_reg_rtx (SImode); 1157 rtx tmp1 = gen_reg_rtx (<MODE>mode); 1158 emit_insn (gen_negsi2 (tmp, operands[2])); 1159 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, 1160 convert_to_mode (<VEL>mode, tmp, 0))); 1161 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], 1162 tmp1)); 1163 DONE; 1164}) 1165 1166(define_expand "ashr<mode>3" 1167 [(match_operand:VDQ_I 0 "register_operand") 1168 (match_operand:VDQ_I 1 "register_operand") 1169 (match_operand:SI 2 "general_operand")] 1170 "TARGET_SIMD" 1171{ 1172 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT; 1173 int shift_amount; 1174 1175 if (CONST_INT_P (operands[2])) 1176 { 1177 shift_amount = INTVAL (operands[2]); 1178 if (shift_amount > 0 && shift_amount <= bit_width) 1179 { 1180 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode, 1181 shift_amount); 1182 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0], 1183 operands[1], 1184 tmp)); 1185 DONE; 1186 } 1187 } 1188 1189 operands[2] = force_reg (SImode, operands[2]); 1190 1191 rtx tmp = gen_reg_rtx (SImode); 1192 rtx tmp1 = gen_reg_rtx (<MODE>mode); 1193 emit_insn (gen_negsi2 (tmp, operands[2])); 1194 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode, 1195 tmp, 0))); 1196 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], 1197 tmp1)); 1198 DONE; 1199}) 1200 1201(define_expand "vashl<mode>3" 1202 [(match_operand:VDQ_I 0 "register_operand") 1203 (match_operand:VDQ_I 1 "register_operand") 1204 (match_operand:VDQ_I 2 "register_operand")] 1205 "TARGET_SIMD" 1206{ 1207 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], 1208 operands[2])); 1209 DONE; 1210}) 1211 1212;; Using mode VDQ_BHSI as there is no V2DImode neg! 1213;; Negating individual lanes most certainly offsets the 1214;; gain from vectorization. 1215(define_expand "vashr<mode>3" 1216 [(match_operand:VDQ_BHSI 0 "register_operand") 1217 (match_operand:VDQ_BHSI 1 "register_operand") 1218 (match_operand:VDQ_BHSI 2 "register_operand")] 1219 "TARGET_SIMD" 1220{ 1221 rtx neg = gen_reg_rtx (<MODE>mode); 1222 emit (gen_neg<mode>2 (neg, operands[2])); 1223 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1], 1224 neg)); 1225 DONE; 1226}) 1227 1228;; DI vector shift 1229(define_expand "aarch64_ashr_simddi" 1230 [(match_operand:DI 0 "register_operand") 1231 (match_operand:DI 1 "register_operand") 1232 (match_operand:SI 2 "aarch64_shift_imm64_di")] 1233 "TARGET_SIMD" 1234 { 1235 /* An arithmetic shift right by 64 fills the result with copies of the sign 1236 bit, just like asr by 63 - however the standard pattern does not handle 1237 a shift by 64. */ 1238 if (INTVAL (operands[2]) == 64) 1239 operands[2] = GEN_INT (63); 1240 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); 1241 DONE; 1242 } 1243) 1244 1245(define_expand "vlshr<mode>3" 1246 [(match_operand:VDQ_BHSI 0 "register_operand") 1247 (match_operand:VDQ_BHSI 1 "register_operand") 1248 (match_operand:VDQ_BHSI 2 "register_operand")] 1249 "TARGET_SIMD" 1250{ 1251 rtx neg = gen_reg_rtx (<MODE>mode); 1252 emit (gen_neg<mode>2 (neg, operands[2])); 1253 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1], 1254 neg)); 1255 DONE; 1256}) 1257 1258(define_expand "aarch64_lshr_simddi" 1259 [(match_operand:DI 0 "register_operand") 1260 (match_operand:DI 1 "register_operand") 1261 (match_operand:SI 2 "aarch64_shift_imm64_di")] 1262 "TARGET_SIMD" 1263 { 1264 if (INTVAL (operands[2]) == 64) 1265 emit_move_insn (operands[0], const0_rtx); 1266 else 1267 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); 1268 DONE; 1269 } 1270) 1271 1272;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. 1273(define_insn "vec_shr_<mode>" 1274 [(set (match_operand:VD 0 "register_operand" "=w") 1275 (unspec:VD [(match_operand:VD 1 "register_operand" "w") 1276 (match_operand:SI 2 "immediate_operand" "i")] 1277 UNSPEC_VEC_SHR))] 1278 "TARGET_SIMD" 1279 { 1280 if (BYTES_BIG_ENDIAN) 1281 return "shl %d0, %d1, %2"; 1282 else 1283 return "ushr %d0, %d1, %2"; 1284 } 1285 [(set_attr "type" "neon_shift_imm")] 1286) 1287 1288(define_expand "vec_set<mode>" 1289 [(match_operand:VALL_F16 0 "register_operand") 1290 (match_operand:<VEL> 1 "register_operand") 1291 (match_operand:SI 2 "immediate_operand")] 1292 "TARGET_SIMD" 1293 { 1294 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 1295 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], 1296 GEN_INT (elem), operands[0])); 1297 DONE; 1298 } 1299) 1300 1301 1302(define_insn "aarch64_mla<mode>" 1303 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1304 (plus:VDQ_BHSI (mult:VDQ_BHSI 1305 (match_operand:VDQ_BHSI 2 "register_operand" "w") 1306 (match_operand:VDQ_BHSI 3 "register_operand" "w")) 1307 (match_operand:VDQ_BHSI 1 "register_operand" "0")))] 1308 "TARGET_SIMD" 1309 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1310 [(set_attr "type" "neon_mla_<Vetype><q>")] 1311) 1312 1313(define_insn "*aarch64_mla_elt<mode>" 1314 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1315 (plus:VDQHS 1316 (mult:VDQHS 1317 (vec_duplicate:VDQHS 1318 (vec_select:<VEL> 1319 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1320 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1321 (match_operand:VDQHS 3 "register_operand" "w")) 1322 (match_operand:VDQHS 4 "register_operand" "0")))] 1323 "TARGET_SIMD" 1324 { 1325 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1326 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1327 } 1328 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1329) 1330 1331(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>" 1332 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1333 (plus:VDQHS 1334 (mult:VDQHS 1335 (vec_duplicate:VDQHS 1336 (vec_select:<VEL> 1337 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1338 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1339 (match_operand:VDQHS 3 "register_operand" "w")) 1340 (match_operand:VDQHS 4 "register_operand" "0")))] 1341 "TARGET_SIMD" 1342 { 1343 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1344 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1345 } 1346 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1347) 1348 1349(define_insn "*aarch64_mla_elt_merge<mode>" 1350 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1351 (plus:VDQHS 1352 (mult:VDQHS (vec_duplicate:VDQHS 1353 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 1354 (match_operand:VDQHS 2 "register_operand" "w")) 1355 (match_operand:VDQHS 3 "register_operand" "0")))] 1356 "TARGET_SIMD" 1357 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 1358 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1359) 1360 1361(define_insn "aarch64_mls<mode>" 1362 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1363 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0") 1364 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w") 1365 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))] 1366 "TARGET_SIMD" 1367 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>" 1368 [(set_attr "type" "neon_mla_<Vetype><q>")] 1369) 1370 1371(define_insn "*aarch64_mls_elt<mode>" 1372 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1373 (minus:VDQHS 1374 (match_operand:VDQHS 4 "register_operand" "0") 1375 (mult:VDQHS 1376 (vec_duplicate:VDQHS 1377 (vec_select:<VEL> 1378 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1379 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1380 (match_operand:VDQHS 3 "register_operand" "w"))))] 1381 "TARGET_SIMD" 1382 { 1383 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1384 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1385 } 1386 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1387) 1388 1389(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>" 1390 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1391 (minus:VDQHS 1392 (match_operand:VDQHS 4 "register_operand" "0") 1393 (mult:VDQHS 1394 (vec_duplicate:VDQHS 1395 (vec_select:<VEL> 1396 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1397 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1398 (match_operand:VDQHS 3 "register_operand" "w"))))] 1399 "TARGET_SIMD" 1400 { 1401 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 1402 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1403 } 1404 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1405) 1406 1407(define_insn "*aarch64_mls_elt_merge<mode>" 1408 [(set (match_operand:VDQHS 0 "register_operand" "=w") 1409 (minus:VDQHS 1410 (match_operand:VDQHS 1 "register_operand" "0") 1411 (mult:VDQHS (vec_duplicate:VDQHS 1412 (match_operand:<VEL> 2 "register_operand" "<h_con>")) 1413 (match_operand:VDQHS 3 "register_operand" "w"))))] 1414 "TARGET_SIMD" 1415 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]" 1416 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1417) 1418 1419;; Max/Min operations. 1420(define_insn "<su><maxmin><mode>3" 1421 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1422 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w") 1423 (match_operand:VDQ_BHSI 2 "register_operand" "w")))] 1424 "TARGET_SIMD" 1425 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1426 [(set_attr "type" "neon_minmax<q>")] 1427) 1428 1429(define_expand "<su><maxmin>v2di3" 1430 [(set (match_operand:V2DI 0 "register_operand") 1431 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand") 1432 (match_operand:V2DI 2 "register_operand")))] 1433 "TARGET_SIMD" 1434{ 1435 enum rtx_code cmp_operator; 1436 rtx cmp_fmt; 1437 1438 switch (<CODE>) 1439 { 1440 case UMIN: 1441 cmp_operator = LTU; 1442 break; 1443 case SMIN: 1444 cmp_operator = LT; 1445 break; 1446 case UMAX: 1447 cmp_operator = GTU; 1448 break; 1449 case SMAX: 1450 cmp_operator = GT; 1451 break; 1452 default: 1453 gcc_unreachable (); 1454 } 1455 1456 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); 1457 emit_insn (gen_vcondv2div2di (operands[0], operands[1], 1458 operands[2], cmp_fmt, operands[1], operands[2])); 1459 DONE; 1460}) 1461 1462;; Pairwise Integer Max/Min operations. 1463(define_insn "aarch64_<maxmin_uns>p<mode>" 1464 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 1465 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 1466 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 1467 MAXMINV))] 1468 "TARGET_SIMD" 1469 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1470 [(set_attr "type" "neon_minmax<q>")] 1471) 1472 1473;; Pairwise FP Max/Min operations. 1474(define_insn "aarch64_<maxmin_uns>p<mode>" 1475 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1476 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 1477 (match_operand:VHSDF 2 "register_operand" "w")] 1478 FMAXMINV))] 1479 "TARGET_SIMD" 1480 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1481 [(set_attr "type" "neon_minmax<q>")] 1482) 1483 1484;; vec_concat gives a new vector with the low elements from operand 1, and 1485;; the high elements from operand 2. That is to say, given op1 = { a, b } 1486;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. 1487;; What that means, is that the RTL descriptions of the below patterns 1488;; need to change depending on endianness. 1489 1490;; Move to the low architectural bits of the register. 1491;; On little-endian this is { operand, zeroes } 1492;; On big-endian this is { zeroes, operand } 1493 1494(define_insn "move_lo_quad_internal_<mode>" 1495 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") 1496 (vec_concat:VQMOV 1497 (match_operand:<VHALF> 1 "register_operand" "w,r,r") 1498 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))] 1499 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1500 "@ 1501 dup\\t%d0, %1.d[0] 1502 fmov\\t%d0, %1 1503 dup\\t%d0, %1" 1504 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1505 (set_attr "length" "4") 1506 (set_attr "arch" "simd,fp,simd")] 1507) 1508 1509(define_insn "move_lo_quad_internal_be_<mode>" 1510 [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") 1511 (vec_concat:VQMOV 1512 (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero") 1513 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] 1514 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1515 "@ 1516 dup\\t%d0, %1.d[0] 1517 fmov\\t%d0, %1 1518 dup\\t%d0, %1" 1519 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1520 (set_attr "length" "4") 1521 (set_attr "arch" "simd,fp,simd")] 1522) 1523 1524(define_expand "move_lo_quad_<mode>" 1525 [(match_operand:VQMOV 0 "register_operand") 1526 (match_operand:<VHALF> 1 "register_operand")] 1527 "TARGET_SIMD" 1528{ 1529 rtx zs = CONST0_RTX (<VHALF>mode); 1530 if (BYTES_BIG_ENDIAN) 1531 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs)); 1532 else 1533 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs)); 1534 DONE; 1535} 1536) 1537 1538;; Move operand1 to the high architectural bits of the register, keeping 1539;; the low architectural bits of operand2. 1540;; For little-endian this is { operand2, operand1 } 1541;; For big-endian this is { operand1, operand2 } 1542 1543(define_insn "aarch64_simd_move_hi_quad_<mode>" 1544 [(set (match_operand:VQMOV 0 "register_operand" "+w,w") 1545 (vec_concat:VQMOV 1546 (vec_select:<VHALF> 1547 (match_dup 0) 1548 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")) 1549 (match_operand:<VHALF> 1 "register_operand" "w,r")))] 1550 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 1551 "@ 1552 ins\\t%0.d[1], %1.d[0] 1553 ins\\t%0.d[1], %1" 1554 [(set_attr "type" "neon_ins")] 1555) 1556 1557(define_insn "aarch64_simd_move_hi_quad_be_<mode>" 1558 [(set (match_operand:VQMOV 0 "register_operand" "+w,w") 1559 (vec_concat:VQMOV 1560 (match_operand:<VHALF> 1 "register_operand" "w,r") 1561 (vec_select:<VHALF> 1562 (match_dup 0) 1563 (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))] 1564 "TARGET_SIMD && BYTES_BIG_ENDIAN" 1565 "@ 1566 ins\\t%0.d[1], %1.d[0] 1567 ins\\t%0.d[1], %1" 1568 [(set_attr "type" "neon_ins")] 1569) 1570 1571(define_expand "move_hi_quad_<mode>" 1572 [(match_operand:VQMOV 0 "register_operand") 1573 (match_operand:<VHALF> 1 "register_operand")] 1574 "TARGET_SIMD" 1575{ 1576 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1577 if (BYTES_BIG_ENDIAN) 1578 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], 1579 operands[1], p)); 1580 else 1581 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], 1582 operands[1], p)); 1583 DONE; 1584}) 1585 1586;; Narrowing operations. 1587 1588;; For doubles. 1589(define_insn "aarch64_simd_vec_pack_trunc_<mode>" 1590 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 1591 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 1592 "TARGET_SIMD" 1593 "xtn\\t%0.<Vntype>, %1.<Vtype>" 1594 [(set_attr "type" "neon_shift_imm_narrow_q")] 1595) 1596 1597(define_expand "vec_pack_trunc_<mode>" 1598 [(match_operand:<VNARROWD> 0 "register_operand") 1599 (match_operand:VDN 1 "register_operand") 1600 (match_operand:VDN 2 "register_operand")] 1601 "TARGET_SIMD" 1602{ 1603 rtx tempreg = gen_reg_rtx (<VDBL>mode); 1604 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 1605 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 1606 1607 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); 1608 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); 1609 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg)); 1610 DONE; 1611}) 1612 1613;; For quads. 1614 1615(define_insn "vec_pack_trunc_<mode>" 1616 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w") 1617 (vec_concat:<VNARROWQ2> 1618 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")) 1619 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))] 1620 "TARGET_SIMD" 1621 { 1622 if (BYTES_BIG_ENDIAN) 1623 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>"; 1624 else 1625 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>"; 1626 } 1627 [(set_attr "type" "multiple") 1628 (set_attr "length" "8")] 1629) 1630 1631;; Widening operations. 1632 1633(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>" 1634 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1635 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1636 (match_operand:VQW 1 "register_operand" "w") 1637 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") 1638 )))] 1639 "TARGET_SIMD" 1640 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>" 1641 [(set_attr "type" "neon_shift_imm_long")] 1642) 1643 1644(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" 1645 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1646 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1647 (match_operand:VQW 1 "register_operand" "w") 1648 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") 1649 )))] 1650 "TARGET_SIMD" 1651 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>" 1652 [(set_attr "type" "neon_shift_imm_long")] 1653) 1654 1655(define_expand "vec_unpack<su>_hi_<mode>" 1656 [(match_operand:<VWIDE> 0 "register_operand") 1657 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1658 "TARGET_SIMD" 1659 { 1660 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 1661 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], 1662 operands[1], p)); 1663 DONE; 1664 } 1665) 1666 1667(define_expand "vec_unpack<su>_lo_<mode>" 1668 [(match_operand:<VWIDE> 0 "register_operand") 1669 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1670 "TARGET_SIMD" 1671 { 1672 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1673 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], 1674 operands[1], p)); 1675 DONE; 1676 } 1677) 1678 1679;; Widening arithmetic. 1680 1681(define_insn "*aarch64_<su>mlal_lo<mode>" 1682 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1683 (plus:<VWIDE> 1684 (mult:<VWIDE> 1685 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1686 (match_operand:VQW 2 "register_operand" "w") 1687 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1688 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1689 (match_operand:VQW 4 "register_operand" "w") 1690 (match_dup 3)))) 1691 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1692 "TARGET_SIMD" 1693 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1694 [(set_attr "type" "neon_mla_<Vetype>_long")] 1695) 1696 1697(define_insn "*aarch64_<su>mlal_hi<mode>" 1698 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1699 (plus:<VWIDE> 1700 (mult:<VWIDE> 1701 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1702 (match_operand:VQW 2 "register_operand" "w") 1703 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1704 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1705 (match_operand:VQW 4 "register_operand" "w") 1706 (match_dup 3)))) 1707 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1708 "TARGET_SIMD" 1709 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1710 [(set_attr "type" "neon_mla_<Vetype>_long")] 1711) 1712 1713(define_insn "*aarch64_<su>mlsl_lo<mode>" 1714 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1715 (minus:<VWIDE> 1716 (match_operand:<VWIDE> 1 "register_operand" "0") 1717 (mult:<VWIDE> 1718 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1719 (match_operand:VQW 2 "register_operand" "w") 1720 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1721 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1722 (match_operand:VQW 4 "register_operand" "w") 1723 (match_dup 3))))))] 1724 "TARGET_SIMD" 1725 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>" 1726 [(set_attr "type" "neon_mla_<Vetype>_long")] 1727) 1728 1729(define_insn "*aarch64_<su>mlsl_hi<mode>" 1730 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1731 (minus:<VWIDE> 1732 (match_operand:<VWIDE> 1 "register_operand" "0") 1733 (mult:<VWIDE> 1734 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1735 (match_operand:VQW 2 "register_operand" "w") 1736 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1737 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1738 (match_operand:VQW 4 "register_operand" "w") 1739 (match_dup 3))))))] 1740 "TARGET_SIMD" 1741 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>" 1742 [(set_attr "type" "neon_mla_<Vetype>_long")] 1743) 1744 1745(define_insn "*aarch64_<su>mlal<mode>" 1746 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1747 (plus:<VWIDE> 1748 (mult:<VWIDE> 1749 (ANY_EXTEND:<VWIDE> 1750 (match_operand:VD_BHSI 1 "register_operand" "w")) 1751 (ANY_EXTEND:<VWIDE> 1752 (match_operand:VD_BHSI 2 "register_operand" "w"))) 1753 (match_operand:<VWIDE> 3 "register_operand" "0")))] 1754 "TARGET_SIMD" 1755 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1756 [(set_attr "type" "neon_mla_<Vetype>_long")] 1757) 1758 1759(define_insn "*aarch64_<su>mlsl<mode>" 1760 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1761 (minus:<VWIDE> 1762 (match_operand:<VWIDE> 1 "register_operand" "0") 1763 (mult:<VWIDE> 1764 (ANY_EXTEND:<VWIDE> 1765 (match_operand:VD_BHSI 2 "register_operand" "w")) 1766 (ANY_EXTEND:<VWIDE> 1767 (match_operand:VD_BHSI 3 "register_operand" "w")))))] 1768 "TARGET_SIMD" 1769 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>" 1770 [(set_attr "type" "neon_mla_<Vetype>_long")] 1771) 1772 1773(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>" 1774 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1775 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1776 (match_operand:VQW 1 "register_operand" "w") 1777 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 1778 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1779 (match_operand:VQW 2 "register_operand" "w") 1780 (match_dup 3)))))] 1781 "TARGET_SIMD" 1782 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 1783 [(set_attr "type" "neon_mul_<Vetype>_long")] 1784) 1785 1786(define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>" 1787 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1788 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> 1789 (match_operand:VD_BHSI 1 "register_operand" "w")) 1790 (ANY_EXTEND:<VWIDE> 1791 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 1792 "TARGET_SIMD" 1793 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1794 [(set_attr "type" "neon_mul_<Vetype>_long")] 1795) 1796 1797(define_expand "vec_widen_<su>mult_lo_<mode>" 1798 [(match_operand:<VWIDE> 0 "register_operand") 1799 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 1800 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 1801 "TARGET_SIMD" 1802 { 1803 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 1804 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], 1805 operands[1], 1806 operands[2], p)); 1807 DONE; 1808 } 1809) 1810 1811(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>" 1812 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1813 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1814 (match_operand:VQW 1 "register_operand" "w") 1815 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 1816 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1817 (match_operand:VQW 2 "register_operand" "w") 1818 (match_dup 3)))))] 1819 "TARGET_SIMD" 1820 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 1821 [(set_attr "type" "neon_mul_<Vetype>_long")] 1822) 1823 1824(define_expand "vec_widen_<su>mult_hi_<mode>" 1825 [(match_operand:<VWIDE> 0 "register_operand") 1826 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")) 1827 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))] 1828 "TARGET_SIMD" 1829 { 1830 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 1831 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], 1832 operands[1], 1833 operands[2], p)); 1834 DONE; 1835 1836 } 1837) 1838 1839;; vmull_lane_s16 intrinsics 1840(define_insn "aarch64_vec_<su>mult_lane<Qlane>" 1841 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1842 (mult:<VWIDE> 1843 (ANY_EXTEND:<VWIDE> 1844 (match_operand:<VCOND> 1 "register_operand" "w")) 1845 (ANY_EXTEND:<VWIDE> 1846 (vec_duplicate:<VCOND> 1847 (vec_select:<VEL> 1848 (match_operand:VDQHS 2 "register_operand" "<vwx>") 1849 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))] 1850 "TARGET_SIMD" 1851 { 1852 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 1853 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]"; 1854 } 1855 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")] 1856) 1857 1858;; vmlal_lane_s16 intrinsics 1859(define_insn "aarch64_vec_<su>mlal_lane<Qlane>" 1860 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1861 (plus:<VWIDE> 1862 (mult:<VWIDE> 1863 (ANY_EXTEND:<VWIDE> 1864 (match_operand:<VCOND> 2 "register_operand" "w")) 1865 (ANY_EXTEND:<VWIDE> 1866 (vec_duplicate:<VCOND> 1867 (vec_select:<VEL> 1868 (match_operand:VDQHS 3 "register_operand" "<vwx>") 1869 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))) 1870 (match_operand:<VWIDE> 1 "register_operand" "0")))] 1871 "TARGET_SIMD" 1872 { 1873 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 1874 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]"; 1875 } 1876 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")] 1877) 1878 1879;; FP vector operations. 1880;; AArch64 AdvSIMD supports single-precision (32-bit) and 1881;; double-precision (64-bit) floating-point data types and arithmetic as 1882;; defined by the IEEE 754-2008 standard. This makes them vectorizable 1883;; without the need for -ffast-math or -funsafe-math-optimizations. 1884;; 1885;; Floating-point operations can raise an exception. Vectorizing such 1886;; operations are safe because of reasons explained below. 1887;; 1888;; ARMv8 permits an extension to enable trapped floating-point 1889;; exception handling, however this is an optional feature. In the 1890;; event of a floating-point exception being raised by vectorised 1891;; code then: 1892;; 1. If trapped floating-point exceptions are available, then a trap 1893;; will be taken when any lane raises an enabled exception. A trap 1894;; handler may determine which lane raised the exception. 1895;; 2. Alternatively a sticky exception flag is set in the 1896;; floating-point status register (FPSR). Software may explicitly 1897;; test the exception flags, in which case the tests will either 1898;; prevent vectorisation, allowing precise identification of the 1899;; failing operation, or if tested outside of vectorisable regions 1900;; then the specific operation and lane are not of interest. 1901 1902;; FP arithmetic operations. 1903 1904(define_insn "add<mode>3" 1905 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1906 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1907 (match_operand:VHSDF 2 "register_operand" "w")))] 1908 "TARGET_SIMD" 1909 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1910 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1911) 1912 1913(define_insn "sub<mode>3" 1914 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1915 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1916 (match_operand:VHSDF 2 "register_operand" "w")))] 1917 "TARGET_SIMD" 1918 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1919 [(set_attr "type" "neon_fp_addsub_<stype><q>")] 1920) 1921 1922(define_insn "mul<mode>3" 1923 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1924 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1925 (match_operand:VHSDF 2 "register_operand" "w")))] 1926 "TARGET_SIMD" 1927 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1928 [(set_attr "type" "neon_fp_mul_<stype><q>")] 1929) 1930 1931(define_expand "div<mode>3" 1932 [(set (match_operand:VHSDF 0 "register_operand") 1933 (div:VHSDF (match_operand:VHSDF 1 "register_operand") 1934 (match_operand:VHSDF 2 "register_operand")))] 1935 "TARGET_SIMD" 1936{ 1937 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) 1938 DONE; 1939 1940 operands[1] = force_reg (<MODE>mode, operands[1]); 1941}) 1942 1943(define_insn "*div<mode>3" 1944 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1945 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1946 (match_operand:VHSDF 2 "register_operand" "w")))] 1947 "TARGET_SIMD" 1948 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1949 [(set_attr "type" "neon_fp_div_<stype><q>")] 1950) 1951 1952(define_insn "neg<mode>2" 1953 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1954 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1955 "TARGET_SIMD" 1956 "fneg\\t%0.<Vtype>, %1.<Vtype>" 1957 [(set_attr "type" "neon_fp_neg_<stype><q>")] 1958) 1959 1960(define_insn "abs<mode>2" 1961 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1962 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 1963 "TARGET_SIMD" 1964 "fabs\\t%0.<Vtype>, %1.<Vtype>" 1965 [(set_attr "type" "neon_fp_abs_<stype><q>")] 1966) 1967 1968(define_insn "fma<mode>4" 1969 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1970 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 1971 (match_operand:VHSDF 2 "register_operand" "w") 1972 (match_operand:VHSDF 3 "register_operand" "0")))] 1973 "TARGET_SIMD" 1974 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1975 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1976) 1977 1978(define_insn "*aarch64_fma4_elt<mode>" 1979 [(set (match_operand:VDQF 0 "register_operand" "=w") 1980 (fma:VDQF 1981 (vec_duplicate:VDQF 1982 (vec_select:<VEL> 1983 (match_operand:VDQF 1 "register_operand" "<h_con>") 1984 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1985 (match_operand:VDQF 3 "register_operand" "w") 1986 (match_operand:VDQF 4 "register_operand" "0")))] 1987 "TARGET_SIMD" 1988 { 1989 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 1990 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 1991 } 1992 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1993) 1994 1995(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>" 1996 [(set (match_operand:VDQSF 0 "register_operand" "=w") 1997 (fma:VDQSF 1998 (vec_duplicate:VDQSF 1999 (vec_select:<VEL> 2000 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 2001 (parallel [(match_operand:SI 2 "immediate_operand")]))) 2002 (match_operand:VDQSF 3 "register_operand" "w") 2003 (match_operand:VDQSF 4 "register_operand" "0")))] 2004 "TARGET_SIMD" 2005 { 2006 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 2007 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 2008 } 2009 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 2010) 2011 2012(define_insn "*aarch64_fma4_elt_from_dup<mode>" 2013 [(set (match_operand:VMUL 0 "register_operand" "=w") 2014 (fma:VMUL 2015 (vec_duplicate:VMUL 2016 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 2017 (match_operand:VMUL 2 "register_operand" "w") 2018 (match_operand:VMUL 3 "register_operand" "0")))] 2019 "TARGET_SIMD" 2020 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 2021 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 2022) 2023 2024(define_insn "*aarch64_fma4_elt_to_64v2df" 2025 [(set (match_operand:DF 0 "register_operand" "=w") 2026 (fma:DF 2027 (vec_select:DF 2028 (match_operand:V2DF 1 "register_operand" "w") 2029 (parallel [(match_operand:SI 2 "immediate_operand")])) 2030 (match_operand:DF 3 "register_operand" "w") 2031 (match_operand:DF 4 "register_operand" "0")))] 2032 "TARGET_SIMD" 2033 { 2034 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 2035 return "fmla\\t%0.2d, %3.2d, %1.d[%2]"; 2036 } 2037 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 2038) 2039 2040(define_insn "fnma<mode>4" 2041 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2042 (fma:VHSDF 2043 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")) 2044 (match_operand:VHSDF 2 "register_operand" "w") 2045 (match_operand:VHSDF 3 "register_operand" "0")))] 2046 "TARGET_SIMD" 2047 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2048 [(set_attr "type" "neon_fp_mla_<stype><q>")] 2049) 2050 2051(define_insn "*aarch64_fnma4_elt<mode>" 2052 [(set (match_operand:VDQF 0 "register_operand" "=w") 2053 (fma:VDQF 2054 (neg:VDQF 2055 (match_operand:VDQF 3 "register_operand" "w")) 2056 (vec_duplicate:VDQF 2057 (vec_select:<VEL> 2058 (match_operand:VDQF 1 "register_operand" "<h_con>") 2059 (parallel [(match_operand:SI 2 "immediate_operand")]))) 2060 (match_operand:VDQF 4 "register_operand" "0")))] 2061 "TARGET_SIMD" 2062 { 2063 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 2064 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 2065 } 2066 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 2067) 2068 2069(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>" 2070 [(set (match_operand:VDQSF 0 "register_operand" "=w") 2071 (fma:VDQSF 2072 (neg:VDQSF 2073 (match_operand:VDQSF 3 "register_operand" "w")) 2074 (vec_duplicate:VDQSF 2075 (vec_select:<VEL> 2076 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 2077 (parallel [(match_operand:SI 2 "immediate_operand")]))) 2078 (match_operand:VDQSF 4 "register_operand" "0")))] 2079 "TARGET_SIMD" 2080 { 2081 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); 2082 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 2083 } 2084 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 2085) 2086 2087(define_insn "*aarch64_fnma4_elt_from_dup<mode>" 2088 [(set (match_operand:VMUL 0 "register_operand" "=w") 2089 (fma:VMUL 2090 (neg:VMUL 2091 (match_operand:VMUL 2 "register_operand" "w")) 2092 (vec_duplicate:VMUL 2093 (match_operand:<VEL> 1 "register_operand" "<h_con>")) 2094 (match_operand:VMUL 3 "register_operand" "0")))] 2095 "TARGET_SIMD" 2096 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]" 2097 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")] 2098) 2099 2100(define_insn "*aarch64_fnma4_elt_to_64v2df" 2101 [(set (match_operand:DF 0 "register_operand" "=w") 2102 (fma:DF 2103 (vec_select:DF 2104 (match_operand:V2DF 1 "register_operand" "w") 2105 (parallel [(match_operand:SI 2 "immediate_operand")])) 2106 (neg:DF 2107 (match_operand:DF 3 "register_operand" "w")) 2108 (match_operand:DF 4 "register_operand" "0")))] 2109 "TARGET_SIMD" 2110 { 2111 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); 2112 return "fmls\\t%0.2d, %3.2d, %1.d[%2]"; 2113 } 2114 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 2115) 2116 2117;; Vector versions of the floating-point frint patterns. 2118;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. 2119(define_insn "<frint_pattern><mode>2" 2120 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2121 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 2122 FRINT))] 2123 "TARGET_SIMD" 2124 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>" 2125 [(set_attr "type" "neon_fp_round_<stype><q>")] 2126) 2127 2128;; Vector versions of the fcvt standard patterns. 2129;; Expands to lbtrunc, lround, lceil, lfloor 2130(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2" 2131 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 2132 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2133 [(match_operand:VHSDF 1 "register_operand" "w")] 2134 FCVT)))] 2135 "TARGET_SIMD" 2136 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>" 2137 [(set_attr "type" "neon_fp_to_int_<stype><q>")] 2138) 2139 2140;; HF Scalar variants of related SIMD instructions. 2141(define_insn "l<fcvt_pattern><su_optab>hfhi2" 2142 [(set (match_operand:HI 0 "register_operand" "=w") 2143 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")] 2144 FCVT)))] 2145 "TARGET_SIMD_F16INST" 2146 "fcvt<frint_suffix><su>\t%h0, %h1" 2147 [(set_attr "type" "neon_fp_to_int_s")] 2148) 2149 2150(define_insn "<optab>_trunchfhi2" 2151 [(set (match_operand:HI 0 "register_operand" "=w") 2152 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))] 2153 "TARGET_SIMD_F16INST" 2154 "fcvtz<su>\t%h0, %h1" 2155 [(set_attr "type" "neon_fp_to_int_s")] 2156) 2157 2158(define_insn "<optab>hihf2" 2159 [(set (match_operand:HF 0 "register_operand" "=w") 2160 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))] 2161 "TARGET_SIMD_F16INST" 2162 "<su_optab>cvtf\t%h0, %h1" 2163 [(set_attr "type" "neon_int_to_fp_s")] 2164) 2165 2166(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult" 2167 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w") 2168 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2169 [(mult:VDQF 2170 (match_operand:VDQF 1 "register_operand" "w") 2171 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))] 2172 UNSPEC_FRINTZ)))] 2173 "TARGET_SIMD 2174 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1, 2175 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))" 2176 { 2177 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]); 2178 char buf[64]; 2179 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits); 2180 output_asm_insn (buf, operands); 2181 return ""; 2182 } 2183 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")] 2184) 2185 2186(define_expand "<optab><VHSDF:mode><fcvt_target>2" 2187 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 2188 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2189 [(match_operand:VHSDF 1 "register_operand")] 2190 UNSPEC_FRINTZ)))] 2191 "TARGET_SIMD" 2192 {}) 2193 2194(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2" 2195 [(set (match_operand:<FCVT_TARGET> 0 "register_operand") 2196 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> 2197 [(match_operand:VHSDF 1 "register_operand")] 2198 UNSPEC_FRINTZ)))] 2199 "TARGET_SIMD" 2200 {}) 2201 2202(define_expand "ftrunc<VHSDF:mode>2" 2203 [(set (match_operand:VHSDF 0 "register_operand") 2204 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2205 UNSPEC_FRINTZ))] 2206 "TARGET_SIMD" 2207 {}) 2208 2209(define_insn "<optab><fcvt_target><VHSDF:mode>2" 2210 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2211 (FLOATUORS:VHSDF 2212 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))] 2213 "TARGET_SIMD" 2214 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>" 2215 [(set_attr "type" "neon_int_to_fp_<stype><q>")] 2216) 2217 2218;; Conversions between vectors of floats and doubles. 2219;; Contains a mix of patterns to match standard pattern names 2220;; and those for intrinsics. 2221 2222;; Float widening operations. 2223 2224(define_insn "aarch64_simd_vec_unpacks_lo_<mode>" 2225 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2226 (float_extend:<VWIDE> (vec_select:<VHALF> 2227 (match_operand:VQ_HSF 1 "register_operand" "w") 2228 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "") 2229 )))] 2230 "TARGET_SIMD" 2231 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>" 2232 [(set_attr "type" "neon_fp_cvt_widen_s")] 2233) 2234 2235;; Convert between fixed-point and floating-point (vector modes) 2236 2237(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3" 2238 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w") 2239 (unspec:<VHSDF:FCVT_TARGET> 2240 [(match_operand:VHSDF 1 "register_operand" "w") 2241 (match_operand:SI 2 "immediate_operand" "i")] 2242 FCVT_F2FIXED))] 2243 "TARGET_SIMD" 2244 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2245 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")] 2246) 2247 2248(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3" 2249 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w") 2250 (unspec:<VDQ_HSDI:FCVT_TARGET> 2251 [(match_operand:VDQ_HSDI 1 "register_operand" "w") 2252 (match_operand:SI 2 "immediate_operand" "i")] 2253 FCVT_FIXED2F))] 2254 "TARGET_SIMD" 2255 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2" 2256 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")] 2257) 2258 2259;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns 2260;; is inconsistent with vector ordering elsewhere in the compiler, in that 2261;; the meaning of HI and LO changes depending on the target endianness. 2262;; While elsewhere we map the higher numbered elements of a vector to 2263;; the lower architectural lanes of the vector, for these patterns we want 2264;; to always treat "hi" as referring to the higher architectural lanes. 2265;; Consequently, while the patterns below look inconsistent with our 2266;; other big-endian patterns their behavior is as required. 2267 2268(define_expand "vec_unpacks_lo_<mode>" 2269 [(match_operand:<VWIDE> 0 "register_operand") 2270 (match_operand:VQ_HSF 1 "register_operand")] 2271 "TARGET_SIMD" 2272 { 2273 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 2274 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2275 operands[1], p)); 2276 DONE; 2277 } 2278) 2279 2280(define_insn "aarch64_simd_vec_unpacks_hi_<mode>" 2281 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2282 (float_extend:<VWIDE> (vec_select:<VHALF> 2283 (match_operand:VQ_HSF 1 "register_operand" "w") 2284 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "") 2285 )))] 2286 "TARGET_SIMD" 2287 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>" 2288 [(set_attr "type" "neon_fp_cvt_widen_s")] 2289) 2290 2291(define_expand "vec_unpacks_hi_<mode>" 2292 [(match_operand:<VWIDE> 0 "register_operand") 2293 (match_operand:VQ_HSF 1 "register_operand")] 2294 "TARGET_SIMD" 2295 { 2296 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 2297 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2298 operands[1], p)); 2299 DONE; 2300 } 2301) 2302(define_insn "aarch64_float_extend_lo_<Vwide>" 2303 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 2304 (float_extend:<VWIDE> 2305 (match_operand:VDF 1 "register_operand" "w")))] 2306 "TARGET_SIMD" 2307 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>" 2308 [(set_attr "type" "neon_fp_cvt_widen_s")] 2309) 2310 2311;; Float narrowing operations. 2312 2313(define_insn "aarch64_float_truncate_lo_<mode>" 2314 [(set (match_operand:VDF 0 "register_operand" "=w") 2315 (float_truncate:VDF 2316 (match_operand:<VWIDE> 1 "register_operand" "w")))] 2317 "TARGET_SIMD" 2318 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>" 2319 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2320) 2321 2322(define_insn "aarch64_float_truncate_hi_<Vdbl>_le" 2323 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 2324 (vec_concat:<VDBL> 2325 (match_operand:VDF 1 "register_operand" "0") 2326 (float_truncate:VDF 2327 (match_operand:<VWIDE> 2 "register_operand" "w"))))] 2328 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 2329 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 2330 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2331) 2332 2333(define_insn "aarch64_float_truncate_hi_<Vdbl>_be" 2334 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 2335 (vec_concat:<VDBL> 2336 (float_truncate:VDF 2337 (match_operand:<VWIDE> 2 "register_operand" "w")) 2338 (match_operand:VDF 1 "register_operand" "0")))] 2339 "TARGET_SIMD && BYTES_BIG_ENDIAN" 2340 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>" 2341 [(set_attr "type" "neon_fp_cvt_narrow_d_q")] 2342) 2343 2344(define_expand "aarch64_float_truncate_hi_<Vdbl>" 2345 [(match_operand:<VDBL> 0 "register_operand") 2346 (match_operand:VDF 1 "register_operand") 2347 (match_operand:<VWIDE> 2 "register_operand")] 2348 "TARGET_SIMD" 2349{ 2350 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN 2351 ? gen_aarch64_float_truncate_hi_<Vdbl>_be 2352 : gen_aarch64_float_truncate_hi_<Vdbl>_le; 2353 emit_insn (gen (operands[0], operands[1], operands[2])); 2354 DONE; 2355} 2356) 2357 2358(define_expand "vec_pack_trunc_v2df" 2359 [(set (match_operand:V4SF 0 "register_operand") 2360 (vec_concat:V4SF 2361 (float_truncate:V2SF 2362 (match_operand:V2DF 1 "register_operand")) 2363 (float_truncate:V2SF 2364 (match_operand:V2DF 2 "register_operand")) 2365 ))] 2366 "TARGET_SIMD" 2367 { 2368 rtx tmp = gen_reg_rtx (V2SFmode); 2369 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2370 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2371 2372 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); 2373 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], 2374 tmp, operands[hi])); 2375 DONE; 2376 } 2377) 2378 2379(define_expand "vec_pack_trunc_df" 2380 [(set (match_operand:V2SF 0 "register_operand") 2381 (vec_concat:V2SF 2382 (float_truncate:SF 2383 (match_operand:DF 1 "register_operand")) 2384 (float_truncate:SF 2385 (match_operand:DF 2 "register_operand")) 2386 ))] 2387 "TARGET_SIMD" 2388 { 2389 rtx tmp = gen_reg_rtx (V2SFmode); 2390 int lo = BYTES_BIG_ENDIAN ? 2 : 1; 2391 int hi = BYTES_BIG_ENDIAN ? 1 : 2; 2392 2393 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); 2394 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); 2395 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); 2396 DONE; 2397 } 2398) 2399 2400;; FP Max/Min 2401;; Max/Min are introduced by idiom recognition by GCC's mid-end. An 2402;; expression like: 2403;; a = (b < c) ? b : c; 2404;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and 2405;; -fno-signed-zeros are enabled either explicitly or indirectly via 2406;; -ffast-math. 2407;; 2408;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. 2409;; The 'smax' and 'smin' RTL standard pattern names do not specify which 2410;; operand will be returned when both operands are zero (i.e. they may not 2411;; honour signed zeroes), or when either operand is NaN. Therefore GCC 2412;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring 2413;; NaNs. 2414 2415(define_insn "<su><maxmin><mode>3" 2416 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2417 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w") 2418 (match_operand:VHSDF 2 "register_operand" "w")))] 2419 "TARGET_SIMD" 2420 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2421 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2422) 2423 2424;; Vector forms for fmax, fmin, fmaxnm, fminnm. 2425;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names, 2426;; which implement the IEEE fmax ()/fmin () functions. 2427(define_insn "<maxmin_uns><mode>3" 2428 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2429 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2430 (match_operand:VHSDF 2 "register_operand" "w")] 2431 FMAXMIN_UNS))] 2432 "TARGET_SIMD" 2433 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2434 [(set_attr "type" "neon_fp_minmax_<stype><q>")] 2435) 2436 2437;; 'across lanes' add. 2438 2439(define_expand "reduc_plus_scal_<mode>" 2440 [(match_operand:<VEL> 0 "register_operand") 2441 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")] 2442 UNSPEC_ADDV)] 2443 "TARGET_SIMD" 2444 { 2445 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2446 rtx scratch = gen_reg_rtx (<MODE>mode); 2447 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); 2448 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2449 DONE; 2450 } 2451) 2452 2453(define_insn "aarch64_faddp<mode>" 2454 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2455 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w") 2456 (match_operand:VHSDF 2 "register_operand" "w")] 2457 UNSPEC_FADDV))] 2458 "TARGET_SIMD" 2459 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 2460 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")] 2461) 2462 2463(define_insn "aarch64_reduc_plus_internal<mode>" 2464 [(set (match_operand:VDQV 0 "register_operand" "=w") 2465 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")] 2466 UNSPEC_ADDV))] 2467 "TARGET_SIMD" 2468 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>" 2469 [(set_attr "type" "neon_reduc_add<q>")] 2470) 2471 2472;; ADDV with result zero-extended to SI/DImode (for popcount). 2473(define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>" 2474 [(set (match_operand:GPI 0 "register_operand" "=w") 2475 (zero_extend:GPI 2476 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")] 2477 UNSPEC_ADDV)))] 2478 "TARGET_SIMD" 2479 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>" 2480 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")] 2481) 2482 2483(define_insn "aarch64_reduc_plus_internalv2si" 2484 [(set (match_operand:V2SI 0 "register_operand" "=w") 2485 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2486 UNSPEC_ADDV))] 2487 "TARGET_SIMD" 2488 "addp\\t%0.2s, %1.2s, %1.2s" 2489 [(set_attr "type" "neon_reduc_add")] 2490) 2491 2492(define_insn "reduc_plus_scal_<mode>" 2493 [(set (match_operand:<VEL> 0 "register_operand" "=w") 2494 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")] 2495 UNSPEC_FADDV))] 2496 "TARGET_SIMD" 2497 "faddp\\t%<Vetype>0, %1.<Vtype>" 2498 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")] 2499) 2500 2501(define_expand "reduc_plus_scal_v4sf" 2502 [(set (match_operand:SF 0 "register_operand") 2503 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] 2504 UNSPEC_FADDV))] 2505 "TARGET_SIMD" 2506{ 2507 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0); 2508 rtx scratch = gen_reg_rtx (V4SFmode); 2509 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); 2510 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); 2511 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); 2512 DONE; 2513}) 2514 2515(define_insn "clrsb<mode>2" 2516 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2517 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2518 "TARGET_SIMD" 2519 "cls\\t%0.<Vtype>, %1.<Vtype>" 2520 [(set_attr "type" "neon_cls<q>")] 2521) 2522 2523(define_insn "clz<mode>2" 2524 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 2525 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))] 2526 "TARGET_SIMD" 2527 "clz\\t%0.<Vtype>, %1.<Vtype>" 2528 [(set_attr "type" "neon_cls<q>")] 2529) 2530 2531(define_insn "popcount<mode>2" 2532 [(set (match_operand:VB 0 "register_operand" "=w") 2533 (popcount:VB (match_operand:VB 1 "register_operand" "w")))] 2534 "TARGET_SIMD" 2535 "cnt\\t%0.<Vbtype>, %1.<Vbtype>" 2536 [(set_attr "type" "neon_cnt<q>")] 2537) 2538 2539;; 'across lanes' max and min ops. 2540 2541;; Template for outputting a scalar, so we can create __builtins which can be 2542;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin). 2543(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2544 [(match_operand:<VEL> 0 "register_operand") 2545 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2546 FMAXMINV)] 2547 "TARGET_SIMD" 2548 { 2549 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2550 rtx scratch = gen_reg_rtx (<MODE>mode); 2551 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2552 operands[1])); 2553 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2554 DONE; 2555 } 2556) 2557 2558;; Likewise for integer cases, signed and unsigned. 2559(define_expand "reduc_<maxmin_uns>_scal_<mode>" 2560 [(match_operand:<VEL> 0 "register_operand") 2561 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] 2562 MAXMINV)] 2563 "TARGET_SIMD" 2564 { 2565 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); 2566 rtx scratch = gen_reg_rtx (<MODE>mode); 2567 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2568 operands[1])); 2569 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2570 DONE; 2571 } 2572) 2573 2574(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2575 [(set (match_operand:VDQV_S 0 "register_operand" "=w") 2576 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")] 2577 MAXMINV))] 2578 "TARGET_SIMD" 2579 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>" 2580 [(set_attr "type" "neon_reduc_minmax<q>")] 2581) 2582 2583(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si" 2584 [(set (match_operand:V2SI 0 "register_operand" "=w") 2585 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")] 2586 MAXMINV))] 2587 "TARGET_SIMD" 2588 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s" 2589 [(set_attr "type" "neon_reduc_minmax")] 2590) 2591 2592(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>" 2593 [(set (match_operand:VHSDF 0 "register_operand" "=w") 2594 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 2595 FMAXMINV))] 2596 "TARGET_SIMD" 2597 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>" 2598 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")] 2599) 2600 2601;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register 2602;; allocation. 2603;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which 2604;; to select. 2605;; 2606;; Thus our BSL is of the form: 2607;; op0 = bsl (mask, op2, op3) 2608;; We can use any of: 2609;; 2610;; if (op0 = mask) 2611;; bsl mask, op1, op2 2612;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) 2613;; bit op0, op2, mask 2614;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) 2615;; bif op0, op1, mask 2616;; 2617;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. 2618;; Some forms of straight-line code may generate the equivalent form 2619;; in *aarch64_simd_bsl<mode>_alt. 2620 2621(define_insn "aarch64_simd_bsl<mode>_internal" 2622 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 2623 (xor:VDQ_I 2624 (and:VDQ_I 2625 (xor:VDQ_I 2626 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w") 2627 (match_operand:VDQ_I 2 "register_operand" "w,w,0")) 2628 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 2629 (match_dup:<V_INT_EQUIV> 3) 2630 ))] 2631 "TARGET_SIMD" 2632 "@ 2633 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> 2634 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype> 2635 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>" 2636 [(set_attr "type" "neon_bsl<q>")] 2637) 2638 2639;; We need this form in addition to the above pattern to match the case 2640;; when combine tries merging three insns such that the second operand of 2641;; the outer XOR matches the second operand of the inner XOR rather than 2642;; the first. The two are equivalent but since recog doesn't try all 2643;; permutations of commutative operations, we have to have a separate pattern. 2644 2645(define_insn "*aarch64_simd_bsl<mode>_alt" 2646 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") 2647 (xor:VDQ_I 2648 (and:VDQ_I 2649 (xor:VDQ_I 2650 (match_operand:VDQ_I 3 "register_operand" "w,w,0") 2651 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w")) 2652 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) 2653 (match_dup:<V_INT_EQUIV> 2)))] 2654 "TARGET_SIMD" 2655 "@ 2656 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> 2657 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> 2658 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 2659 [(set_attr "type" "neon_bsl<q>")] 2660) 2661 2662;; DImode is special, we want to avoid computing operations which are 2663;; more naturally computed in general purpose registers in the vector 2664;; registers. If we do that, we need to move all three operands from general 2665;; purpose registers to vector registers, then back again. However, we 2666;; don't want to make this pattern an UNSPEC as we'd lose scope for 2667;; optimizations based on the component operations of a BSL. 2668;; 2669;; That means we need a splitter back to the individual operations, if they 2670;; would be better calculated on the integer side. 2671 2672(define_insn_and_split "aarch64_simd_bsldi_internal" 2673 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 2674 (xor:DI 2675 (and:DI 2676 (xor:DI 2677 (match_operand:DI 3 "register_operand" "w,0,w,r") 2678 (match_operand:DI 2 "register_operand" "w,w,0,r")) 2679 (match_operand:DI 1 "register_operand" "0,w,w,r")) 2680 (match_dup:DI 3) 2681 ))] 2682 "TARGET_SIMD" 2683 "@ 2684 bsl\\t%0.8b, %2.8b, %3.8b 2685 bit\\t%0.8b, %2.8b, %1.8b 2686 bif\\t%0.8b, %3.8b, %1.8b 2687 #" 2688 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 2689 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)] 2690{ 2691 /* Split back to individual operations. If we're before reload, and 2692 able to create a temporary register, do so. If we're after reload, 2693 we've got an early-clobber destination register, so use that. 2694 Otherwise, we can't create pseudos and we can't yet guarantee that 2695 operands[0] is safe to write, so FAIL to split. */ 2696 2697 rtx scratch; 2698 if (reload_completed) 2699 scratch = operands[0]; 2700 else if (can_create_pseudo_p ()) 2701 scratch = gen_reg_rtx (DImode); 2702 else 2703 FAIL; 2704 2705 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 2706 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 2707 emit_insn (gen_xordi3 (operands[0], scratch, operands[3])); 2708 DONE; 2709} 2710 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 2711 (set_attr "length" "4,4,4,12")] 2712) 2713 2714(define_insn_and_split "aarch64_simd_bsldi_alt" 2715 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") 2716 (xor:DI 2717 (and:DI 2718 (xor:DI 2719 (match_operand:DI 3 "register_operand" "w,w,0,r") 2720 (match_operand:DI 2 "register_operand" "w,0,w,r")) 2721 (match_operand:DI 1 "register_operand" "0,w,w,r")) 2722 (match_dup:DI 2) 2723 ))] 2724 "TARGET_SIMD" 2725 "@ 2726 bsl\\t%0.8b, %3.8b, %2.8b 2727 bit\\t%0.8b, %3.8b, %1.8b 2728 bif\\t%0.8b, %2.8b, %1.8b 2729 #" 2730 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" 2731 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)] 2732{ 2733 /* Split back to individual operations. If we're before reload, and 2734 able to create a temporary register, do so. If we're after reload, 2735 we've got an early-clobber destination register, so use that. 2736 Otherwise, we can't create pseudos and we can't yet guarantee that 2737 operands[0] is safe to write, so FAIL to split. */ 2738 2739 rtx scratch; 2740 if (reload_completed) 2741 scratch = operands[0]; 2742 else if (can_create_pseudo_p ()) 2743 scratch = gen_reg_rtx (DImode); 2744 else 2745 FAIL; 2746 2747 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); 2748 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); 2749 emit_insn (gen_xordi3 (operands[0], scratch, operands[2])); 2750 DONE; 2751} 2752 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") 2753 (set_attr "length" "4,4,4,12")] 2754) 2755 2756(define_expand "aarch64_simd_bsl<mode>" 2757 [(match_operand:VALLDIF 0 "register_operand") 2758 (match_operand:<V_INT_EQUIV> 1 "register_operand") 2759 (match_operand:VALLDIF 2 "register_operand") 2760 (match_operand:VALLDIF 3 "register_operand")] 2761 "TARGET_SIMD" 2762{ 2763 /* We can't alias operands together if they have different modes. */ 2764 rtx tmp = operands[0]; 2765 if (FLOAT_MODE_P (<MODE>mode)) 2766 { 2767 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]); 2768 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]); 2769 tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 2770 } 2771 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]); 2772 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp, 2773 operands[1], 2774 operands[2], 2775 operands[3])); 2776 if (tmp != operands[0]) 2777 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp)); 2778 2779 DONE; 2780}) 2781 2782(define_expand "vcond_mask_<mode><v_int_equiv>" 2783 [(match_operand:VALLDI 0 "register_operand") 2784 (match_operand:VALLDI 1 "nonmemory_operand") 2785 (match_operand:VALLDI 2 "nonmemory_operand") 2786 (match_operand:<V_INT_EQUIV> 3 "register_operand")] 2787 "TARGET_SIMD" 2788{ 2789 /* If we have (a = (P) ? -1 : 0); 2790 Then we can simply move the generated mask (result must be int). */ 2791 if (operands[1] == CONSTM1_RTX (<MODE>mode) 2792 && operands[2] == CONST0_RTX (<MODE>mode)) 2793 emit_move_insn (operands[0], operands[3]); 2794 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */ 2795 else if (operands[1] == CONST0_RTX (<MODE>mode) 2796 && operands[2] == CONSTM1_RTX (<MODE>mode)) 2797 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3])); 2798 else 2799 { 2800 if (!REG_P (operands[1])) 2801 operands[1] = force_reg (<MODE>mode, operands[1]); 2802 if (!REG_P (operands[2])) 2803 operands[2] = force_reg (<MODE>mode, operands[2]); 2804 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3], 2805 operands[1], operands[2])); 2806 } 2807 2808 DONE; 2809}) 2810 2811;; Patterns comparing two vectors to produce a mask. 2812 2813(define_expand "vec_cmp<mode><mode>" 2814 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 2815 (match_operator 1 "comparison_operator" 2816 [(match_operand:VSDQ_I_DI 2 "register_operand") 2817 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 2818 "TARGET_SIMD" 2819{ 2820 rtx mask = operands[0]; 2821 enum rtx_code code = GET_CODE (operands[1]); 2822 2823 switch (code) 2824 { 2825 case NE: 2826 case LE: 2827 case LT: 2828 case GE: 2829 case GT: 2830 case EQ: 2831 if (operands[3] == CONST0_RTX (<MODE>mode)) 2832 break; 2833 2834 /* Fall through. */ 2835 default: 2836 if (!REG_P (operands[3])) 2837 operands[3] = force_reg (<MODE>mode, operands[3]); 2838 2839 break; 2840 } 2841 2842 switch (code) 2843 { 2844 case LT: 2845 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3])); 2846 break; 2847 2848 case GE: 2849 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3])); 2850 break; 2851 2852 case LE: 2853 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3])); 2854 break; 2855 2856 case GT: 2857 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3])); 2858 break; 2859 2860 case LTU: 2861 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2])); 2862 break; 2863 2864 case GEU: 2865 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3])); 2866 break; 2867 2868 case LEU: 2869 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2])); 2870 break; 2871 2872 case GTU: 2873 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3])); 2874 break; 2875 2876 case NE: 2877 /* Handle NE as !EQ. */ 2878 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2879 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask)); 2880 break; 2881 2882 case EQ: 2883 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3])); 2884 break; 2885 2886 default: 2887 gcc_unreachable (); 2888 } 2889 2890 DONE; 2891}) 2892 2893(define_expand "vec_cmp<mode><v_int_equiv>" 2894 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 2895 (match_operator 1 "comparison_operator" 2896 [(match_operand:VDQF 2 "register_operand") 2897 (match_operand:VDQF 3 "nonmemory_operand")]))] 2898 "TARGET_SIMD" 2899{ 2900 int use_zero_form = 0; 2901 enum rtx_code code = GET_CODE (operands[1]); 2902 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode); 2903 2904 rtx (*comparison) (rtx, rtx, rtx) = NULL; 2905 2906 switch (code) 2907 { 2908 case LE: 2909 case LT: 2910 case GE: 2911 case GT: 2912 case EQ: 2913 if (operands[3] == CONST0_RTX (<MODE>mode)) 2914 { 2915 use_zero_form = 1; 2916 break; 2917 } 2918 /* Fall through. */ 2919 default: 2920 if (!REG_P (operands[3])) 2921 operands[3] = force_reg (<MODE>mode, operands[3]); 2922 2923 break; 2924 } 2925 2926 switch (code) 2927 { 2928 case LT: 2929 if (use_zero_form) 2930 { 2931 comparison = gen_aarch64_cmlt<mode>; 2932 break; 2933 } 2934 /* Fall through. */ 2935 case UNLT: 2936 std::swap (operands[2], operands[3]); 2937 /* Fall through. */ 2938 case UNGT: 2939 case GT: 2940 comparison = gen_aarch64_cmgt<mode>; 2941 break; 2942 case LE: 2943 if (use_zero_form) 2944 { 2945 comparison = gen_aarch64_cmle<mode>; 2946 break; 2947 } 2948 /* Fall through. */ 2949 case UNLE: 2950 std::swap (operands[2], operands[3]); 2951 /* Fall through. */ 2952 case UNGE: 2953 case GE: 2954 comparison = gen_aarch64_cmge<mode>; 2955 break; 2956 case NE: 2957 case EQ: 2958 comparison = gen_aarch64_cmeq<mode>; 2959 break; 2960 case UNEQ: 2961 case ORDERED: 2962 case UNORDERED: 2963 case LTGT: 2964 break; 2965 default: 2966 gcc_unreachable (); 2967 } 2968 2969 switch (code) 2970 { 2971 case UNGE: 2972 case UNGT: 2973 case UNLE: 2974 case UNLT: 2975 { 2976 /* All of the above must not raise any FP exceptions. Thus we first 2977 check each operand for NaNs and force any elements containing NaN to 2978 zero before using them in the compare. 2979 Example: UN<cc> (a, b) -> UNORDERED (a, b) | 2980 (cm<cc> (isnan (a) ? 0.0 : a, 2981 isnan (b) ? 0.0 : b)) 2982 We use the following transformations for doing the comparisions: 2983 a UNGE b -> a GE b 2984 a UNGT b -> a GT b 2985 a UNLE b -> b GE a 2986 a UNLT b -> b GT a. */ 2987 2988 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode); 2989 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode); 2990 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode); 2991 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2])); 2992 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3])); 2993 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1)); 2994 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0, 2995 lowpart_subreg (<V_INT_EQUIV>mode, 2996 operands[2], 2997 <MODE>mode))); 2998 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1, 2999 lowpart_subreg (<V_INT_EQUIV>mode, 3000 operands[3], 3001 <MODE>mode))); 3002 gcc_assert (comparison != NULL); 3003 emit_insn (comparison (operands[0], 3004 lowpart_subreg (<MODE>mode, 3005 tmp0, <V_INT_EQUIV>mode), 3006 lowpart_subreg (<MODE>mode, 3007 tmp1, <V_INT_EQUIV>mode))); 3008 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0])); 3009 } 3010 break; 3011 3012 case LT: 3013 case LE: 3014 case GT: 3015 case GE: 3016 case EQ: 3017 case NE: 3018 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. 3019 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 3020 a GE b -> a GE b 3021 a GT b -> a GT b 3022 a LE b -> b GE a 3023 a LT b -> b GT a 3024 a EQ b -> a EQ b 3025 a NE b -> ~(a EQ b) */ 3026 gcc_assert (comparison != NULL); 3027 emit_insn (comparison (operands[0], operands[2], operands[3])); 3028 if (code == NE) 3029 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 3030 break; 3031 3032 case LTGT: 3033 /* LTGT is not guranteed to not generate a FP exception. So let's 3034 go the faster way : ((a > b) || (b > a)). */ 3035 emit_insn (gen_aarch64_cmgt<mode> (operands[0], 3036 operands[2], operands[3])); 3037 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); 3038 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); 3039 break; 3040 3041 case ORDERED: 3042 case UNORDERED: 3043 case UNEQ: 3044 /* cmeq (a, a) & cmeq (b, b). */ 3045 emit_insn (gen_aarch64_cmeq<mode> (operands[0], 3046 operands[2], operands[2])); 3047 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3])); 3048 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp)); 3049 3050 if (code == UNORDERED) 3051 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 3052 else if (code == UNEQ) 3053 { 3054 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3])); 3055 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp)); 3056 } 3057 break; 3058 3059 default: 3060 gcc_unreachable (); 3061 } 3062 3063 DONE; 3064}) 3065 3066(define_expand "vec_cmpu<mode><mode>" 3067 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 3068 (match_operator 1 "comparison_operator" 3069 [(match_operand:VSDQ_I_DI 2 "register_operand") 3070 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))] 3071 "TARGET_SIMD" 3072{ 3073 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1], 3074 operands[2], operands[3])); 3075 DONE; 3076}) 3077 3078(define_expand "vcond<mode><mode>" 3079 [(set (match_operand:VALLDI 0 "register_operand") 3080 (if_then_else:VALLDI 3081 (match_operator 3 "comparison_operator" 3082 [(match_operand:VALLDI 4 "register_operand") 3083 (match_operand:VALLDI 5 "nonmemory_operand")]) 3084 (match_operand:VALLDI 1 "nonmemory_operand") 3085 (match_operand:VALLDI 2 "nonmemory_operand")))] 3086 "TARGET_SIMD" 3087{ 3088 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3089 enum rtx_code code = GET_CODE (operands[3]); 3090 3091 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3092 it as well as switch operands 1/2 in order to avoid the additional 3093 NOT instruction. */ 3094 if (code == NE) 3095 { 3096 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3097 operands[4], operands[5]); 3098 std::swap (operands[1], operands[2]); 3099 } 3100 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 3101 operands[4], operands[5])); 3102 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3103 operands[2], mask)); 3104 3105 DONE; 3106}) 3107 3108(define_expand "vcond<v_cmp_mixed><mode>" 3109 [(set (match_operand:<V_cmp_mixed> 0 "register_operand") 3110 (if_then_else:<V_cmp_mixed> 3111 (match_operator 3 "comparison_operator" 3112 [(match_operand:VDQF_COND 4 "register_operand") 3113 (match_operand:VDQF_COND 5 "nonmemory_operand")]) 3114 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand") 3115 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))] 3116 "TARGET_SIMD" 3117{ 3118 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3119 enum rtx_code code = GET_CODE (operands[3]); 3120 3121 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3122 it as well as switch operands 1/2 in order to avoid the additional 3123 NOT instruction. */ 3124 if (code == NE) 3125 { 3126 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3127 operands[4], operands[5]); 3128 std::swap (operands[1], operands[2]); 3129 } 3130 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3], 3131 operands[4], operands[5])); 3132 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> ( 3133 operands[0], operands[1], 3134 operands[2], mask)); 3135 3136 DONE; 3137}) 3138 3139(define_expand "vcondu<mode><mode>" 3140 [(set (match_operand:VSDQ_I_DI 0 "register_operand") 3141 (if_then_else:VSDQ_I_DI 3142 (match_operator 3 "comparison_operator" 3143 [(match_operand:VSDQ_I_DI 4 "register_operand") 3144 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")]) 3145 (match_operand:VSDQ_I_DI 1 "nonmemory_operand") 3146 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))] 3147 "TARGET_SIMD" 3148{ 3149 rtx mask = gen_reg_rtx (<MODE>mode); 3150 enum rtx_code code = GET_CODE (operands[3]); 3151 3152 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3153 it as well as switch operands 1/2 in order to avoid the additional 3154 NOT instruction. */ 3155 if (code == NE) 3156 { 3157 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3158 operands[4], operands[5]); 3159 std::swap (operands[1], operands[2]); 3160 } 3161 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3], 3162 operands[4], operands[5])); 3163 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3164 operands[2], mask)); 3165 DONE; 3166}) 3167 3168(define_expand "vcondu<mode><v_cmp_mixed>" 3169 [(set (match_operand:VDQF 0 "register_operand") 3170 (if_then_else:VDQF 3171 (match_operator 3 "comparison_operator" 3172 [(match_operand:<V_cmp_mixed> 4 "register_operand") 3173 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")]) 3174 (match_operand:VDQF 1 "nonmemory_operand") 3175 (match_operand:VDQF 2 "nonmemory_operand")))] 3176 "TARGET_SIMD" 3177{ 3178 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode); 3179 enum rtx_code code = GET_CODE (operands[3]); 3180 3181 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert 3182 it as well as switch operands 1/2 in order to avoid the additional 3183 NOT instruction. */ 3184 if (code == NE) 3185 { 3186 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]), 3187 operands[4], operands[5]); 3188 std::swap (operands[1], operands[2]); 3189 } 3190 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> ( 3191 mask, operands[3], 3192 operands[4], operands[5])); 3193 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1], 3194 operands[2], mask)); 3195 DONE; 3196}) 3197 3198;; Patterns for AArch64 SIMD Intrinsics. 3199 3200;; Lane extraction with sign extension to general purpose register. 3201(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>" 3202 [(set (match_operand:GPI 0 "register_operand" "=r") 3203 (sign_extend:GPI 3204 (vec_select:<VDQQH:VEL> 3205 (match_operand:VDQQH 1 "register_operand" "w") 3206 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3207 "TARGET_SIMD" 3208 { 3209 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, 3210 INTVAL (operands[2])); 3211 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; 3212 } 3213 [(set_attr "type" "neon_to_gp<VDQQH:q>")] 3214) 3215 3216(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>" 3217 [(set (match_operand:GPI 0 "register_operand" "=r") 3218 (zero_extend:GPI 3219 (vec_select:<VDQQH:VEL> 3220 (match_operand:VDQQH 1 "register_operand" "w") 3221 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3222 "TARGET_SIMD" 3223 { 3224 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, 3225 INTVAL (operands[2])); 3226 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]"; 3227 } 3228 [(set_attr "type" "neon_to_gp<VDQQH:q>")] 3229) 3230 3231;; Lane extraction of a value, neither sign nor zero extension 3232;; is guaranteed so upper bits should be considered undefined. 3233;; RTL uses GCC vector extension indices throughout so flip only for assembly. 3234(define_insn "aarch64_get_lane<mode>" 3235 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv") 3236 (vec_select:<VEL> 3237 (match_operand:VALL_F16 1 "register_operand" "w, w, w") 3238 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] 3239 "TARGET_SIMD" 3240 { 3241 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 3242 switch (which_alternative) 3243 { 3244 case 0: 3245 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 3246 case 1: 3247 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 3248 case 2: 3249 return "st1\\t{%1.<Vetype>}[%2], %0"; 3250 default: 3251 gcc_unreachable (); 3252 } 3253 } 3254 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] 3255) 3256 3257(define_insn "load_pair_lanes<mode>" 3258 [(set (match_operand:<VDBL> 0 "register_operand" "=w") 3259 (vec_concat:<VDBL> 3260 (match_operand:VDC 1 "memory_operand" "Utq") 3261 (match_operand:VDC 2 "memory_operand" "m")))] 3262 "TARGET_SIMD && !STRICT_ALIGNMENT 3263 && rtx_equal_p (XEXP (operands[2], 0), 3264 plus_constant (Pmode, 3265 XEXP (operands[1], 0), 3266 GET_MODE_SIZE (<MODE>mode)))" 3267 "ldr\\t%q0, %1" 3268 [(set_attr "type" "neon_load1_1reg_q")] 3269) 3270 3271(define_insn "store_pair_lanes<mode>" 3272 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn") 3273 (vec_concat:<VDBL> 3274 (match_operand:VDC 1 "register_operand" "w, r") 3275 (match_operand:VDC 2 "register_operand" "w, r")))] 3276 "TARGET_SIMD" 3277 "@ 3278 stp\\t%d1, %d2, %y0 3279 stp\\t%x1, %x2, %y0" 3280 [(set_attr "type" "neon_stp, store_16")] 3281) 3282 3283;; In this insn, operand 1 should be low, and operand 2 the high part of the 3284;; dest vector. 3285 3286(define_insn "@aarch64_combinez<mode>" 3287 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3288 (vec_concat:<VDBL> 3289 (match_operand:VDC 1 "general_operand" "w,?r,m") 3290 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))] 3291 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 3292 "@ 3293 mov\\t%0.8b, %1.8b 3294 fmov\t%d0, %1 3295 ldr\\t%d0, %1" 3296 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3297 (set_attr "arch" "simd,fp,simd")] 3298) 3299 3300(define_insn "@aarch64_combinez_be<mode>" 3301 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3302 (vec_concat:<VDBL> 3303 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") 3304 (match_operand:VDC 1 "general_operand" "w,?r,m")))] 3305 "TARGET_SIMD && BYTES_BIG_ENDIAN" 3306 "@ 3307 mov\\t%0.8b, %1.8b 3308 fmov\t%d0, %1 3309 ldr\\t%d0, %1" 3310 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3311 (set_attr "arch" "simd,fp,simd")] 3312) 3313 3314(define_expand "aarch64_combine<mode>" 3315 [(match_operand:<VDBL> 0 "register_operand") 3316 (match_operand:VDC 1 "register_operand") 3317 (match_operand:VDC 2 "aarch64_simd_reg_or_zero")] 3318 "TARGET_SIMD" 3319{ 3320 if (operands[2] == CONST0_RTX (<MODE>mode)) 3321 { 3322 if (BYTES_BIG_ENDIAN) 3323 emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1], 3324 operands[2])); 3325 else 3326 emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1], 3327 operands[2])); 3328 } 3329 else 3330 aarch64_split_simd_combine (operands[0], operands[1], operands[2]); 3331 DONE; 3332} 3333) 3334 3335(define_expand "@aarch64_simd_combine<mode>" 3336 [(match_operand:<VDBL> 0 "register_operand") 3337 (match_operand:VDC 1 "register_operand") 3338 (match_operand:VDC 2 "register_operand")] 3339 "TARGET_SIMD" 3340 { 3341 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); 3342 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); 3343 DONE; 3344 } 3345[(set_attr "type" "multiple")] 3346) 3347 3348;; <su><addsub>l<q>. 3349 3350(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal" 3351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3352 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3353 (match_operand:VQW 1 "register_operand" "w") 3354 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 3355 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3356 (match_operand:VQW 2 "register_operand" "w") 3357 (match_dup 3)))))] 3358 "TARGET_SIMD" 3359 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 3360 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3361) 3362 3363(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal" 3364 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3365 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3366 (match_operand:VQW 1 "register_operand" "w") 3367 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 3368 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 3369 (match_operand:VQW 2 "register_operand" "w") 3370 (match_dup 3)))))] 3371 "TARGET_SIMD" 3372 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" 3373 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3374) 3375 3376 3377(define_expand "aarch64_saddl2<mode>" 3378 [(match_operand:<VWIDE> 0 "register_operand") 3379 (match_operand:VQW 1 "register_operand") 3380 (match_operand:VQW 2 "register_operand")] 3381 "TARGET_SIMD" 3382{ 3383 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3384 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], 3385 operands[2], p)); 3386 DONE; 3387}) 3388 3389(define_expand "aarch64_uaddl2<mode>" 3390 [(match_operand:<VWIDE> 0 "register_operand") 3391 (match_operand:VQW 1 "register_operand") 3392 (match_operand:VQW 2 "register_operand")] 3393 "TARGET_SIMD" 3394{ 3395 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3396 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], 3397 operands[2], p)); 3398 DONE; 3399}) 3400 3401(define_expand "aarch64_ssubl2<mode>" 3402 [(match_operand:<VWIDE> 0 "register_operand") 3403 (match_operand:VQW 1 "register_operand") 3404 (match_operand:VQW 2 "register_operand")] 3405 "TARGET_SIMD" 3406{ 3407 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3408 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], 3409 operands[2], p)); 3410 DONE; 3411}) 3412 3413(define_expand "aarch64_usubl2<mode>" 3414 [(match_operand:<VWIDE> 0 "register_operand") 3415 (match_operand:VQW 1 "register_operand") 3416 (match_operand:VQW 2 "register_operand")] 3417 "TARGET_SIMD" 3418{ 3419 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3420 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], 3421 operands[2], p)); 3422 DONE; 3423}) 3424 3425(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>" 3426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3427 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> 3428 (match_operand:VD_BHSI 1 "register_operand" "w")) 3429 (ANY_EXTEND:<VWIDE> 3430 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3431 "TARGET_SIMD" 3432 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" 3433 [(set_attr "type" "neon_<ADDSUB:optab>_long")] 3434) 3435 3436;; <su><addsub>w<q>. 3437 3438(define_expand "widen_ssum<mode>3" 3439 [(set (match_operand:<VDBLW> 0 "register_operand") 3440 (plus:<VDBLW> (sign_extend:<VDBLW> 3441 (match_operand:VQW 1 "register_operand")) 3442 (match_operand:<VDBLW> 2 "register_operand")))] 3443 "TARGET_SIMD" 3444 { 3445 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 3446 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3447 3448 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], 3449 operands[1], p)); 3450 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); 3451 DONE; 3452 } 3453) 3454 3455(define_expand "widen_ssum<mode>3" 3456 [(set (match_operand:<VWIDE> 0 "register_operand") 3457 (plus:<VWIDE> (sign_extend:<VWIDE> 3458 (match_operand:VD_BHSI 1 "register_operand")) 3459 (match_operand:<VWIDE> 2 "register_operand")))] 3460 "TARGET_SIMD" 3461{ 3462 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1])); 3463 DONE; 3464}) 3465 3466(define_expand "widen_usum<mode>3" 3467 [(set (match_operand:<VDBLW> 0 "register_operand") 3468 (plus:<VDBLW> (zero_extend:<VDBLW> 3469 (match_operand:VQW 1 "register_operand")) 3470 (match_operand:<VDBLW> 2 "register_operand")))] 3471 "TARGET_SIMD" 3472 { 3473 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); 3474 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3475 3476 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], 3477 operands[1], p)); 3478 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); 3479 DONE; 3480 } 3481) 3482 3483(define_expand "widen_usum<mode>3" 3484 [(set (match_operand:<VWIDE> 0 "register_operand") 3485 (plus:<VWIDE> (zero_extend:<VWIDE> 3486 (match_operand:VD_BHSI 1 "register_operand")) 3487 (match_operand:<VWIDE> 2 "register_operand")))] 3488 "TARGET_SIMD" 3489{ 3490 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); 3491 DONE; 3492}) 3493 3494(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>" 3495 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3496 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3497 (ANY_EXTEND:<VWIDE> 3498 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3499 "TARGET_SIMD" 3500 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3501 [(set_attr "type" "neon_sub_widen")] 3502) 3503 3504(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal" 3505 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3506 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3507 (ANY_EXTEND:<VWIDE> 3508 (vec_select:<VHALF> 3509 (match_operand:VQW 2 "register_operand" "w") 3510 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] 3511 "TARGET_SIMD" 3512 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 3513 [(set_attr "type" "neon_sub_widen")] 3514) 3515 3516(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal" 3517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3518 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3519 (ANY_EXTEND:<VWIDE> 3520 (vec_select:<VHALF> 3521 (match_operand:VQW 2 "register_operand" "w") 3522 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] 3523 "TARGET_SIMD" 3524 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3525 [(set_attr "type" "neon_sub_widen")] 3526) 3527 3528(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>" 3529 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3530 (plus:<VWIDE> 3531 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w")) 3532 (match_operand:<VWIDE> 1 "register_operand" "w")))] 3533 "TARGET_SIMD" 3534 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3535 [(set_attr "type" "neon_add_widen")] 3536) 3537 3538(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal" 3539 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3540 (plus:<VWIDE> 3541 (ANY_EXTEND:<VWIDE> 3542 (vec_select:<VHALF> 3543 (match_operand:VQW 2 "register_operand" "w") 3544 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) 3545 (match_operand:<VWIDE> 1 "register_operand" "w")))] 3546 "TARGET_SIMD" 3547 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 3548 [(set_attr "type" "neon_add_widen")] 3549) 3550 3551(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal" 3552 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3553 (plus:<VWIDE> 3554 (ANY_EXTEND:<VWIDE> 3555 (vec_select:<VHALF> 3556 (match_operand:VQW 2 "register_operand" "w") 3557 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) 3558 (match_operand:<VWIDE> 1 "register_operand" "w")))] 3559 "TARGET_SIMD" 3560 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3561 [(set_attr "type" "neon_add_widen")] 3562) 3563 3564(define_expand "aarch64_saddw2<mode>" 3565 [(match_operand:<VWIDE> 0 "register_operand") 3566 (match_operand:<VWIDE> 1 "register_operand") 3567 (match_operand:VQW 2 "register_operand")] 3568 "TARGET_SIMD" 3569{ 3570 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3571 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], 3572 operands[2], p)); 3573 DONE; 3574}) 3575 3576(define_expand "aarch64_uaddw2<mode>" 3577 [(match_operand:<VWIDE> 0 "register_operand") 3578 (match_operand:<VWIDE> 1 "register_operand") 3579 (match_operand:VQW 2 "register_operand")] 3580 "TARGET_SIMD" 3581{ 3582 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3583 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], 3584 operands[2], p)); 3585 DONE; 3586}) 3587 3588 3589(define_expand "aarch64_ssubw2<mode>" 3590 [(match_operand:<VWIDE> 0 "register_operand") 3591 (match_operand:<VWIDE> 1 "register_operand") 3592 (match_operand:VQW 2 "register_operand")] 3593 "TARGET_SIMD" 3594{ 3595 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3596 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], 3597 operands[2], p)); 3598 DONE; 3599}) 3600 3601(define_expand "aarch64_usubw2<mode>" 3602 [(match_operand:<VWIDE> 0 "register_operand") 3603 (match_operand:<VWIDE> 1 "register_operand") 3604 (match_operand:VQW 2 "register_operand")] 3605 "TARGET_SIMD" 3606{ 3607 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 3608 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], 3609 operands[2], p)); 3610 DONE; 3611}) 3612 3613;; <su><r>h<addsub>. 3614 3615(define_expand "<u>avg<mode>3_floor" 3616 [(set (match_operand:VDQ_BHSI 0 "register_operand") 3617 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") 3618 (match_operand:VDQ_BHSI 2 "register_operand")] 3619 HADD))] 3620 "TARGET_SIMD" 3621) 3622 3623(define_expand "<u>avg<mode>3_ceil" 3624 [(set (match_operand:VDQ_BHSI 0 "register_operand") 3625 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") 3626 (match_operand:VDQ_BHSI 2 "register_operand")] 3627 RHADD))] 3628 "TARGET_SIMD" 3629) 3630 3631(define_insn "aarch64_<sur>h<addsub><mode>" 3632 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 3633 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 3634 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 3635 HADDSUB))] 3636 "TARGET_SIMD" 3637 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3638 [(set_attr "type" "neon_<addsub>_halve<q>")] 3639) 3640 3641;; <r><addsub>hn<q>. 3642 3643(define_insn "aarch64_<sur><addsub>hn<mode>" 3644 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3645 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w") 3646 (match_operand:VQN 2 "register_operand" "w")] 3647 ADDSUBHN))] 3648 "TARGET_SIMD" 3649 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>" 3650 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3651) 3652 3653(define_insn "aarch64_<sur><addsub>hn2<mode>" 3654 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w") 3655 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0") 3656 (match_operand:VQN 2 "register_operand" "w") 3657 (match_operand:VQN 3 "register_operand" "w")] 3658 ADDSUBHN2))] 3659 "TARGET_SIMD" 3660 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>" 3661 [(set_attr "type" "neon_<addsub>_halve_narrow_q")] 3662) 3663 3664;; pmul. 3665 3666(define_insn "aarch64_pmul<mode>" 3667 [(set (match_operand:VB 0 "register_operand" "=w") 3668 (unspec:VB [(match_operand:VB 1 "register_operand" "w") 3669 (match_operand:VB 2 "register_operand" "w")] 3670 UNSPEC_PMUL))] 3671 "TARGET_SIMD" 3672 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 3673 [(set_attr "type" "neon_mul_<Vetype><q>")] 3674) 3675 3676;; fmulx. 3677 3678(define_insn "aarch64_fmulx<mode>" 3679 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 3680 (unspec:VHSDF_HSDF 3681 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 3682 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 3683 UNSPEC_FMULX))] 3684 "TARGET_SIMD" 3685 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3686 [(set_attr "type" "neon_fp_mul_<stype>")] 3687) 3688 3689;; vmulxq_lane_f32, and vmulx_laneq_f32 3690 3691(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>" 3692 [(set (match_operand:VDQSF 0 "register_operand" "=w") 3693 (unspec:VDQSF 3694 [(match_operand:VDQSF 1 "register_operand" "w") 3695 (vec_duplicate:VDQSF 3696 (vec_select:<VEL> 3697 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") 3698 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3699 UNSPEC_FMULX))] 3700 "TARGET_SIMD" 3701 { 3702 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3])); 3703 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3704 } 3705 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] 3706) 3707 3708;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32 3709 3710(define_insn "*aarch64_mulx_elt<mode>" 3711 [(set (match_operand:VDQF 0 "register_operand" "=w") 3712 (unspec:VDQF 3713 [(match_operand:VDQF 1 "register_operand" "w") 3714 (vec_duplicate:VDQF 3715 (vec_select:<VEL> 3716 (match_operand:VDQF 2 "register_operand" "w") 3717 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3718 UNSPEC_FMULX))] 3719 "TARGET_SIMD" 3720 { 3721 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 3722 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3723 } 3724 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] 3725) 3726 3727;; vmulxq_lane 3728 3729(define_insn "*aarch64_mulx_elt_from_dup<mode>" 3730 [(set (match_operand:VHSDF 0 "register_operand" "=w") 3731 (unspec:VHSDF 3732 [(match_operand:VHSDF 1 "register_operand" "w") 3733 (vec_duplicate:VHSDF 3734 (match_operand:<VEL> 2 "register_operand" "<h_con>"))] 3735 UNSPEC_FMULX))] 3736 "TARGET_SIMD" 3737 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"; 3738 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 3739) 3740 3741;; vmulxs_lane_f32, vmulxs_laneq_f32 3742;; vmulxd_lane_f64 == vmulx_lane_f64 3743;; vmulxd_laneq_f64 == vmulx_laneq_f64 3744 3745(define_insn "*aarch64_vgetfmulx<mode>" 3746 [(set (match_operand:<VEL> 0 "register_operand" "=w") 3747 (unspec:<VEL> 3748 [(match_operand:<VEL> 1 "register_operand" "w") 3749 (vec_select:<VEL> 3750 (match_operand:VDQF 2 "register_operand" "w") 3751 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3752 UNSPEC_FMULX))] 3753 "TARGET_SIMD" 3754 { 3755 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 3756 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; 3757 } 3758 [(set_attr "type" "fmul<Vetype>")] 3759) 3760;; <su>q<addsub> 3761 3762(define_insn "aarch64_<su_optab>q<addsub><mode>" 3763 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3764 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w") 3765 (match_operand:VSDQ_I 2 "register_operand" "w")))] 3766 "TARGET_SIMD" 3767 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3768 [(set_attr "type" "neon_q<addsub><q>")] 3769) 3770 3771;; suqadd and usqadd 3772 3773(define_insn "aarch64_<sur>qadd<mode>" 3774 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3775 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0") 3776 (match_operand:VSDQ_I 2 "register_operand" "w")] 3777 USSUQADD))] 3778 "TARGET_SIMD" 3779 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>" 3780 [(set_attr "type" "neon_qadd<q>")] 3781) 3782 3783;; sqmovun 3784 3785(define_insn "aarch64_sqmovun<mode>" 3786 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3787 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3788 UNSPEC_SQXTUN))] 3789 "TARGET_SIMD" 3790 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3791 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3792) 3793 3794;; sqmovn and uqmovn 3795 3796(define_insn "aarch64_<sur>qmovn<mode>" 3797 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 3798 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")] 3799 SUQMOVN))] 3800 "TARGET_SIMD" 3801 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>" 3802 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 3803) 3804 3805;; <su>q<absneg> 3806 3807(define_insn "aarch64_s<optab><mode>" 3808 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 3809 (UNQOPS:VSDQ_I 3810 (match_operand:VSDQ_I 1 "register_operand" "w")))] 3811 "TARGET_SIMD" 3812 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 3813 [(set_attr "type" "neon_<optab><q>")] 3814) 3815 3816;; sq<r>dmulh. 3817 3818(define_insn "aarch64_sq<r>dmulh<mode>" 3819 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3820 (unspec:VSDQ_HSI 3821 [(match_operand:VSDQ_HSI 1 "register_operand" "w") 3822 (match_operand:VSDQ_HSI 2 "register_operand" "w")] 3823 VQDMULH))] 3824 "TARGET_SIMD" 3825 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 3826 [(set_attr "type" "neon_sat_mul_<Vetype><q>")] 3827) 3828 3829;; sq<r>dmulh_lane 3830 3831(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3832 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3833 (unspec:VDQHS 3834 [(match_operand:VDQHS 1 "register_operand" "w") 3835 (vec_select:<VEL> 3836 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3837 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3838 VQDMULH))] 3839 "TARGET_SIMD" 3840 "* 3841 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 3842 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3843 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3844) 3845 3846(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3847 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3848 (unspec:VDQHS 3849 [(match_operand:VDQHS 1 "register_operand" "w") 3850 (vec_select:<VEL> 3851 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3852 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3853 VQDMULH))] 3854 "TARGET_SIMD" 3855 "* 3856 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 3857 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3858 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3859) 3860 3861(define_insn "aarch64_sq<r>dmulh_lane<mode>" 3862 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3863 (unspec:SD_HSI 3864 [(match_operand:SD_HSI 1 "register_operand" "w") 3865 (vec_select:<VEL> 3866 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3867 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3868 VQDMULH))] 3869 "TARGET_SIMD" 3870 "* 3871 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 3872 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3873 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3874) 3875 3876(define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3877 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3878 (unspec:SD_HSI 3879 [(match_operand:SD_HSI 1 "register_operand" "w") 3880 (vec_select:<VEL> 3881 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3882 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3883 VQDMULH))] 3884 "TARGET_SIMD" 3885 "* 3886 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 3887 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3888 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3889) 3890 3891;; sqrdml[as]h. 3892 3893(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>" 3894 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") 3895 (unspec:VSDQ_HSI 3896 [(match_operand:VSDQ_HSI 1 "register_operand" "0") 3897 (match_operand:VSDQ_HSI 2 "register_operand" "w") 3898 (match_operand:VSDQ_HSI 3 "register_operand" "w")] 3899 SQRDMLH_AS))] 3900 "TARGET_SIMD_RDMA" 3901 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3902 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3903) 3904 3905;; sqrdml[as]h_lane. 3906 3907(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3908 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3909 (unspec:VDQHS 3910 [(match_operand:VDQHS 1 "register_operand" "0") 3911 (match_operand:VDQHS 2 "register_operand" "w") 3912 (vec_select:<VEL> 3913 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3914 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3915 SQRDMLH_AS))] 3916 "TARGET_SIMD_RDMA" 3917 { 3918 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3919 return 3920 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3921 } 3922 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3923) 3924 3925(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>" 3926 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3927 (unspec:SD_HSI 3928 [(match_operand:SD_HSI 1 "register_operand" "0") 3929 (match_operand:SD_HSI 2 "register_operand" "w") 3930 (vec_select:<VEL> 3931 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3932 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3933 SQRDMLH_AS))] 3934 "TARGET_SIMD_RDMA" 3935 { 3936 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 3937 return 3938 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; 3939 } 3940 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3941) 3942 3943;; sqrdml[as]h_laneq. 3944 3945(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3946 [(set (match_operand:VDQHS 0 "register_operand" "=w") 3947 (unspec:VDQHS 3948 [(match_operand:VDQHS 1 "register_operand" "0") 3949 (match_operand:VDQHS 2 "register_operand" "w") 3950 (vec_select:<VEL> 3951 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3952 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3953 SQRDMLH_AS))] 3954 "TARGET_SIMD_RDMA" 3955 { 3956 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3957 return 3958 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3959 } 3960 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3961) 3962 3963(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>" 3964 [(set (match_operand:SD_HSI 0 "register_operand" "=w") 3965 (unspec:SD_HSI 3966 [(match_operand:SD_HSI 1 "register_operand" "0") 3967 (match_operand:SD_HSI 2 "register_operand" "w") 3968 (vec_select:<VEL> 3969 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3970 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3971 SQRDMLH_AS))] 3972 "TARGET_SIMD_RDMA" 3973 { 3974 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 3975 return 3976 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; 3977 } 3978 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3979) 3980 3981;; vqdml[sa]l 3982 3983(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>" 3984 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3985 (SBINQOPS:<VWIDE> 3986 (match_operand:<VWIDE> 1 "register_operand" "0") 3987 (ss_ashift:<VWIDE> 3988 (mult:<VWIDE> 3989 (sign_extend:<VWIDE> 3990 (match_operand:VSD_HSI 2 "register_operand" "w")) 3991 (sign_extend:<VWIDE> 3992 (match_operand:VSD_HSI 3 "register_operand" "w"))) 3993 (const_int 1))))] 3994 "TARGET_SIMD" 3995 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 3996 [(set_attr "type" "neon_sat_mla_<Vetype>_long")] 3997) 3998 3999;; vqdml[sa]l_lane 4000 4001(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 4002 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4003 (SBINQOPS:<VWIDE> 4004 (match_operand:<VWIDE> 1 "register_operand" "0") 4005 (ss_ashift:<VWIDE> 4006 (mult:<VWIDE> 4007 (sign_extend:<VWIDE> 4008 (match_operand:VD_HSI 2 "register_operand" "w")) 4009 (sign_extend:<VWIDE> 4010 (vec_duplicate:VD_HSI 4011 (vec_select:<VEL> 4012 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4013 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4014 )) 4015 (const_int 1))))] 4016 "TARGET_SIMD" 4017 { 4018 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4019 return 4020 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4021 } 4022 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4023) 4024 4025(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 4026 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4027 (SBINQOPS:<VWIDE> 4028 (match_operand:<VWIDE> 1 "register_operand" "0") 4029 (ss_ashift:<VWIDE> 4030 (mult:<VWIDE> 4031 (sign_extend:<VWIDE> 4032 (match_operand:VD_HSI 2 "register_operand" "w")) 4033 (sign_extend:<VWIDE> 4034 (vec_duplicate:VD_HSI 4035 (vec_select:<VEL> 4036 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4037 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4038 )) 4039 (const_int 1))))] 4040 "TARGET_SIMD" 4041 { 4042 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4043 return 4044 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4045 } 4046 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4047) 4048 4049(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>" 4050 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4051 (SBINQOPS:<VWIDE> 4052 (match_operand:<VWIDE> 1 "register_operand" "0") 4053 (ss_ashift:<VWIDE> 4054 (mult:<VWIDE> 4055 (sign_extend:<VWIDE> 4056 (match_operand:SD_HSI 2 "register_operand" "w")) 4057 (sign_extend:<VWIDE> 4058 (vec_select:<VEL> 4059 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4060 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4061 ) 4062 (const_int 1))))] 4063 "TARGET_SIMD" 4064 { 4065 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4066 return 4067 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4068 } 4069 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4070) 4071 4072(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>" 4073 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4074 (SBINQOPS:<VWIDE> 4075 (match_operand:<VWIDE> 1 "register_operand" "0") 4076 (ss_ashift:<VWIDE> 4077 (mult:<VWIDE> 4078 (sign_extend:<VWIDE> 4079 (match_operand:SD_HSI 2 "register_operand" "w")) 4080 (sign_extend:<VWIDE> 4081 (vec_select:<VEL> 4082 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4083 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 4084 ) 4085 (const_int 1))))] 4086 "TARGET_SIMD" 4087 { 4088 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4089 return 4090 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4091 } 4092 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4093) 4094 4095;; vqdml[sa]l_n 4096 4097(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>" 4098 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4099 (SBINQOPS:<VWIDE> 4100 (match_operand:<VWIDE> 1 "register_operand" "0") 4101 (ss_ashift:<VWIDE> 4102 (mult:<VWIDE> 4103 (sign_extend:<VWIDE> 4104 (match_operand:VD_HSI 2 "register_operand" "w")) 4105 (sign_extend:<VWIDE> 4106 (vec_duplicate:VD_HSI 4107 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 4108 (const_int 1))))] 4109 "TARGET_SIMD" 4110 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 4111 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4112) 4113 4114;; sqdml[as]l2 4115 4116(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal" 4117 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4118 (SBINQOPS:<VWIDE> 4119 (match_operand:<VWIDE> 1 "register_operand" "0") 4120 (ss_ashift:<VWIDE> 4121 (mult:<VWIDE> 4122 (sign_extend:<VWIDE> 4123 (vec_select:<VHALF> 4124 (match_operand:VQ_HSI 2 "register_operand" "w") 4125 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4126 (sign_extend:<VWIDE> 4127 (vec_select:<VHALF> 4128 (match_operand:VQ_HSI 3 "register_operand" "w") 4129 (match_dup 4)))) 4130 (const_int 1))))] 4131 "TARGET_SIMD" 4132 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>" 4133 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4134) 4135 4136(define_expand "aarch64_sqdmlal2<mode>" 4137 [(match_operand:<VWIDE> 0 "register_operand") 4138 (match_operand:<VWIDE> 1 "register_operand") 4139 (match_operand:VQ_HSI 2 "register_operand") 4140 (match_operand:VQ_HSI 3 "register_operand")] 4141 "TARGET_SIMD" 4142{ 4143 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4144 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], 4145 operands[2], operands[3], p)); 4146 DONE; 4147}) 4148 4149(define_expand "aarch64_sqdmlsl2<mode>" 4150 [(match_operand:<VWIDE> 0 "register_operand") 4151 (match_operand:<VWIDE> 1 "register_operand") 4152 (match_operand:VQ_HSI 2 "register_operand") 4153 (match_operand:VQ_HSI 3 "register_operand")] 4154 "TARGET_SIMD" 4155{ 4156 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4157 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], 4158 operands[2], operands[3], p)); 4159 DONE; 4160}) 4161 4162;; vqdml[sa]l2_lane 4163 4164(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal" 4165 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4166 (SBINQOPS:<VWIDE> 4167 (match_operand:<VWIDE> 1 "register_operand" "0") 4168 (ss_ashift:<VWIDE> 4169 (mult:<VWIDE> 4170 (sign_extend:<VWIDE> 4171 (vec_select:<VHALF> 4172 (match_operand:VQ_HSI 2 "register_operand" "w") 4173 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 4174 (sign_extend:<VWIDE> 4175 (vec_duplicate:<VHALF> 4176 (vec_select:<VEL> 4177 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4178 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 4179 )))) 4180 (const_int 1))))] 4181 "TARGET_SIMD" 4182 { 4183 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); 4184 return 4185 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4186 } 4187 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4188) 4189 4190(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal" 4191 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4192 (SBINQOPS:<VWIDE> 4193 (match_operand:<VWIDE> 1 "register_operand" "0") 4194 (ss_ashift:<VWIDE> 4195 (mult:<VWIDE> 4196 (sign_extend:<VWIDE> 4197 (vec_select:<VHALF> 4198 (match_operand:VQ_HSI 2 "register_operand" "w") 4199 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" ""))) 4200 (sign_extend:<VWIDE> 4201 (vec_duplicate:<VHALF> 4202 (vec_select:<VEL> 4203 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4204 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 4205 )))) 4206 (const_int 1))))] 4207 "TARGET_SIMD" 4208 { 4209 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); 4210 return 4211 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4212 } 4213 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4214) 4215 4216(define_expand "aarch64_sqdmlal2_lane<mode>" 4217 [(match_operand:<VWIDE> 0 "register_operand") 4218 (match_operand:<VWIDE> 1 "register_operand") 4219 (match_operand:VQ_HSI 2 "register_operand") 4220 (match_operand:<VCOND> 3 "register_operand") 4221 (match_operand:SI 4 "immediate_operand")] 4222 "TARGET_SIMD" 4223{ 4224 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4225 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], 4226 operands[2], operands[3], 4227 operands[4], p)); 4228 DONE; 4229}) 4230 4231(define_expand "aarch64_sqdmlal2_laneq<mode>" 4232 [(match_operand:<VWIDE> 0 "register_operand") 4233 (match_operand:<VWIDE> 1 "register_operand") 4234 (match_operand:VQ_HSI 2 "register_operand") 4235 (match_operand:<VCONQ> 3 "register_operand") 4236 (match_operand:SI 4 "immediate_operand")] 4237 "TARGET_SIMD" 4238{ 4239 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4240 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], 4241 operands[2], operands[3], 4242 operands[4], p)); 4243 DONE; 4244}) 4245 4246(define_expand "aarch64_sqdmlsl2_lane<mode>" 4247 [(match_operand:<VWIDE> 0 "register_operand") 4248 (match_operand:<VWIDE> 1 "register_operand") 4249 (match_operand:VQ_HSI 2 "register_operand") 4250 (match_operand:<VCOND> 3 "register_operand") 4251 (match_operand:SI 4 "immediate_operand")] 4252 "TARGET_SIMD" 4253{ 4254 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4255 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], 4256 operands[2], operands[3], 4257 operands[4], p)); 4258 DONE; 4259}) 4260 4261(define_expand "aarch64_sqdmlsl2_laneq<mode>" 4262 [(match_operand:<VWIDE> 0 "register_operand") 4263 (match_operand:<VWIDE> 1 "register_operand") 4264 (match_operand:VQ_HSI 2 "register_operand") 4265 (match_operand:<VCONQ> 3 "register_operand") 4266 (match_operand:SI 4 "immediate_operand")] 4267 "TARGET_SIMD" 4268{ 4269 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4270 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], 4271 operands[2], operands[3], 4272 operands[4], p)); 4273 DONE; 4274}) 4275 4276(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal" 4277 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4278 (SBINQOPS:<VWIDE> 4279 (match_operand:<VWIDE> 1 "register_operand" "0") 4280 (ss_ashift:<VWIDE> 4281 (mult:<VWIDE> 4282 (sign_extend:<VWIDE> 4283 (vec_select:<VHALF> 4284 (match_operand:VQ_HSI 2 "register_operand" "w") 4285 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4286 (sign_extend:<VWIDE> 4287 (vec_duplicate:<VHALF> 4288 (match_operand:<VEL> 3 "register_operand" "<vwx>")))) 4289 (const_int 1))))] 4290 "TARGET_SIMD" 4291 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]" 4292 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4293) 4294 4295(define_expand "aarch64_sqdmlal2_n<mode>" 4296 [(match_operand:<VWIDE> 0 "register_operand") 4297 (match_operand:<VWIDE> 1 "register_operand") 4298 (match_operand:VQ_HSI 2 "register_operand") 4299 (match_operand:<VEL> 3 "register_operand")] 4300 "TARGET_SIMD" 4301{ 4302 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4303 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], 4304 operands[2], operands[3], 4305 p)); 4306 DONE; 4307}) 4308 4309(define_expand "aarch64_sqdmlsl2_n<mode>" 4310 [(match_operand:<VWIDE> 0 "register_operand") 4311 (match_operand:<VWIDE> 1 "register_operand") 4312 (match_operand:VQ_HSI 2 "register_operand") 4313 (match_operand:<VEL> 3 "register_operand")] 4314 "TARGET_SIMD" 4315{ 4316 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4317 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], 4318 operands[2], operands[3], 4319 p)); 4320 DONE; 4321}) 4322 4323;; vqdmull 4324 4325(define_insn "aarch64_sqdmull<mode>" 4326 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4327 (ss_ashift:<VWIDE> 4328 (mult:<VWIDE> 4329 (sign_extend:<VWIDE> 4330 (match_operand:VSD_HSI 1 "register_operand" "w")) 4331 (sign_extend:<VWIDE> 4332 (match_operand:VSD_HSI 2 "register_operand" "w"))) 4333 (const_int 1)))] 4334 "TARGET_SIMD" 4335 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4336 [(set_attr "type" "neon_sat_mul_<Vetype>_long")] 4337) 4338 4339;; vqdmull_lane 4340 4341(define_insn "aarch64_sqdmull_lane<mode>" 4342 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4343 (ss_ashift:<VWIDE> 4344 (mult:<VWIDE> 4345 (sign_extend:<VWIDE> 4346 (match_operand:VD_HSI 1 "register_operand" "w")) 4347 (sign_extend:<VWIDE> 4348 (vec_duplicate:VD_HSI 4349 (vec_select:<VEL> 4350 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4351 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4352 )) 4353 (const_int 1)))] 4354 "TARGET_SIMD" 4355 { 4356 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4357 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4358 } 4359 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4360) 4361 4362(define_insn "aarch64_sqdmull_laneq<mode>" 4363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4364 (ss_ashift:<VWIDE> 4365 (mult:<VWIDE> 4366 (sign_extend:<VWIDE> 4367 (match_operand:VD_HSI 1 "register_operand" "w")) 4368 (sign_extend:<VWIDE> 4369 (vec_duplicate:VD_HSI 4370 (vec_select:<VEL> 4371 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4372 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4373 )) 4374 (const_int 1)))] 4375 "TARGET_SIMD" 4376 { 4377 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4378 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4379 } 4380 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4381) 4382 4383(define_insn "aarch64_sqdmull_lane<mode>" 4384 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4385 (ss_ashift:<VWIDE> 4386 (mult:<VWIDE> 4387 (sign_extend:<VWIDE> 4388 (match_operand:SD_HSI 1 "register_operand" "w")) 4389 (sign_extend:<VWIDE> 4390 (vec_select:<VEL> 4391 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4392 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4393 )) 4394 (const_int 1)))] 4395 "TARGET_SIMD" 4396 { 4397 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4398 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4399 } 4400 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4401) 4402 4403(define_insn "aarch64_sqdmull_laneq<mode>" 4404 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4405 (ss_ashift:<VWIDE> 4406 (mult:<VWIDE> 4407 (sign_extend:<VWIDE> 4408 (match_operand:SD_HSI 1 "register_operand" "w")) 4409 (sign_extend:<VWIDE> 4410 (vec_select:<VEL> 4411 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4412 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4413 )) 4414 (const_int 1)))] 4415 "TARGET_SIMD" 4416 { 4417 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4418 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4419 } 4420 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4421) 4422 4423;; vqdmull_n 4424 4425(define_insn "aarch64_sqdmull_n<mode>" 4426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4427 (ss_ashift:<VWIDE> 4428 (mult:<VWIDE> 4429 (sign_extend:<VWIDE> 4430 (match_operand:VD_HSI 1 "register_operand" "w")) 4431 (sign_extend:<VWIDE> 4432 (vec_duplicate:VD_HSI 4433 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 4434 ) 4435 (const_int 1)))] 4436 "TARGET_SIMD" 4437 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 4438 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4439) 4440 4441;; vqdmull2 4442 4443 4444 4445(define_insn "aarch64_sqdmull2<mode>_internal" 4446 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4447 (ss_ashift:<VWIDE> 4448 (mult:<VWIDE> 4449 (sign_extend:<VWIDE> 4450 (vec_select:<VHALF> 4451 (match_operand:VQ_HSI 1 "register_operand" "w") 4452 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 4453 (sign_extend:<VWIDE> 4454 (vec_select:<VHALF> 4455 (match_operand:VQ_HSI 2 "register_operand" "w") 4456 (match_dup 3))) 4457 ) 4458 (const_int 1)))] 4459 "TARGET_SIMD" 4460 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4461 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4462) 4463 4464(define_expand "aarch64_sqdmull2<mode>" 4465 [(match_operand:<VWIDE> 0 "register_operand") 4466 (match_operand:VQ_HSI 1 "register_operand") 4467 (match_operand:VQ_HSI 2 "register_operand")] 4468 "TARGET_SIMD" 4469{ 4470 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4471 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], 4472 operands[2], p)); 4473 DONE; 4474}) 4475 4476;; vqdmull2_lane 4477 4478(define_insn "aarch64_sqdmull2_lane<mode>_internal" 4479 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4480 (ss_ashift:<VWIDE> 4481 (mult:<VWIDE> 4482 (sign_extend:<VWIDE> 4483 (vec_select:<VHALF> 4484 (match_operand:VQ_HSI 1 "register_operand" "w") 4485 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4486 (sign_extend:<VWIDE> 4487 (vec_duplicate:<VHALF> 4488 (vec_select:<VEL> 4489 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4490 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4491 )) 4492 (const_int 1)))] 4493 "TARGET_SIMD" 4494 { 4495 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); 4496 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4497 } 4498 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4499) 4500 4501(define_insn "aarch64_sqdmull2_laneq<mode>_internal" 4502 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4503 (ss_ashift:<VWIDE> 4504 (mult:<VWIDE> 4505 (sign_extend:<VWIDE> 4506 (vec_select:<VHALF> 4507 (match_operand:VQ_HSI 1 "register_operand" "w") 4508 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) 4509 (sign_extend:<VWIDE> 4510 (vec_duplicate:<VHALF> 4511 (vec_select:<VEL> 4512 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4513 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4514 )) 4515 (const_int 1)))] 4516 "TARGET_SIMD" 4517 { 4518 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); 4519 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4520 } 4521 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4522) 4523 4524(define_expand "aarch64_sqdmull2_lane<mode>" 4525 [(match_operand:<VWIDE> 0 "register_operand") 4526 (match_operand:VQ_HSI 1 "register_operand") 4527 (match_operand:<VCOND> 2 "register_operand") 4528 (match_operand:SI 3 "immediate_operand")] 4529 "TARGET_SIMD" 4530{ 4531 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4532 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], 4533 operands[2], operands[3], 4534 p)); 4535 DONE; 4536}) 4537 4538(define_expand "aarch64_sqdmull2_laneq<mode>" 4539 [(match_operand:<VWIDE> 0 "register_operand") 4540 (match_operand:VQ_HSI 1 "register_operand") 4541 (match_operand:<VCONQ> 2 "register_operand") 4542 (match_operand:SI 3 "immediate_operand")] 4543 "TARGET_SIMD" 4544{ 4545 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4546 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], 4547 operands[2], operands[3], 4548 p)); 4549 DONE; 4550}) 4551 4552;; vqdmull2_n 4553 4554(define_insn "aarch64_sqdmull2_n<mode>_internal" 4555 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4556 (ss_ashift:<VWIDE> 4557 (mult:<VWIDE> 4558 (sign_extend:<VWIDE> 4559 (vec_select:<VHALF> 4560 (match_operand:VQ_HSI 1 "register_operand" "w") 4561 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" ""))) 4562 (sign_extend:<VWIDE> 4563 (vec_duplicate:<VHALF> 4564 (match_operand:<VEL> 2 "register_operand" "<vwx>"))) 4565 ) 4566 (const_int 1)))] 4567 "TARGET_SIMD" 4568 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]" 4569 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4570) 4571 4572(define_expand "aarch64_sqdmull2_n<mode>" 4573 [(match_operand:<VWIDE> 0 "register_operand") 4574 (match_operand:VQ_HSI 1 "register_operand") 4575 (match_operand:<VEL> 2 "register_operand")] 4576 "TARGET_SIMD" 4577{ 4578 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); 4579 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], 4580 operands[2], p)); 4581 DONE; 4582}) 4583 4584;; vshl 4585 4586(define_insn "aarch64_<sur>shl<mode>" 4587 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4588 (unspec:VSDQ_I_DI 4589 [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4590 (match_operand:VSDQ_I_DI 2 "register_operand" "w")] 4591 VSHL))] 4592 "TARGET_SIMD" 4593 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4594 [(set_attr "type" "neon_shift_reg<q>")] 4595) 4596 4597 4598;; vqshl 4599 4600(define_insn "aarch64_<sur>q<r>shl<mode>" 4601 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4602 (unspec:VSDQ_I 4603 [(match_operand:VSDQ_I 1 "register_operand" "w") 4604 (match_operand:VSDQ_I 2 "register_operand" "w")] 4605 VQSHL))] 4606 "TARGET_SIMD" 4607 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"; 4608 [(set_attr "type" "neon_sat_shift_reg<q>")] 4609) 4610 4611;; vshll_n 4612 4613(define_insn "aarch64_<sur>shll_n<mode>" 4614 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4615 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") 4616 (match_operand:SI 2 4617 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] 4618 VSHLL))] 4619 "TARGET_SIMD" 4620 { 4621 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4622 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4623 else 4624 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4625 } 4626 [(set_attr "type" "neon_shift_imm_long")] 4627) 4628 4629;; vshll_high_n 4630 4631(define_insn "aarch64_<sur>shll2_n<mode>" 4632 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 4633 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") 4634 (match_operand:SI 2 "immediate_operand" "i")] 4635 VSHLL))] 4636 "TARGET_SIMD" 4637 { 4638 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) 4639 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4640 else 4641 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2"; 4642 } 4643 [(set_attr "type" "neon_shift_imm_long")] 4644) 4645 4646;; vrshr_n 4647 4648(define_insn "aarch64_<sur>shr_n<mode>" 4649 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4650 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w") 4651 (match_operand:SI 2 4652 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4653 VRSHR_N))] 4654 "TARGET_SIMD" 4655 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4656 [(set_attr "type" "neon_sat_shift_imm<q>")] 4657) 4658 4659;; v(r)sra_n 4660 4661(define_insn "aarch64_<sur>sra_n<mode>" 4662 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4663 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4664 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4665 (match_operand:SI 3 4666 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4667 VSRA))] 4668 "TARGET_SIMD" 4669 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4670 [(set_attr "type" "neon_shift_acc<q>")] 4671) 4672 4673;; vs<lr>i_n 4674 4675(define_insn "aarch64_<sur>s<lr>i_n<mode>" 4676 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w") 4677 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0") 4678 (match_operand:VSDQ_I_DI 2 "register_operand" "w") 4679 (match_operand:SI 3 4680 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")] 4681 VSLRI))] 4682 "TARGET_SIMD" 4683 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3" 4684 [(set_attr "type" "neon_shift_imm<q>")] 4685) 4686 4687;; vqshl(u) 4688 4689(define_insn "aarch64_<sur>qshl<u>_n<mode>" 4690 [(set (match_operand:VSDQ_I 0 "register_operand" "=w") 4691 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w") 4692 (match_operand:SI 2 4693 "aarch64_simd_shift_imm_<ve_mode>" "i")] 4694 VQSHL_N))] 4695 "TARGET_SIMD" 4696 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2" 4697 [(set_attr "type" "neon_sat_shift_imm<q>")] 4698) 4699 4700 4701;; vq(r)shr(u)n_n 4702 4703(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>" 4704 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 4705 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w") 4706 (match_operand:SI 2 4707 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")] 4708 VQSHRN_N))] 4709 "TARGET_SIMD" 4710 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2" 4711 [(set_attr "type" "neon_sat_shift_imm_narrow_q")] 4712) 4713 4714 4715;; cm(eq|ge|gt|lt|le) 4716;; Note, we have constraints for Dz and Z as different expanders 4717;; have different ideas of what should be passed to this pattern. 4718 4719(define_insn "aarch64_cm<optab><mode>" 4720 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 4721 (neg:<V_INT_EQUIV> 4722 (COMPARISONS:<V_INT_EQUIV> 4723 (match_operand:VDQ_I 1 "register_operand" "w,w") 4724 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4725 )))] 4726 "TARGET_SIMD" 4727 "@ 4728 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4729 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0" 4730 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")] 4731) 4732 4733(define_insn_and_split "aarch64_cm<optab>di" 4734 [(set (match_operand:DI 0 "register_operand" "=w,w,r") 4735 (neg:DI 4736 (COMPARISONS:DI 4737 (match_operand:DI 1 "register_operand" "w,w,r") 4738 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") 4739 ))) 4740 (clobber (reg:CC CC_REGNUM))] 4741 "TARGET_SIMD" 4742 "#" 4743 "&& reload_completed" 4744 [(set (match_operand:DI 0 "register_operand") 4745 (neg:DI 4746 (COMPARISONS:DI 4747 (match_operand:DI 1 "register_operand") 4748 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4749 )))] 4750 { 4751 /* If we are in the general purpose register file, 4752 we split to a sequence of comparison and store. */ 4753 if (GP_REGNUM_P (REGNO (operands[0])) 4754 && GP_REGNUM_P (REGNO (operands[1]))) 4755 { 4756 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]); 4757 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4758 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4759 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4760 DONE; 4761 } 4762 /* Otherwise, we expand to a similar pattern which does not 4763 clobber CC_REGNUM. */ 4764 } 4765 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")] 4766) 4767 4768(define_insn "*aarch64_cm<optab>di" 4769 [(set (match_operand:DI 0 "register_operand" "=w,w") 4770 (neg:DI 4771 (COMPARISONS:DI 4772 (match_operand:DI 1 "register_operand" "w,w") 4773 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz") 4774 )))] 4775 "TARGET_SIMD && reload_completed" 4776 "@ 4777 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> 4778 cm<optab>\t%d0, %d1, #0" 4779 [(set_attr "type" "neon_compare, neon_compare_zero")] 4780) 4781 4782;; cm(hs|hi) 4783 4784(define_insn "aarch64_cm<optab><mode>" 4785 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4786 (neg:<V_INT_EQUIV> 4787 (UCOMPARISONS:<V_INT_EQUIV> 4788 (match_operand:VDQ_I 1 "register_operand" "w") 4789 (match_operand:VDQ_I 2 "register_operand" "w") 4790 )))] 4791 "TARGET_SIMD" 4792 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4793 [(set_attr "type" "neon_compare<q>")] 4794) 4795 4796(define_insn_and_split "aarch64_cm<optab>di" 4797 [(set (match_operand:DI 0 "register_operand" "=w,r") 4798 (neg:DI 4799 (UCOMPARISONS:DI 4800 (match_operand:DI 1 "register_operand" "w,r") 4801 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") 4802 ))) 4803 (clobber (reg:CC CC_REGNUM))] 4804 "TARGET_SIMD" 4805 "#" 4806 "&& reload_completed" 4807 [(set (match_operand:DI 0 "register_operand") 4808 (neg:DI 4809 (UCOMPARISONS:DI 4810 (match_operand:DI 1 "register_operand") 4811 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4812 )))] 4813 { 4814 /* If we are in the general purpose register file, 4815 we split to a sequence of comparison and store. */ 4816 if (GP_REGNUM_P (REGNO (operands[0])) 4817 && GP_REGNUM_P (REGNO (operands[1]))) 4818 { 4819 machine_mode mode = CCmode; 4820 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]); 4821 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]); 4822 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4823 DONE; 4824 } 4825 /* Otherwise, we expand to a similar pattern which does not 4826 clobber CC_REGNUM. */ 4827 } 4828 [(set_attr "type" "neon_compare,multiple")] 4829) 4830 4831(define_insn "*aarch64_cm<optab>di" 4832 [(set (match_operand:DI 0 "register_operand" "=w") 4833 (neg:DI 4834 (UCOMPARISONS:DI 4835 (match_operand:DI 1 "register_operand" "w") 4836 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w") 4837 )))] 4838 "TARGET_SIMD && reload_completed" 4839 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>" 4840 [(set_attr "type" "neon_compare")] 4841) 4842 4843;; cmtst 4844 4845;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, 4846;; we don't have any insns using ne, and aarch64_vcond outputs 4847;; not (neg (eq (and x y) 0)) 4848;; which is rewritten by simplify_rtx as 4849;; plus (eq (and x y) 0) -1. 4850 4851(define_insn "aarch64_cmtst<mode>" 4852 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4853 (plus:<V_INT_EQUIV> 4854 (eq:<V_INT_EQUIV> 4855 (and:VDQ_I 4856 (match_operand:VDQ_I 1 "register_operand" "w") 4857 (match_operand:VDQ_I 2 "register_operand" "w")) 4858 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero")) 4859 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one"))) 4860 ] 4861 "TARGET_SIMD" 4862 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4863 [(set_attr "type" "neon_tst<q>")] 4864) 4865 4866(define_insn_and_split "aarch64_cmtstdi" 4867 [(set (match_operand:DI 0 "register_operand" "=w,r") 4868 (neg:DI 4869 (ne:DI 4870 (and:DI 4871 (match_operand:DI 1 "register_operand" "w,r") 4872 (match_operand:DI 2 "register_operand" "w,r")) 4873 (const_int 0)))) 4874 (clobber (reg:CC CC_REGNUM))] 4875 "TARGET_SIMD" 4876 "#" 4877 "&& reload_completed" 4878 [(set (match_operand:DI 0 "register_operand") 4879 (neg:DI 4880 (ne:DI 4881 (and:DI 4882 (match_operand:DI 1 "register_operand") 4883 (match_operand:DI 2 "register_operand")) 4884 (const_int 0))))] 4885 { 4886 /* If we are in the general purpose register file, 4887 we split to a sequence of comparison and store. */ 4888 if (GP_REGNUM_P (REGNO (operands[0])) 4889 && GP_REGNUM_P (REGNO (operands[1]))) 4890 { 4891 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); 4892 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); 4893 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); 4894 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); 4895 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); 4896 DONE; 4897 } 4898 /* Otherwise, we expand to a similar pattern which does not 4899 clobber CC_REGNUM. */ 4900 } 4901 [(set_attr "type" "neon_tst,multiple")] 4902) 4903 4904(define_insn "*aarch64_cmtstdi" 4905 [(set (match_operand:DI 0 "register_operand" "=w") 4906 (neg:DI 4907 (ne:DI 4908 (and:DI 4909 (match_operand:DI 1 "register_operand" "w") 4910 (match_operand:DI 2 "register_operand" "w")) 4911 (const_int 0))))] 4912 "TARGET_SIMD" 4913 "cmtst\t%d0, %d1, %d2" 4914 [(set_attr "type" "neon_tst")] 4915) 4916 4917;; fcm(eq|ge|gt|le|lt) 4918 4919(define_insn "aarch64_cm<optab><mode>" 4920 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w") 4921 (neg:<V_INT_EQUIV> 4922 (COMPARISONS:<V_INT_EQUIV> 4923 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w") 4924 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz") 4925 )))] 4926 "TARGET_SIMD" 4927 "@ 4928 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype> 4929 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0" 4930 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4931) 4932 4933;; fac(ge|gt) 4934;; Note we can also handle what would be fac(le|lt) by 4935;; generating fac(ge|gt). 4936 4937(define_insn "aarch64_fac<optab><mode>" 4938 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w") 4939 (neg:<V_INT_EQUIV> 4940 (FAC_COMPARISONS:<V_INT_EQUIV> 4941 (abs:VHSDF_HSDF 4942 (match_operand:VHSDF_HSDF 1 "register_operand" "w")) 4943 (abs:VHSDF_HSDF 4944 (match_operand:VHSDF_HSDF 2 "register_operand" "w")) 4945 )))] 4946 "TARGET_SIMD" 4947 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>" 4948 [(set_attr "type" "neon_fp_compare_<stype><q>")] 4949) 4950 4951;; addp 4952 4953(define_insn "aarch64_addp<mode>" 4954 [(set (match_operand:VD_BHSI 0 "register_operand" "=w") 4955 (unspec:VD_BHSI 4956 [(match_operand:VD_BHSI 1 "register_operand" "w") 4957 (match_operand:VD_BHSI 2 "register_operand" "w")] 4958 UNSPEC_ADDP))] 4959 "TARGET_SIMD" 4960 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 4961 [(set_attr "type" "neon_reduc_add<q>")] 4962) 4963 4964(define_insn "aarch64_addpdi" 4965 [(set (match_operand:DI 0 "register_operand" "=w") 4966 (unspec:DI 4967 [(match_operand:V2DI 1 "register_operand" "w")] 4968 UNSPEC_ADDP))] 4969 "TARGET_SIMD" 4970 "addp\t%d0, %1.2d" 4971 [(set_attr "type" "neon_reduc_add")] 4972) 4973 4974;; sqrt 4975 4976(define_expand "sqrt<mode>2" 4977 [(set (match_operand:VHSDF 0 "register_operand") 4978 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))] 4979 "TARGET_SIMD" 4980{ 4981 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) 4982 DONE; 4983}) 4984 4985(define_insn "*sqrt<mode>2" 4986 [(set (match_operand:VHSDF 0 "register_operand" "=w") 4987 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))] 4988 "TARGET_SIMD" 4989 "fsqrt\\t%0.<Vtype>, %1.<Vtype>" 4990 [(set_attr "type" "neon_fp_sqrt_<stype><q>")] 4991) 4992 4993;; Patterns for vector struct loads and stores. 4994 4995(define_insn "aarch64_simd_ld2<mode>" 4996 [(set (match_operand:OI 0 "register_operand" "=w") 4997 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 4998 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4999 UNSPEC_LD2))] 5000 "TARGET_SIMD" 5001 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5002 [(set_attr "type" "neon_load2_2reg<q>")] 5003) 5004 5005(define_insn "aarch64_simd_ld2r<mode>" 5006 [(set (match_operand:OI 0 "register_operand" "=w") 5007 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5008 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 5009 UNSPEC_LD2_DUP))] 5010 "TARGET_SIMD" 5011 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5012 [(set_attr "type" "neon_load2_all_lanes<q>")] 5013) 5014 5015(define_insn "aarch64_vec_load_lanesoi_lane<mode>" 5016 [(set (match_operand:OI 0 "register_operand" "=w") 5017 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5018 (match_operand:OI 2 "register_operand" "0") 5019 (match_operand:SI 3 "immediate_operand" "i") 5020 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 5021 UNSPEC_LD2_LANE))] 5022 "TARGET_SIMD" 5023 { 5024 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 5025 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; 5026 } 5027 [(set_attr "type" "neon_load2_one_lane")] 5028) 5029 5030(define_expand "vec_load_lanesoi<mode>" 5031 [(set (match_operand:OI 0 "register_operand") 5032 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand") 5033 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5034 UNSPEC_LD2))] 5035 "TARGET_SIMD" 5036{ 5037 if (BYTES_BIG_ENDIAN) 5038 { 5039 rtx tmp = gen_reg_rtx (OImode); 5040 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5041 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); 5042 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); 5043 } 5044 else 5045 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); 5046 DONE; 5047}) 5048 5049(define_insn "aarch64_simd_st2<mode>" 5050 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 5051 (unspec:OI [(match_operand:OI 1 "register_operand" "w") 5052 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5053 UNSPEC_ST2))] 5054 "TARGET_SIMD" 5055 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 5056 [(set_attr "type" "neon_store2_2reg<q>")] 5057) 5058 5059;; RTL uses GCC vector extension indices, so flip only for assembly. 5060(define_insn "aarch64_vec_store_lanesoi_lane<mode>" 5061 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5062 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5063 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5064 (match_operand:SI 2 "immediate_operand" "i")] 5065 UNSPEC_ST2_LANE))] 5066 "TARGET_SIMD" 5067 { 5068 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 5069 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; 5070 } 5071 [(set_attr "type" "neon_store2_one_lane<q>")] 5072) 5073 5074(define_expand "vec_store_lanesoi<mode>" 5075 [(set (match_operand:OI 0 "aarch64_simd_struct_operand") 5076 (unspec:OI [(match_operand:OI 1 "register_operand") 5077 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5078 UNSPEC_ST2))] 5079 "TARGET_SIMD" 5080{ 5081 if (BYTES_BIG_ENDIAN) 5082 { 5083 rtx tmp = gen_reg_rtx (OImode); 5084 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5085 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); 5086 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); 5087 } 5088 else 5089 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); 5090 DONE; 5091}) 5092 5093(define_insn "aarch64_simd_ld3<mode>" 5094 [(set (match_operand:CI 0 "register_operand" "=w") 5095 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 5096 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5097 UNSPEC_LD3))] 5098 "TARGET_SIMD" 5099 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5100 [(set_attr "type" "neon_load3_3reg<q>")] 5101) 5102 5103(define_insn "aarch64_simd_ld3r<mode>" 5104 [(set (match_operand:CI 0 "register_operand" "=w") 5105 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5106 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 5107 UNSPEC_LD3_DUP))] 5108 "TARGET_SIMD" 5109 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5110 [(set_attr "type" "neon_load3_all_lanes<q>")] 5111) 5112 5113(define_insn "aarch64_vec_load_lanesci_lane<mode>" 5114 [(set (match_operand:CI 0 "register_operand" "=w") 5115 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5116 (match_operand:CI 2 "register_operand" "0") 5117 (match_operand:SI 3 "immediate_operand" "i") 5118 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5119 UNSPEC_LD3_LANE))] 5120 "TARGET_SIMD" 5121{ 5122 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 5123 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; 5124} 5125 [(set_attr "type" "neon_load3_one_lane")] 5126) 5127 5128(define_expand "vec_load_lanesci<mode>" 5129 [(set (match_operand:CI 0 "register_operand") 5130 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand") 5131 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5132 UNSPEC_LD3))] 5133 "TARGET_SIMD" 5134{ 5135 if (BYTES_BIG_ENDIAN) 5136 { 5137 rtx tmp = gen_reg_rtx (CImode); 5138 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5139 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); 5140 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); 5141 } 5142 else 5143 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); 5144 DONE; 5145}) 5146 5147(define_insn "aarch64_simd_st3<mode>" 5148 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 5149 (unspec:CI [(match_operand:CI 1 "register_operand" "w") 5150 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5151 UNSPEC_ST3))] 5152 "TARGET_SIMD" 5153 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5154 [(set_attr "type" "neon_store3_3reg<q>")] 5155) 5156 5157;; RTL uses GCC vector extension indices, so flip only for assembly. 5158(define_insn "aarch64_vec_store_lanesci_lane<mode>" 5159 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5160 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5161 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5162 (match_operand:SI 2 "immediate_operand" "i")] 5163 UNSPEC_ST3_LANE))] 5164 "TARGET_SIMD" 5165 { 5166 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 5167 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; 5168 } 5169 [(set_attr "type" "neon_store3_one_lane<q>")] 5170) 5171 5172(define_expand "vec_store_lanesci<mode>" 5173 [(set (match_operand:CI 0 "aarch64_simd_struct_operand") 5174 (unspec:CI [(match_operand:CI 1 "register_operand") 5175 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5176 UNSPEC_ST3))] 5177 "TARGET_SIMD" 5178{ 5179 if (BYTES_BIG_ENDIAN) 5180 { 5181 rtx tmp = gen_reg_rtx (CImode); 5182 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5183 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); 5184 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); 5185 } 5186 else 5187 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); 5188 DONE; 5189}) 5190 5191(define_insn "aarch64_simd_ld4<mode>" 5192 [(set (match_operand:XI 0 "register_operand" "=w") 5193 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 5194 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5195 UNSPEC_LD4))] 5196 "TARGET_SIMD" 5197 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5198 [(set_attr "type" "neon_load4_4reg<q>")] 5199) 5200 5201(define_insn "aarch64_simd_ld4r<mode>" 5202 [(set (match_operand:XI 0 "register_operand" "=w") 5203 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5204 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 5205 UNSPEC_LD4_DUP))] 5206 "TARGET_SIMD" 5207 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5208 [(set_attr "type" "neon_load4_all_lanes<q>")] 5209) 5210 5211(define_insn "aarch64_vec_load_lanesxi_lane<mode>" 5212 [(set (match_operand:XI 0 "register_operand" "=w") 5213 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5214 (match_operand:XI 2 "register_operand" "0") 5215 (match_operand:SI 3 "immediate_operand" "i") 5216 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5217 UNSPEC_LD4_LANE))] 5218 "TARGET_SIMD" 5219{ 5220 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); 5221 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; 5222} 5223 [(set_attr "type" "neon_load4_one_lane")] 5224) 5225 5226(define_expand "vec_load_lanesxi<mode>" 5227 [(set (match_operand:XI 0 "register_operand") 5228 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand") 5229 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5230 UNSPEC_LD4))] 5231 "TARGET_SIMD" 5232{ 5233 if (BYTES_BIG_ENDIAN) 5234 { 5235 rtx tmp = gen_reg_rtx (XImode); 5236 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5237 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); 5238 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); 5239 } 5240 else 5241 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); 5242 DONE; 5243}) 5244 5245(define_insn "aarch64_simd_st4<mode>" 5246 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 5247 (unspec:XI [(match_operand:XI 1 "register_operand" "w") 5248 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5249 UNSPEC_ST4))] 5250 "TARGET_SIMD" 5251 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 5252 [(set_attr "type" "neon_store4_4reg<q>")] 5253) 5254 5255;; RTL uses GCC vector extension indices, so flip only for assembly. 5256(define_insn "aarch64_vec_store_lanesxi_lane<mode>" 5257 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5258 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5259 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5260 (match_operand:SI 2 "immediate_operand" "i")] 5261 UNSPEC_ST4_LANE))] 5262 "TARGET_SIMD" 5263 { 5264 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); 5265 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; 5266 } 5267 [(set_attr "type" "neon_store4_one_lane<q>")] 5268) 5269 5270(define_expand "vec_store_lanesxi<mode>" 5271 [(set (match_operand:XI 0 "aarch64_simd_struct_operand") 5272 (unspec:XI [(match_operand:XI 1 "register_operand") 5273 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5274 UNSPEC_ST4))] 5275 "TARGET_SIMD" 5276{ 5277 if (BYTES_BIG_ENDIAN) 5278 { 5279 rtx tmp = gen_reg_rtx (XImode); 5280 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); 5281 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); 5282 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); 5283 } 5284 else 5285 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); 5286 DONE; 5287}) 5288 5289(define_insn_and_split "aarch64_rev_reglist<mode>" 5290[(set (match_operand:VSTRUCT 0 "register_operand" "=&w") 5291 (unspec:VSTRUCT 5292 [(match_operand:VSTRUCT 1 "register_operand" "w") 5293 (match_operand:V16QI 2 "register_operand" "w")] 5294 UNSPEC_REV_REGLIST))] 5295 "TARGET_SIMD" 5296 "#" 5297 "&& reload_completed" 5298 [(const_int 0)] 5299{ 5300 int i; 5301 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG; 5302 for (i = 0; i < nregs; i++) 5303 { 5304 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); 5305 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); 5306 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); 5307 } 5308 DONE; 5309} 5310 [(set_attr "type" "neon_tbl1_q") 5311 (set_attr "length" "<insn_count>")] 5312) 5313 5314;; Reload patterns for AdvSIMD register list operands. 5315 5316(define_expand "mov<mode>" 5317 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand") 5318 (match_operand:VSTRUCT 1 "general_operand"))] 5319 "TARGET_SIMD" 5320{ 5321 if (can_create_pseudo_p ()) 5322 { 5323 if (GET_CODE (operands[0]) != REG) 5324 operands[1] = force_reg (<MODE>mode, operands[1]); 5325 } 5326}) 5327 5328 5329(define_expand "aarch64_ld1x3<VALLDIF:mode>" 5330 [(match_operand:CI 0 "register_operand") 5331 (match_operand:DI 1 "register_operand") 5332 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5333 "TARGET_SIMD" 5334{ 5335 rtx mem = gen_rtx_MEM (CImode, operands[1]); 5336 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem)); 5337 DONE; 5338}) 5339 5340(define_insn "aarch64_ld1_x3_<mode>" 5341 [(set (match_operand:CI 0 "register_operand" "=w") 5342 (unspec:CI 5343 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") 5344 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))] 5345 "TARGET_SIMD" 5346 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5347 [(set_attr "type" "neon_load1_3reg<q>")] 5348) 5349 5350(define_expand "aarch64_ld1x4<VALLDIF:mode>" 5351 [(match_operand:XI 0 "register_operand" "=w") 5352 (match_operand:DI 1 "register_operand" "r") 5353 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5354 "TARGET_SIMD" 5355{ 5356 rtx mem = gen_rtx_MEM (XImode, operands[1]); 5357 emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem)); 5358 DONE; 5359}) 5360 5361(define_insn "aarch64_ld1_x4_<mode>" 5362 [(set (match_operand:XI 0 "register_operand" "=w") 5363 (unspec:XI 5364 [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv") 5365 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] 5366 UNSPEC_LD1))] 5367 "TARGET_SIMD" 5368 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5369 [(set_attr "type" "neon_load1_4reg<q>")] 5370) 5371 5372(define_expand "aarch64_st1x2<VALLDIF:mode>" 5373 [(match_operand:DI 0 "register_operand") 5374 (match_operand:OI 1 "register_operand") 5375 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5376 "TARGET_SIMD" 5377{ 5378 rtx mem = gen_rtx_MEM (OImode, operands[0]); 5379 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1])); 5380 DONE; 5381}) 5382 5383(define_insn "aarch64_st1_x2_<mode>" 5384 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") 5385 (unspec:OI 5386 [(match_operand:OI 1 "register_operand" "w") 5387 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] 5388 "TARGET_SIMD" 5389 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 5390 [(set_attr "type" "neon_store1_2reg<q>")] 5391) 5392 5393(define_expand "aarch64_st1x3<VALLDIF:mode>" 5394 [(match_operand:DI 0 "register_operand") 5395 (match_operand:CI 1 "register_operand") 5396 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5397 "TARGET_SIMD" 5398{ 5399 rtx mem = gen_rtx_MEM (CImode, operands[0]); 5400 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1])); 5401 DONE; 5402}) 5403 5404(define_insn "aarch64_st1_x3_<mode>" 5405 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") 5406 (unspec:CI 5407 [(match_operand:CI 1 "register_operand" "w") 5408 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] 5409 "TARGET_SIMD" 5410 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5411 [(set_attr "type" "neon_store1_3reg<q>")] 5412) 5413 5414(define_expand "aarch64_st1x4<VALLDIF:mode>" 5415 [(match_operand:DI 0 "register_operand" "") 5416 (match_operand:XI 1 "register_operand" "") 5417 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5418 "TARGET_SIMD" 5419{ 5420 rtx mem = gen_rtx_MEM (XImode, operands[0]); 5421 emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1])); 5422 DONE; 5423}) 5424 5425(define_insn "aarch64_st1_x4_<mode>" 5426 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv") 5427 (unspec:XI 5428 [(match_operand:XI 1 "register_operand" "w") 5429 (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)] 5430 UNSPEC_ST1))] 5431 "TARGET_SIMD" 5432 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 5433 [(set_attr "type" "neon_store1_4reg<q>")] 5434) 5435 5436(define_insn "*aarch64_mov<mode>" 5437 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") 5438 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] 5439 "TARGET_SIMD && !BYTES_BIG_ENDIAN 5440 && (register_operand (operands[0], <MODE>mode) 5441 || register_operand (operands[1], <MODE>mode))" 5442 "@ 5443 # 5444 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0 5445 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1" 5446 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\ 5447 neon_load<nregs>_<nregs>reg_q") 5448 (set_attr "length" "<insn_count>,4,4")] 5449) 5450 5451(define_insn "aarch64_be_ld1<mode>" 5452 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w") 5453 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 5454 "aarch64_simd_struct_operand" "Utv")] 5455 UNSPEC_LD1))] 5456 "TARGET_SIMD" 5457 "ld1\\t{%0<Vmtype>}, %1" 5458 [(set_attr "type" "neon_load1_1reg<q>")] 5459) 5460 5461(define_insn "aarch64_be_st1<mode>" 5462 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv") 5463 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")] 5464 UNSPEC_ST1))] 5465 "TARGET_SIMD" 5466 "st1\\t{%1<Vmtype>}, %0" 5467 [(set_attr "type" "neon_store1_1reg<q>")] 5468) 5469 5470(define_insn "*aarch64_be_movoi" 5471 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w") 5472 (match_operand:OI 1 "general_operand" " w,w,m"))] 5473 "TARGET_SIMD && BYTES_BIG_ENDIAN 5474 && (register_operand (operands[0], OImode) 5475 || register_operand (operands[1], OImode))" 5476 "@ 5477 # 5478 stp\\t%q1, %R1, %0 5479 ldp\\t%q0, %R0, %1" 5480 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q") 5481 (set_attr "length" "8,4,4")] 5482) 5483 5484(define_insn "*aarch64_be_movci" 5485 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w") 5486 (match_operand:CI 1 "general_operand" " w,w,o"))] 5487 "TARGET_SIMD && BYTES_BIG_ENDIAN 5488 && (register_operand (operands[0], CImode) 5489 || register_operand (operands[1], CImode))" 5490 "#" 5491 [(set_attr "type" "multiple") 5492 (set_attr "length" "12,4,4")] 5493) 5494 5495(define_insn "*aarch64_be_movxi" 5496 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w") 5497 (match_operand:XI 1 "general_operand" " w,w,o"))] 5498 "TARGET_SIMD && BYTES_BIG_ENDIAN 5499 && (register_operand (operands[0], XImode) 5500 || register_operand (operands[1], XImode))" 5501 "#" 5502 [(set_attr "type" "multiple") 5503 (set_attr "length" "16,4,4")] 5504) 5505 5506(define_split 5507 [(set (match_operand:OI 0 "register_operand") 5508 (match_operand:OI 1 "register_operand"))] 5509 "TARGET_SIMD && reload_completed" 5510 [(const_int 0)] 5511{ 5512 aarch64_simd_emit_reg_reg_move (operands, TImode, 2); 5513 DONE; 5514}) 5515 5516(define_split 5517 [(set (match_operand:CI 0 "nonimmediate_operand") 5518 (match_operand:CI 1 "general_operand"))] 5519 "TARGET_SIMD && reload_completed" 5520 [(const_int 0)] 5521{ 5522 if (register_operand (operands[0], CImode) 5523 && register_operand (operands[1], CImode)) 5524 { 5525 aarch64_simd_emit_reg_reg_move (operands, TImode, 3); 5526 DONE; 5527 } 5528 else if (BYTES_BIG_ENDIAN) 5529 { 5530 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), 5531 simplify_gen_subreg (OImode, operands[1], CImode, 0)); 5532 emit_move_insn (gen_lowpart (V16QImode, 5533 simplify_gen_subreg (TImode, operands[0], 5534 CImode, 32)), 5535 gen_lowpart (V16QImode, 5536 simplify_gen_subreg (TImode, operands[1], 5537 CImode, 32))); 5538 DONE; 5539 } 5540 else 5541 FAIL; 5542}) 5543 5544(define_split 5545 [(set (match_operand:XI 0 "nonimmediate_operand") 5546 (match_operand:XI 1 "general_operand"))] 5547 "TARGET_SIMD && reload_completed" 5548 [(const_int 0)] 5549{ 5550 if (register_operand (operands[0], XImode) 5551 && register_operand (operands[1], XImode)) 5552 { 5553 aarch64_simd_emit_reg_reg_move (operands, TImode, 4); 5554 DONE; 5555 } 5556 else if (BYTES_BIG_ENDIAN) 5557 { 5558 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), 5559 simplify_gen_subreg (OImode, operands[1], XImode, 0)); 5560 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), 5561 simplify_gen_subreg (OImode, operands[1], XImode, 32)); 5562 DONE; 5563 } 5564 else 5565 FAIL; 5566}) 5567 5568(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>" 5569 [(match_operand:VSTRUCT 0 "register_operand") 5570 (match_operand:DI 1 "register_operand") 5571 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5572 "TARGET_SIMD" 5573{ 5574 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5575 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5576 * <VSTRUCT:nregs>); 5577 5578 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0], 5579 mem)); 5580 DONE; 5581}) 5582 5583(define_insn "aarch64_ld2<mode>_dreg" 5584 [(set (match_operand:OI 0 "register_operand" "=w") 5585 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5586 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5587 UNSPEC_LD2_DREG))] 5588 "TARGET_SIMD" 5589 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 5590 [(set_attr "type" "neon_load2_2reg<q>")] 5591) 5592 5593(define_insn "aarch64_ld2<mode>_dreg" 5594 [(set (match_operand:OI 0 "register_operand" "=w") 5595 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5596 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5597 UNSPEC_LD2_DREG))] 5598 "TARGET_SIMD" 5599 "ld1\\t{%S0.1d - %T0.1d}, %1" 5600 [(set_attr "type" "neon_load1_2reg<q>")] 5601) 5602 5603(define_insn "aarch64_ld3<mode>_dreg" 5604 [(set (match_operand:CI 0 "register_operand" "=w") 5605 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5606 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5607 UNSPEC_LD3_DREG))] 5608 "TARGET_SIMD" 5609 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" 5610 [(set_attr "type" "neon_load3_3reg<q>")] 5611) 5612 5613(define_insn "aarch64_ld3<mode>_dreg" 5614 [(set (match_operand:CI 0 "register_operand" "=w") 5615 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5616 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5617 UNSPEC_LD3_DREG))] 5618 "TARGET_SIMD" 5619 "ld1\\t{%S0.1d - %U0.1d}, %1" 5620 [(set_attr "type" "neon_load1_3reg<q>")] 5621) 5622 5623(define_insn "aarch64_ld4<mode>_dreg" 5624 [(set (match_operand:XI 0 "register_operand" "=w") 5625 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5626 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5627 UNSPEC_LD4_DREG))] 5628 "TARGET_SIMD" 5629 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1" 5630 [(set_attr "type" "neon_load4_4reg<q>")] 5631) 5632 5633(define_insn "aarch64_ld4<mode>_dreg" 5634 [(set (match_operand:XI 0 "register_operand" "=w") 5635 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv") 5636 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5637 UNSPEC_LD4_DREG))] 5638 "TARGET_SIMD" 5639 "ld1\\t{%S0.1d - %V0.1d}, %1" 5640 [(set_attr "type" "neon_load1_4reg<q>")] 5641) 5642 5643(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>" 5644 [(match_operand:VSTRUCT 0 "register_operand") 5645 (match_operand:DI 1 "register_operand") 5646 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5647 "TARGET_SIMD" 5648{ 5649 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5650 set_mem_size (mem, <VSTRUCT:nregs> * 8); 5651 5652 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem)); 5653 DONE; 5654}) 5655 5656(define_expand "aarch64_ld1<VALL_F16:mode>" 5657 [(match_operand:VALL_F16 0 "register_operand") 5658 (match_operand:DI 1 "register_operand")] 5659 "TARGET_SIMD" 5660{ 5661 machine_mode mode = <VALL_F16:MODE>mode; 5662 rtx mem = gen_rtx_MEM (mode, operands[1]); 5663 5664 if (BYTES_BIG_ENDIAN) 5665 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem)); 5666 else 5667 emit_move_insn (operands[0], mem); 5668 DONE; 5669}) 5670 5671(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>" 5672 [(match_operand:VSTRUCT 0 "register_operand") 5673 (match_operand:DI 1 "register_operand") 5674 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5675 "TARGET_SIMD" 5676{ 5677 machine_mode mode = <VSTRUCT:MODE>mode; 5678 rtx mem = gen_rtx_MEM (mode, operands[1]); 5679 5680 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); 5681 DONE; 5682}) 5683 5684(define_expand "aarch64_ld1x2<VQ:mode>" 5685 [(match_operand:OI 0 "register_operand") 5686 (match_operand:DI 1 "register_operand") 5687 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5688 "TARGET_SIMD" 5689{ 5690 machine_mode mode = OImode; 5691 rtx mem = gen_rtx_MEM (mode, operands[1]); 5692 5693 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem)); 5694 DONE; 5695}) 5696 5697(define_expand "aarch64_ld1x2<VDC:mode>" 5698 [(match_operand:OI 0 "register_operand") 5699 (match_operand:DI 1 "register_operand") 5700 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5701 "TARGET_SIMD" 5702{ 5703 machine_mode mode = OImode; 5704 rtx mem = gen_rtx_MEM (mode, operands[1]); 5705 5706 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem)); 5707 DONE; 5708}) 5709 5710 5711(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5712 [(match_operand:VSTRUCT 0 "register_operand") 5713 (match_operand:DI 1 "register_operand") 5714 (match_operand:VSTRUCT 2 "register_operand") 5715 (match_operand:SI 3 "immediate_operand") 5716 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5717 "TARGET_SIMD" 5718{ 5719 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5720 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5721 * <VSTRUCT:nregs>); 5722 5723 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL); 5724 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5725 operands[0], mem, operands[2], operands[3])); 5726 DONE; 5727}) 5728 5729;; Expanders for builtins to extract vector registers from large 5730;; opaque integer modes. 5731 5732;; D-register list. 5733 5734(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>" 5735 [(match_operand:VDC 0 "register_operand") 5736 (match_operand:VSTRUCT 1 "register_operand") 5737 (match_operand:SI 2 "immediate_operand")] 5738 "TARGET_SIMD" 5739{ 5740 int part = INTVAL (operands[2]); 5741 rtx temp = gen_reg_rtx (<VDC:VDBL>mode); 5742 int offset = part * 16; 5743 5744 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset)); 5745 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp)); 5746 DONE; 5747}) 5748 5749;; Q-register list. 5750 5751(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>" 5752 [(match_operand:VQ 0 "register_operand") 5753 (match_operand:VSTRUCT 1 "register_operand") 5754 (match_operand:SI 2 "immediate_operand")] 5755 "TARGET_SIMD" 5756{ 5757 int part = INTVAL (operands[2]); 5758 int offset = part * 16; 5759 5760 emit_move_insn (operands[0], 5761 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset)); 5762 DONE; 5763}) 5764 5765;; Permuted-store expanders for neon intrinsics. 5766 5767;; Permute instructions 5768 5769;; vec_perm support 5770 5771(define_expand "vec_perm<mode>" 5772 [(match_operand:VB 0 "register_operand") 5773 (match_operand:VB 1 "register_operand") 5774 (match_operand:VB 2 "register_operand") 5775 (match_operand:VB 3 "register_operand")] 5776 "TARGET_SIMD" 5777{ 5778 aarch64_expand_vec_perm (operands[0], operands[1], 5779 operands[2], operands[3], <nunits>); 5780 DONE; 5781}) 5782 5783(define_insn "aarch64_tbl1<mode>" 5784 [(set (match_operand:VB 0 "register_operand" "=w") 5785 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w") 5786 (match_operand:VB 2 "register_operand" "w")] 5787 UNSPEC_TBL))] 5788 "TARGET_SIMD" 5789 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>" 5790 [(set_attr "type" "neon_tbl1<q>")] 5791) 5792 5793;; Two source registers. 5794 5795(define_insn "aarch64_tbl2v16qi" 5796 [(set (match_operand:V16QI 0 "register_operand" "=w") 5797 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w") 5798 (match_operand:V16QI 2 "register_operand" "w")] 5799 UNSPEC_TBL))] 5800 "TARGET_SIMD" 5801 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b" 5802 [(set_attr "type" "neon_tbl2_q")] 5803) 5804 5805(define_insn "aarch64_tbl3<mode>" 5806 [(set (match_operand:VB 0 "register_operand" "=w") 5807 (unspec:VB [(match_operand:OI 1 "register_operand" "w") 5808 (match_operand:VB 2 "register_operand" "w")] 5809 UNSPEC_TBL))] 5810 "TARGET_SIMD" 5811 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>" 5812 [(set_attr "type" "neon_tbl3")] 5813) 5814 5815(define_insn "aarch64_tbx4<mode>" 5816 [(set (match_operand:VB 0 "register_operand" "=w") 5817 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5818 (match_operand:OI 2 "register_operand" "w") 5819 (match_operand:VB 3 "register_operand" "w")] 5820 UNSPEC_TBX))] 5821 "TARGET_SIMD" 5822 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>" 5823 [(set_attr "type" "neon_tbl4")] 5824) 5825 5826;; Three source registers. 5827 5828(define_insn "aarch64_qtbl3<mode>" 5829 [(set (match_operand:VB 0 "register_operand" "=w") 5830 (unspec:VB [(match_operand:CI 1 "register_operand" "w") 5831 (match_operand:VB 2 "register_operand" "w")] 5832 UNSPEC_TBL))] 5833 "TARGET_SIMD" 5834 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>" 5835 [(set_attr "type" "neon_tbl3")] 5836) 5837 5838(define_insn "aarch64_qtbx3<mode>" 5839 [(set (match_operand:VB 0 "register_operand" "=w") 5840 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5841 (match_operand:CI 2 "register_operand" "w") 5842 (match_operand:VB 3 "register_operand" "w")] 5843 UNSPEC_TBX))] 5844 "TARGET_SIMD" 5845 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>" 5846 [(set_attr "type" "neon_tbl3")] 5847) 5848 5849;; Four source registers. 5850 5851(define_insn "aarch64_qtbl4<mode>" 5852 [(set (match_operand:VB 0 "register_operand" "=w") 5853 (unspec:VB [(match_operand:XI 1 "register_operand" "w") 5854 (match_operand:VB 2 "register_operand" "w")] 5855 UNSPEC_TBL))] 5856 "TARGET_SIMD" 5857 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>" 5858 [(set_attr "type" "neon_tbl4")] 5859) 5860 5861(define_insn "aarch64_qtbx4<mode>" 5862 [(set (match_operand:VB 0 "register_operand" "=w") 5863 (unspec:VB [(match_operand:VB 1 "register_operand" "0") 5864 (match_operand:XI 2 "register_operand" "w") 5865 (match_operand:VB 3 "register_operand" "w")] 5866 UNSPEC_TBX))] 5867 "TARGET_SIMD" 5868 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>" 5869 [(set_attr "type" "neon_tbl4")] 5870) 5871 5872(define_insn_and_split "aarch64_combinev16qi" 5873 [(set (match_operand:OI 0 "register_operand" "=w") 5874 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w") 5875 (match_operand:V16QI 2 "register_operand" "w")] 5876 UNSPEC_CONCAT))] 5877 "TARGET_SIMD" 5878 "#" 5879 "&& reload_completed" 5880 [(const_int 0)] 5881{ 5882 aarch64_split_combinev16qi (operands); 5883 DONE; 5884} 5885[(set_attr "type" "multiple")] 5886) 5887 5888;; This instruction's pattern is generated directly by 5889;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5890;; need corresponding changes there. 5891(define_insn "aarch64_<PERMUTE:perm_insn><mode>" 5892 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5893 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5894 (match_operand:VALL_F16 2 "register_operand" "w")] 5895 PERMUTE))] 5896 "TARGET_SIMD" 5897 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 5898 [(set_attr "type" "neon_permute<q>")] 5899) 5900 5901;; This instruction's pattern is generated directly by 5902;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5903;; need corresponding changes there. Note that the immediate (third) 5904;; operand is a lane index not a byte index. 5905(define_insn "aarch64_ext<mode>" 5906 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5907 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5908 (match_operand:VALL_F16 2 "register_operand" "w") 5909 (match_operand:SI 3 "immediate_operand" "i")] 5910 UNSPEC_EXT))] 5911 "TARGET_SIMD" 5912{ 5913 operands[3] = GEN_INT (INTVAL (operands[3]) 5914 * GET_MODE_UNIT_SIZE (<MODE>mode)); 5915 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; 5916} 5917 [(set_attr "type" "neon_ext<q>")] 5918) 5919 5920;; This instruction's pattern is generated directly by 5921;; aarch64_expand_vec_perm_const, so any changes to the pattern would 5922;; need corresponding changes there. 5923(define_insn "aarch64_rev<REVERSE:rev_op><mode>" 5924 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5925 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] 5926 REVERSE))] 5927 "TARGET_SIMD" 5928 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>" 5929 [(set_attr "type" "neon_rev<q>")] 5930) 5931 5932(define_insn "aarch64_st2<mode>_dreg" 5933 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5934 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5935 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5936 UNSPEC_ST2))] 5937 "TARGET_SIMD" 5938 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" 5939 [(set_attr "type" "neon_store2_2reg")] 5940) 5941 5942(define_insn "aarch64_st2<mode>_dreg" 5943 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5944 (unspec:BLK [(match_operand:OI 1 "register_operand" "w") 5945 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5946 UNSPEC_ST2))] 5947 "TARGET_SIMD" 5948 "st1\\t{%S1.1d - %T1.1d}, %0" 5949 [(set_attr "type" "neon_store1_2reg")] 5950) 5951 5952(define_insn "aarch64_st3<mode>_dreg" 5953 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5954 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5955 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5956 UNSPEC_ST3))] 5957 "TARGET_SIMD" 5958 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" 5959 [(set_attr "type" "neon_store3_3reg")] 5960) 5961 5962(define_insn "aarch64_st3<mode>_dreg" 5963 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5964 (unspec:BLK [(match_operand:CI 1 "register_operand" "w") 5965 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5966 UNSPEC_ST3))] 5967 "TARGET_SIMD" 5968 "st1\\t{%S1.1d - %U1.1d}, %0" 5969 [(set_attr "type" "neon_store1_3reg")] 5970) 5971 5972(define_insn "aarch64_st4<mode>_dreg" 5973 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5974 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5975 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5976 UNSPEC_ST4))] 5977 "TARGET_SIMD" 5978 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0" 5979 [(set_attr "type" "neon_store4_4reg")] 5980) 5981 5982(define_insn "aarch64_st4<mode>_dreg" 5983 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv") 5984 (unspec:BLK [(match_operand:XI 1 "register_operand" "w") 5985 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5986 UNSPEC_ST4))] 5987 "TARGET_SIMD" 5988 "st1\\t{%S1.1d - %V1.1d}, %0" 5989 [(set_attr "type" "neon_store1_4reg")] 5990) 5991 5992(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>" 5993 [(match_operand:DI 0 "register_operand") 5994 (match_operand:VSTRUCT 1 "register_operand") 5995 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5996 "TARGET_SIMD" 5997{ 5998 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 5999 set_mem_size (mem, <VSTRUCT:nregs> * 8); 6000 6001 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1])); 6002 DONE; 6003}) 6004 6005(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>" 6006 [(match_operand:DI 0 "register_operand") 6007 (match_operand:VSTRUCT 1 "register_operand") 6008 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6009 "TARGET_SIMD" 6010{ 6011 machine_mode mode = <VSTRUCT:MODE>mode; 6012 rtx mem = gen_rtx_MEM (mode, operands[0]); 6013 6014 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1])); 6015 DONE; 6016}) 6017 6018(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>" 6019 [(match_operand:DI 0 "register_operand") 6020 (match_operand:VSTRUCT 1 "register_operand") 6021 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 6022 (match_operand:SI 2 "immediate_operand")] 6023 "TARGET_SIMD" 6024{ 6025 rtx mem = gen_rtx_MEM (BLKmode, operands[0]); 6026 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 6027 * <VSTRUCT:nregs>); 6028 6029 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 6030 mem, operands[1], operands[2])); 6031 DONE; 6032}) 6033 6034(define_expand "aarch64_st1<VALL_F16:mode>" 6035 [(match_operand:DI 0 "register_operand") 6036 (match_operand:VALL_F16 1 "register_operand")] 6037 "TARGET_SIMD" 6038{ 6039 machine_mode mode = <VALL_F16:MODE>mode; 6040 rtx mem = gen_rtx_MEM (mode, operands[0]); 6041 6042 if (BYTES_BIG_ENDIAN) 6043 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1])); 6044 else 6045 emit_move_insn (mem, operands[1]); 6046 DONE; 6047}) 6048 6049;; Expander for builtins to insert vector registers into large 6050;; opaque integer modes. 6051 6052;; Q-register list. We don't need a D-reg inserter as we zero 6053;; extend them in arm_neon.h and insert the resulting Q-regs. 6054 6055(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>" 6056 [(match_operand:VSTRUCT 0 "register_operand") 6057 (match_operand:VSTRUCT 1 "register_operand") 6058 (match_operand:VQ 2 "register_operand") 6059 (match_operand:SI 3 "immediate_operand")] 6060 "TARGET_SIMD" 6061{ 6062 int part = INTVAL (operands[3]); 6063 int offset = part * 16; 6064 6065 emit_move_insn (operands[0], operands[1]); 6066 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset), 6067 operands[2]); 6068 DONE; 6069}) 6070 6071;; Standard pattern name vec_init<mode><Vel>. 6072 6073(define_expand "vec_init<mode><Vel>" 6074 [(match_operand:VALL_F16 0 "register_operand") 6075 (match_operand 1 "" "")] 6076 "TARGET_SIMD" 6077{ 6078 aarch64_expand_vector_init (operands[0], operands[1]); 6079 DONE; 6080}) 6081 6082(define_expand "vec_init<mode><Vhalf>" 6083 [(match_operand:VQ_NO2E 0 "register_operand") 6084 (match_operand 1 "" "")] 6085 "TARGET_SIMD" 6086{ 6087 aarch64_expand_vector_init (operands[0], operands[1]); 6088 DONE; 6089}) 6090 6091(define_insn "*aarch64_simd_ld1r<mode>" 6092 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 6093 (vec_duplicate:VALL_F16 6094 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))] 6095 "TARGET_SIMD" 6096 "ld1r\\t{%0.<Vtype>}, %1" 6097 [(set_attr "type" "neon_load1_all_lanes")] 6098) 6099 6100(define_insn "aarch64_simd_ld1<mode>_x2" 6101 [(set (match_operand:OI 0 "register_operand" "=w") 6102 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 6103 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6104 UNSPEC_LD1))] 6105 "TARGET_SIMD" 6106 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 6107 [(set_attr "type" "neon_load1_2reg<q>")] 6108) 6109 6110(define_insn "aarch64_simd_ld1<mode>_x2" 6111 [(set (match_operand:OI 0 "register_operand" "=w") 6112 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") 6113 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 6114 UNSPEC_LD1))] 6115 "TARGET_SIMD" 6116 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" 6117 [(set_attr "type" "neon_load1_2reg<q>")] 6118) 6119 6120 6121(define_insn "@aarch64_frecpe<mode>" 6122 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 6123 (unspec:VHSDF_HSDF 6124 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 6125 UNSPEC_FRECPE))] 6126 "TARGET_SIMD" 6127 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>" 6128 [(set_attr "type" "neon_fp_recpe_<stype><q>")] 6129) 6130 6131(define_insn "aarch64_frecpx<mode>" 6132 [(set (match_operand:GPF_F16 0 "register_operand" "=w") 6133 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] 6134 UNSPEC_FRECPX))] 6135 "TARGET_SIMD" 6136 "frecpx\t%<s>0, %<s>1" 6137 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")] 6138) 6139 6140(define_insn "@aarch64_frecps<mode>" 6141 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 6142 (unspec:VHSDF_HSDF 6143 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 6144 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 6145 UNSPEC_FRECPS))] 6146 "TARGET_SIMD" 6147 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>" 6148 [(set_attr "type" "neon_fp_recps_<stype><q>")] 6149) 6150 6151(define_insn "aarch64_urecpe<mode>" 6152 [(set (match_operand:VDQ_SI 0 "register_operand" "=w") 6153 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")] 6154 UNSPEC_URECPE))] 6155 "TARGET_SIMD" 6156 "urecpe\\t%0.<Vtype>, %1.<Vtype>" 6157 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]) 6158 6159;; Standard pattern name vec_extract<mode><Vel>. 6160 6161(define_expand "vec_extract<mode><Vel>" 6162 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand") 6163 (match_operand:VALL_F16 1 "register_operand") 6164 (match_operand:SI 2 "immediate_operand")] 6165 "TARGET_SIMD" 6166{ 6167 emit_insn 6168 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2])); 6169 DONE; 6170}) 6171 6172;; Extract a 64-bit vector from one half of a 128-bit vector. 6173(define_expand "vec_extract<mode><Vhalf>" 6174 [(match_operand:<VHALF> 0 "register_operand") 6175 (match_operand:VQMOV_NO2E 1 "register_operand") 6176 (match_operand 2 "immediate_operand")] 6177 "TARGET_SIMD" 6178{ 6179 int start = INTVAL (operands[2]); 6180 if (start != 0 && start != <nunits> / 2) 6181 FAIL; 6182 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1); 6183 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel)); 6184 DONE; 6185}) 6186 6187;; Extract a single-element 64-bit vector from one half of a 128-bit vector. 6188(define_expand "vec_extractv2dfv1df" 6189 [(match_operand:V1DF 0 "register_operand") 6190 (match_operand:V2DF 1 "register_operand") 6191 (match_operand 2 "immediate_operand")] 6192 "TARGET_SIMD" 6193{ 6194 /* V1DF is rarely used by other patterns, so it should be better to hide 6195 it in a subreg destination of a normal DF op. */ 6196 rtx scalar0 = gen_lowpart (DFmode, operands[0]); 6197 emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2])); 6198 DONE; 6199}) 6200 6201;; aes 6202 6203(define_insn "aarch64_crypto_aes<aes_op>v16qi" 6204 [(set (match_operand:V16QI 0 "register_operand" "=w") 6205 (unspec:V16QI 6206 [(xor:V16QI 6207 (match_operand:V16QI 1 "register_operand" "%0") 6208 (match_operand:V16QI 2 "register_operand" "w"))] 6209 CRYPTO_AES))] 6210 "TARGET_SIMD && TARGET_AES" 6211 "aes<aes_op>\\t%0.16b, %2.16b" 6212 [(set_attr "type" "crypto_aese")] 6213) 6214 6215(define_insn "aarch64_crypto_aes<aesmc_op>v16qi" 6216 [(set (match_operand:V16QI 0 "register_operand" "=w") 6217 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")] 6218 CRYPTO_AESMC))] 6219 "TARGET_SIMD && TARGET_AES" 6220 "aes<aesmc_op>\\t%0.16b, %1.16b" 6221 [(set_attr "type" "crypto_aesmc")] 6222) 6223 6224;; When AESE/AESMC fusion is enabled we really want to keep the two together 6225;; and enforce the register dependency without scheduling or register 6226;; allocation messing up the order or introducing moves inbetween. 6227;; Mash the two together during combine. 6228 6229(define_insn "*aarch64_crypto_aese_fused" 6230 [(set (match_operand:V16QI 0 "register_operand" "=w") 6231 (unspec:V16QI 6232 [(unspec:V16QI 6233 [(xor:V16QI 6234 (match_operand:V16QI 1 "register_operand" "%0") 6235 (match_operand:V16QI 2 "register_operand" "w"))] 6236 UNSPEC_AESE)] 6237 UNSPEC_AESMC))] 6238 "TARGET_SIMD && TARGET_AES 6239 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 6240 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" 6241 [(set_attr "type" "crypto_aese") 6242 (set_attr "length" "8")] 6243) 6244 6245;; When AESD/AESIMC fusion is enabled we really want to keep the two together 6246;; and enforce the register dependency without scheduling or register 6247;; allocation messing up the order or introducing moves inbetween. 6248;; Mash the two together during combine. 6249 6250(define_insn "*aarch64_crypto_aesd_fused" 6251 [(set (match_operand:V16QI 0 "register_operand" "=w") 6252 (unspec:V16QI 6253 [(unspec:V16QI 6254 [(xor:V16QI 6255 (match_operand:V16QI 1 "register_operand" "%0") 6256 (match_operand:V16QI 2 "register_operand" "w"))] 6257 UNSPEC_AESD)] 6258 UNSPEC_AESIMC))] 6259 "TARGET_SIMD && TARGET_AES 6260 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" 6261 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" 6262 [(set_attr "type" "crypto_aese") 6263 (set_attr "length" "8")] 6264) 6265 6266;; sha1 6267 6268(define_insn "aarch64_crypto_sha1hsi" 6269 [(set (match_operand:SI 0 "register_operand" "=w") 6270 (unspec:SI [(match_operand:SI 1 6271 "register_operand" "w")] 6272 UNSPEC_SHA1H))] 6273 "TARGET_SIMD && TARGET_SHA2" 6274 "sha1h\\t%s0, %s1" 6275 [(set_attr "type" "crypto_sha1_fast")] 6276) 6277 6278(define_insn "aarch64_crypto_sha1hv4si" 6279 [(set (match_operand:SI 0 "register_operand" "=w") 6280 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 6281 (parallel [(const_int 0)]))] 6282 UNSPEC_SHA1H))] 6283 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" 6284 "sha1h\\t%s0, %s1" 6285 [(set_attr "type" "crypto_sha1_fast")] 6286) 6287 6288(define_insn "aarch64_be_crypto_sha1hv4si" 6289 [(set (match_operand:SI 0 "register_operand" "=w") 6290 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 6291 (parallel [(const_int 3)]))] 6292 UNSPEC_SHA1H))] 6293 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" 6294 "sha1h\\t%s0, %s1" 6295 [(set_attr "type" "crypto_sha1_fast")] 6296) 6297 6298(define_insn "aarch64_crypto_sha1su1v4si" 6299 [(set (match_operand:V4SI 0 "register_operand" "=w") 6300 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6301 (match_operand:V4SI 2 "register_operand" "w")] 6302 UNSPEC_SHA1SU1))] 6303 "TARGET_SIMD && TARGET_SHA2" 6304 "sha1su1\\t%0.4s, %2.4s" 6305 [(set_attr "type" "crypto_sha1_fast")] 6306) 6307 6308(define_insn "aarch64_crypto_sha1<sha1_op>v4si" 6309 [(set (match_operand:V4SI 0 "register_operand" "=w") 6310 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6311 (match_operand:SI 2 "register_operand" "w") 6312 (match_operand:V4SI 3 "register_operand" "w")] 6313 CRYPTO_SHA1))] 6314 "TARGET_SIMD && TARGET_SHA2" 6315 "sha1<sha1_op>\\t%q0, %s2, %3.4s" 6316 [(set_attr "type" "crypto_sha1_slow")] 6317) 6318 6319(define_insn "aarch64_crypto_sha1su0v4si" 6320 [(set (match_operand:V4SI 0 "register_operand" "=w") 6321 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6322 (match_operand:V4SI 2 "register_operand" "w") 6323 (match_operand:V4SI 3 "register_operand" "w")] 6324 UNSPEC_SHA1SU0))] 6325 "TARGET_SIMD && TARGET_SHA2" 6326 "sha1su0\\t%0.4s, %2.4s, %3.4s" 6327 [(set_attr "type" "crypto_sha1_xor")] 6328) 6329 6330;; sha256 6331 6332(define_insn "aarch64_crypto_sha256h<sha256_op>v4si" 6333 [(set (match_operand:V4SI 0 "register_operand" "=w") 6334 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6335 (match_operand:V4SI 2 "register_operand" "w") 6336 (match_operand:V4SI 3 "register_operand" "w")] 6337 CRYPTO_SHA256))] 6338 "TARGET_SIMD && TARGET_SHA2" 6339 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" 6340 [(set_attr "type" "crypto_sha256_slow")] 6341) 6342 6343(define_insn "aarch64_crypto_sha256su0v4si" 6344 [(set (match_operand:V4SI 0 "register_operand" "=w") 6345 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6346 (match_operand:V4SI 2 "register_operand" "w")] 6347 UNSPEC_SHA256SU0))] 6348 "TARGET_SIMD && TARGET_SHA2" 6349 "sha256su0\\t%0.4s, %2.4s" 6350 [(set_attr "type" "crypto_sha256_fast")] 6351) 6352 6353(define_insn "aarch64_crypto_sha256su1v4si" 6354 [(set (match_operand:V4SI 0 "register_operand" "=w") 6355 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6356 (match_operand:V4SI 2 "register_operand" "w") 6357 (match_operand:V4SI 3 "register_operand" "w")] 6358 UNSPEC_SHA256SU1))] 6359 "TARGET_SIMD && TARGET_SHA2" 6360 "sha256su1\\t%0.4s, %2.4s, %3.4s" 6361 [(set_attr "type" "crypto_sha256_slow")] 6362) 6363 6364;; sha512 6365 6366(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di" 6367 [(set (match_operand:V2DI 0 "register_operand" "=w") 6368 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 6369 (match_operand:V2DI 2 "register_operand" "w") 6370 (match_operand:V2DI 3 "register_operand" "w")] 6371 CRYPTO_SHA512))] 6372 "TARGET_SIMD && TARGET_SHA3" 6373 "sha512h<sha512_op>\\t%q0, %q2, %3.2d" 6374 [(set_attr "type" "crypto_sha512")] 6375) 6376 6377(define_insn "aarch64_crypto_sha512su0qv2di" 6378 [(set (match_operand:V2DI 0 "register_operand" "=w") 6379 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 6380 (match_operand:V2DI 2 "register_operand" "w")] 6381 UNSPEC_SHA512SU0))] 6382 "TARGET_SIMD && TARGET_SHA3" 6383 "sha512su0\\t%0.2d, %2.2d" 6384 [(set_attr "type" "crypto_sha512")] 6385) 6386 6387(define_insn "aarch64_crypto_sha512su1qv2di" 6388 [(set (match_operand:V2DI 0 "register_operand" "=w") 6389 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 6390 (match_operand:V2DI 2 "register_operand" "w") 6391 (match_operand:V2DI 3 "register_operand" "w")] 6392 UNSPEC_SHA512SU1))] 6393 "TARGET_SIMD && TARGET_SHA3" 6394 "sha512su1\\t%0.2d, %2.2d, %3.2d" 6395 [(set_attr "type" "crypto_sha512")] 6396) 6397 6398;; sha3 6399 6400(define_insn "eor3q<mode>4" 6401 [(set (match_operand:VQ_I 0 "register_operand" "=w") 6402 (xor:VQ_I 6403 (xor:VQ_I 6404 (match_operand:VQ_I 2 "register_operand" "w") 6405 (match_operand:VQ_I 3 "register_operand" "w")) 6406 (match_operand:VQ_I 1 "register_operand" "w")))] 6407 "TARGET_SIMD && TARGET_SHA3" 6408 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" 6409 [(set_attr "type" "crypto_sha3")] 6410) 6411 6412(define_insn "aarch64_rax1qv2di" 6413 [(set (match_operand:V2DI 0 "register_operand" "=w") 6414 (xor:V2DI 6415 (rotate:V2DI 6416 (match_operand:V2DI 2 "register_operand" "w") 6417 (const_int 1)) 6418 (match_operand:V2DI 1 "register_operand" "w")))] 6419 "TARGET_SIMD && TARGET_SHA3" 6420 "rax1\\t%0.2d, %1.2d, %2.2d" 6421 [(set_attr "type" "crypto_sha3")] 6422) 6423 6424(define_insn "aarch64_xarqv2di" 6425 [(set (match_operand:V2DI 0 "register_operand" "=w") 6426 (rotatert:V2DI 6427 (xor:V2DI 6428 (match_operand:V2DI 1 "register_operand" "%w") 6429 (match_operand:V2DI 2 "register_operand" "w")) 6430 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))] 6431 "TARGET_SIMD && TARGET_SHA3" 6432 "xar\\t%0.2d, %1.2d, %2.2d, %3" 6433 [(set_attr "type" "crypto_sha3")] 6434) 6435 6436(define_insn "bcaxq<mode>4" 6437 [(set (match_operand:VQ_I 0 "register_operand" "=w") 6438 (xor:VQ_I 6439 (and:VQ_I 6440 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) 6441 (match_operand:VQ_I 2 "register_operand" "w")) 6442 (match_operand:VQ_I 1 "register_operand" "w")))] 6443 "TARGET_SIMD && TARGET_SHA3" 6444 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" 6445 [(set_attr "type" "crypto_sha3")] 6446) 6447 6448;; SM3 6449 6450(define_insn "aarch64_sm3ss1qv4si" 6451 [(set (match_operand:V4SI 0 "register_operand" "=w") 6452 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 6453 (match_operand:V4SI 2 "register_operand" "w") 6454 (match_operand:V4SI 3 "register_operand" "w")] 6455 UNSPEC_SM3SS1))] 6456 "TARGET_SIMD && TARGET_SM4" 6457 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s" 6458 [(set_attr "type" "crypto_sm3")] 6459) 6460 6461 6462(define_insn "aarch64_sm3tt<sm3tt_op>qv4si" 6463 [(set (match_operand:V4SI 0 "register_operand" "=w") 6464 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6465 (match_operand:V4SI 2 "register_operand" "w") 6466 (match_operand:V4SI 3 "register_operand" "w") 6467 (match_operand:SI 4 "aarch64_imm2" "Ui2")] 6468 CRYPTO_SM3TT))] 6469 "TARGET_SIMD && TARGET_SM4" 6470 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]" 6471 [(set_attr "type" "crypto_sm3")] 6472) 6473 6474(define_insn "aarch64_sm3partw<sm3part_op>qv4si" 6475 [(set (match_operand:V4SI 0 "register_operand" "=w") 6476 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6477 (match_operand:V4SI 2 "register_operand" "w") 6478 (match_operand:V4SI 3 "register_operand" "w")] 6479 CRYPTO_SM3PART))] 6480 "TARGET_SIMD && TARGET_SM4" 6481 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s" 6482 [(set_attr "type" "crypto_sm3")] 6483) 6484 6485;; SM4 6486 6487(define_insn "aarch64_sm4eqv4si" 6488 [(set (match_operand:V4SI 0 "register_operand" "=w") 6489 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6490 (match_operand:V4SI 2 "register_operand" "w")] 6491 UNSPEC_SM4E))] 6492 "TARGET_SIMD && TARGET_SM4" 6493 "sm4e\\t%0.4s, %2.4s" 6494 [(set_attr "type" "crypto_sm4")] 6495) 6496 6497(define_insn "aarch64_sm4ekeyqv4si" 6498 [(set (match_operand:V4SI 0 "register_operand" "=w") 6499 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") 6500 (match_operand:V4SI 2 "register_operand" "w")] 6501 UNSPEC_SM4EKEY))] 6502 "TARGET_SIMD && TARGET_SM4" 6503 "sm4ekey\\t%0.4s, %1.4s, %2.4s" 6504 [(set_attr "type" "crypto_sm4")] 6505) 6506 6507;; fp16fml 6508 6509(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>" 6510 [(set (match_operand:VDQSF 0 "register_operand") 6511 (unspec:VDQSF 6512 [(match_operand:VDQSF 1 "register_operand") 6513 (match_operand:<VFMLA_W> 2 "register_operand") 6514 (match_operand:<VFMLA_W> 3 "register_operand")] 6515 VFMLA16_LOW))] 6516 "TARGET_F16FML" 6517{ 6518 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 6519 <nunits> * 2, false); 6520 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, 6521 <nunits> * 2, false); 6522 6523 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0], 6524 operands[1], 6525 operands[2], 6526 operands[3], 6527 p1, p2)); 6528 DONE; 6529 6530}) 6531 6532(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>" 6533 [(set (match_operand:VDQSF 0 "register_operand") 6534 (unspec:VDQSF 6535 [(match_operand:VDQSF 1 "register_operand") 6536 (match_operand:<VFMLA_W> 2 "register_operand") 6537 (match_operand:<VFMLA_W> 3 "register_operand")] 6538 VFMLA16_HIGH))] 6539 "TARGET_F16FML" 6540{ 6541 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 6542 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); 6543 6544 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0], 6545 operands[1], 6546 operands[2], 6547 operands[3], 6548 p1, p2)); 6549 DONE; 6550}) 6551 6552(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>" 6553 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6554 (fma:VDQSF 6555 (float_extend:VDQSF 6556 (vec_select:<VFMLA_SEL_W> 6557 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6558 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))) 6559 (float_extend:VDQSF 6560 (vec_select:<VFMLA_SEL_W> 6561 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6562 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 6563 (match_operand:VDQSF 1 "register_operand" "0")))] 6564 "TARGET_F16FML" 6565 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6566 [(set_attr "type" "neon_fp_mul_s")] 6567) 6568 6569(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>" 6570 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6571 (fma:VDQSF 6572 (float_extend:VDQSF 6573 (neg:<VFMLA_SEL_W> 6574 (vec_select:<VFMLA_SEL_W> 6575 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6576 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))) 6577 (float_extend:VDQSF 6578 (vec_select:<VFMLA_SEL_W> 6579 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6580 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) 6581 (match_operand:VDQSF 1 "register_operand" "0")))] 6582 "TARGET_F16FML" 6583 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6584 [(set_attr "type" "neon_fp_mul_s")] 6585) 6586 6587(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>" 6588 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6589 (fma:VDQSF 6590 (float_extend:VDQSF 6591 (vec_select:<VFMLA_SEL_W> 6592 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6593 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))) 6594 (float_extend:VDQSF 6595 (vec_select:<VFMLA_SEL_W> 6596 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6597 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 6598 (match_operand:VDQSF 1 "register_operand" "0")))] 6599 "TARGET_F16FML" 6600 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6601 [(set_attr "type" "neon_fp_mul_s")] 6602) 6603 6604(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>" 6605 [(set (match_operand:VDQSF 0 "register_operand" "=w") 6606 (fma:VDQSF 6607 (float_extend:VDQSF 6608 (neg:<VFMLA_SEL_W> 6609 (vec_select:<VFMLA_SEL_W> 6610 (match_operand:<VFMLA_W> 2 "register_operand" "w") 6611 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))) 6612 (float_extend:VDQSF 6613 (vec_select:<VFMLA_SEL_W> 6614 (match_operand:<VFMLA_W> 3 "register_operand" "w") 6615 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) 6616 (match_operand:VDQSF 1 "register_operand" "0")))] 6617 "TARGET_F16FML" 6618 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" 6619 [(set_attr "type" "neon_fp_mul_s")] 6620) 6621 6622(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf" 6623 [(set (match_operand:V2SF 0 "register_operand") 6624 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 6625 (match_operand:V4HF 2 "register_operand") 6626 (match_operand:V4HF 3 "register_operand") 6627 (match_operand:SI 4 "aarch64_imm2")] 6628 VFMLA16_LOW))] 6629 "TARGET_F16FML" 6630{ 6631 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 6632 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6633 6634 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0], 6635 operands[1], 6636 operands[2], 6637 operands[3], 6638 p1, lane)); 6639 DONE; 6640} 6641) 6642 6643(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf" 6644 [(set (match_operand:V2SF 0 "register_operand") 6645 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 6646 (match_operand:V4HF 2 "register_operand") 6647 (match_operand:V4HF 3 "register_operand") 6648 (match_operand:SI 4 "aarch64_imm2")] 6649 VFMLA16_HIGH))] 6650 "TARGET_F16FML" 6651{ 6652 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 6653 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6654 6655 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0], 6656 operands[1], 6657 operands[2], 6658 operands[3], 6659 p1, lane)); 6660 DONE; 6661}) 6662 6663(define_insn "aarch64_simd_fmlal_lane_lowv2sf" 6664 [(set (match_operand:V2SF 0 "register_operand" "=w") 6665 (fma:V2SF 6666 (float_extend:V2SF 6667 (vec_select:V2HF 6668 (match_operand:V4HF 2 "register_operand" "w") 6669 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 6670 (float_extend:V2SF 6671 (vec_duplicate:V2HF 6672 (vec_select:HF 6673 (match_operand:V4HF 3 "register_operand" "x") 6674 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6675 (match_operand:V2SF 1 "register_operand" "0")))] 6676 "TARGET_F16FML" 6677 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 6678 [(set_attr "type" "neon_fp_mul_s")] 6679) 6680 6681(define_insn "aarch64_simd_fmlsl_lane_lowv2sf" 6682 [(set (match_operand:V2SF 0 "register_operand" "=w") 6683 (fma:V2SF 6684 (float_extend:V2SF 6685 (neg:V2HF 6686 (vec_select:V2HF 6687 (match_operand:V4HF 2 "register_operand" "w") 6688 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 6689 (float_extend:V2SF 6690 (vec_duplicate:V2HF 6691 (vec_select:HF 6692 (match_operand:V4HF 3 "register_operand" "x") 6693 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6694 (match_operand:V2SF 1 "register_operand" "0")))] 6695 "TARGET_F16FML" 6696 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 6697 [(set_attr "type" "neon_fp_mul_s")] 6698) 6699 6700(define_insn "aarch64_simd_fmlal_lane_highv2sf" 6701 [(set (match_operand:V2SF 0 "register_operand" "=w") 6702 (fma:V2SF 6703 (float_extend:V2SF 6704 (vec_select:V2HF 6705 (match_operand:V4HF 2 "register_operand" "w") 6706 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 6707 (float_extend:V2SF 6708 (vec_duplicate:V2HF 6709 (vec_select:HF 6710 (match_operand:V4HF 3 "register_operand" "x") 6711 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6712 (match_operand:V2SF 1 "register_operand" "0")))] 6713 "TARGET_F16FML" 6714 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 6715 [(set_attr "type" "neon_fp_mul_s")] 6716) 6717 6718(define_insn "aarch64_simd_fmlsl_lane_highv2sf" 6719 [(set (match_operand:V2SF 0 "register_operand" "=w") 6720 (fma:V2SF 6721 (float_extend:V2SF 6722 (neg:V2HF 6723 (vec_select:V2HF 6724 (match_operand:V4HF 2 "register_operand" "w") 6725 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 6726 (float_extend:V2SF 6727 (vec_duplicate:V2HF 6728 (vec_select:HF 6729 (match_operand:V4HF 3 "register_operand" "x") 6730 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 6731 (match_operand:V2SF 1 "register_operand" "0")))] 6732 "TARGET_F16FML" 6733 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 6734 [(set_attr "type" "neon_fp_mul_s")] 6735) 6736 6737(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf" 6738 [(set (match_operand:V4SF 0 "register_operand") 6739 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 6740 (match_operand:V8HF 2 "register_operand") 6741 (match_operand:V8HF 3 "register_operand") 6742 (match_operand:SI 4 "aarch64_lane_imm3")] 6743 VFMLA16_LOW))] 6744 "TARGET_F16FML" 6745{ 6746 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 6747 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6748 6749 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0], 6750 operands[1], 6751 operands[2], 6752 operands[3], 6753 p1, lane)); 6754 DONE; 6755}) 6756 6757(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf" 6758 [(set (match_operand:V4SF 0 "register_operand") 6759 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 6760 (match_operand:V8HF 2 "register_operand") 6761 (match_operand:V8HF 3 "register_operand") 6762 (match_operand:SI 4 "aarch64_lane_imm3")] 6763 VFMLA16_HIGH))] 6764 "TARGET_F16FML" 6765{ 6766 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 6767 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6768 6769 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0], 6770 operands[1], 6771 operands[2], 6772 operands[3], 6773 p1, lane)); 6774 DONE; 6775}) 6776 6777(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf" 6778 [(set (match_operand:V4SF 0 "register_operand" "=w") 6779 (fma:V4SF 6780 (float_extend:V4SF 6781 (vec_select:V4HF 6782 (match_operand:V8HF 2 "register_operand" "w") 6783 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 6784 (float_extend:V4SF 6785 (vec_duplicate:V4HF 6786 (vec_select:HF 6787 (match_operand:V8HF 3 "register_operand" "x") 6788 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6789 (match_operand:V4SF 1 "register_operand" "0")))] 6790 "TARGET_F16FML" 6791 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 6792 [(set_attr "type" "neon_fp_mul_s")] 6793) 6794 6795(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf" 6796 [(set (match_operand:V4SF 0 "register_operand" "=w") 6797 (fma:V4SF 6798 (float_extend:V4SF 6799 (neg:V4HF 6800 (vec_select:V4HF 6801 (match_operand:V8HF 2 "register_operand" "w") 6802 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 6803 (float_extend:V4SF 6804 (vec_duplicate:V4HF 6805 (vec_select:HF 6806 (match_operand:V8HF 3 "register_operand" "x") 6807 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6808 (match_operand:V4SF 1 "register_operand" "0")))] 6809 "TARGET_F16FML" 6810 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 6811 [(set_attr "type" "neon_fp_mul_s")] 6812) 6813 6814(define_insn "aarch64_simd_fmlalq_laneq_highv4sf" 6815 [(set (match_operand:V4SF 0 "register_operand" "=w") 6816 (fma:V4SF 6817 (float_extend:V4SF 6818 (vec_select:V4HF 6819 (match_operand:V8HF 2 "register_operand" "w") 6820 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 6821 (float_extend:V4SF 6822 (vec_duplicate:V4HF 6823 (vec_select:HF 6824 (match_operand:V8HF 3 "register_operand" "x") 6825 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6826 (match_operand:V4SF 1 "register_operand" "0")))] 6827 "TARGET_F16FML" 6828 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 6829 [(set_attr "type" "neon_fp_mul_s")] 6830) 6831 6832(define_insn "aarch64_simd_fmlslq_laneq_highv4sf" 6833 [(set (match_operand:V4SF 0 "register_operand" "=w") 6834 (fma:V4SF 6835 (float_extend:V4SF 6836 (neg:V4HF 6837 (vec_select:V4HF 6838 (match_operand:V8HF 2 "register_operand" "w") 6839 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 6840 (float_extend:V4SF 6841 (vec_duplicate:V4HF 6842 (vec_select:HF 6843 (match_operand:V8HF 3 "register_operand" "x") 6844 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6845 (match_operand:V4SF 1 "register_operand" "0")))] 6846 "TARGET_F16FML" 6847 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 6848 [(set_attr "type" "neon_fp_mul_s")] 6849) 6850 6851(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf" 6852 [(set (match_operand:V2SF 0 "register_operand") 6853 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 6854 (match_operand:V4HF 2 "register_operand") 6855 (match_operand:V8HF 3 "register_operand") 6856 (match_operand:SI 4 "aarch64_lane_imm3")] 6857 VFMLA16_LOW))] 6858 "TARGET_F16FML" 6859{ 6860 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); 6861 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6862 6863 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0], 6864 operands[1], 6865 operands[2], 6866 operands[3], 6867 p1, lane)); 6868 DONE; 6869 6870}) 6871 6872(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf" 6873 [(set (match_operand:V2SF 0 "register_operand") 6874 (unspec:V2SF [(match_operand:V2SF 1 "register_operand") 6875 (match_operand:V4HF 2 "register_operand") 6876 (match_operand:V8HF 3 "register_operand") 6877 (match_operand:SI 4 "aarch64_lane_imm3")] 6878 VFMLA16_HIGH))] 6879 "TARGET_F16FML" 6880{ 6881 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); 6882 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); 6883 6884 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0], 6885 operands[1], 6886 operands[2], 6887 operands[3], 6888 p1, lane)); 6889 DONE; 6890 6891}) 6892 6893(define_insn "aarch64_simd_fmlal_laneq_lowv2sf" 6894 [(set (match_operand:V2SF 0 "register_operand" "=w") 6895 (fma:V2SF 6896 (float_extend:V2SF 6897 (vec_select:V2HF 6898 (match_operand:V4HF 2 "register_operand" "w") 6899 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) 6900 (float_extend:V2SF 6901 (vec_duplicate:V2HF 6902 (vec_select:HF 6903 (match_operand:V8HF 3 "register_operand" "x") 6904 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6905 (match_operand:V2SF 1 "register_operand" "0")))] 6906 "TARGET_F16FML" 6907 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" 6908 [(set_attr "type" "neon_fp_mul_s")] 6909) 6910 6911(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf" 6912 [(set (match_operand:V2SF 0 "register_operand" "=w") 6913 (fma:V2SF 6914 (float_extend:V2SF 6915 (neg:V2HF 6916 (vec_select:V2HF 6917 (match_operand:V4HF 2 "register_operand" "w") 6918 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) 6919 (float_extend:V2SF 6920 (vec_duplicate:V2HF 6921 (vec_select:HF 6922 (match_operand:V8HF 3 "register_operand" "x") 6923 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6924 (match_operand:V2SF 1 "register_operand" "0")))] 6925 "TARGET_F16FML" 6926 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" 6927 [(set_attr "type" "neon_fp_mul_s")] 6928) 6929 6930(define_insn "aarch64_simd_fmlal_laneq_highv2sf" 6931 [(set (match_operand:V2SF 0 "register_operand" "=w") 6932 (fma:V2SF 6933 (float_extend:V2SF 6934 (vec_select:V2HF 6935 (match_operand:V4HF 2 "register_operand" "w") 6936 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) 6937 (float_extend:V2SF 6938 (vec_duplicate:V2HF 6939 (vec_select:HF 6940 (match_operand:V8HF 3 "register_operand" "x") 6941 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6942 (match_operand:V2SF 1 "register_operand" "0")))] 6943 "TARGET_F16FML" 6944 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" 6945 [(set_attr "type" "neon_fp_mul_s")] 6946) 6947 6948(define_insn "aarch64_simd_fmlsl_laneq_highv2sf" 6949 [(set (match_operand:V2SF 0 "register_operand" "=w") 6950 (fma:V2SF 6951 (float_extend:V2SF 6952 (neg:V2HF 6953 (vec_select:V2HF 6954 (match_operand:V4HF 2 "register_operand" "w") 6955 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) 6956 (float_extend:V2SF 6957 (vec_duplicate:V2HF 6958 (vec_select:HF 6959 (match_operand:V8HF 3 "register_operand" "x") 6960 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) 6961 (match_operand:V2SF 1 "register_operand" "0")))] 6962 "TARGET_F16FML" 6963 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" 6964 [(set_attr "type" "neon_fp_mul_s")] 6965) 6966 6967(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf" 6968 [(set (match_operand:V4SF 0 "register_operand") 6969 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 6970 (match_operand:V8HF 2 "register_operand") 6971 (match_operand:V4HF 3 "register_operand") 6972 (match_operand:SI 4 "aarch64_imm2")] 6973 VFMLA16_LOW))] 6974 "TARGET_F16FML" 6975{ 6976 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); 6977 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6978 6979 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0], 6980 operands[1], 6981 operands[2], 6982 operands[3], 6983 p1, lane)); 6984 DONE; 6985}) 6986 6987(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf" 6988 [(set (match_operand:V4SF 0 "register_operand") 6989 (unspec:V4SF [(match_operand:V4SF 1 "register_operand") 6990 (match_operand:V8HF 2 "register_operand") 6991 (match_operand:V4HF 3 "register_operand") 6992 (match_operand:SI 4 "aarch64_imm2")] 6993 VFMLA16_HIGH))] 6994 "TARGET_F16FML" 6995{ 6996 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); 6997 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); 6998 6999 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0], 7000 operands[1], 7001 operands[2], 7002 operands[3], 7003 p1, lane)); 7004 DONE; 7005}) 7006 7007(define_insn "aarch64_simd_fmlalq_lane_lowv4sf" 7008 [(set (match_operand:V4SF 0 "register_operand" "=w") 7009 (fma:V4SF 7010 (float_extend:V4SF 7011 (vec_select:V4HF 7012 (match_operand:V8HF 2 "register_operand" "w") 7013 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) 7014 (float_extend:V4SF 7015 (vec_duplicate:V4HF 7016 (vec_select:HF 7017 (match_operand:V4HF 3 "register_operand" "x") 7018 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7019 (match_operand:V4SF 1 "register_operand" "0")))] 7020 "TARGET_F16FML" 7021 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" 7022 [(set_attr "type" "neon_fp_mul_s")] 7023) 7024 7025(define_insn "aarch64_simd_fmlslq_lane_lowv4sf" 7026 [(set (match_operand:V4SF 0 "register_operand" "=w") 7027 (fma:V4SF 7028 (float_extend:V4SF 7029 (neg:V4HF 7030 (vec_select:V4HF 7031 (match_operand:V8HF 2 "register_operand" "w") 7032 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) 7033 (float_extend:V4SF 7034 (vec_duplicate:V4HF 7035 (vec_select:HF 7036 (match_operand:V4HF 3 "register_operand" "x") 7037 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7038 (match_operand:V4SF 1 "register_operand" "0")))] 7039 "TARGET_F16FML" 7040 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" 7041 [(set_attr "type" "neon_fp_mul_s")] 7042) 7043 7044(define_insn "aarch64_simd_fmlalq_lane_highv4sf" 7045 [(set (match_operand:V4SF 0 "register_operand" "=w") 7046 (fma:V4SF 7047 (float_extend:V4SF 7048 (vec_select:V4HF 7049 (match_operand:V8HF 2 "register_operand" "w") 7050 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) 7051 (float_extend:V4SF 7052 (vec_duplicate:V4HF 7053 (vec_select:HF 7054 (match_operand:V4HF 3 "register_operand" "x") 7055 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7056 (match_operand:V4SF 1 "register_operand" "0")))] 7057 "TARGET_F16FML" 7058 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" 7059 [(set_attr "type" "neon_fp_mul_s")] 7060) 7061 7062(define_insn "aarch64_simd_fmlslq_lane_highv4sf" 7063 [(set (match_operand:V4SF 0 "register_operand" "=w") 7064 (fma:V4SF 7065 (float_extend:V4SF 7066 (neg:V4HF 7067 (vec_select:V4HF 7068 (match_operand:V8HF 2 "register_operand" "w") 7069 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) 7070 (float_extend:V4SF 7071 (vec_duplicate:V4HF 7072 (vec_select:HF 7073 (match_operand:V4HF 3 "register_operand" "x") 7074 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) 7075 (match_operand:V4SF 1 "register_operand" "0")))] 7076 "TARGET_F16FML" 7077 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" 7078 [(set_attr "type" "neon_fp_mul_s")] 7079) 7080 7081;; pmull 7082 7083(define_insn "aarch64_crypto_pmulldi" 7084 [(set (match_operand:TI 0 "register_operand" "=w") 7085 (unspec:TI [(match_operand:DI 1 "register_operand" "w") 7086 (match_operand:DI 2 "register_operand" "w")] 7087 UNSPEC_PMULL))] 7088 "TARGET_SIMD && TARGET_AES" 7089 "pmull\\t%0.1q, %1.1d, %2.1d" 7090 [(set_attr "type" "crypto_pmull")] 7091) 7092 7093(define_insn "aarch64_crypto_pmullv2di" 7094 [(set (match_operand:TI 0 "register_operand" "=w") 7095 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") 7096 (match_operand:V2DI 2 "register_operand" "w")] 7097 UNSPEC_PMULL2))] 7098 "TARGET_SIMD && TARGET_AES" 7099 "pmull2\\t%0.1q, %1.2d, %2.2d" 7100 [(set_attr "type" "crypto_pmull")] 7101) 7102 7103;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector. 7104(define_insn "<optab><Vnarrowq><mode>2" 7105 [(set (match_operand:VQN 0 "register_operand" "=w") 7106 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))] 7107 "TARGET_SIMD" 7108 "<su>xtl\t%0.<Vtype>, %1.<Vntype>" 7109 [(set_attr "type" "neon_shift_imm_long")] 7110) 7111 7112;; Truncate a 128-bit integer vector to a 64-bit vector. 7113(define_insn "trunc<mode><Vnarrowq>2" 7114 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w") 7115 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))] 7116 "TARGET_SIMD" 7117 "xtn\t%0.<Vntype>, %1.<Vtype>" 7118 [(set_attr "type" "neon_shift_imm_narrow_q")] 7119) 7120 7121(define_insn "aarch64_bfdot<mode>" 7122 [(set (match_operand:VDQSF 0 "register_operand" "=w") 7123 (plus:VDQSF 7124 (unspec:VDQSF 7125 [(match_operand:<VBFMLA_W> 2 "register_operand" "w") 7126 (match_operand:<VBFMLA_W> 3 "register_operand" "w")] 7127 UNSPEC_BFDOT) 7128 (match_operand:VDQSF 1 "register_operand" "0")))] 7129 "TARGET_BF16_SIMD" 7130 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>" 7131 [(set_attr "type" "neon_dot<q>")] 7132) 7133 7134(define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>" 7135 [(set (match_operand:VDQSF 0 "register_operand" "=w") 7136 (plus:VDQSF 7137 (unspec:VDQSF 7138 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w") 7139 (match_operand:VBF 3 "register_operand" "w") 7140 (match_operand:SI 4 "const_int_operand" "n")] 7141 UNSPEC_BFDOT) 7142 (match_operand:VDQSF 1 "register_operand" "0")))] 7143 "TARGET_BF16_SIMD" 7144{ 7145 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant (); 7146 int lane = INTVAL (operands[4]); 7147 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode); 7148 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]"; 7149} 7150 [(set_attr "type" "neon_dot<VDQSF:q>")] 7151) 7152 7153;; vget_low/high_bf16 7154(define_expand "aarch64_vget_lo_halfv8bf" 7155 [(match_operand:V4BF 0 "register_operand") 7156 (match_operand:V8BF 1 "register_operand")] 7157 "TARGET_BF16_SIMD" 7158{ 7159 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false); 7160 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); 7161 DONE; 7162}) 7163 7164(define_expand "aarch64_vget_hi_halfv8bf" 7165 [(match_operand:V4BF 0 "register_operand") 7166 (match_operand:V8BF 1 "register_operand")] 7167 "TARGET_BF16_SIMD" 7168{ 7169 rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true); 7170 emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p)); 7171 DONE; 7172}) 7173 7174;; bfmmla 7175(define_insn "aarch64_bfmmlaqv4sf" 7176 [(set (match_operand:V4SF 0 "register_operand" "=w") 7177 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0") 7178 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 7179 (match_operand:V8BF 3 "register_operand" "w")] 7180 UNSPEC_BFMMLA)))] 7181 "TARGET_BF16_SIMD" 7182 "bfmmla\\t%0.4s, %2.8h, %3.8h" 7183 [(set_attr "type" "neon_fp_mla_s_q")] 7184) 7185 7186;; bfmlal<bt> 7187(define_insn "aarch64_bfmlal<bt>v4sf" 7188 [(set (match_operand:V4SF 0 "register_operand" "=w") 7189 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 7190 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 7191 (match_operand:V8BF 3 "register_operand" "w")] 7192 BF_MLA)))] 7193 "TARGET_BF16_SIMD" 7194 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h" 7195 [(set_attr "type" "neon_fp_mla_s_q")] 7196) 7197 7198(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf" 7199 [(set (match_operand:V4SF 0 "register_operand" "=w") 7200 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0") 7201 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w") 7202 (match_operand:VBF 3 "register_operand" "x") 7203 (match_operand:SI 4 "const_int_operand" "n")] 7204 BF_MLA)))] 7205 "TARGET_BF16_SIMD" 7206{ 7207 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); 7208 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]"; 7209} 7210 [(set_attr "type" "neon_fp_mla_s_scalar_q")] 7211) 7212 7213;; 8-bit integer matrix multiply-accumulate 7214(define_insn "aarch64_simd_<sur>mmlav16qi" 7215 [(set (match_operand:V4SI 0 "register_operand" "=w") 7216 (plus:V4SI 7217 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w") 7218 (match_operand:V16QI 3 "register_operand" "w")] MATMUL) 7219 (match_operand:V4SI 1 "register_operand" "0")))] 7220 "TARGET_I8MM" 7221 "<sur>mmla\\t%0.4s, %2.16b, %3.16b" 7222 [(set_attr "type" "neon_mla_s_q")] 7223) 7224 7225;; bfcvtn 7226(define_insn "aarch64_bfcvtn<q><mode>" 7227 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w") 7228 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")] 7229 UNSPEC_BFCVTN))] 7230 "TARGET_BF16_SIMD" 7231 "bfcvtn\\t%0.4h, %1.4s" 7232 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 7233) 7234 7235(define_insn "aarch64_bfcvtn2v8bf" 7236 [(set (match_operand:V8BF 0 "register_operand" "=w") 7237 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0") 7238 (match_operand:V4SF 2 "register_operand" "w")] 7239 UNSPEC_BFCVTN2))] 7240 "TARGET_BF16_SIMD" 7241 "bfcvtn2\\t%0.8h, %2.4s" 7242 [(set_attr "type" "neon_fp_cvt_narrow_s_q")] 7243) 7244 7245(define_insn "aarch64_bfcvtbf" 7246 [(set (match_operand:BF 0 "register_operand" "=w") 7247 (unspec:BF [(match_operand:SF 1 "register_operand" "w")] 7248 UNSPEC_BFCVT))] 7249 "TARGET_BF16_FP" 7250 "bfcvt\\t%h0, %s1" 7251 [(set_attr "type" "f_cvt")] 7252) 7253 7254;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes. 7255(define_insn "aarch64_vbfcvt<mode>" 7256 [(set (match_operand:V4SF 0 "register_operand" "=w") 7257 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")] 7258 UNSPEC_BFCVTN))] 7259 "TARGET_BF16_SIMD" 7260 "shll\\t%0.4s, %1.4h, #16" 7261 [(set_attr "type" "neon_shift_imm_long")] 7262) 7263 7264(define_insn "aarch64_vbfcvt_highv8bf" 7265 [(set (match_operand:V4SF 0 "register_operand" "=w") 7266 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")] 7267 UNSPEC_BFCVTN2))] 7268 "TARGET_BF16_SIMD" 7269 "shll2\\t%0.4s, %1.8h, #16" 7270 [(set_attr "type" "neon_shift_imm_long")] 7271) 7272 7273(define_insn "aarch64_bfcvtsf" 7274 [(set (match_operand:SF 0 "register_operand" "=w") 7275 (unspec:SF [(match_operand:BF 1 "register_operand" "w")] 7276 UNSPEC_BFCVT))] 7277 "TARGET_BF16_FP" 7278 "shl\\t%d0, %d1, #16" 7279 [(set_attr "type" "neon_shift_imm")] 7280) 7281