1;; Machine description for AArch64 AdvSIMD architecture.
2;; Copyright (C) 2011-2020 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21(define_expand "mov<mode>"
22  [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand")
23	(match_operand:VALL_F16MOV 1 "general_operand"))]
24  "TARGET_SIMD"
25  "
26  /* Force the operand into a register if it is not an
27     immediate whose use can be replaced with xzr.
28     If the mode is 16 bytes wide, then we will be doing
29     a stp in DI mode, so we check the validity of that.
30     If the mode is 8 bytes wide, then we will do doing a
31     normal str, so the check need not apply.  */
32  if (GET_CODE (operands[0]) == MEM
33      && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34	   && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35		&& aarch64_mem_pair_operand (operands[0], DImode))
36	       || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37      operands[1] = force_reg (<MODE>mode, operands[1]);
38
39  /* If a constant is too complex to force to memory (e.g. because it
40     contains CONST_POLY_INTs), build it up from individual elements instead.
41     We should only need to do this before RA; aarch64_legitimate_constant_p
42     should ensure that we don't try to rematerialize the constant later.  */
43  if (GET_CODE (operands[1]) == CONST_VECTOR
44      && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
45    {
46      aarch64_expand_vector_init (operands[0], operands[1]);
47      DONE;
48    }
49  "
50)
51
52(define_expand "movmisalign<mode>"
53  [(set (match_operand:VALL 0 "nonimmediate_operand")
54        (match_operand:VALL 1 "general_operand"))]
55  "TARGET_SIMD && !STRICT_ALIGNMENT"
56{
57  /* This pattern is not permitted to fail during expansion: if both arguments
58     are non-registers (e.g. memory := constant, which can be created by the
59     auto-vectorizer), force operand 1 into a register.  */
60  if (!register_operand (operands[0], <MODE>mode)
61      && !register_operand (operands[1], <MODE>mode))
62    operands[1] = force_reg (<MODE>mode, operands[1]);
63})
64
65(define_insn "aarch64_simd_dup<mode>"
66  [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
67	(vec_duplicate:VDQ_I
68	  (match_operand:<VEL> 1 "register_operand" "w,?r")))]
69  "TARGET_SIMD"
70  "@
71   dup\\t%0.<Vtype>, %1.<Vetype>[0]
72   dup\\t%0.<Vtype>, %<vw>1"
73  [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
74)
75
76(define_insn "aarch64_simd_dup<mode>"
77  [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
78	(vec_duplicate:VDQF_F16
79	  (match_operand:<VEL> 1 "register_operand" "w")))]
80  "TARGET_SIMD"
81  "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
82  [(set_attr "type" "neon_dup<q>")]
83)
84
85(define_insn "aarch64_dup_lane<mode>"
86  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
87	(vec_duplicate:VALL_F16
88	  (vec_select:<VEL>
89	    (match_operand:VALL_F16 1 "register_operand" "w")
90	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
91          )))]
92  "TARGET_SIMD"
93  {
94    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
95    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
96  }
97  [(set_attr "type" "neon_dup<q>")]
98)
99
100(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
101  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
102	(vec_duplicate:VALL_F16_NO_V2Q
103	  (vec_select:<VEL>
104	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
105	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
106          )))]
107  "TARGET_SIMD"
108  {
109    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
110    return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
111  }
112  [(set_attr "type" "neon_dup<q>")]
113)
114
115(define_insn "*aarch64_simd_mov<VDMOV:mode>"
116  [(set (match_operand:VDMOV 0 "nonimmediate_operand"
117		"=w, m,  m,  w, ?r, ?w, ?r, w")
118	(match_operand:VDMOV 1 "general_operand"
119		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
120  "TARGET_SIMD
121   && (register_operand (operands[0], <MODE>mode)
122       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
123{
124   switch (which_alternative)
125     {
126     case 0: return "ldr\t%d0, %1";
127     case 1: return "str\txzr, %0";
128     case 2: return "str\t%d1, %0";
129     case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
130     case 4: return "umov\t%0, %1.d[0]";
131     case 5: return "fmov\t%d0, %1";
132     case 6: return "mov\t%0, %1";
133     case 7:
134	return aarch64_output_simd_mov_immediate (operands[1], 64);
135     default: gcc_unreachable ();
136     }
137}
138  [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
139		     neon_logic<q>, neon_to_gp<q>, f_mcr,\
140		     mov_reg, neon_move<q>")]
141)
142
143(define_insn "*aarch64_simd_mov<VQMOV:mode>"
144  [(set (match_operand:VQMOV 0 "nonimmediate_operand"
145		"=w, Umn,  m,  w, ?r, ?w, ?r, w")
146	(match_operand:VQMOV 1 "general_operand"
147		"m,  Dz, w,  w,  w,  r,  r, Dn"))]
148  "TARGET_SIMD
149   && (register_operand (operands[0], <MODE>mode)
150       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
151{
152  switch (which_alternative)
153    {
154    case 0:
155	return "ldr\t%q0, %1";
156    case 1:
157	return "stp\txzr, xzr, %0";
158    case 2:
159	return "str\t%q1, %0";
160    case 3:
161	return "mov\t%0.<Vbtype>, %1.<Vbtype>";
162    case 4:
163    case 5:
164    case 6:
165	return "#";
166    case 7:
167	return aarch64_output_simd_mov_immediate (operands[1], 128);
168    default:
169	gcc_unreachable ();
170    }
171}
172  [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
173		     neon_logic<q>, multiple, multiple,\
174		     multiple, neon_move<q>")
175   (set_attr "length" "4,4,4,4,8,8,8,4")]
176)
177
178;; When storing lane zero we can use the normal STR and its more permissive
179;; addressing modes.
180
181(define_insn "aarch64_store_lane0<mode>"
182  [(set (match_operand:<VEL> 0 "memory_operand" "=m")
183	(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
184			(parallel [(match_operand 2 "const_int_operand" "n")])))]
185  "TARGET_SIMD
186   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
187  "str\\t%<Vetype>1, %0"
188  [(set_attr "type" "neon_store1_1reg<q>")]
189)
190
191(define_insn "load_pair<DREG:mode><DREG2:mode>"
192  [(set (match_operand:DREG 0 "register_operand" "=w")
193	(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
194   (set (match_operand:DREG2 2 "register_operand" "=w")
195	(match_operand:DREG2 3 "memory_operand" "m"))]
196  "TARGET_SIMD
197   && rtx_equal_p (XEXP (operands[3], 0),
198		   plus_constant (Pmode,
199				  XEXP (operands[1], 0),
200				  GET_MODE_SIZE (<DREG:MODE>mode)))"
201  "ldp\\t%d0, %d2, %1"
202  [(set_attr "type" "neon_ldp")]
203)
204
205(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
206  [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
207	(match_operand:DREG 1 "register_operand" "w"))
208   (set (match_operand:DREG2 2 "memory_operand" "=m")
209	(match_operand:DREG2 3 "register_operand" "w"))]
210  "TARGET_SIMD
211   && rtx_equal_p (XEXP (operands[2], 0),
212		   plus_constant (Pmode,
213				  XEXP (operands[0], 0),
214				  GET_MODE_SIZE (<DREG:MODE>mode)))"
215  "stp\\t%d1, %d3, %0"
216  [(set_attr "type" "neon_stp")]
217)
218
219(define_insn "load_pair<VQ:mode><VQ2:mode>"
220  [(set (match_operand:VQ 0 "register_operand" "=w")
221	(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
222   (set (match_operand:VQ2 2 "register_operand" "=w")
223	(match_operand:VQ2 3 "memory_operand" "m"))]
224  "TARGET_SIMD
225    && rtx_equal_p (XEXP (operands[3], 0),
226		    plus_constant (Pmode,
227			       XEXP (operands[1], 0),
228			       GET_MODE_SIZE (<VQ:MODE>mode)))"
229  "ldp\\t%q0, %q2, %1"
230  [(set_attr "type" "neon_ldp_q")]
231)
232
233(define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
234  [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
235	(match_operand:VQ 1 "register_operand" "w"))
236   (set (match_operand:VQ2 2 "memory_operand" "=m")
237	(match_operand:VQ2 3 "register_operand" "w"))]
238  "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
239		plus_constant (Pmode,
240			       XEXP (operands[0], 0),
241			       GET_MODE_SIZE (<VQ:MODE>mode)))"
242  "stp\\t%q1, %q3, %0"
243  [(set_attr "type" "neon_stp_q")]
244)
245
246
247(define_split
248  [(set (match_operand:VQMOV 0 "register_operand" "")
249      (match_operand:VQMOV 1 "register_operand" ""))]
250  "TARGET_SIMD && reload_completed
251   && GP_REGNUM_P (REGNO (operands[0]))
252   && GP_REGNUM_P (REGNO (operands[1]))"
253  [(const_int 0)]
254{
255  aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
256  DONE;
257})
258
259(define_split
260  [(set (match_operand:VQMOV 0 "register_operand" "")
261        (match_operand:VQMOV 1 "register_operand" ""))]
262  "TARGET_SIMD && reload_completed
263   && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
264       || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
265  [(const_int 0)]
266{
267  aarch64_split_simd_move (operands[0], operands[1]);
268  DONE;
269})
270
271(define_expand "@aarch64_split_simd_mov<mode>"
272  [(set (match_operand:VQMOV 0)
273        (match_operand:VQMOV 1))]
274  "TARGET_SIMD"
275  {
276    rtx dst = operands[0];
277    rtx src = operands[1];
278
279    if (GP_REGNUM_P (REGNO (src)))
280      {
281        rtx src_low_part = gen_lowpart (<VHALF>mode, src);
282        rtx src_high_part = gen_highpart (<VHALF>mode, src);
283
284        emit_insn
285          (gen_move_lo_quad_<mode> (dst, src_low_part));
286        emit_insn
287          (gen_move_hi_quad_<mode> (dst, src_high_part));
288      }
289
290    else
291      {
292        rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
293        rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
294	rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
295	rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
296        emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
297        emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
298      }
299    DONE;
300  }
301)
302
303(define_expand "aarch64_get_half<mode>"
304  [(set (match_operand:<VHALF> 0 "register_operand")
305        (vec_select:<VHALF>
306          (match_operand:VQMOV 1 "register_operand")
307          (match_operand 2 "ascending_int_parallel")))]
308  "TARGET_SIMD"
309)
310
311(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
312  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
313        (vec_select:<VHALF>
314          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
315          (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
316  "TARGET_SIMD"
317  "@
318   #
319   umov\t%0, %1.d[0]"
320  "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
321  [(set (match_dup 0) (match_dup 1))]
322  {
323    operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
324  }
325  [(set_attr "type" "mov_reg,neon_to_gp<q>")
326   (set_attr "length" "4")]
327)
328
329(define_insn "aarch64_simd_mov_from_<mode>high"
330  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
331        (vec_select:<VHALF>
332          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
333          (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
334  "TARGET_SIMD"
335  "@
336   dup\\t%d0, %1.d[1]
337   umov\t%0, %1.d[1]"
338  [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
339   (set_attr "length" "4")]
340)
341
342(define_insn "orn<mode>3"
343 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
344       (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
345		(match_operand:VDQ_I 2 "register_operand" "w")))]
346 "TARGET_SIMD"
347 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
348  [(set_attr "type" "neon_logic<q>")]
349)
350
351(define_insn "bic<mode>3"
352 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
353       (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
354		(match_operand:VDQ_I 2 "register_operand" "w")))]
355 "TARGET_SIMD"
356 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
357  [(set_attr "type" "neon_logic<q>")]
358)
359
360(define_insn "add<mode>3"
361  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
362        (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
363		  (match_operand:VDQ_I 2 "register_operand" "w")))]
364  "TARGET_SIMD"
365  "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
366  [(set_attr "type" "neon_add<q>")]
367)
368
369(define_insn "sub<mode>3"
370  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
371        (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
372		   (match_operand:VDQ_I 2 "register_operand" "w")))]
373  "TARGET_SIMD"
374  "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
375  [(set_attr "type" "neon_sub<q>")]
376)
377
378(define_insn "mul<mode>3"
379  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
380        (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
381		   (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
382  "TARGET_SIMD"
383  "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
384  [(set_attr "type" "neon_mul_<Vetype><q>")]
385)
386
387(define_insn "bswap<mode>2"
388  [(set (match_operand:VDQHSD 0 "register_operand" "=w")
389        (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
390  "TARGET_SIMD"
391  "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
392  [(set_attr "type" "neon_rev<q>")]
393)
394
395(define_insn "aarch64_rbit<mode>"
396  [(set (match_operand:VB 0 "register_operand" "=w")
397	(unspec:VB [(match_operand:VB 1 "register_operand" "w")]
398		   UNSPEC_RBIT))]
399  "TARGET_SIMD"
400  "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
401  [(set_attr "type" "neon_rbit")]
402)
403
404(define_expand "ctz<mode>2"
405  [(set (match_operand:VS 0 "register_operand")
406        (ctz:VS (match_operand:VS 1 "register_operand")))]
407  "TARGET_SIMD"
408  {
409     emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
410     rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
411					     <MODE>mode, 0);
412     emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
413     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
414     DONE;
415  }
416)
417
418(define_expand "xorsign<mode>3"
419  [(match_operand:VHSDF 0 "register_operand")
420   (match_operand:VHSDF 1 "register_operand")
421   (match_operand:VHSDF 2 "register_operand")]
422  "TARGET_SIMD"
423{
424
425  machine_mode imode = <V_INT_EQUIV>mode;
426  rtx v_bitmask = gen_reg_rtx (imode);
427  rtx op1x = gen_reg_rtx (imode);
428  rtx op2x = gen_reg_rtx (imode);
429
430  rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
431  rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
432
433  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
434
435  emit_move_insn (v_bitmask,
436		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
437						     HOST_WIDE_INT_M1U << bits));
438
439  emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
440  emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
441  emit_move_insn (operands[0],
442		  lowpart_subreg (<MODE>mode, op1x, imode));
443  DONE;
444}
445)
446
447;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
448;; fact that their usage need to guarantee that the source vectors are
449;; contiguous.  It would be wrong to describe the operation without being able
450;; to describe the permute that is also required, but even if that is done
451;; the permute would have been created as a LOAD_LANES which means the values
452;; in the registers are in the wrong order.
453(define_insn "aarch64_fcadd<rot><mode>"
454  [(set (match_operand:VHSDF 0 "register_operand" "=w")
455	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
456		       (match_operand:VHSDF 2 "register_operand" "w")]
457		       FCADD))]
458  "TARGET_COMPLEX"
459  "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
460  [(set_attr "type" "neon_fcadd")]
461)
462
463(define_insn "aarch64_fcmla<rot><mode>"
464  [(set (match_operand:VHSDF 0 "register_operand" "=w")
465	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
466		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
467				   (match_operand:VHSDF 3 "register_operand" "w")]
468				   FCMLA)))]
469  "TARGET_COMPLEX"
470  "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
471  [(set_attr "type" "neon_fcmla")]
472)
473
474
475(define_insn "aarch64_fcmla_lane<rot><mode>"
476  [(set (match_operand:VHSDF 0 "register_operand" "=w")
477	(plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
478		    (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
479				   (match_operand:VHSDF 3 "register_operand" "w")
480				   (match_operand:SI 4 "const_int_operand" "n")]
481				   FCMLA)))]
482  "TARGET_COMPLEX"
483{
484  operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
485  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
486}
487  [(set_attr "type" "neon_fcmla")]
488)
489
490(define_insn "aarch64_fcmla_laneq<rot>v4hf"
491  [(set (match_operand:V4HF 0 "register_operand" "=w")
492	(plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
493		   (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
494				 (match_operand:V8HF 3 "register_operand" "w")
495				 (match_operand:SI 4 "const_int_operand" "n")]
496				 FCMLA)))]
497  "TARGET_COMPLEX"
498{
499  operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
500  return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
501}
502  [(set_attr "type" "neon_fcmla")]
503)
504
505(define_insn "aarch64_fcmlaq_lane<rot><mode>"
506  [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
507	(plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
508		     (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
509				     (match_operand:<VHALF> 3 "register_operand" "w")
510				     (match_operand:SI 4 "const_int_operand" "n")]
511				     FCMLA)))]
512  "TARGET_COMPLEX"
513{
514  int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
515  operands[4]
516    = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
517  return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
518}
519  [(set_attr "type" "neon_fcmla")]
520)
521
522;; These instructions map to the __builtins for the Dot Product operations.
523(define_insn "aarch64_<sur>dot<vsi2qi>"
524  [(set (match_operand:VS 0 "register_operand" "=w")
525	(plus:VS (match_operand:VS 1 "register_operand" "0")
526		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
527			    (match_operand:<VSI2QI> 3 "register_operand" "w")]
528		DOTPROD)))]
529  "TARGET_DOTPROD"
530  "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
531  [(set_attr "type" "neon_dot<q>")]
532)
533
534;; These instructions map to the __builtins for the armv8.6a I8MM usdot
535;; (vector) Dot Product operation.
536(define_insn "aarch64_usdot<vsi2qi>"
537  [(set (match_operand:VS 0 "register_operand" "=w")
538	(plus:VS
539	  (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
540		      (match_operand:<VSI2QI> 3 "register_operand" "w")]
541	  UNSPEC_USDOT)
542	  (match_operand:VS 1 "register_operand" "0")))]
543  "TARGET_I8MM"
544  "usdot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
545  [(set_attr "type" "neon_dot<q>")]
546)
547
548;; These expands map to the Dot Product optab the vectorizer checks for.
549;; The auto-vectorizer expects a dot product builtin that also does an
550;; accumulation into the provided register.
551;; Given the following pattern
552;;
553;; for (i=0; i<len; i++) {
554;;     c = a[i] * b[i];
555;;     r += c;
556;; }
557;; return result;
558;;
559;; This can be auto-vectorized to
560;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
561;;
562;; given enough iterations.  However the vectorizer can keep unrolling the loop
563;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
564;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
565;; ...
566;;
567;; and so the vectorizer provides r, in which the result has to be accumulated.
568(define_expand "<sur>dot_prod<vsi2qi>"
569  [(set (match_operand:VS 0 "register_operand")
570	(plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
571			    (match_operand:<VSI2QI> 2 "register_operand")]
572		 DOTPROD)
573		(match_operand:VS 3 "register_operand")))]
574  "TARGET_DOTPROD"
575{
576  emit_insn (
577    gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
578				    operands[2]));
579  emit_insn (gen_rtx_SET (operands[0], operands[3]));
580  DONE;
581})
582
583;; These instructions map to the __builtins for the Dot Product
584;; indexed operations.
585(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
586  [(set (match_operand:VS 0 "register_operand" "=w")
587	(plus:VS (match_operand:VS 1 "register_operand" "0")
588		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
589			    (match_operand:V8QI 3 "register_operand" "<h_con>")
590			    (match_operand:SI 4 "immediate_operand" "i")]
591		DOTPROD)))]
592  "TARGET_DOTPROD"
593  {
594    operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
595    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
596  }
597  [(set_attr "type" "neon_dot<q>")]
598)
599
600(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
601  [(set (match_operand:VS 0 "register_operand" "=w")
602	(plus:VS (match_operand:VS 1 "register_operand" "0")
603		(unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
604			    (match_operand:V16QI 3 "register_operand" "<h_con>")
605			    (match_operand:SI 4 "immediate_operand" "i")]
606		DOTPROD)))]
607  "TARGET_DOTPROD"
608  {
609    operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
610    return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
611  }
612  [(set_attr "type" "neon_dot<q>")]
613)
614
615;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
616;; (by element) Dot Product operations.
617(define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi>"
618  [(set (match_operand:VS 0 "register_operand" "=w")
619	(plus:VS
620	  (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
621		      (match_operand:VB 3 "register_operand" "w")
622		      (match_operand:SI 4 "immediate_operand" "i")]
623	  DOTPROD_I8MM)
624	  (match_operand:VS 1 "register_operand" "0")))]
625  "TARGET_I8MM"
626  {
627    int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
628    int lane = INTVAL (operands[4]);
629    operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
630    return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
631  }
632  [(set_attr "type" "neon_dot<VS:q>")]
633)
634
635(define_expand "copysign<mode>3"
636  [(match_operand:VHSDF 0 "register_operand")
637   (match_operand:VHSDF 1 "register_operand")
638   (match_operand:VHSDF 2 "register_operand")]
639  "TARGET_FLOAT && TARGET_SIMD"
640{
641  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
642  int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
643
644  emit_move_insn (v_bitmask,
645		  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
646						     HOST_WIDE_INT_M1U << bits));
647  emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
648					 operands[2], operands[1]));
649  DONE;
650}
651)
652
653(define_insn "*aarch64_mul3_elt<mode>"
654 [(set (match_operand:VMUL 0 "register_operand" "=w")
655    (mult:VMUL
656      (vec_duplicate:VMUL
657	  (vec_select:<VEL>
658	    (match_operand:VMUL 1 "register_operand" "<h_con>")
659	    (parallel [(match_operand:SI 2 "immediate_operand")])))
660      (match_operand:VMUL 3 "register_operand" "w")))]
661  "TARGET_SIMD"
662  {
663    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
664    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
665  }
666  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
667)
668
669(define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
670  [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
671     (mult:VMUL_CHANGE_NLANES
672       (vec_duplicate:VMUL_CHANGE_NLANES
673	  (vec_select:<VEL>
674	    (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
675	    (parallel [(match_operand:SI 2 "immediate_operand")])))
676      (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
677  "TARGET_SIMD"
678  {
679    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
680    return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
681  }
682  [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
683)
684
685(define_insn "*aarch64_mul3_elt_from_dup<mode>"
686 [(set (match_operand:VMUL 0 "register_operand" "=w")
687    (mult:VMUL
688      (vec_duplicate:VMUL
689	    (match_operand:<VEL> 1 "register_operand" "<h_con>"))
690      (match_operand:VMUL 2 "register_operand" "w")))]
691  "TARGET_SIMD"
692  "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
693  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
694)
695
696(define_insn "@aarch64_rsqrte<mode>"
697  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
698	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
699		     UNSPEC_RSQRTE))]
700  "TARGET_SIMD"
701  "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
702  [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
703
704(define_insn "@aarch64_rsqrts<mode>"
705  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
706	(unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
707			    (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
708	 UNSPEC_RSQRTS))]
709  "TARGET_SIMD"
710  "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
711  [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
712
713(define_expand "rsqrt<mode>2"
714  [(set (match_operand:VALLF 0 "register_operand")
715	(unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
716		     UNSPEC_RSQRT))]
717  "TARGET_SIMD"
718{
719  aarch64_emit_approx_sqrt (operands[0], operands[1], true);
720  DONE;
721})
722
723(define_insn "*aarch64_mul3_elt_to_64v2df"
724  [(set (match_operand:DF 0 "register_operand" "=w")
725     (mult:DF
726       (vec_select:DF
727	 (match_operand:V2DF 1 "register_operand" "w")
728	 (parallel [(match_operand:SI 2 "immediate_operand")]))
729       (match_operand:DF 3 "register_operand" "w")))]
730  "TARGET_SIMD"
731  {
732    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
733    return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
734  }
735  [(set_attr "type" "neon_fp_mul_d_scalar_q")]
736)
737
738(define_insn "neg<mode>2"
739  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
740	(neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
741  "TARGET_SIMD"
742  "neg\t%0.<Vtype>, %1.<Vtype>"
743  [(set_attr "type" "neon_neg<q>")]
744)
745
746(define_insn "abs<mode>2"
747  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
748        (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
749  "TARGET_SIMD"
750  "abs\t%0.<Vtype>, %1.<Vtype>"
751  [(set_attr "type" "neon_abs<q>")]
752)
753
754;; The intrinsic version of integer ABS must not be allowed to
755;; combine with any operation with an integerated ABS step, such
756;; as SABD.
757(define_insn "aarch64_abs<mode>"
758  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
759	  (unspec:VSDQ_I_DI
760	    [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
761	   UNSPEC_ABS))]
762  "TARGET_SIMD"
763  "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
764  [(set_attr "type" "neon_abs<q>")]
765)
766
767;; It's tempting to represent SABD as ABS (MINUS op1 op2).
768;; This isn't accurate as ABS treats always its input as a signed value.
769;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
770;; Whereas SABD would return 192 (-64 signed) on the above example.
771;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
772(define_insn "aarch64_<su>abd<mode>_3"
773  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
774	(minus:VDQ_BHSI
775	  (USMAX:VDQ_BHSI
776	    (match_operand:VDQ_BHSI 1 "register_operand" "w")
777	    (match_operand:VDQ_BHSI 2 "register_operand" "w"))
778	  (<max_opp>:VDQ_BHSI
779	    (match_dup 1)
780	    (match_dup 2))))]
781  "TARGET_SIMD"
782  "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
783  [(set_attr "type" "neon_abd<q>")]
784)
785
786(define_insn "aarch64_<sur>abdl2<mode>_3"
787  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
788	(unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
789			  (match_operand:VDQV_S 2 "register_operand" "w")]
790	ABDL2))]
791  "TARGET_SIMD"
792  "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
793  [(set_attr "type" "neon_abd<q>")]
794)
795
796(define_insn "aarch64_<sur>abal<mode>_4"
797  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
798	(unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
799			  (match_operand:VDQV_S 2 "register_operand" "w")
800			 (match_operand:<VDBLW> 3 "register_operand" "0")]
801	ABAL))]
802  "TARGET_SIMD"
803  "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
804  [(set_attr "type" "neon_arith_acc<q>")]
805)
806
807(define_insn "aarch64_<sur>adalp<mode>_3"
808  [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
809	(unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
810			  (match_operand:<VDBLW> 2 "register_operand" "0")]
811	ADALP))]
812  "TARGET_SIMD"
813  "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
814  [(set_attr "type" "neon_reduc_add<q>")]
815)
816
817;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
818;; inputs in operands 1 and 2.  The sequence also has to perform a widening
819;; reduction of the difference into a V4SI vector and accumulate that into
820;; operand 3 before copying that into the result operand 0.
821;; Perform that with a sequence of:
822;; UABDL2	tmp.8h, op1.16b, op2.16b
823;; UABAL	tmp.8h, op1.16b, op2.16b
824;; UADALP	op3.4s, tmp.8h
825;; MOV		op0, op3 // should be eliminated in later passes.
826;;
827;; For TARGET_DOTPROD we do:
828;; MOV	tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
829;; UABD	tmp2.16b, op1.16b, op2.16b
830;; UDOT	op3.4s, tmp2.16b, tmp1.16b
831;; MOV	op0, op3 // RA will tie the operands of UDOT appropriately.
832;;
833;; The signed version just uses the signed variants of the above instructions
834;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
835;; unsigned.
836
837(define_expand "<sur>sadv16qi"
838  [(use (match_operand:V4SI 0 "register_operand"))
839   (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
840		  (use (match_operand:V16QI 2 "register_operand"))] ABAL)
841   (use (match_operand:V4SI 3 "register_operand"))]
842  "TARGET_SIMD"
843  {
844    if (TARGET_DOTPROD)
845      {
846	rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
847	rtx abd = gen_reg_rtx (V16QImode);
848	emit_insn (gen_aarch64_<sur>abdv16qi_3 (abd, operands[1], operands[2]));
849	emit_insn (gen_aarch64_udotv16qi (operands[0], operands[3],
850					  abd, ones));
851	DONE;
852      }
853    rtx reduc = gen_reg_rtx (V8HImode);
854    emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
855					       operands[2]));
856    emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
857					      operands[2], reduc));
858    emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
859					      operands[3]));
860    emit_move_insn (operands[0], operands[3]);
861    DONE;
862  }
863)
864
865(define_insn "aba<mode>_3"
866  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
867	(plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
868			 (match_operand:VDQ_BHSI 1 "register_operand" "w")
869			 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
870		       (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
871  "TARGET_SIMD"
872  "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
873  [(set_attr "type" "neon_arith_acc<q>")]
874)
875
876(define_insn "fabd<mode>3"
877  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
878	(abs:VHSDF_HSDF
879	  (minus:VHSDF_HSDF
880	    (match_operand:VHSDF_HSDF 1 "register_operand" "w")
881	    (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
882  "TARGET_SIMD"
883  "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
884  [(set_attr "type" "neon_fp_abd_<stype><q>")]
885)
886
887;; For AND (vector, register) and BIC (vector, immediate)
888(define_insn "and<mode>3"
889  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
890	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
891		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
892  "TARGET_SIMD"
893  {
894    switch (which_alternative)
895      {
896      case 0:
897	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
898      case 1:
899	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
900						  AARCH64_CHECK_BIC);
901      default:
902	gcc_unreachable ();
903      }
904  }
905  [(set_attr "type" "neon_logic<q>")]
906)
907
908;; For ORR (vector, register) and ORR (vector, immediate)
909(define_insn "ior<mode>3"
910  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
911	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
912		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
913  "TARGET_SIMD"
914  {
915    switch (which_alternative)
916      {
917      case 0:
918	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
919      case 1:
920	return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
921						  AARCH64_CHECK_ORR);
922      default:
923	gcc_unreachable ();
924      }
925  }
926  [(set_attr "type" "neon_logic<q>")]
927)
928
929(define_insn "xor<mode>3"
930  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
931        (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
932		 (match_operand:VDQ_I 2 "register_operand" "w")))]
933  "TARGET_SIMD"
934  "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
935  [(set_attr "type" "neon_logic<q>")]
936)
937
938(define_insn "one_cmpl<mode>2"
939  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
940        (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
941  "TARGET_SIMD"
942  "not\t%0.<Vbtype>, %1.<Vbtype>"
943  [(set_attr "type" "neon_logic<q>")]
944)
945
946(define_insn "aarch64_simd_vec_set<mode>"
947  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
948	(vec_merge:VALL_F16
949	    (vec_duplicate:VALL_F16
950		(match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
951	    (match_operand:VALL_F16 3 "register_operand" "0,0,0")
952	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
953  "TARGET_SIMD"
954  {
955   int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
956   operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
957   switch (which_alternative)
958     {
959     case 0:
960	return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
961     case 1:
962	return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
963     case 2:
964        return "ld1\\t{%0.<Vetype>}[%p2], %1";
965     default:
966	gcc_unreachable ();
967     }
968  }
969  [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
970)
971
972(define_insn "*aarch64_simd_vec_copy_lane<mode>"
973  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
974	(vec_merge:VALL_F16
975	    (vec_duplicate:VALL_F16
976	      (vec_select:<VEL>
977		(match_operand:VALL_F16 3 "register_operand" "w")
978		(parallel
979		  [(match_operand:SI 4 "immediate_operand" "i")])))
980	    (match_operand:VALL_F16 1 "register_operand" "0")
981	    (match_operand:SI 2 "immediate_operand" "i")))]
982  "TARGET_SIMD"
983  {
984    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
985    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
986    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
987
988    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
989  }
990  [(set_attr "type" "neon_ins<q>")]
991)
992
993(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
994  [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
995	(vec_merge:VALL_F16_NO_V2Q
996	    (vec_duplicate:VALL_F16_NO_V2Q
997	      (vec_select:<VEL>
998		(match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
999		(parallel
1000		  [(match_operand:SI 4 "immediate_operand" "i")])))
1001	    (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1002	    (match_operand:SI 2 "immediate_operand" "i")))]
1003  "TARGET_SIMD"
1004  {
1005    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1006    operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1007    operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1008					   INTVAL (operands[4]));
1009
1010    return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1011  }
1012  [(set_attr "type" "neon_ins<q>")]
1013)
1014
1015(define_expand "signbit<mode>2"
1016  [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1017   (use (match_operand:VDQSF 1 "register_operand"))]
1018  "TARGET_SIMD"
1019{
1020  int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1021  rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1022                                                        shift_amount);
1023  operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1024
1025  emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1026                                                 shift_vector));
1027  DONE;
1028})
1029
1030(define_insn "aarch64_simd_lshr<mode>"
1031 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1032       (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1033		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1034 "TARGET_SIMD"
1035 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1036  [(set_attr "type" "neon_shift_imm<q>")]
1037)
1038
1039(define_insn "aarch64_simd_ashr<mode>"
1040 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1041       (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1042		     (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1043 "TARGET_SIMD"
1044 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
1045  [(set_attr "type" "neon_shift_imm<q>")]
1046)
1047
1048(define_insn "*aarch64_simd_sra<mode>"
1049 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1050	(plus:VDQ_I
1051	   (SHIFTRT:VDQ_I
1052		(match_operand:VDQ_I 1 "register_operand" "w")
1053		(match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr"))
1054	   (match_operand:VDQ_I 3 "register_operand" "0")))]
1055  "TARGET_SIMD"
1056  "<sra_op>sra\t%0.<Vtype>, %1.<Vtype>, %2"
1057  [(set_attr "type" "neon_shift_acc<q>")]
1058)
1059
1060(define_insn "aarch64_simd_imm_shl<mode>"
1061 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1062       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1063		   (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm" "Dl")))]
1064 "TARGET_SIMD"
1065  "shl\t%0.<Vtype>, %1.<Vtype>, %2"
1066  [(set_attr "type" "neon_shift_imm<q>")]
1067)
1068
1069(define_insn "aarch64_simd_reg_sshl<mode>"
1070 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1071       (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1072		   (match_operand:VDQ_I 2 "register_operand" "w")))]
1073 "TARGET_SIMD"
1074 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1075  [(set_attr "type" "neon_shift_reg<q>")]
1076)
1077
1078(define_insn "aarch64_simd_reg_shl<mode>_unsigned"
1079 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1080       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1081		    (match_operand:VDQ_I 2 "register_operand" "w")]
1082		   UNSPEC_ASHIFT_UNSIGNED))]
1083 "TARGET_SIMD"
1084 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1085  [(set_attr "type" "neon_shift_reg<q>")]
1086)
1087
1088(define_insn "aarch64_simd_reg_shl<mode>_signed"
1089 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1090       (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1091		    (match_operand:VDQ_I 2 "register_operand" "w")]
1092		   UNSPEC_ASHIFT_SIGNED))]
1093 "TARGET_SIMD"
1094 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1095  [(set_attr "type" "neon_shift_reg<q>")]
1096)
1097
1098(define_expand "ashl<mode>3"
1099  [(match_operand:VDQ_I 0 "register_operand")
1100   (match_operand:VDQ_I 1 "register_operand")
1101   (match_operand:SI  2 "general_operand")]
1102 "TARGET_SIMD"
1103{
1104  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1105  int shift_amount;
1106
1107  if (CONST_INT_P (operands[2]))
1108    {
1109      shift_amount = INTVAL (operands[2]);
1110      if (shift_amount >= 0 && shift_amount < bit_width)
1111        {
1112	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1113						       shift_amount);
1114	  emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1115						     operands[1],
1116						     tmp));
1117          DONE;
1118        }
1119    }
1120
1121  operands[2] = force_reg (SImode, operands[2]);
1122
1123  rtx tmp = gen_reg_rtx (<MODE>mode);
1124  emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1125							       operands[2],
1126							       0)));
1127  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1128  DONE;
1129})
1130
1131(define_expand "lshr<mode>3"
1132  [(match_operand:VDQ_I 0 "register_operand")
1133   (match_operand:VDQ_I 1 "register_operand")
1134   (match_operand:SI  2 "general_operand")]
1135 "TARGET_SIMD"
1136{
1137  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1138  int shift_amount;
1139
1140  if (CONST_INT_P (operands[2]))
1141    {
1142      shift_amount = INTVAL (operands[2]);
1143      if (shift_amount > 0 && shift_amount <= bit_width)
1144        {
1145	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1146						       shift_amount);
1147          emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1148						  operands[1],
1149						  tmp));
1150	  DONE;
1151	}
1152    }
1153
1154  operands[2] = force_reg (SImode, operands[2]);
1155
1156  rtx tmp = gen_reg_rtx (SImode);
1157  rtx tmp1 = gen_reg_rtx (<MODE>mode);
1158  emit_insn (gen_negsi2 (tmp, operands[2]));
1159  emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1160					 convert_to_mode (<VEL>mode, tmp, 0)));
1161  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1162						      tmp1));
1163  DONE;
1164})
1165
1166(define_expand "ashr<mode>3"
1167  [(match_operand:VDQ_I 0 "register_operand")
1168   (match_operand:VDQ_I 1 "register_operand")
1169   (match_operand:SI  2 "general_operand")]
1170 "TARGET_SIMD"
1171{
1172  int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1173  int shift_amount;
1174
1175  if (CONST_INT_P (operands[2]))
1176    {
1177      shift_amount = INTVAL (operands[2]);
1178      if (shift_amount > 0 && shift_amount <= bit_width)
1179        {
1180	  rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1181						       shift_amount);
1182          emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1183						  operands[1],
1184						  tmp));
1185          DONE;
1186	}
1187    }
1188
1189  operands[2] = force_reg (SImode, operands[2]);
1190
1191  rtx tmp = gen_reg_rtx (SImode);
1192  rtx tmp1 = gen_reg_rtx (<MODE>mode);
1193  emit_insn (gen_negsi2 (tmp, operands[2]));
1194  emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1195								tmp, 0)));
1196  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1197						    tmp1));
1198  DONE;
1199})
1200
1201(define_expand "vashl<mode>3"
1202 [(match_operand:VDQ_I 0 "register_operand")
1203  (match_operand:VDQ_I 1 "register_operand")
1204  (match_operand:VDQ_I 2 "register_operand")]
1205 "TARGET_SIMD"
1206{
1207  emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1208					      operands[2]));
1209  DONE;
1210})
1211
1212;; Using mode VDQ_BHSI as there is no V2DImode neg!
1213;; Negating individual lanes most certainly offsets the
1214;; gain from vectorization.
1215(define_expand "vashr<mode>3"
1216 [(match_operand:VDQ_BHSI 0 "register_operand")
1217  (match_operand:VDQ_BHSI 1 "register_operand")
1218  (match_operand:VDQ_BHSI 2 "register_operand")]
1219 "TARGET_SIMD"
1220{
1221  rtx neg = gen_reg_rtx (<MODE>mode);
1222  emit (gen_neg<mode>2 (neg, operands[2]));
1223  emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1224						    neg));
1225  DONE;
1226})
1227
1228;; DI vector shift
1229(define_expand "aarch64_ashr_simddi"
1230  [(match_operand:DI 0 "register_operand")
1231   (match_operand:DI 1 "register_operand")
1232   (match_operand:SI 2 "aarch64_shift_imm64_di")]
1233  "TARGET_SIMD"
1234  {
1235    /* An arithmetic shift right by 64 fills the result with copies of the sign
1236       bit, just like asr by 63 - however the standard pattern does not handle
1237       a shift by 64.  */
1238    if (INTVAL (operands[2]) == 64)
1239      operands[2] = GEN_INT (63);
1240    emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1241    DONE;
1242  }
1243)
1244
1245(define_expand "vlshr<mode>3"
1246 [(match_operand:VDQ_BHSI 0 "register_operand")
1247  (match_operand:VDQ_BHSI 1 "register_operand")
1248  (match_operand:VDQ_BHSI 2 "register_operand")]
1249 "TARGET_SIMD"
1250{
1251  rtx neg = gen_reg_rtx (<MODE>mode);
1252  emit (gen_neg<mode>2 (neg, operands[2]));
1253  emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1254						      neg));
1255  DONE;
1256})
1257
1258(define_expand "aarch64_lshr_simddi"
1259  [(match_operand:DI 0 "register_operand")
1260   (match_operand:DI 1 "register_operand")
1261   (match_operand:SI 2 "aarch64_shift_imm64_di")]
1262  "TARGET_SIMD"
1263  {
1264    if (INTVAL (operands[2]) == 64)
1265      emit_move_insn (operands[0], const0_rtx);
1266    else
1267      emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1268    DONE;
1269  }
1270)
1271
1272;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1273(define_insn "vec_shr_<mode>"
1274  [(set (match_operand:VD 0 "register_operand" "=w")
1275        (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1276		    (match_operand:SI 2 "immediate_operand" "i")]
1277		   UNSPEC_VEC_SHR))]
1278  "TARGET_SIMD"
1279  {
1280    if (BYTES_BIG_ENDIAN)
1281      return "shl %d0, %d1, %2";
1282    else
1283      return "ushr %d0, %d1, %2";
1284  }
1285  [(set_attr "type" "neon_shift_imm")]
1286)
1287
1288(define_expand "vec_set<mode>"
1289  [(match_operand:VALL_F16 0 "register_operand")
1290   (match_operand:<VEL> 1 "register_operand")
1291   (match_operand:SI 2 "immediate_operand")]
1292  "TARGET_SIMD"
1293  {
1294    HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1295    emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1296					  GEN_INT (elem), operands[0]));
1297    DONE;
1298  }
1299)
1300
1301
1302(define_insn "aarch64_mla<mode>"
1303 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1304       (plus:VDQ_BHSI (mult:VDQ_BHSI
1305			(match_operand:VDQ_BHSI 2 "register_operand" "w")
1306			(match_operand:VDQ_BHSI 3 "register_operand" "w"))
1307		      (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1308 "TARGET_SIMD"
1309 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1310  [(set_attr "type" "neon_mla_<Vetype><q>")]
1311)
1312
1313(define_insn "*aarch64_mla_elt<mode>"
1314 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1315       (plus:VDQHS
1316	 (mult:VDQHS
1317	   (vec_duplicate:VDQHS
1318	      (vec_select:<VEL>
1319		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1320		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1321	   (match_operand:VDQHS 3 "register_operand" "w"))
1322	 (match_operand:VDQHS 4 "register_operand" "0")))]
1323 "TARGET_SIMD"
1324  {
1325    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1326    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1327  }
1328  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1329)
1330
1331(define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1332 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1333       (plus:VDQHS
1334	 (mult:VDQHS
1335	   (vec_duplicate:VDQHS
1336	      (vec_select:<VEL>
1337		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1338		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1339	   (match_operand:VDQHS 3 "register_operand" "w"))
1340	 (match_operand:VDQHS 4 "register_operand" "0")))]
1341 "TARGET_SIMD"
1342  {
1343    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1344    return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1345  }
1346  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1347)
1348
1349(define_insn "*aarch64_mla_elt_merge<mode>"
1350  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1351	(plus:VDQHS
1352	  (mult:VDQHS (vec_duplicate:VDQHS
1353		  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1354		(match_operand:VDQHS 2 "register_operand" "w"))
1355	  (match_operand:VDQHS 3 "register_operand" "0")))]
1356 "TARGET_SIMD"
1357 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1358  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1359)
1360
1361(define_insn "aarch64_mls<mode>"
1362 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1363       (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1364		   (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1365			      (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1366 "TARGET_SIMD"
1367 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1368  [(set_attr "type" "neon_mla_<Vetype><q>")]
1369)
1370
1371(define_insn "*aarch64_mls_elt<mode>"
1372 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1373       (minus:VDQHS
1374	 (match_operand:VDQHS 4 "register_operand" "0")
1375	 (mult:VDQHS
1376	   (vec_duplicate:VDQHS
1377	      (vec_select:<VEL>
1378		(match_operand:VDQHS 1 "register_operand" "<h_con>")
1379		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1380	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1381 "TARGET_SIMD"
1382  {
1383    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1384    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1385  }
1386  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1387)
1388
1389(define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1390 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1391       (minus:VDQHS
1392	 (match_operand:VDQHS 4 "register_operand" "0")
1393	 (mult:VDQHS
1394	   (vec_duplicate:VDQHS
1395	      (vec_select:<VEL>
1396		(match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1397		  (parallel [(match_operand:SI 2 "immediate_operand")])))
1398	   (match_operand:VDQHS 3 "register_operand" "w"))))]
1399 "TARGET_SIMD"
1400  {
1401    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1402    return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1403  }
1404  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1405)
1406
1407(define_insn "*aarch64_mls_elt_merge<mode>"
1408  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1409	(minus:VDQHS
1410	  (match_operand:VDQHS 1 "register_operand" "0")
1411	  (mult:VDQHS (vec_duplicate:VDQHS
1412		  (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1413		(match_operand:VDQHS 3 "register_operand" "w"))))]
1414  "TARGET_SIMD"
1415  "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1416  [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1417)
1418
1419;; Max/Min operations.
1420(define_insn "<su><maxmin><mode>3"
1421 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1422       (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1423		    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1424 "TARGET_SIMD"
1425 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1426  [(set_attr "type" "neon_minmax<q>")]
1427)
1428
1429(define_expand "<su><maxmin>v2di3"
1430 [(set (match_operand:V2DI 0 "register_operand")
1431       (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1432                    (match_operand:V2DI 2 "register_operand")))]
1433 "TARGET_SIMD"
1434{
1435  enum rtx_code cmp_operator;
1436  rtx cmp_fmt;
1437
1438  switch (<CODE>)
1439    {
1440    case UMIN:
1441      cmp_operator = LTU;
1442      break;
1443    case SMIN:
1444      cmp_operator = LT;
1445      break;
1446    case UMAX:
1447      cmp_operator = GTU;
1448      break;
1449    case SMAX:
1450      cmp_operator = GT;
1451      break;
1452    default:
1453      gcc_unreachable ();
1454    }
1455
1456  cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1457  emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1458              operands[2], cmp_fmt, operands[1], operands[2]));
1459  DONE;
1460})
1461
1462;; Pairwise Integer Max/Min operations.
1463(define_insn "aarch64_<maxmin_uns>p<mode>"
1464 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1465       (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1466			 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1467			MAXMINV))]
1468 "TARGET_SIMD"
1469 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1470  [(set_attr "type" "neon_minmax<q>")]
1471)
1472
1473;; Pairwise FP Max/Min operations.
1474(define_insn "aarch64_<maxmin_uns>p<mode>"
1475 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1476       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1477		      (match_operand:VHSDF 2 "register_operand" "w")]
1478		      FMAXMINV))]
1479 "TARGET_SIMD"
1480 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1481  [(set_attr "type" "neon_minmax<q>")]
1482)
1483
1484;; vec_concat gives a new vector with the low elements from operand 1, and
1485;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1486;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1487;; What that means, is that the RTL descriptions of the below patterns
1488;; need to change depending on endianness.
1489
1490;; Move to the low architectural bits of the register.
1491;; On little-endian this is { operand, zeroes }
1492;; On big-endian this is { zeroes, operand }
1493
1494(define_insn "move_lo_quad_internal_<mode>"
1495  [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1496	(vec_concat:VQMOV
1497	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1498	  (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))]
1499  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1500  "@
1501   dup\\t%d0, %1.d[0]
1502   fmov\\t%d0, %1
1503   dup\\t%d0, %1"
1504  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1505   (set_attr "length" "4")
1506   (set_attr "arch" "simd,fp,simd")]
1507)
1508
1509(define_insn "move_lo_quad_internal_be_<mode>"
1510  [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w")
1511	(vec_concat:VQMOV
1512	  (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")
1513	  (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1514  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1515  "@
1516   dup\\t%d0, %1.d[0]
1517   fmov\\t%d0, %1
1518   dup\\t%d0, %1"
1519  [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1520   (set_attr "length" "4")
1521   (set_attr "arch" "simd,fp,simd")]
1522)
1523
1524(define_expand "move_lo_quad_<mode>"
1525  [(match_operand:VQMOV 0 "register_operand")
1526   (match_operand:<VHALF> 1 "register_operand")]
1527  "TARGET_SIMD"
1528{
1529  rtx zs = CONST0_RTX (<VHALF>mode);
1530  if (BYTES_BIG_ENDIAN)
1531    emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs));
1532  else
1533    emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs));
1534  DONE;
1535}
1536)
1537
1538;; Move operand1 to the high architectural bits of the register, keeping
1539;; the low architectural bits of operand2.
1540;; For little-endian this is { operand2, operand1 }
1541;; For big-endian this is { operand1, operand2 }
1542
1543(define_insn "aarch64_simd_move_hi_quad_<mode>"
1544  [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1545        (vec_concat:VQMOV
1546          (vec_select:<VHALF>
1547                (match_dup 0)
1548                (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))
1549	  (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1550  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1551  "@
1552   ins\\t%0.d[1], %1.d[0]
1553   ins\\t%0.d[1], %1"
1554  [(set_attr "type" "neon_ins")]
1555)
1556
1557(define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1558  [(set (match_operand:VQMOV 0 "register_operand" "+w,w")
1559        (vec_concat:VQMOV
1560	  (match_operand:<VHALF> 1 "register_operand" "w,r")
1561          (vec_select:<VHALF>
1562                (match_dup 0)
1563                (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))]
1564  "TARGET_SIMD && BYTES_BIG_ENDIAN"
1565  "@
1566   ins\\t%0.d[1], %1.d[0]
1567   ins\\t%0.d[1], %1"
1568  [(set_attr "type" "neon_ins")]
1569)
1570
1571(define_expand "move_hi_quad_<mode>"
1572 [(match_operand:VQMOV 0 "register_operand")
1573  (match_operand:<VHALF> 1 "register_operand")]
1574 "TARGET_SIMD"
1575{
1576  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1577  if (BYTES_BIG_ENDIAN)
1578    emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1579		    operands[1], p));
1580  else
1581    emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1582		    operands[1], p));
1583  DONE;
1584})
1585
1586;; Narrowing operations.
1587
1588;; For doubles.
1589(define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1590 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1591       (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1592 "TARGET_SIMD"
1593 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1594  [(set_attr "type" "neon_shift_imm_narrow_q")]
1595)
1596
1597(define_expand "vec_pack_trunc_<mode>"
1598 [(match_operand:<VNARROWD> 0 "register_operand")
1599  (match_operand:VDN 1 "register_operand")
1600  (match_operand:VDN 2 "register_operand")]
1601 "TARGET_SIMD"
1602{
1603  rtx tempreg = gen_reg_rtx (<VDBL>mode);
1604  int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1605  int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1606
1607  emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1608  emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1609  emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1610  DONE;
1611})
1612
1613;; For quads.
1614
1615(define_insn "vec_pack_trunc_<mode>"
1616 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1617       (vec_concat:<VNARROWQ2>
1618	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1619	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1620 "TARGET_SIMD"
1621 {
1622   if (BYTES_BIG_ENDIAN)
1623     return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1624   else
1625     return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1626 }
1627  [(set_attr "type" "multiple")
1628   (set_attr "length" "8")]
1629)
1630
1631;; Widening operations.
1632
1633(define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1634  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1635        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1636			       (match_operand:VQW 1 "register_operand" "w")
1637			       (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1638			    )))]
1639  "TARGET_SIMD"
1640  "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1641  [(set_attr "type" "neon_shift_imm_long")]
1642)
1643
1644(define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1645  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1646        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1647			       (match_operand:VQW 1 "register_operand" "w")
1648			       (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1649			    )))]
1650  "TARGET_SIMD"
1651  "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1652  [(set_attr "type" "neon_shift_imm_long")]
1653)
1654
1655(define_expand "vec_unpack<su>_hi_<mode>"
1656  [(match_operand:<VWIDE> 0 "register_operand")
1657   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1658  "TARGET_SIMD"
1659  {
1660    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1661    emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1662							  operands[1], p));
1663    DONE;
1664  }
1665)
1666
1667(define_expand "vec_unpack<su>_lo_<mode>"
1668  [(match_operand:<VWIDE> 0 "register_operand")
1669   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1670  "TARGET_SIMD"
1671  {
1672    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1673    emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1674							  operands[1], p));
1675    DONE;
1676  }
1677)
1678
1679;; Widening arithmetic.
1680
1681(define_insn "*aarch64_<su>mlal_lo<mode>"
1682  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1683        (plus:<VWIDE>
1684          (mult:<VWIDE>
1685              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1686                 (match_operand:VQW 2 "register_operand" "w")
1687                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1688              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1689                 (match_operand:VQW 4 "register_operand" "w")
1690                 (match_dup 3))))
1691          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1692  "TARGET_SIMD"
1693  "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1694  [(set_attr "type" "neon_mla_<Vetype>_long")]
1695)
1696
1697(define_insn "*aarch64_<su>mlal_hi<mode>"
1698  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1699        (plus:<VWIDE>
1700          (mult:<VWIDE>
1701              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1702                 (match_operand:VQW 2 "register_operand" "w")
1703                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1704              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1705                 (match_operand:VQW 4 "register_operand" "w")
1706                 (match_dup 3))))
1707          (match_operand:<VWIDE> 1 "register_operand" "0")))]
1708  "TARGET_SIMD"
1709  "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1710  [(set_attr "type" "neon_mla_<Vetype>_long")]
1711)
1712
1713(define_insn "*aarch64_<su>mlsl_lo<mode>"
1714  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1715        (minus:<VWIDE>
1716          (match_operand:<VWIDE> 1 "register_operand" "0")
1717          (mult:<VWIDE>
1718              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1719                 (match_operand:VQW 2 "register_operand" "w")
1720                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1721              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1722                 (match_operand:VQW 4 "register_operand" "w")
1723                 (match_dup 3))))))]
1724  "TARGET_SIMD"
1725  "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1726  [(set_attr "type" "neon_mla_<Vetype>_long")]
1727)
1728
1729(define_insn "*aarch64_<su>mlsl_hi<mode>"
1730  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1731        (minus:<VWIDE>
1732          (match_operand:<VWIDE> 1 "register_operand" "0")
1733          (mult:<VWIDE>
1734              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1735                 (match_operand:VQW 2 "register_operand" "w")
1736                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1737              (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1738                 (match_operand:VQW 4 "register_operand" "w")
1739                 (match_dup 3))))))]
1740  "TARGET_SIMD"
1741  "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1742  [(set_attr "type" "neon_mla_<Vetype>_long")]
1743)
1744
1745(define_insn "*aarch64_<su>mlal<mode>"
1746  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1747        (plus:<VWIDE>
1748          (mult:<VWIDE>
1749            (ANY_EXTEND:<VWIDE>
1750              (match_operand:VD_BHSI 1 "register_operand" "w"))
1751            (ANY_EXTEND:<VWIDE>
1752              (match_operand:VD_BHSI 2 "register_operand" "w")))
1753          (match_operand:<VWIDE> 3 "register_operand" "0")))]
1754  "TARGET_SIMD"
1755  "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1756  [(set_attr "type" "neon_mla_<Vetype>_long")]
1757)
1758
1759(define_insn "*aarch64_<su>mlsl<mode>"
1760  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1761        (minus:<VWIDE>
1762          (match_operand:<VWIDE> 1 "register_operand" "0")
1763          (mult:<VWIDE>
1764            (ANY_EXTEND:<VWIDE>
1765              (match_operand:VD_BHSI 2 "register_operand" "w"))
1766            (ANY_EXTEND:<VWIDE>
1767              (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1768  "TARGET_SIMD"
1769  "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1770  [(set_attr "type" "neon_mla_<Vetype>_long")]
1771)
1772
1773(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1774 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1775       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1776			   (match_operand:VQW 1 "register_operand" "w")
1777                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1778		     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1779                           (match_operand:VQW 2 "register_operand" "w")
1780                           (match_dup 3)))))]
1781  "TARGET_SIMD"
1782  "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1783  [(set_attr "type" "neon_mul_<Vetype>_long")]
1784)
1785
1786(define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
1787  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1788	(mult:<VWIDE> (ANY_EXTEND:<VWIDE>
1789			 (match_operand:VD_BHSI 1 "register_operand" "w"))
1790		      (ANY_EXTEND:<VWIDE>
1791			 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
1792  "TARGET_SIMD"
1793  "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1794  [(set_attr "type" "neon_mul_<Vetype>_long")]
1795)
1796
1797(define_expand "vec_widen_<su>mult_lo_<mode>"
1798  [(match_operand:<VWIDE> 0 "register_operand")
1799   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1800   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1801 "TARGET_SIMD"
1802 {
1803   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1804   emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1805						       operands[1],
1806						       operands[2], p));
1807   DONE;
1808 }
1809)
1810
1811(define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1812 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1813      (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1814			    (match_operand:VQW 1 "register_operand" "w")
1815			    (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1816		    (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1817			    (match_operand:VQW 2 "register_operand" "w")
1818			    (match_dup 3)))))]
1819  "TARGET_SIMD"
1820  "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1821  [(set_attr "type" "neon_mul_<Vetype>_long")]
1822)
1823
1824(define_expand "vec_widen_<su>mult_hi_<mode>"
1825  [(match_operand:<VWIDE> 0 "register_operand")
1826   (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
1827   (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
1828 "TARGET_SIMD"
1829 {
1830   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1831   emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1832						       operands[1],
1833						       operands[2], p));
1834   DONE;
1835
1836 }
1837)
1838
1839;; vmull_lane_s16 intrinsics
1840(define_insn "aarch64_vec_<su>mult_lane<Qlane>"
1841  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1842	(mult:<VWIDE>
1843	  (ANY_EXTEND:<VWIDE>
1844	    (match_operand:<VCOND> 1 "register_operand" "w"))
1845	  (ANY_EXTEND:<VWIDE>
1846	    (vec_duplicate:<VCOND>
1847	      (vec_select:<VEL>
1848		(match_operand:VDQHS 2 "register_operand" "<vwx>")
1849		(parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
1850  "TARGET_SIMD"
1851  {
1852    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
1853    return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
1854  }
1855  [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
1856)
1857
1858;; vmlal_lane_s16 intrinsics
1859(define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
1860  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1861	(plus:<VWIDE>
1862	  (mult:<VWIDE>
1863	    (ANY_EXTEND:<VWIDE>
1864	      (match_operand:<VCOND> 2 "register_operand" "w"))
1865	    (ANY_EXTEND:<VWIDE>
1866	      (vec_duplicate:<VCOND>
1867		(vec_select:<VEL>
1868		  (match_operand:VDQHS 3 "register_operand" "<vwx>")
1869		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
1870	  (match_operand:<VWIDE> 1 "register_operand" "0")))]
1871  "TARGET_SIMD"
1872  {
1873    operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1874    return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
1875  }
1876  [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
1877)
1878
1879;; FP vector operations.
1880;; AArch64 AdvSIMD supports single-precision (32-bit) and 
1881;; double-precision (64-bit) floating-point data types and arithmetic as
1882;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
1883;; without the need for -ffast-math or -funsafe-math-optimizations.
1884;;
1885;; Floating-point operations can raise an exception.  Vectorizing such
1886;; operations are safe because of reasons explained below.
1887;;
1888;; ARMv8 permits an extension to enable trapped floating-point
1889;; exception handling, however this is an optional feature.  In the
1890;; event of a floating-point exception being raised by vectorised
1891;; code then:
1892;; 1.  If trapped floating-point exceptions are available, then a trap
1893;;     will be taken when any lane raises an enabled exception.  A trap
1894;;     handler may determine which lane raised the exception.
1895;; 2.  Alternatively a sticky exception flag is set in the
1896;;     floating-point status register (FPSR).  Software may explicitly
1897;;     test the exception flags, in which case the tests will either
1898;;     prevent vectorisation, allowing precise identification of the
1899;;     failing operation, or if tested outside of vectorisable regions
1900;;     then the specific operation and lane are not of interest.
1901
1902;; FP arithmetic operations.
1903
1904(define_insn "add<mode>3"
1905 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1906       (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1907		   (match_operand:VHSDF 2 "register_operand" "w")))]
1908 "TARGET_SIMD"
1909 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1910  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1911)
1912
1913(define_insn "sub<mode>3"
1914 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1915       (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1916		    (match_operand:VHSDF 2 "register_operand" "w")))]
1917 "TARGET_SIMD"
1918 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1919  [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1920)
1921
1922(define_insn "mul<mode>3"
1923 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1924       (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1925		   (match_operand:VHSDF 2 "register_operand" "w")))]
1926 "TARGET_SIMD"
1927 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1928  [(set_attr "type" "neon_fp_mul_<stype><q>")]
1929)
1930
1931(define_expand "div<mode>3"
1932 [(set (match_operand:VHSDF 0 "register_operand")
1933       (div:VHSDF (match_operand:VHSDF 1 "register_operand")
1934		  (match_operand:VHSDF 2 "register_operand")))]
1935 "TARGET_SIMD"
1936{
1937  if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1938    DONE;
1939
1940  operands[1] = force_reg (<MODE>mode, operands[1]);
1941})
1942
1943(define_insn "*div<mode>3"
1944 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1945       (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1946		 (match_operand:VHSDF 2 "register_operand" "w")))]
1947 "TARGET_SIMD"
1948 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1949  [(set_attr "type" "neon_fp_div_<stype><q>")]
1950)
1951
1952(define_insn "neg<mode>2"
1953 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1954       (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1955 "TARGET_SIMD"
1956 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1957  [(set_attr "type" "neon_fp_neg_<stype><q>")]
1958)
1959
1960(define_insn "abs<mode>2"
1961 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1962       (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1963 "TARGET_SIMD"
1964 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1965  [(set_attr "type" "neon_fp_abs_<stype><q>")]
1966)
1967
1968(define_insn "fma<mode>4"
1969  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1970       (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1971		  (match_operand:VHSDF 2 "register_operand" "w")
1972		  (match_operand:VHSDF 3 "register_operand" "0")))]
1973  "TARGET_SIMD"
1974 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1975  [(set_attr "type" "neon_fp_mla_<stype><q>")]
1976)
1977
1978(define_insn "*aarch64_fma4_elt<mode>"
1979  [(set (match_operand:VDQF 0 "register_operand" "=w")
1980    (fma:VDQF
1981      (vec_duplicate:VDQF
1982	(vec_select:<VEL>
1983	  (match_operand:VDQF 1 "register_operand" "<h_con>")
1984	  (parallel [(match_operand:SI 2 "immediate_operand")])))
1985      (match_operand:VDQF 3 "register_operand" "w")
1986      (match_operand:VDQF 4 "register_operand" "0")))]
1987  "TARGET_SIMD"
1988  {
1989    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1990    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1991  }
1992  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1993)
1994
1995(define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1996  [(set (match_operand:VDQSF 0 "register_operand" "=w")
1997    (fma:VDQSF
1998      (vec_duplicate:VDQSF
1999	(vec_select:<VEL>
2000	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2001	  (parallel [(match_operand:SI 2 "immediate_operand")])))
2002      (match_operand:VDQSF 3 "register_operand" "w")
2003      (match_operand:VDQSF 4 "register_operand" "0")))]
2004  "TARGET_SIMD"
2005  {
2006    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2007    return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2008  }
2009  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2010)
2011
2012(define_insn "*aarch64_fma4_elt_from_dup<mode>"
2013  [(set (match_operand:VMUL 0 "register_operand" "=w")
2014    (fma:VMUL
2015      (vec_duplicate:VMUL
2016	  (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2017      (match_operand:VMUL 2 "register_operand" "w")
2018      (match_operand:VMUL 3 "register_operand" "0")))]
2019  "TARGET_SIMD"
2020  "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2021  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2022)
2023
2024(define_insn "*aarch64_fma4_elt_to_64v2df"
2025  [(set (match_operand:DF 0 "register_operand" "=w")
2026    (fma:DF
2027	(vec_select:DF
2028	  (match_operand:V2DF 1 "register_operand" "w")
2029	  (parallel [(match_operand:SI 2 "immediate_operand")]))
2030      (match_operand:DF 3 "register_operand" "w")
2031      (match_operand:DF 4 "register_operand" "0")))]
2032  "TARGET_SIMD"
2033  {
2034    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2035    return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2036  }
2037  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2038)
2039
2040(define_insn "fnma<mode>4"
2041  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2042	(fma:VHSDF
2043	  (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2044	  (match_operand:VHSDF 2 "register_operand" "w")
2045	  (match_operand:VHSDF 3 "register_operand" "0")))]
2046  "TARGET_SIMD"
2047  "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2048  [(set_attr "type" "neon_fp_mla_<stype><q>")]
2049)
2050
2051(define_insn "*aarch64_fnma4_elt<mode>"
2052  [(set (match_operand:VDQF 0 "register_operand" "=w")
2053    (fma:VDQF
2054      (neg:VDQF
2055        (match_operand:VDQF 3 "register_operand" "w"))
2056      (vec_duplicate:VDQF
2057	(vec_select:<VEL>
2058	  (match_operand:VDQF 1 "register_operand" "<h_con>")
2059	  (parallel [(match_operand:SI 2 "immediate_operand")])))
2060      (match_operand:VDQF 4 "register_operand" "0")))]
2061  "TARGET_SIMD"
2062  {
2063    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2064    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2065  }
2066  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2067)
2068
2069(define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
2070  [(set (match_operand:VDQSF 0 "register_operand" "=w")
2071    (fma:VDQSF
2072      (neg:VDQSF
2073        (match_operand:VDQSF 3 "register_operand" "w"))
2074      (vec_duplicate:VDQSF
2075	(vec_select:<VEL>
2076	  (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2077	  (parallel [(match_operand:SI 2 "immediate_operand")])))
2078      (match_operand:VDQSF 4 "register_operand" "0")))]
2079  "TARGET_SIMD"
2080  {
2081    operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2082    return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2083  }
2084  [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2085)
2086
2087(define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2088  [(set (match_operand:VMUL 0 "register_operand" "=w")
2089    (fma:VMUL
2090      (neg:VMUL
2091        (match_operand:VMUL 2 "register_operand" "w"))
2092      (vec_duplicate:VMUL
2093	(match_operand:<VEL> 1 "register_operand" "<h_con>"))
2094      (match_operand:VMUL 3 "register_operand" "0")))]
2095  "TARGET_SIMD"
2096  "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2097  [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2098)
2099
2100(define_insn "*aarch64_fnma4_elt_to_64v2df"
2101  [(set (match_operand:DF 0 "register_operand" "=w")
2102    (fma:DF
2103      (vec_select:DF
2104	(match_operand:V2DF 1 "register_operand" "w")
2105	(parallel [(match_operand:SI 2 "immediate_operand")]))
2106      (neg:DF
2107        (match_operand:DF 3 "register_operand" "w"))
2108      (match_operand:DF 4 "register_operand" "0")))]
2109  "TARGET_SIMD"
2110  {
2111    operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2112    return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2113  }
2114  [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2115)
2116
2117;; Vector versions of the floating-point frint patterns.
2118;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2119(define_insn "<frint_pattern><mode>2"
2120  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2121	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2122		       FRINT))]
2123  "TARGET_SIMD"
2124  "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2125  [(set_attr "type" "neon_fp_round_<stype><q>")]
2126)
2127
2128;; Vector versions of the fcvt standard patterns.
2129;; Expands to lbtrunc, lround, lceil, lfloor
2130(define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2131  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2132	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2133			       [(match_operand:VHSDF 1 "register_operand" "w")]
2134			       FCVT)))]
2135  "TARGET_SIMD"
2136  "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2137  [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2138)
2139
2140;; HF Scalar variants of related SIMD instructions.
2141(define_insn "l<fcvt_pattern><su_optab>hfhi2"
2142  [(set (match_operand:HI 0 "register_operand" "=w")
2143	(FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2144		      FCVT)))]
2145  "TARGET_SIMD_F16INST"
2146  "fcvt<frint_suffix><su>\t%h0, %h1"
2147  [(set_attr "type" "neon_fp_to_int_s")]
2148)
2149
2150(define_insn "<optab>_trunchfhi2"
2151  [(set (match_operand:HI 0 "register_operand" "=w")
2152	(FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2153  "TARGET_SIMD_F16INST"
2154  "fcvtz<su>\t%h0, %h1"
2155  [(set_attr "type" "neon_fp_to_int_s")]
2156)
2157
2158(define_insn "<optab>hihf2"
2159  [(set (match_operand:HF 0 "register_operand" "=w")
2160	(FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2161  "TARGET_SIMD_F16INST"
2162  "<su_optab>cvtf\t%h0, %h1"
2163  [(set_attr "type" "neon_int_to_fp_s")]
2164)
2165
2166(define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2167  [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2168	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2169			       [(mult:VDQF
2170	 (match_operand:VDQF 1 "register_operand" "w")
2171	 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2172			       UNSPEC_FRINTZ)))]
2173  "TARGET_SIMD
2174   && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2175		GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2176  {
2177    int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2178    char buf[64];
2179    snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2180    output_asm_insn (buf, operands);
2181    return "";
2182  }
2183  [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2184)
2185
2186(define_expand "<optab><VHSDF:mode><fcvt_target>2"
2187  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2188	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2189			       [(match_operand:VHSDF 1 "register_operand")]
2190				UNSPEC_FRINTZ)))]
2191  "TARGET_SIMD"
2192  {})
2193
2194(define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2195  [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2196	(FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2197			       [(match_operand:VHSDF 1 "register_operand")]
2198				UNSPEC_FRINTZ)))]
2199  "TARGET_SIMD"
2200  {})
2201
2202(define_expand "ftrunc<VHSDF:mode>2"
2203  [(set (match_operand:VHSDF 0 "register_operand")
2204	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2205		       UNSPEC_FRINTZ))]
2206  "TARGET_SIMD"
2207  {})
2208
2209(define_insn "<optab><fcvt_target><VHSDF:mode>2"
2210  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2211	(FLOATUORS:VHSDF
2212	  (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2213  "TARGET_SIMD"
2214  "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2215  [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2216)
2217
2218;; Conversions between vectors of floats and doubles.
2219;; Contains a mix of patterns to match standard pattern names
2220;; and those for intrinsics.
2221
2222;; Float widening operations.
2223
2224(define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2225  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2226        (float_extend:<VWIDE> (vec_select:<VHALF>
2227			       (match_operand:VQ_HSF 1 "register_operand" "w")
2228			       (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2229			    )))]
2230  "TARGET_SIMD"
2231  "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2232  [(set_attr "type" "neon_fp_cvt_widen_s")]
2233)
2234
2235;; Convert between fixed-point and floating-point (vector modes)
2236
2237(define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2238  [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2239	(unspec:<VHSDF:FCVT_TARGET>
2240	  [(match_operand:VHSDF 1 "register_operand" "w")
2241	   (match_operand:SI 2 "immediate_operand" "i")]
2242	 FCVT_F2FIXED))]
2243  "TARGET_SIMD"
2244  "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2245  [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2246)
2247
2248(define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2249  [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2250	(unspec:<VDQ_HSDI:FCVT_TARGET>
2251	  [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2252	   (match_operand:SI 2 "immediate_operand" "i")]
2253	 FCVT_FIXED2F))]
2254  "TARGET_SIMD"
2255  "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2256  [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2257)
2258
2259;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2260;; is inconsistent with vector ordering elsewhere in the compiler, in that
2261;; the meaning of HI and LO changes depending on the target endianness.
2262;; While elsewhere we map the higher numbered elements of a vector to
2263;; the lower architectural lanes of the vector, for these patterns we want
2264;; to always treat "hi" as referring to the higher architectural lanes.
2265;; Consequently, while the patterns below look inconsistent with our
2266;; other big-endian patterns their behavior is as required.
2267
2268(define_expand "vec_unpacks_lo_<mode>"
2269  [(match_operand:<VWIDE> 0 "register_operand")
2270   (match_operand:VQ_HSF 1 "register_operand")]
2271  "TARGET_SIMD"
2272  {
2273    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2274    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2275						       operands[1], p));
2276    DONE;
2277  }
2278)
2279
2280(define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2281  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2282        (float_extend:<VWIDE> (vec_select:<VHALF>
2283			       (match_operand:VQ_HSF 1 "register_operand" "w")
2284			       (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2285			    )))]
2286  "TARGET_SIMD"
2287  "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2288  [(set_attr "type" "neon_fp_cvt_widen_s")]
2289)
2290
2291(define_expand "vec_unpacks_hi_<mode>"
2292  [(match_operand:<VWIDE> 0 "register_operand")
2293   (match_operand:VQ_HSF 1 "register_operand")]
2294  "TARGET_SIMD"
2295  {
2296    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2297    emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2298						       operands[1], p));
2299    DONE;
2300  }
2301)
2302(define_insn "aarch64_float_extend_lo_<Vwide>"
2303  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2304	(float_extend:<VWIDE>
2305	  (match_operand:VDF 1 "register_operand" "w")))]
2306  "TARGET_SIMD"
2307  "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2308  [(set_attr "type" "neon_fp_cvt_widen_s")]
2309)
2310
2311;; Float narrowing operations.
2312
2313(define_insn "aarch64_float_truncate_lo_<mode>"
2314  [(set (match_operand:VDF 0 "register_operand" "=w")
2315      (float_truncate:VDF
2316	(match_operand:<VWIDE> 1 "register_operand" "w")))]
2317  "TARGET_SIMD"
2318  "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2319  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2320)
2321
2322(define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2323  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2324    (vec_concat:<VDBL>
2325      (match_operand:VDF 1 "register_operand" "0")
2326      (float_truncate:VDF
2327	(match_operand:<VWIDE> 2 "register_operand" "w"))))]
2328  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2329  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2330  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2331)
2332
2333(define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2334  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2335    (vec_concat:<VDBL>
2336      (float_truncate:VDF
2337	(match_operand:<VWIDE> 2 "register_operand" "w"))
2338      (match_operand:VDF 1 "register_operand" "0")))]
2339  "TARGET_SIMD && BYTES_BIG_ENDIAN"
2340  "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2341  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2342)
2343
2344(define_expand "aarch64_float_truncate_hi_<Vdbl>"
2345  [(match_operand:<VDBL> 0 "register_operand")
2346   (match_operand:VDF 1 "register_operand")
2347   (match_operand:<VWIDE> 2 "register_operand")]
2348  "TARGET_SIMD"
2349{
2350  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2351			     ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2352			     : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2353  emit_insn (gen (operands[0], operands[1], operands[2]));
2354  DONE;
2355}
2356)
2357
2358(define_expand "vec_pack_trunc_v2df"
2359  [(set (match_operand:V4SF 0 "register_operand")
2360      (vec_concat:V4SF
2361	(float_truncate:V2SF
2362	    (match_operand:V2DF 1 "register_operand"))
2363	(float_truncate:V2SF
2364	    (match_operand:V2DF 2 "register_operand"))
2365	  ))]
2366  "TARGET_SIMD"
2367  {
2368    rtx tmp = gen_reg_rtx (V2SFmode);
2369    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2370    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2371
2372    emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2373    emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2374						   tmp, operands[hi]));
2375    DONE;
2376  }
2377)
2378
2379(define_expand "vec_pack_trunc_df"
2380  [(set (match_operand:V2SF 0 "register_operand")
2381      (vec_concat:V2SF
2382	(float_truncate:SF
2383	    (match_operand:DF 1 "register_operand"))
2384	(float_truncate:SF
2385	    (match_operand:DF 2 "register_operand"))
2386	  ))]
2387  "TARGET_SIMD"
2388  {
2389    rtx tmp = gen_reg_rtx (V2SFmode);
2390    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2391    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2392
2393    emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2394    emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2395    emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2396    DONE;
2397  }
2398)
2399
2400;; FP Max/Min
2401;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
2402;; expression like:
2403;;      a = (b < c) ? b : c;
2404;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2405;; -fno-signed-zeros are enabled either explicitly or indirectly via
2406;; -ffast-math.
2407;;
2408;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2409;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2410;; operand will be returned when both operands are zero (i.e. they may not
2411;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
2412;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2413;; NaNs.
2414
2415(define_insn "<su><maxmin><mode>3"
2416  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2417	(FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2418		       (match_operand:VHSDF 2 "register_operand" "w")))]
2419  "TARGET_SIMD"
2420  "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2421  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2422)
2423
2424;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2425;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2426;; which implement the IEEE fmax ()/fmin () functions.
2427(define_insn "<maxmin_uns><mode>3"
2428  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2429       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2430		      (match_operand:VHSDF 2 "register_operand" "w")]
2431		      FMAXMIN_UNS))]
2432  "TARGET_SIMD"
2433  "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2434  [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2435)
2436
2437;; 'across lanes' add.
2438
2439(define_expand "reduc_plus_scal_<mode>"
2440  [(match_operand:<VEL> 0 "register_operand")
2441   (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand")]
2442	       UNSPEC_ADDV)]
2443  "TARGET_SIMD"
2444  {
2445    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2446    rtx scratch = gen_reg_rtx (<MODE>mode);
2447    emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2448    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2449    DONE;
2450  }
2451)
2452
2453(define_insn "aarch64_faddp<mode>"
2454 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2455       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2456		      (match_operand:VHSDF 2 "register_operand" "w")]
2457	UNSPEC_FADDV))]
2458 "TARGET_SIMD"
2459 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2460  [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2461)
2462
2463(define_insn "aarch64_reduc_plus_internal<mode>"
2464 [(set (match_operand:VDQV 0 "register_operand" "=w")
2465       (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2466		    UNSPEC_ADDV))]
2467 "TARGET_SIMD"
2468 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2469  [(set_attr "type" "neon_reduc_add<q>")]
2470)
2471
2472;; ADDV with result zero-extended to SI/DImode (for popcount).
2473(define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
2474 [(set (match_operand:GPI 0 "register_operand" "=w")
2475       (zero_extend:GPI
2476	(unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
2477			     UNSPEC_ADDV)))]
2478 "TARGET_SIMD"
2479 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
2480  [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
2481)
2482
2483(define_insn "aarch64_reduc_plus_internalv2si"
2484 [(set (match_operand:V2SI 0 "register_operand" "=w")
2485       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2486		    UNSPEC_ADDV))]
2487 "TARGET_SIMD"
2488 "addp\\t%0.2s, %1.2s, %1.2s"
2489  [(set_attr "type" "neon_reduc_add")]
2490)
2491
2492(define_insn "reduc_plus_scal_<mode>"
2493 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2494       (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2495		   UNSPEC_FADDV))]
2496 "TARGET_SIMD"
2497 "faddp\\t%<Vetype>0, %1.<Vtype>"
2498  [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2499)
2500
2501(define_expand "reduc_plus_scal_v4sf"
2502 [(set (match_operand:SF 0 "register_operand")
2503       (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2504		    UNSPEC_FADDV))]
2505 "TARGET_SIMD"
2506{
2507  rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2508  rtx scratch = gen_reg_rtx (V4SFmode);
2509  emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2510  emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2511  emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2512  DONE;
2513})
2514
2515(define_insn "clrsb<mode>2"
2516  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2517        (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2518  "TARGET_SIMD"
2519  "cls\\t%0.<Vtype>, %1.<Vtype>"
2520  [(set_attr "type" "neon_cls<q>")]
2521)
2522
2523(define_insn "clz<mode>2"
2524 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2525       (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2526 "TARGET_SIMD"
2527 "clz\\t%0.<Vtype>, %1.<Vtype>"
2528  [(set_attr "type" "neon_cls<q>")]
2529)
2530
2531(define_insn "popcount<mode>2"
2532  [(set (match_operand:VB 0 "register_operand" "=w")
2533        (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2534  "TARGET_SIMD"
2535  "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2536  [(set_attr "type" "neon_cnt<q>")]
2537)
2538
2539;; 'across lanes' max and min ops.
2540
2541;; Template for outputting a scalar, so we can create __builtins which can be
2542;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
2543(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2544  [(match_operand:<VEL> 0 "register_operand")
2545   (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2546		  FMAXMINV)]
2547  "TARGET_SIMD"
2548  {
2549    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2550    rtx scratch = gen_reg_rtx (<MODE>mode);
2551    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2552							      operands[1]));
2553    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2554    DONE;
2555  }
2556)
2557
2558;; Likewise for integer cases, signed and unsigned.
2559(define_expand "reduc_<maxmin_uns>_scal_<mode>"
2560  [(match_operand:<VEL> 0 "register_operand")
2561   (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2562		    MAXMINV)]
2563  "TARGET_SIMD"
2564  {
2565    rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2566    rtx scratch = gen_reg_rtx (<MODE>mode);
2567    emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2568							      operands[1]));
2569    emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2570    DONE;
2571  }
2572)
2573
2574(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2575 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2576       (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2577		    MAXMINV))]
2578 "TARGET_SIMD"
2579 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2580  [(set_attr "type" "neon_reduc_minmax<q>")]
2581)
2582
2583(define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2584 [(set (match_operand:V2SI 0 "register_operand" "=w")
2585       (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2586		    MAXMINV))]
2587 "TARGET_SIMD"
2588 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2589  [(set_attr "type" "neon_reduc_minmax")]
2590)
2591
2592(define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2593 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2594       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2595		      FMAXMINV))]
2596 "TARGET_SIMD"
2597 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2598  [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2599)
2600
2601;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2602;; allocation.
2603;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2604;; to select.
2605;;
2606;; Thus our BSL is of the form:
2607;;   op0 = bsl (mask, op2, op3)
2608;; We can use any of:
2609;;
2610;;   if (op0 = mask)
2611;;     bsl mask, op1, op2
2612;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2613;;     bit op0, op2, mask
2614;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2615;;     bif op0, op1, mask
2616;;
2617;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2618;; Some forms of straight-line code may generate the equivalent form
2619;; in *aarch64_simd_bsl<mode>_alt.
2620
2621(define_insn "aarch64_simd_bsl<mode>_internal"
2622  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2623	(xor:VDQ_I
2624	   (and:VDQ_I
2625	     (xor:VDQ_I
2626	       (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2627	       (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2628	     (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2629	  (match_dup:<V_INT_EQUIV> 3)
2630	))]
2631  "TARGET_SIMD"
2632  "@
2633  bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2634  bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2635  bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2636  [(set_attr "type" "neon_bsl<q>")]
2637)
2638
2639;; We need this form in addition to the above pattern to match the case
2640;; when combine tries merging three insns such that the second operand of
2641;; the outer XOR matches the second operand of the inner XOR rather than
2642;; the first.  The two are equivalent but since recog doesn't try all
2643;; permutations of commutative operations, we have to have a separate pattern.
2644
2645(define_insn "*aarch64_simd_bsl<mode>_alt"
2646  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2647	(xor:VDQ_I
2648	   (and:VDQ_I
2649	     (xor:VDQ_I
2650	       (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2651	       (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2652	      (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2653	  (match_dup:<V_INT_EQUIV> 2)))]
2654  "TARGET_SIMD"
2655  "@
2656  bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2657  bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2658  bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2659  [(set_attr "type" "neon_bsl<q>")]
2660)
2661
2662;; DImode is special, we want to avoid computing operations which are
2663;; more naturally computed in general purpose registers in the vector
2664;; registers.  If we do that, we need to move all three operands from general
2665;; purpose registers to vector registers, then back again.  However, we
2666;; don't want to make this pattern an UNSPEC as we'd lose scope for
2667;; optimizations based on the component operations of a BSL.
2668;;
2669;; That means we need a splitter back to the individual operations, if they
2670;; would be better calculated on the integer side.
2671
2672(define_insn_and_split "aarch64_simd_bsldi_internal"
2673  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2674	(xor:DI
2675	   (and:DI
2676	     (xor:DI
2677	       (match_operand:DI 3 "register_operand" "w,0,w,r")
2678	       (match_operand:DI 2 "register_operand" "w,w,0,r"))
2679	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
2680	  (match_dup:DI 3)
2681	))]
2682  "TARGET_SIMD"
2683  "@
2684  bsl\\t%0.8b, %2.8b, %3.8b
2685  bit\\t%0.8b, %2.8b, %1.8b
2686  bif\\t%0.8b, %3.8b, %1.8b
2687  #"
2688  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2689  [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2690{
2691  /* Split back to individual operations.  If we're before reload, and
2692     able to create a temporary register, do so.  If we're after reload,
2693     we've got an early-clobber destination register, so use that.
2694     Otherwise, we can't create pseudos and we can't yet guarantee that
2695     operands[0] is safe to write, so FAIL to split.  */
2696
2697  rtx scratch;
2698  if (reload_completed)
2699    scratch = operands[0];
2700  else if (can_create_pseudo_p ())
2701    scratch = gen_reg_rtx (DImode);
2702  else
2703    FAIL;
2704
2705  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2706  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2707  emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2708  DONE;
2709}
2710  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2711   (set_attr "length" "4,4,4,12")]
2712)
2713
2714(define_insn_and_split "aarch64_simd_bsldi_alt"
2715  [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2716	(xor:DI
2717	   (and:DI
2718	     (xor:DI
2719	       (match_operand:DI 3 "register_operand" "w,w,0,r")
2720	       (match_operand:DI 2 "register_operand" "w,0,w,r"))
2721	     (match_operand:DI 1 "register_operand" "0,w,w,r"))
2722	  (match_dup:DI 2)
2723	))]
2724  "TARGET_SIMD"
2725  "@
2726  bsl\\t%0.8b, %3.8b, %2.8b
2727  bit\\t%0.8b, %3.8b, %1.8b
2728  bif\\t%0.8b, %2.8b, %1.8b
2729  #"
2730  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2731  [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2732{
2733  /* Split back to individual operations.  If we're before reload, and
2734     able to create a temporary register, do so.  If we're after reload,
2735     we've got an early-clobber destination register, so use that.
2736     Otherwise, we can't create pseudos and we can't yet guarantee that
2737     operands[0] is safe to write, so FAIL to split.  */
2738
2739  rtx scratch;
2740  if (reload_completed)
2741    scratch = operands[0];
2742  else if (can_create_pseudo_p ())
2743    scratch = gen_reg_rtx (DImode);
2744  else
2745    FAIL;
2746
2747  emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2748  emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2749  emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2750  DONE;
2751}
2752  [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2753   (set_attr "length" "4,4,4,12")]
2754)
2755
2756(define_expand "aarch64_simd_bsl<mode>"
2757  [(match_operand:VALLDIF 0 "register_operand")
2758   (match_operand:<V_INT_EQUIV> 1 "register_operand")
2759   (match_operand:VALLDIF 2 "register_operand")
2760   (match_operand:VALLDIF 3 "register_operand")]
2761 "TARGET_SIMD"
2762{
2763  /* We can't alias operands together if they have different modes.  */
2764  rtx tmp = operands[0];
2765  if (FLOAT_MODE_P (<MODE>mode))
2766    {
2767      operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2768      operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2769      tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2770    }
2771  operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2772  emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2773							 operands[1],
2774							 operands[2],
2775							 operands[3]));
2776  if (tmp != operands[0])
2777    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2778
2779  DONE;
2780})
2781
2782(define_expand "vcond_mask_<mode><v_int_equiv>"
2783  [(match_operand:VALLDI 0 "register_operand")
2784   (match_operand:VALLDI 1 "nonmemory_operand")
2785   (match_operand:VALLDI 2 "nonmemory_operand")
2786   (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2787  "TARGET_SIMD"
2788{
2789  /* If we have (a = (P) ? -1 : 0);
2790     Then we can simply move the generated mask (result must be int).  */
2791  if (operands[1] == CONSTM1_RTX (<MODE>mode)
2792      && operands[2] == CONST0_RTX (<MODE>mode))
2793    emit_move_insn (operands[0], operands[3]);
2794  /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
2795  else if (operands[1] == CONST0_RTX (<MODE>mode)
2796	   && operands[2] == CONSTM1_RTX (<MODE>mode))
2797    emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2798  else
2799    {
2800      if (!REG_P (operands[1]))
2801	operands[1] = force_reg (<MODE>mode, operands[1]);
2802      if (!REG_P (operands[2]))
2803	operands[2] = force_reg (<MODE>mode, operands[2]);
2804      emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2805					     operands[1], operands[2]));
2806    }
2807
2808  DONE;
2809})
2810
2811;; Patterns comparing two vectors to produce a mask.
2812
2813(define_expand "vec_cmp<mode><mode>"
2814  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2815	  (match_operator 1 "comparison_operator"
2816	    [(match_operand:VSDQ_I_DI 2 "register_operand")
2817	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2818  "TARGET_SIMD"
2819{
2820  rtx mask = operands[0];
2821  enum rtx_code code = GET_CODE (operands[1]);
2822
2823  switch (code)
2824    {
2825    case NE:
2826    case LE:
2827    case LT:
2828    case GE:
2829    case GT:
2830    case EQ:
2831      if (operands[3] == CONST0_RTX (<MODE>mode))
2832	break;
2833
2834      /* Fall through.  */
2835    default:
2836      if (!REG_P (operands[3]))
2837	operands[3] = force_reg (<MODE>mode, operands[3]);
2838
2839      break;
2840    }
2841
2842  switch (code)
2843    {
2844    case LT:
2845      emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2846      break;
2847
2848    case GE:
2849      emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2850      break;
2851
2852    case LE:
2853      emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2854      break;
2855
2856    case GT:
2857      emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2858      break;
2859
2860    case LTU:
2861      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2862      break;
2863
2864    case GEU:
2865      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2866      break;
2867
2868    case LEU:
2869      emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2870      break;
2871
2872    case GTU:
2873      emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2874      break;
2875
2876    case NE:
2877      /* Handle NE as !EQ.  */
2878      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2879      emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2880      break;
2881
2882    case EQ:
2883      emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2884      break;
2885
2886    default:
2887      gcc_unreachable ();
2888    }
2889
2890  DONE;
2891})
2892
2893(define_expand "vec_cmp<mode><v_int_equiv>"
2894  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2895	(match_operator 1 "comparison_operator"
2896	    [(match_operand:VDQF 2 "register_operand")
2897	     (match_operand:VDQF 3 "nonmemory_operand")]))]
2898  "TARGET_SIMD"
2899{
2900  int use_zero_form = 0;
2901  enum rtx_code code = GET_CODE (operands[1]);
2902  rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2903
2904  rtx (*comparison) (rtx, rtx, rtx) = NULL;
2905
2906  switch (code)
2907    {
2908    case LE:
2909    case LT:
2910    case GE:
2911    case GT:
2912    case EQ:
2913      if (operands[3] == CONST0_RTX (<MODE>mode))
2914	{
2915	  use_zero_form = 1;
2916	  break;
2917	}
2918      /* Fall through.  */
2919    default:
2920      if (!REG_P (operands[3]))
2921	operands[3] = force_reg (<MODE>mode, operands[3]);
2922
2923      break;
2924    }
2925
2926  switch (code)
2927    {
2928    case LT:
2929      if (use_zero_form)
2930	{
2931	  comparison = gen_aarch64_cmlt<mode>;
2932	  break;
2933	}
2934      /* Fall through.  */
2935    case UNLT:
2936      std::swap (operands[2], operands[3]);
2937      /* Fall through.  */
2938    case UNGT:
2939    case GT:
2940      comparison = gen_aarch64_cmgt<mode>;
2941      break;
2942    case LE:
2943      if (use_zero_form)
2944	{
2945	  comparison = gen_aarch64_cmle<mode>;
2946	  break;
2947	}
2948      /* Fall through.  */
2949    case UNLE:
2950      std::swap (operands[2], operands[3]);
2951      /* Fall through.  */
2952    case UNGE:
2953    case GE:
2954      comparison = gen_aarch64_cmge<mode>;
2955      break;
2956    case NE:
2957    case EQ:
2958      comparison = gen_aarch64_cmeq<mode>;
2959      break;
2960    case UNEQ:
2961    case ORDERED:
2962    case UNORDERED:
2963    case LTGT:
2964      break;
2965    default:
2966      gcc_unreachable ();
2967    }
2968
2969  switch (code)
2970    {
2971    case UNGE:
2972    case UNGT:
2973    case UNLE:
2974    case UNLT:
2975      {
2976	/* All of the above must not raise any FP exceptions.  Thus we first
2977	   check each operand for NaNs and force any elements containing NaN to
2978	   zero before using them in the compare.
2979	   Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2980				     (cm<cc> (isnan (a) ? 0.0 : a,
2981					      isnan (b) ? 0.0 : b))
2982	   We use the following transformations for doing the comparisions:
2983	   a UNGE b -> a GE b
2984	   a UNGT b -> a GT b
2985	   a UNLE b -> b GE a
2986	   a UNLT b -> b GT a.  */
2987
2988	rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2989	rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2990	rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2991	emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2992	emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2993	emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2994	emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2995					  lowpart_subreg (<V_INT_EQUIV>mode,
2996							  operands[2],
2997							  <MODE>mode)));
2998	emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2999					  lowpart_subreg (<V_INT_EQUIV>mode,
3000							  operands[3],
3001							  <MODE>mode)));
3002	gcc_assert (comparison != NULL);
3003	emit_insn (comparison (operands[0],
3004			       lowpart_subreg (<MODE>mode,
3005					       tmp0, <V_INT_EQUIV>mode),
3006			       lowpart_subreg (<MODE>mode,
3007					       tmp1, <V_INT_EQUIV>mode)));
3008	emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
3009      }
3010      break;
3011
3012    case LT:
3013    case LE:
3014    case GT:
3015    case GE:
3016    case EQ:
3017    case NE:
3018      /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
3019	 As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
3020	 a GE b -> a GE b
3021	 a GT b -> a GT b
3022	 a LE b -> b GE a
3023	 a LT b -> b GT a
3024	 a EQ b -> a EQ b
3025	 a NE b -> ~(a EQ b)  */
3026      gcc_assert (comparison != NULL);
3027      emit_insn (comparison (operands[0], operands[2], operands[3]));
3028      if (code == NE)
3029	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3030      break;
3031
3032    case LTGT:
3033      /* LTGT is not guranteed to not generate a FP exception.  So let's
3034	 go the faster way : ((a > b) || (b > a)).  */
3035      emit_insn (gen_aarch64_cmgt<mode> (operands[0],
3036					 operands[2], operands[3]));
3037      emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
3038      emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
3039      break;
3040
3041    case ORDERED:
3042    case UNORDERED:
3043    case UNEQ:
3044      /* cmeq (a, a) & cmeq (b, b).  */
3045      emit_insn (gen_aarch64_cmeq<mode> (operands[0],
3046					 operands[2], operands[2]));
3047      emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
3048      emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
3049
3050      if (code == UNORDERED)
3051	emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
3052      else if (code == UNEQ)
3053	{
3054	  emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
3055	  emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
3056	}
3057      break;
3058
3059    default:
3060      gcc_unreachable ();
3061    }
3062
3063  DONE;
3064})
3065
3066(define_expand "vec_cmpu<mode><mode>"
3067  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3068	  (match_operator 1 "comparison_operator"
3069	    [(match_operand:VSDQ_I_DI 2 "register_operand")
3070	     (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3071  "TARGET_SIMD"
3072{
3073  emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
3074				      operands[2], operands[3]));
3075  DONE;
3076})
3077
3078(define_expand "vcond<mode><mode>"
3079  [(set (match_operand:VALLDI 0 "register_operand")
3080	(if_then_else:VALLDI
3081	  (match_operator 3 "comparison_operator"
3082	    [(match_operand:VALLDI 4 "register_operand")
3083	     (match_operand:VALLDI 5 "nonmemory_operand")])
3084	  (match_operand:VALLDI 1 "nonmemory_operand")
3085	  (match_operand:VALLDI 2 "nonmemory_operand")))]
3086  "TARGET_SIMD"
3087{
3088  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3089  enum rtx_code code = GET_CODE (operands[3]);
3090
3091  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3092     it as well as switch operands 1/2 in order to avoid the additional
3093     NOT instruction.  */
3094  if (code == NE)
3095    {
3096      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3097				    operands[4], operands[5]);
3098      std::swap (operands[1], operands[2]);
3099    }
3100  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3101					     operands[4], operands[5]));
3102  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3103						 operands[2], mask));
3104
3105  DONE;
3106})
3107
3108(define_expand "vcond<v_cmp_mixed><mode>"
3109  [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3110	(if_then_else:<V_cmp_mixed>
3111	  (match_operator 3 "comparison_operator"
3112	    [(match_operand:VDQF_COND 4 "register_operand")
3113	     (match_operand:VDQF_COND 5 "nonmemory_operand")])
3114	  (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3115	  (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3116  "TARGET_SIMD"
3117{
3118  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3119  enum rtx_code code = GET_CODE (operands[3]);
3120
3121  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3122     it as well as switch operands 1/2 in order to avoid the additional
3123     NOT instruction.  */
3124  if (code == NE)
3125    {
3126      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3127				    operands[4], operands[5]);
3128      std::swap (operands[1], operands[2]);
3129    }
3130  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3131					     operands[4], operands[5]));
3132  emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3133						operands[0], operands[1],
3134						operands[2], mask));
3135
3136  DONE;
3137})
3138
3139(define_expand "vcondu<mode><mode>"
3140  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3141	(if_then_else:VSDQ_I_DI
3142	  (match_operator 3 "comparison_operator"
3143	    [(match_operand:VSDQ_I_DI 4 "register_operand")
3144	     (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3145	  (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3146	  (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3147  "TARGET_SIMD"
3148{
3149  rtx mask = gen_reg_rtx (<MODE>mode);
3150  enum rtx_code code = GET_CODE (operands[3]);
3151
3152  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3153     it as well as switch operands 1/2 in order to avoid the additional
3154     NOT instruction.  */
3155  if (code == NE)
3156    {
3157      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3158				    operands[4], operands[5]);
3159      std::swap (operands[1], operands[2]);
3160    }
3161  emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3162				      operands[4], operands[5]));
3163  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3164						 operands[2], mask));
3165  DONE;
3166})
3167
3168(define_expand "vcondu<mode><v_cmp_mixed>"
3169  [(set (match_operand:VDQF 0 "register_operand")
3170	(if_then_else:VDQF
3171	  (match_operator 3 "comparison_operator"
3172	    [(match_operand:<V_cmp_mixed> 4 "register_operand")
3173	     (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3174	  (match_operand:VDQF 1 "nonmemory_operand")
3175	  (match_operand:VDQF 2 "nonmemory_operand")))]
3176  "TARGET_SIMD"
3177{
3178  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3179  enum rtx_code code = GET_CODE (operands[3]);
3180
3181  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3182     it as well as switch operands 1/2 in order to avoid the additional
3183     NOT instruction.  */
3184  if (code == NE)
3185    {
3186      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3187				    operands[4], operands[5]);
3188      std::swap (operands[1], operands[2]);
3189    }
3190  emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3191						  mask, operands[3],
3192						  operands[4], operands[5]));
3193  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3194						 operands[2], mask));
3195  DONE;
3196})
3197
3198;; Patterns for AArch64 SIMD Intrinsics.
3199
3200;; Lane extraction with sign extension to general purpose register.
3201(define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3202  [(set (match_operand:GPI 0 "register_operand" "=r")
3203	(sign_extend:GPI
3204	  (vec_select:<VDQQH:VEL>
3205	    (match_operand:VDQQH 1 "register_operand" "w")
3206	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3207  "TARGET_SIMD"
3208  {
3209    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3210					   INTVAL (operands[2]));
3211    return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3212  }
3213  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3214)
3215
3216(define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3217  [(set (match_operand:GPI 0 "register_operand" "=r")
3218	(zero_extend:GPI
3219	  (vec_select:<VDQQH:VEL>
3220	    (match_operand:VDQQH 1 "register_operand" "w")
3221	    (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3222  "TARGET_SIMD"
3223  {
3224    operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3225					   INTVAL (operands[2]));
3226    return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
3227  }
3228  [(set_attr "type" "neon_to_gp<VDQQH:q>")]
3229)
3230
3231;; Lane extraction of a value, neither sign nor zero extension
3232;; is guaranteed so upper bits should be considered undefined.
3233;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3234(define_insn "aarch64_get_lane<mode>"
3235  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3236	(vec_select:<VEL>
3237	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3238	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3239  "TARGET_SIMD"
3240  {
3241    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3242    switch (which_alternative)
3243      {
3244	case 0:
3245	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3246	case 1:
3247	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3248	case 2:
3249	  return "st1\\t{%1.<Vetype>}[%2], %0";
3250	default:
3251	  gcc_unreachable ();
3252      }
3253  }
3254  [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3255)
3256
3257(define_insn "load_pair_lanes<mode>"
3258  [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3259	(vec_concat:<VDBL>
3260	   (match_operand:VDC 1 "memory_operand" "Utq")
3261	   (match_operand:VDC 2 "memory_operand" "m")))]
3262  "TARGET_SIMD && !STRICT_ALIGNMENT
3263   && rtx_equal_p (XEXP (operands[2], 0),
3264		   plus_constant (Pmode,
3265				  XEXP (operands[1], 0),
3266				  GET_MODE_SIZE (<MODE>mode)))"
3267  "ldr\\t%q0, %1"
3268  [(set_attr "type" "neon_load1_1reg_q")]
3269)
3270
3271(define_insn "store_pair_lanes<mode>"
3272  [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3273	(vec_concat:<VDBL>
3274	   (match_operand:VDC 1 "register_operand" "w, r")
3275	   (match_operand:VDC 2 "register_operand" "w, r")))]
3276  "TARGET_SIMD"
3277  "@
3278   stp\\t%d1, %d2, %y0
3279   stp\\t%x1, %x2, %y0"
3280  [(set_attr "type" "neon_stp, store_16")]
3281)
3282
3283;; In this insn, operand 1 should be low, and operand 2 the high part of the
3284;; dest vector.
3285
3286(define_insn "@aarch64_combinez<mode>"
3287  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3288	(vec_concat:<VDBL>
3289	  (match_operand:VDC 1 "general_operand" "w,?r,m")
3290	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3291  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3292  "@
3293   mov\\t%0.8b, %1.8b
3294   fmov\t%d0, %1
3295   ldr\\t%d0, %1"
3296  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3297   (set_attr "arch" "simd,fp,simd")]
3298)
3299
3300(define_insn "@aarch64_combinez_be<mode>"
3301  [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3302        (vec_concat:<VDBL>
3303	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3304	  (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3305  "TARGET_SIMD && BYTES_BIG_ENDIAN"
3306  "@
3307   mov\\t%0.8b, %1.8b
3308   fmov\t%d0, %1
3309   ldr\\t%d0, %1"
3310  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3311   (set_attr "arch" "simd,fp,simd")]
3312)
3313
3314(define_expand "aarch64_combine<mode>"
3315  [(match_operand:<VDBL> 0 "register_operand")
3316   (match_operand:VDC 1 "register_operand")
3317   (match_operand:VDC 2 "aarch64_simd_reg_or_zero")]
3318  "TARGET_SIMD"
3319{
3320  if (operands[2] == CONST0_RTX (<MODE>mode))
3321    {
3322      if (BYTES_BIG_ENDIAN)
3323	emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1],
3324						  operands[2]));
3325      else
3326	emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1],
3327					       operands[2]));
3328    }
3329  else
3330    aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3331  DONE;
3332}
3333)
3334
3335(define_expand "@aarch64_simd_combine<mode>"
3336  [(match_operand:<VDBL> 0 "register_operand")
3337   (match_operand:VDC 1 "register_operand")
3338   (match_operand:VDC 2 "register_operand")]
3339  "TARGET_SIMD"
3340  {
3341    emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3342    emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3343    DONE;
3344  }
3345[(set_attr "type" "multiple")]
3346)
3347
3348;; <su><addsub>l<q>.
3349
3350(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3352       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3353			   (match_operand:VQW 1 "register_operand" "w")
3354			   (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3355		       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3356			   (match_operand:VQW 2 "register_operand" "w")
3357			   (match_dup 3)))))]
3358  "TARGET_SIMD"
3359  "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3360  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3361)
3362
3363(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3364 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3365       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3366                           (match_operand:VQW 1 "register_operand" "w")
3367                           (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3368                       (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3369                           (match_operand:VQW 2 "register_operand" "w")
3370                           (match_dup 3)))))]
3371  "TARGET_SIMD"
3372  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3373  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3374)
3375
3376
3377(define_expand "aarch64_saddl2<mode>"
3378  [(match_operand:<VWIDE> 0 "register_operand")
3379   (match_operand:VQW 1 "register_operand")
3380   (match_operand:VQW 2 "register_operand")]
3381  "TARGET_SIMD"
3382{
3383  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3384  emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3385                                                  operands[2], p));
3386  DONE;
3387})
3388
3389(define_expand "aarch64_uaddl2<mode>"
3390  [(match_operand:<VWIDE> 0 "register_operand")
3391   (match_operand:VQW 1 "register_operand")
3392   (match_operand:VQW 2 "register_operand")]
3393  "TARGET_SIMD"
3394{
3395  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3396  emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3397                                                  operands[2], p));
3398  DONE;
3399})
3400
3401(define_expand "aarch64_ssubl2<mode>"
3402  [(match_operand:<VWIDE> 0 "register_operand")
3403   (match_operand:VQW 1 "register_operand")
3404   (match_operand:VQW 2 "register_operand")]
3405  "TARGET_SIMD"
3406{
3407  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3408  emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3409						operands[2], p));
3410  DONE;
3411})
3412
3413(define_expand "aarch64_usubl2<mode>"
3414  [(match_operand:<VWIDE> 0 "register_operand")
3415   (match_operand:VQW 1 "register_operand")
3416   (match_operand:VQW 2 "register_operand")]
3417  "TARGET_SIMD"
3418{
3419  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3420  emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3421						operands[2], p));
3422  DONE;
3423})
3424
3425(define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3426 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3427       (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3428			   (match_operand:VD_BHSI 1 "register_operand" "w"))
3429		       (ANY_EXTEND:<VWIDE>
3430			   (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3431  "TARGET_SIMD"
3432  "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3433  [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3434)
3435
3436;; <su><addsub>w<q>.
3437
3438(define_expand "widen_ssum<mode>3"
3439  [(set (match_operand:<VDBLW> 0 "register_operand")
3440	(plus:<VDBLW> (sign_extend:<VDBLW> 
3441		        (match_operand:VQW 1 "register_operand"))
3442		      (match_operand:<VDBLW> 2 "register_operand")))]
3443  "TARGET_SIMD"
3444  {
3445    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3446    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3447
3448    emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3449						operands[1], p));
3450    emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3451    DONE;
3452  }
3453)
3454
3455(define_expand "widen_ssum<mode>3"
3456  [(set (match_operand:<VWIDE> 0 "register_operand")
3457	(plus:<VWIDE> (sign_extend:<VWIDE>
3458		        (match_operand:VD_BHSI 1 "register_operand"))
3459		      (match_operand:<VWIDE> 2 "register_operand")))]
3460  "TARGET_SIMD"
3461{
3462  emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3463  DONE;
3464})
3465
3466(define_expand "widen_usum<mode>3"
3467  [(set (match_operand:<VDBLW> 0 "register_operand")
3468	(plus:<VDBLW> (zero_extend:<VDBLW> 
3469		        (match_operand:VQW 1 "register_operand"))
3470		      (match_operand:<VDBLW> 2 "register_operand")))]
3471  "TARGET_SIMD"
3472  {
3473    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3474    rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3475
3476    emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3477						 operands[1], p));
3478    emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3479    DONE;
3480  }
3481)
3482
3483(define_expand "widen_usum<mode>3"
3484  [(set (match_operand:<VWIDE> 0 "register_operand")
3485	(plus:<VWIDE> (zero_extend:<VWIDE>
3486		        (match_operand:VD_BHSI 1 "register_operand"))
3487		      (match_operand:<VWIDE> 2 "register_operand")))]
3488  "TARGET_SIMD"
3489{
3490  emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3491  DONE;
3492})
3493
3494(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3495  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3496	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3497	  (ANY_EXTEND:<VWIDE>
3498	    (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3499  "TARGET_SIMD"
3500  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3501  [(set_attr "type" "neon_sub_widen")]
3502)
3503
3504(define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3505  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3506	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3507	  (ANY_EXTEND:<VWIDE>
3508	    (vec_select:<VHALF>
3509	      (match_operand:VQW 2 "register_operand" "w")
3510	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3511  "TARGET_SIMD"
3512  "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3513  [(set_attr "type" "neon_sub_widen")]
3514)
3515
3516(define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3517  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3518	(minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3519	  (ANY_EXTEND:<VWIDE>
3520	    (vec_select:<VHALF>
3521	      (match_operand:VQW 2 "register_operand" "w")
3522	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3523  "TARGET_SIMD"
3524  "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3525  [(set_attr "type" "neon_sub_widen")]
3526)
3527
3528(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3529  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3530	(plus:<VWIDE>
3531	  (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3532	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
3533  "TARGET_SIMD"
3534  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3535  [(set_attr "type" "neon_add_widen")]
3536)
3537
3538(define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3539  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3540	(plus:<VWIDE>
3541	  (ANY_EXTEND:<VWIDE>
3542	    (vec_select:<VHALF>
3543	      (match_operand:VQW 2 "register_operand" "w")
3544	      (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3545	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
3546  "TARGET_SIMD"
3547  "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3548  [(set_attr "type" "neon_add_widen")]
3549)
3550
3551(define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3552  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3553	(plus:<VWIDE>
3554	  (ANY_EXTEND:<VWIDE>
3555	    (vec_select:<VHALF>
3556	      (match_operand:VQW 2 "register_operand" "w")
3557	      (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3558	  (match_operand:<VWIDE> 1 "register_operand" "w")))]
3559  "TARGET_SIMD"
3560  "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3561  [(set_attr "type" "neon_add_widen")]
3562)
3563
3564(define_expand "aarch64_saddw2<mode>"
3565  [(match_operand:<VWIDE> 0 "register_operand")
3566   (match_operand:<VWIDE> 1 "register_operand")
3567   (match_operand:VQW 2 "register_operand")]
3568  "TARGET_SIMD"
3569{
3570  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3571  emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3572						operands[2], p));
3573  DONE;
3574})
3575
3576(define_expand "aarch64_uaddw2<mode>"
3577  [(match_operand:<VWIDE> 0 "register_operand")
3578   (match_operand:<VWIDE> 1 "register_operand")
3579   (match_operand:VQW 2 "register_operand")]
3580  "TARGET_SIMD"
3581{
3582  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3583  emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3584						operands[2], p));
3585  DONE;
3586})
3587
3588
3589(define_expand "aarch64_ssubw2<mode>"
3590  [(match_operand:<VWIDE> 0 "register_operand")
3591   (match_operand:<VWIDE> 1 "register_operand")
3592   (match_operand:VQW 2 "register_operand")]
3593  "TARGET_SIMD"
3594{
3595  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3596  emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3597						operands[2], p));
3598  DONE;
3599})
3600
3601(define_expand "aarch64_usubw2<mode>"
3602  [(match_operand:<VWIDE> 0 "register_operand")
3603   (match_operand:<VWIDE> 1 "register_operand")
3604   (match_operand:VQW 2 "register_operand")]
3605  "TARGET_SIMD"
3606{
3607  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3608  emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3609						operands[2], p));
3610  DONE;
3611})
3612
3613;; <su><r>h<addsub>.
3614
3615(define_expand "<u>avg<mode>3_floor"
3616  [(set (match_operand:VDQ_BHSI 0 "register_operand")
3617	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3618			  (match_operand:VDQ_BHSI 2 "register_operand")]
3619			 HADD))]
3620  "TARGET_SIMD"
3621)
3622
3623(define_expand "<u>avg<mode>3_ceil"
3624  [(set (match_operand:VDQ_BHSI 0 "register_operand")
3625	(unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3626			  (match_operand:VDQ_BHSI 2 "register_operand")]
3627			 RHADD))]
3628  "TARGET_SIMD"
3629)
3630
3631(define_insn "aarch64_<sur>h<addsub><mode>"
3632  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3633        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3634		      (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3635		     HADDSUB))]
3636  "TARGET_SIMD"
3637  "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3638  [(set_attr "type" "neon_<addsub>_halve<q>")]
3639)
3640
3641;; <r><addsub>hn<q>.
3642
3643(define_insn "aarch64_<sur><addsub>hn<mode>"
3644  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3645        (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3646			    (match_operand:VQN 2 "register_operand" "w")]
3647                           ADDSUBHN))]
3648  "TARGET_SIMD"
3649  "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3650  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3651)
3652
3653(define_insn "aarch64_<sur><addsub>hn2<mode>"
3654  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3655        (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3656			     (match_operand:VQN 2 "register_operand" "w")
3657			     (match_operand:VQN 3 "register_operand" "w")]
3658                            ADDSUBHN2))]
3659  "TARGET_SIMD"
3660  "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3661  [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3662)
3663
3664;; pmul.
3665
3666(define_insn "aarch64_pmul<mode>"
3667  [(set (match_operand:VB 0 "register_operand" "=w")
3668        (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3669		    (match_operand:VB 2 "register_operand" "w")]
3670		   UNSPEC_PMUL))]
3671 "TARGET_SIMD"
3672 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3673  [(set_attr "type" "neon_mul_<Vetype><q>")]
3674)
3675
3676;; fmulx.
3677
3678(define_insn "aarch64_fmulx<mode>"
3679  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3680	(unspec:VHSDF_HSDF
3681	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3682	   (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3683	   UNSPEC_FMULX))]
3684 "TARGET_SIMD"
3685 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3686 [(set_attr "type" "neon_fp_mul_<stype>")]
3687)
3688
3689;; vmulxq_lane_f32, and vmulx_laneq_f32
3690
3691(define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3692  [(set (match_operand:VDQSF 0 "register_operand" "=w")
3693	(unspec:VDQSF
3694	 [(match_operand:VDQSF 1 "register_operand" "w")
3695	  (vec_duplicate:VDQSF
3696	   (vec_select:<VEL>
3697	    (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3698	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3699	 UNSPEC_FMULX))]
3700  "TARGET_SIMD"
3701  {
3702    operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3703    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3704  }
3705  [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3706)
3707
3708;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3709
3710(define_insn "*aarch64_mulx_elt<mode>"
3711  [(set (match_operand:VDQF 0 "register_operand" "=w")
3712	(unspec:VDQF
3713	 [(match_operand:VDQF 1 "register_operand" "w")
3714	  (vec_duplicate:VDQF
3715	   (vec_select:<VEL>
3716	    (match_operand:VDQF 2 "register_operand" "w")
3717	    (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3718	 UNSPEC_FMULX))]
3719  "TARGET_SIMD"
3720  {
3721    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3722    return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3723  }
3724  [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3725)
3726
3727;; vmulxq_lane
3728
3729(define_insn "*aarch64_mulx_elt_from_dup<mode>"
3730  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3731	(unspec:VHSDF
3732	 [(match_operand:VHSDF 1 "register_operand" "w")
3733	  (vec_duplicate:VHSDF
3734	    (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3735	 UNSPEC_FMULX))]
3736  "TARGET_SIMD"
3737  "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3738  [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3739)
3740
3741;; vmulxs_lane_f32, vmulxs_laneq_f32
3742;; vmulxd_lane_f64 ==  vmulx_lane_f64
3743;; vmulxd_laneq_f64 == vmulx_laneq_f64
3744
3745(define_insn "*aarch64_vgetfmulx<mode>"
3746  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3747	(unspec:<VEL>
3748	 [(match_operand:<VEL> 1 "register_operand" "w")
3749	  (vec_select:<VEL>
3750	   (match_operand:VDQF 2 "register_operand" "w")
3751	    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3752	 UNSPEC_FMULX))]
3753  "TARGET_SIMD"
3754  {
3755    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3756    return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3757  }
3758  [(set_attr "type" "fmul<Vetype>")]
3759)
3760;; <su>q<addsub>
3761
3762(define_insn "aarch64_<su_optab>q<addsub><mode>"
3763  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3764	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3765			(match_operand:VSDQ_I 2 "register_operand" "w")))]
3766  "TARGET_SIMD"
3767  "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3768  [(set_attr "type" "neon_q<addsub><q>")]
3769)
3770
3771;; suqadd and usqadd
3772
3773(define_insn "aarch64_<sur>qadd<mode>"
3774  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3775	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3776			(match_operand:VSDQ_I 2 "register_operand" "w")]
3777		       USSUQADD))]
3778  "TARGET_SIMD"
3779  "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3780  [(set_attr "type" "neon_qadd<q>")]
3781)
3782
3783;; sqmovun
3784
3785(define_insn "aarch64_sqmovun<mode>"
3786  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3787	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3788                            UNSPEC_SQXTUN))]
3789   "TARGET_SIMD"
3790   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3791   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3792)
3793
3794;; sqmovn and uqmovn
3795
3796(define_insn "aarch64_<sur>qmovn<mode>"
3797  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3798	(unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3799                            SUQMOVN))]
3800  "TARGET_SIMD"
3801  "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3802   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3803)
3804
3805;; <su>q<absneg>
3806
3807(define_insn "aarch64_s<optab><mode>"
3808  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3809	(UNQOPS:VSDQ_I
3810	  (match_operand:VSDQ_I 1 "register_operand" "w")))]
3811  "TARGET_SIMD"
3812  "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3813  [(set_attr "type" "neon_<optab><q>")]
3814)
3815
3816;; sq<r>dmulh.
3817
3818(define_insn "aarch64_sq<r>dmulh<mode>"
3819  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3820	(unspec:VSDQ_HSI
3821	  [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3822	   (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3823	 VQDMULH))]
3824  "TARGET_SIMD"
3825  "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3826  [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3827)
3828
3829;; sq<r>dmulh_lane
3830
3831(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3832  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3833        (unspec:VDQHS
3834	  [(match_operand:VDQHS 1 "register_operand" "w")
3835           (vec_select:<VEL>
3836             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3837             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3838	 VQDMULH))]
3839  "TARGET_SIMD"
3840  "*
3841   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3842   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3843  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3844)
3845
3846(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3847  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3848        (unspec:VDQHS
3849	  [(match_operand:VDQHS 1 "register_operand" "w")
3850           (vec_select:<VEL>
3851             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3852             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3853	 VQDMULH))]
3854  "TARGET_SIMD"
3855  "*
3856   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3857   return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3858  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3859)
3860
3861(define_insn "aarch64_sq<r>dmulh_lane<mode>"
3862  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3863        (unspec:SD_HSI
3864	  [(match_operand:SD_HSI 1 "register_operand" "w")
3865           (vec_select:<VEL>
3866             (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3867             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3868	 VQDMULH))]
3869  "TARGET_SIMD"
3870  "*
3871   operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3872   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3873  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3874)
3875
3876(define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3877  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3878        (unspec:SD_HSI
3879	  [(match_operand:SD_HSI 1 "register_operand" "w")
3880           (vec_select:<VEL>
3881             (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3882             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3883	 VQDMULH))]
3884  "TARGET_SIMD"
3885  "*
3886   operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3887   return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3888  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3889)
3890
3891;; sqrdml[as]h.
3892
3893(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3894  [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3895	(unspec:VSDQ_HSI
3896	  [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3897	   (match_operand:VSDQ_HSI 2 "register_operand" "w")
3898	   (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3899	  SQRDMLH_AS))]
3900   "TARGET_SIMD_RDMA"
3901   "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3902   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3903)
3904
3905;; sqrdml[as]h_lane.
3906
3907(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3908  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3909	(unspec:VDQHS
3910	  [(match_operand:VDQHS 1 "register_operand" "0")
3911	   (match_operand:VDQHS 2 "register_operand" "w")
3912	   (vec_select:<VEL>
3913	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3914	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3915	  SQRDMLH_AS))]
3916   "TARGET_SIMD_RDMA"
3917   {
3918     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3919     return
3920      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3921   }
3922   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3923)
3924
3925(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3926  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3927	(unspec:SD_HSI
3928	  [(match_operand:SD_HSI 1 "register_operand" "0")
3929	   (match_operand:SD_HSI 2 "register_operand" "w")
3930	   (vec_select:<VEL>
3931	     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3932	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3933	  SQRDMLH_AS))]
3934   "TARGET_SIMD_RDMA"
3935   {
3936     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3937     return
3938      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3939   }
3940   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3941)
3942
3943;; sqrdml[as]h_laneq.
3944
3945(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3946  [(set (match_operand:VDQHS 0 "register_operand" "=w")
3947	(unspec:VDQHS
3948	  [(match_operand:VDQHS 1 "register_operand" "0")
3949	   (match_operand:VDQHS 2 "register_operand" "w")
3950	   (vec_select:<VEL>
3951	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3952	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3953	  SQRDMLH_AS))]
3954   "TARGET_SIMD_RDMA"
3955   {
3956     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3957     return
3958      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3959   }
3960   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3961)
3962
3963(define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3964  [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3965	(unspec:SD_HSI
3966	  [(match_operand:SD_HSI 1 "register_operand" "0")
3967	   (match_operand:SD_HSI 2 "register_operand" "w")
3968	   (vec_select:<VEL>
3969	     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3970	     (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3971	  SQRDMLH_AS))]
3972   "TARGET_SIMD_RDMA"
3973   {
3974     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3975     return
3976      "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3977   }
3978   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3979)
3980
3981;; vqdml[sa]l
3982
3983(define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3984  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3985        (SBINQOPS:<VWIDE>
3986	  (match_operand:<VWIDE> 1 "register_operand" "0")
3987	  (ss_ashift:<VWIDE>
3988	      (mult:<VWIDE>
3989		(sign_extend:<VWIDE>
3990		      (match_operand:VSD_HSI 2 "register_operand" "w"))
3991		(sign_extend:<VWIDE>
3992		      (match_operand:VSD_HSI 3 "register_operand" "w")))
3993	      (const_int 1))))]
3994  "TARGET_SIMD"
3995  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3996  [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3997)
3998
3999;; vqdml[sa]l_lane
4000
4001(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4002  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4003        (SBINQOPS:<VWIDE>
4004	  (match_operand:<VWIDE> 1 "register_operand" "0")
4005	  (ss_ashift:<VWIDE>
4006	    (mult:<VWIDE>
4007	      (sign_extend:<VWIDE>
4008		(match_operand:VD_HSI 2 "register_operand" "w"))
4009	      (sign_extend:<VWIDE>
4010		(vec_duplicate:VD_HSI
4011		  (vec_select:<VEL>
4012		    (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4013		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4014              ))
4015	    (const_int 1))))]
4016  "TARGET_SIMD"
4017  {
4018    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4019    return
4020      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4021  }
4022  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4023)
4024
4025(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4026  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4027        (SBINQOPS:<VWIDE>
4028	  (match_operand:<VWIDE> 1 "register_operand" "0")
4029	  (ss_ashift:<VWIDE>
4030	    (mult:<VWIDE>
4031	      (sign_extend:<VWIDE>
4032		(match_operand:VD_HSI 2 "register_operand" "w"))
4033	      (sign_extend:<VWIDE>
4034		(vec_duplicate:VD_HSI
4035		  (vec_select:<VEL>
4036		    (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4037		    (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4038              ))
4039	    (const_int 1))))]
4040  "TARGET_SIMD"
4041  {
4042    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4043    return
4044      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4045  }
4046  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4047)
4048
4049(define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
4050  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4051        (SBINQOPS:<VWIDE>
4052	  (match_operand:<VWIDE> 1 "register_operand" "0")
4053	  (ss_ashift:<VWIDE>
4054	    (mult:<VWIDE>
4055	      (sign_extend:<VWIDE>
4056		(match_operand:SD_HSI 2 "register_operand" "w"))
4057	      (sign_extend:<VWIDE>
4058		(vec_select:<VEL>
4059		  (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4060		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4061              )
4062	    (const_int 1))))]
4063  "TARGET_SIMD"
4064  {
4065    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4066    return
4067      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4068  }
4069  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4070)
4071
4072(define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
4073  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074        (SBINQOPS:<VWIDE>
4075	  (match_operand:<VWIDE> 1 "register_operand" "0")
4076	  (ss_ashift:<VWIDE>
4077	    (mult:<VWIDE>
4078	      (sign_extend:<VWIDE>
4079		(match_operand:SD_HSI 2 "register_operand" "w"))
4080	      (sign_extend:<VWIDE>
4081		(vec_select:<VEL>
4082		  (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4083		  (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
4084              )
4085	    (const_int 1))))]
4086  "TARGET_SIMD"
4087  {
4088    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4089    return
4090      "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4091  }
4092  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4093)
4094
4095;; vqdml[sa]l_n
4096
4097(define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
4098  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4099        (SBINQOPS:<VWIDE>
4100	  (match_operand:<VWIDE> 1 "register_operand" "0")
4101	  (ss_ashift:<VWIDE>
4102	      (mult:<VWIDE>
4103		(sign_extend:<VWIDE>
4104		      (match_operand:VD_HSI 2 "register_operand" "w"))
4105		(sign_extend:<VWIDE>
4106		  (vec_duplicate:VD_HSI
4107		    (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4108	      (const_int 1))))]
4109  "TARGET_SIMD"
4110  "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4111  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4112)
4113
4114;; sqdml[as]l2
4115
4116(define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4117  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4118        (SBINQOPS:<VWIDE>
4119         (match_operand:<VWIDE> 1 "register_operand" "0")
4120         (ss_ashift:<VWIDE>
4121             (mult:<VWIDE>
4122               (sign_extend:<VWIDE>
4123                 (vec_select:<VHALF>
4124                     (match_operand:VQ_HSI 2 "register_operand" "w")
4125                     (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4126               (sign_extend:<VWIDE>
4127                 (vec_select:<VHALF>
4128                     (match_operand:VQ_HSI 3 "register_operand" "w")
4129                     (match_dup 4))))
4130             (const_int 1))))]
4131  "TARGET_SIMD"
4132  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4133  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4134)
4135
4136(define_expand "aarch64_sqdmlal2<mode>"
4137  [(match_operand:<VWIDE> 0 "register_operand")
4138   (match_operand:<VWIDE> 1 "register_operand")
4139   (match_operand:VQ_HSI 2 "register_operand")
4140   (match_operand:VQ_HSI 3 "register_operand")]
4141  "TARGET_SIMD"
4142{
4143  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4144  emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4145						  operands[2], operands[3], p));
4146  DONE;
4147})
4148
4149(define_expand "aarch64_sqdmlsl2<mode>"
4150  [(match_operand:<VWIDE> 0 "register_operand")
4151   (match_operand:<VWIDE> 1 "register_operand")
4152   (match_operand:VQ_HSI 2 "register_operand")
4153   (match_operand:VQ_HSI 3 "register_operand")]
4154  "TARGET_SIMD"
4155{
4156  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4157  emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4158						  operands[2], operands[3], p));
4159  DONE;
4160})
4161
4162;; vqdml[sa]l2_lane
4163
4164(define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4165  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4166        (SBINQOPS:<VWIDE>
4167	  (match_operand:<VWIDE> 1 "register_operand" "0")
4168	  (ss_ashift:<VWIDE>
4169	      (mult:<VWIDE>
4170		(sign_extend:<VWIDE>
4171                  (vec_select:<VHALF>
4172                    (match_operand:VQ_HSI 2 "register_operand" "w")
4173                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4174		(sign_extend:<VWIDE>
4175                  (vec_duplicate:<VHALF>
4176		    (vec_select:<VEL>
4177		      (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4178		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4179		    ))))
4180	      (const_int 1))))]
4181  "TARGET_SIMD"
4182  {
4183    operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4184    return
4185     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4186  }
4187  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4188)
4189
4190(define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4191  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4192        (SBINQOPS:<VWIDE>
4193	  (match_operand:<VWIDE> 1 "register_operand" "0")
4194	  (ss_ashift:<VWIDE>
4195	      (mult:<VWIDE>
4196		(sign_extend:<VWIDE>
4197                  (vec_select:<VHALF>
4198                    (match_operand:VQ_HSI 2 "register_operand" "w")
4199                    (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4200		(sign_extend:<VWIDE>
4201                  (vec_duplicate:<VHALF>
4202		    (vec_select:<VEL>
4203		      (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4204		      (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4205		    ))))
4206	      (const_int 1))))]
4207  "TARGET_SIMD"
4208  {
4209    operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4210    return
4211     "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4212  }
4213  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4214)
4215
4216(define_expand "aarch64_sqdmlal2_lane<mode>"
4217  [(match_operand:<VWIDE> 0 "register_operand")
4218   (match_operand:<VWIDE> 1 "register_operand")
4219   (match_operand:VQ_HSI 2 "register_operand")
4220   (match_operand:<VCOND> 3 "register_operand")
4221   (match_operand:SI 4 "immediate_operand")]
4222  "TARGET_SIMD"
4223{
4224  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4225  emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4226						       operands[2], operands[3],
4227						       operands[4], p));
4228  DONE;
4229})
4230
4231(define_expand "aarch64_sqdmlal2_laneq<mode>"
4232  [(match_operand:<VWIDE> 0 "register_operand")
4233   (match_operand:<VWIDE> 1 "register_operand")
4234   (match_operand:VQ_HSI 2 "register_operand")
4235   (match_operand:<VCONQ> 3 "register_operand")
4236   (match_operand:SI 4 "immediate_operand")]
4237  "TARGET_SIMD"
4238{
4239  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4240  emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4241						       operands[2], operands[3],
4242						       operands[4], p));
4243  DONE;
4244})
4245
4246(define_expand "aarch64_sqdmlsl2_lane<mode>"
4247  [(match_operand:<VWIDE> 0 "register_operand")
4248   (match_operand:<VWIDE> 1 "register_operand")
4249   (match_operand:VQ_HSI 2 "register_operand")
4250   (match_operand:<VCOND> 3 "register_operand")
4251   (match_operand:SI 4 "immediate_operand")]
4252  "TARGET_SIMD"
4253{
4254  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4255  emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4256						       operands[2], operands[3],
4257						       operands[4], p));
4258  DONE;
4259})
4260
4261(define_expand "aarch64_sqdmlsl2_laneq<mode>"
4262  [(match_operand:<VWIDE> 0 "register_operand")
4263   (match_operand:<VWIDE> 1 "register_operand")
4264   (match_operand:VQ_HSI 2 "register_operand")
4265   (match_operand:<VCONQ> 3 "register_operand")
4266   (match_operand:SI 4 "immediate_operand")]
4267  "TARGET_SIMD"
4268{
4269  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4270  emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4271						       operands[2], operands[3],
4272						       operands[4], p));
4273  DONE;
4274})
4275
4276(define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4277  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4278        (SBINQOPS:<VWIDE>
4279	  (match_operand:<VWIDE> 1 "register_operand" "0")
4280	  (ss_ashift:<VWIDE>
4281	    (mult:<VWIDE>
4282	      (sign_extend:<VWIDE>
4283                (vec_select:<VHALF>
4284                  (match_operand:VQ_HSI 2 "register_operand" "w")
4285                  (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4286	      (sign_extend:<VWIDE>
4287                (vec_duplicate:<VHALF>
4288		  (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4289	    (const_int 1))))]
4290  "TARGET_SIMD"
4291  "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4292  [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4293)
4294
4295(define_expand "aarch64_sqdmlal2_n<mode>"
4296  [(match_operand:<VWIDE> 0 "register_operand")
4297   (match_operand:<VWIDE> 1 "register_operand")
4298   (match_operand:VQ_HSI 2 "register_operand")
4299   (match_operand:<VEL> 3 "register_operand")]
4300  "TARGET_SIMD"
4301{
4302  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4303  emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4304						    operands[2], operands[3],
4305						    p));
4306  DONE;
4307})
4308
4309(define_expand "aarch64_sqdmlsl2_n<mode>"
4310  [(match_operand:<VWIDE> 0 "register_operand")
4311   (match_operand:<VWIDE> 1 "register_operand")
4312   (match_operand:VQ_HSI 2 "register_operand")
4313   (match_operand:<VEL> 3 "register_operand")]
4314  "TARGET_SIMD"
4315{
4316  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4317  emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4318						    operands[2], operands[3],
4319						    p));
4320  DONE;
4321})
4322
4323;; vqdmull
4324
4325(define_insn "aarch64_sqdmull<mode>"
4326  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4327        (ss_ashift:<VWIDE>
4328	     (mult:<VWIDE>
4329	       (sign_extend:<VWIDE>
4330		     (match_operand:VSD_HSI 1 "register_operand" "w"))
4331	       (sign_extend:<VWIDE>
4332		     (match_operand:VSD_HSI 2 "register_operand" "w")))
4333	     (const_int 1)))]
4334  "TARGET_SIMD"
4335  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4336  [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4337)
4338
4339;; vqdmull_lane
4340
4341(define_insn "aarch64_sqdmull_lane<mode>"
4342  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4343        (ss_ashift:<VWIDE>
4344	     (mult:<VWIDE>
4345	       (sign_extend:<VWIDE>
4346		 (match_operand:VD_HSI 1 "register_operand" "w"))
4347	       (sign_extend:<VWIDE>
4348                 (vec_duplicate:VD_HSI
4349                   (vec_select:<VEL>
4350		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4351		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4352	       ))
4353	     (const_int 1)))]
4354  "TARGET_SIMD"
4355  {
4356    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4357    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4358  }
4359  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4360)
4361
4362(define_insn "aarch64_sqdmull_laneq<mode>"
4363  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364        (ss_ashift:<VWIDE>
4365	     (mult:<VWIDE>
4366	       (sign_extend:<VWIDE>
4367		 (match_operand:VD_HSI 1 "register_operand" "w"))
4368	       (sign_extend:<VWIDE>
4369                 (vec_duplicate:VD_HSI
4370                   (vec_select:<VEL>
4371		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4372		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4373	       ))
4374	     (const_int 1)))]
4375  "TARGET_SIMD"
4376  {
4377    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4378    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4379  }
4380  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4381)
4382
4383(define_insn "aarch64_sqdmull_lane<mode>"
4384  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4385        (ss_ashift:<VWIDE>
4386	     (mult:<VWIDE>
4387	       (sign_extend:<VWIDE>
4388		 (match_operand:SD_HSI 1 "register_operand" "w"))
4389	       (sign_extend:<VWIDE>
4390                 (vec_select:<VEL>
4391		   (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4392		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4393	       ))
4394	     (const_int 1)))]
4395  "TARGET_SIMD"
4396  {
4397    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4398    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4399  }
4400  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4401)
4402
4403(define_insn "aarch64_sqdmull_laneq<mode>"
4404  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4405        (ss_ashift:<VWIDE>
4406	     (mult:<VWIDE>
4407	       (sign_extend:<VWIDE>
4408		 (match_operand:SD_HSI 1 "register_operand" "w"))
4409	       (sign_extend:<VWIDE>
4410                 (vec_select:<VEL>
4411		   (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4412		   (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4413	       ))
4414	     (const_int 1)))]
4415  "TARGET_SIMD"
4416  {
4417    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4418    return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4419  }
4420  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4421)
4422
4423;; vqdmull_n
4424
4425(define_insn "aarch64_sqdmull_n<mode>"
4426  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4427        (ss_ashift:<VWIDE>
4428	     (mult:<VWIDE>
4429	       (sign_extend:<VWIDE>
4430		 (match_operand:VD_HSI 1 "register_operand" "w"))
4431	       (sign_extend:<VWIDE>
4432                 (vec_duplicate:VD_HSI
4433                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4434	       )
4435	     (const_int 1)))]
4436  "TARGET_SIMD"
4437  "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4438  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4439)
4440
4441;; vqdmull2
4442
4443
4444
4445(define_insn "aarch64_sqdmull2<mode>_internal"
4446  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4447        (ss_ashift:<VWIDE>
4448	     (mult:<VWIDE>
4449	       (sign_extend:<VWIDE>
4450		 (vec_select:<VHALF>
4451                   (match_operand:VQ_HSI 1 "register_operand" "w")
4452                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4453	       (sign_extend:<VWIDE>
4454		 (vec_select:<VHALF>
4455                   (match_operand:VQ_HSI 2 "register_operand" "w")
4456                   (match_dup 3)))
4457	       )
4458	     (const_int 1)))]
4459  "TARGET_SIMD"
4460  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4461  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4462)
4463
4464(define_expand "aarch64_sqdmull2<mode>"
4465  [(match_operand:<VWIDE> 0 "register_operand")
4466   (match_operand:VQ_HSI 1 "register_operand")
4467   (match_operand:VQ_HSI 2 "register_operand")]
4468  "TARGET_SIMD"
4469{
4470  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4471  emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4472						  operands[2], p));
4473  DONE;
4474})
4475
4476;; vqdmull2_lane
4477
4478(define_insn "aarch64_sqdmull2_lane<mode>_internal"
4479  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4480        (ss_ashift:<VWIDE>
4481	     (mult:<VWIDE>
4482	       (sign_extend:<VWIDE>
4483		 (vec_select:<VHALF>
4484                   (match_operand:VQ_HSI 1 "register_operand" "w")
4485                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4486	       (sign_extend:<VWIDE>
4487                 (vec_duplicate:<VHALF>
4488                   (vec_select:<VEL>
4489		     (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4490		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4491	       ))
4492	     (const_int 1)))]
4493  "TARGET_SIMD"
4494  {
4495    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4496    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4497  }
4498  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4499)
4500
4501(define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4502  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4503        (ss_ashift:<VWIDE>
4504	     (mult:<VWIDE>
4505	       (sign_extend:<VWIDE>
4506		 (vec_select:<VHALF>
4507                   (match_operand:VQ_HSI 1 "register_operand" "w")
4508                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4509	       (sign_extend:<VWIDE>
4510                 (vec_duplicate:<VHALF>
4511                   (vec_select:<VEL>
4512		     (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4513		     (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4514	       ))
4515	     (const_int 1)))]
4516  "TARGET_SIMD"
4517  {
4518    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4519    return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4520  }
4521  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4522)
4523
4524(define_expand "aarch64_sqdmull2_lane<mode>"
4525  [(match_operand:<VWIDE> 0 "register_operand")
4526   (match_operand:VQ_HSI 1 "register_operand")
4527   (match_operand:<VCOND> 2 "register_operand")
4528   (match_operand:SI 3 "immediate_operand")]
4529  "TARGET_SIMD"
4530{
4531  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4532  emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4533						       operands[2], operands[3],
4534						       p));
4535  DONE;
4536})
4537
4538(define_expand "aarch64_sqdmull2_laneq<mode>"
4539  [(match_operand:<VWIDE> 0 "register_operand")
4540   (match_operand:VQ_HSI 1 "register_operand")
4541   (match_operand:<VCONQ> 2 "register_operand")
4542   (match_operand:SI 3 "immediate_operand")]
4543  "TARGET_SIMD"
4544{
4545  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4546  emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4547						       operands[2], operands[3],
4548						       p));
4549  DONE;
4550})
4551
4552;; vqdmull2_n
4553
4554(define_insn "aarch64_sqdmull2_n<mode>_internal"
4555  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4556        (ss_ashift:<VWIDE>
4557	     (mult:<VWIDE>
4558	       (sign_extend:<VWIDE>
4559		 (vec_select:<VHALF>
4560                   (match_operand:VQ_HSI 1 "register_operand" "w")
4561                   (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4562	       (sign_extend:<VWIDE>
4563                 (vec_duplicate:<VHALF>
4564                   (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4565	       )
4566	     (const_int 1)))]
4567  "TARGET_SIMD"
4568  "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4569  [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4570)
4571
4572(define_expand "aarch64_sqdmull2_n<mode>"
4573  [(match_operand:<VWIDE> 0 "register_operand")
4574   (match_operand:VQ_HSI 1 "register_operand")
4575   (match_operand:<VEL> 2 "register_operand")]
4576  "TARGET_SIMD"
4577{
4578  rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4579  emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4580						    operands[2], p));
4581  DONE;
4582})
4583
4584;; vshl
4585
4586(define_insn "aarch64_<sur>shl<mode>"
4587  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4588        (unspec:VSDQ_I_DI
4589	  [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4590           (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4591         VSHL))]
4592  "TARGET_SIMD"
4593  "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4594  [(set_attr "type" "neon_shift_reg<q>")]
4595)
4596
4597
4598;; vqshl
4599
4600(define_insn "aarch64_<sur>q<r>shl<mode>"
4601  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4602        (unspec:VSDQ_I
4603	  [(match_operand:VSDQ_I 1 "register_operand" "w")
4604           (match_operand:VSDQ_I 2 "register_operand" "w")]
4605         VQSHL))]
4606  "TARGET_SIMD"
4607  "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4608  [(set_attr "type" "neon_sat_shift_reg<q>")]
4609)
4610
4611;; vshll_n
4612
4613(define_insn "aarch64_<sur>shll_n<mode>"
4614  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4615	(unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4616			 (match_operand:SI 2
4617			   "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4618                         VSHLL))]
4619  "TARGET_SIMD"
4620  {
4621    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4622      return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4623    else
4624      return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4625  }
4626  [(set_attr "type" "neon_shift_imm_long")]
4627)
4628
4629;; vshll_high_n
4630
4631(define_insn "aarch64_<sur>shll2_n<mode>"
4632  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4633	(unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4634			 (match_operand:SI 2 "immediate_operand" "i")]
4635                         VSHLL))]
4636  "TARGET_SIMD"
4637  {
4638    if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4639      return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4640    else
4641      return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4642  }
4643  [(set_attr "type" "neon_shift_imm_long")]
4644)
4645
4646;; vrshr_n
4647
4648(define_insn "aarch64_<sur>shr_n<mode>"
4649  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4650        (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4651			   (match_operand:SI 2
4652			     "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4653			  VRSHR_N))]
4654  "TARGET_SIMD"
4655  "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4656  [(set_attr "type" "neon_sat_shift_imm<q>")]
4657)
4658
4659;; v(r)sra_n
4660
4661(define_insn "aarch64_<sur>sra_n<mode>"
4662  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4663	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4664		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4665                       (match_operand:SI 3
4666			 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4667                      VSRA))]
4668  "TARGET_SIMD"
4669  "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4670  [(set_attr "type" "neon_shift_acc<q>")]
4671)
4672
4673;; vs<lr>i_n
4674
4675(define_insn "aarch64_<sur>s<lr>i_n<mode>"
4676  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4677	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4678		       (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4679                       (match_operand:SI 3
4680			 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4681                      VSLRI))]
4682  "TARGET_SIMD"
4683  "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4684  [(set_attr "type" "neon_shift_imm<q>")]
4685)
4686
4687;; vqshl(u)
4688
4689(define_insn "aarch64_<sur>qshl<u>_n<mode>"
4690  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4691	(unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4692		       (match_operand:SI 2
4693			 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4694                      VQSHL_N))]
4695  "TARGET_SIMD"
4696  "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4697  [(set_attr "type" "neon_sat_shift_imm<q>")]
4698)
4699
4700
4701;; vq(r)shr(u)n_n
4702
4703(define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4704  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4705        (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4706			    (match_operand:SI 2
4707			      "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4708			   VQSHRN_N))]
4709  "TARGET_SIMD"
4710  "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4711  [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4712)
4713
4714
4715;; cm(eq|ge|gt|lt|le)
4716;; Note, we have constraints for Dz and Z as different expanders
4717;; have different ideas of what should be passed to this pattern.
4718
4719(define_insn "aarch64_cm<optab><mode>"
4720  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4721	(neg:<V_INT_EQUIV>
4722	  (COMPARISONS:<V_INT_EQUIV>
4723	    (match_operand:VDQ_I 1 "register_operand" "w,w")
4724	    (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4725	  )))]
4726  "TARGET_SIMD"
4727  "@
4728  cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4729  cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4730  [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4731)
4732
4733(define_insn_and_split "aarch64_cm<optab>di"
4734  [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4735	(neg:DI
4736	  (COMPARISONS:DI
4737	    (match_operand:DI 1 "register_operand" "w,w,r")
4738	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4739	  )))
4740     (clobber (reg:CC CC_REGNUM))]
4741  "TARGET_SIMD"
4742  "#"
4743  "&& reload_completed"
4744  [(set (match_operand:DI 0 "register_operand")
4745	(neg:DI
4746	  (COMPARISONS:DI
4747	    (match_operand:DI 1 "register_operand")
4748	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4749	  )))]
4750  {
4751    /* If we are in the general purpose register file,
4752       we split to a sequence of comparison and store.  */
4753    if (GP_REGNUM_P (REGNO (operands[0]))
4754	&& GP_REGNUM_P (REGNO (operands[1])))
4755      {
4756	machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4757	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4758	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4759	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4760	DONE;
4761      }
4762    /* Otherwise, we expand to a similar pattern which does not
4763       clobber CC_REGNUM.  */
4764  }
4765  [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4766)
4767
4768(define_insn "*aarch64_cm<optab>di"
4769  [(set (match_operand:DI 0 "register_operand" "=w,w")
4770	(neg:DI
4771	  (COMPARISONS:DI
4772	    (match_operand:DI 1 "register_operand" "w,w")
4773	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4774	  )))]
4775  "TARGET_SIMD && reload_completed"
4776  "@
4777  cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4778  cm<optab>\t%d0, %d1, #0"
4779  [(set_attr "type" "neon_compare, neon_compare_zero")]
4780)
4781
4782;; cm(hs|hi)
4783
4784(define_insn "aarch64_cm<optab><mode>"
4785  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4786	(neg:<V_INT_EQUIV>
4787	  (UCOMPARISONS:<V_INT_EQUIV>
4788	    (match_operand:VDQ_I 1 "register_operand" "w")
4789	    (match_operand:VDQ_I 2 "register_operand" "w")
4790	  )))]
4791  "TARGET_SIMD"
4792  "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4793  [(set_attr "type" "neon_compare<q>")]
4794)
4795
4796(define_insn_and_split "aarch64_cm<optab>di"
4797  [(set (match_operand:DI 0 "register_operand" "=w,r")
4798	(neg:DI
4799	  (UCOMPARISONS:DI
4800	    (match_operand:DI 1 "register_operand" "w,r")
4801	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4802	  )))
4803    (clobber (reg:CC CC_REGNUM))]
4804  "TARGET_SIMD"
4805  "#"
4806  "&& reload_completed"
4807  [(set (match_operand:DI 0 "register_operand")
4808	(neg:DI
4809	  (UCOMPARISONS:DI
4810	    (match_operand:DI 1 "register_operand")
4811	    (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4812	  )))]
4813  {
4814    /* If we are in the general purpose register file,
4815       we split to a sequence of comparison and store.  */
4816    if (GP_REGNUM_P (REGNO (operands[0]))
4817	&& GP_REGNUM_P (REGNO (operands[1])))
4818      {
4819	machine_mode mode = CCmode;
4820	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4821	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4822	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4823	DONE;
4824      }
4825    /* Otherwise, we expand to a similar pattern which does not
4826       clobber CC_REGNUM.  */
4827  }
4828  [(set_attr "type" "neon_compare,multiple")]
4829)
4830
4831(define_insn "*aarch64_cm<optab>di"
4832  [(set (match_operand:DI 0 "register_operand" "=w")
4833	(neg:DI
4834	  (UCOMPARISONS:DI
4835	    (match_operand:DI 1 "register_operand" "w")
4836	    (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4837	  )))]
4838  "TARGET_SIMD && reload_completed"
4839  "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4840  [(set_attr "type" "neon_compare")]
4841)
4842
4843;; cmtst
4844
4845;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4846;; we don't have any insns using ne, and aarch64_vcond outputs
4847;; not (neg (eq (and x y) 0))
4848;; which is rewritten by simplify_rtx as
4849;; plus (eq (and x y) 0) -1.
4850
4851(define_insn "aarch64_cmtst<mode>"
4852  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4853	(plus:<V_INT_EQUIV>
4854	  (eq:<V_INT_EQUIV>
4855	    (and:VDQ_I
4856	      (match_operand:VDQ_I 1 "register_operand" "w")
4857	      (match_operand:VDQ_I 2 "register_operand" "w"))
4858	    (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4859	  (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4860  ]
4861  "TARGET_SIMD"
4862  "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4863  [(set_attr "type" "neon_tst<q>")]
4864)
4865
4866(define_insn_and_split "aarch64_cmtstdi"
4867  [(set (match_operand:DI 0 "register_operand" "=w,r")
4868	(neg:DI
4869	  (ne:DI
4870	    (and:DI
4871	      (match_operand:DI 1 "register_operand" "w,r")
4872	      (match_operand:DI 2 "register_operand" "w,r"))
4873	    (const_int 0))))
4874    (clobber (reg:CC CC_REGNUM))]
4875  "TARGET_SIMD"
4876  "#"
4877  "&& reload_completed"
4878  [(set (match_operand:DI 0 "register_operand")
4879	(neg:DI
4880	  (ne:DI
4881	    (and:DI
4882	      (match_operand:DI 1 "register_operand")
4883	      (match_operand:DI 2 "register_operand"))
4884	    (const_int 0))))]
4885  {
4886    /* If we are in the general purpose register file,
4887       we split to a sequence of comparison and store.  */
4888    if (GP_REGNUM_P (REGNO (operands[0]))
4889	&& GP_REGNUM_P (REGNO (operands[1])))
4890      {
4891	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4892	machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4893	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4894	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4895	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4896	DONE;
4897      }
4898    /* Otherwise, we expand to a similar pattern which does not
4899       clobber CC_REGNUM.  */
4900  }
4901  [(set_attr "type" "neon_tst,multiple")]
4902)
4903
4904(define_insn "*aarch64_cmtstdi"
4905  [(set (match_operand:DI 0 "register_operand" "=w")
4906	(neg:DI
4907	  (ne:DI
4908	    (and:DI
4909	      (match_operand:DI 1 "register_operand" "w")
4910	      (match_operand:DI 2 "register_operand" "w"))
4911	    (const_int 0))))]
4912  "TARGET_SIMD"
4913  "cmtst\t%d0, %d1, %d2"
4914  [(set_attr "type" "neon_tst")]
4915)
4916
4917;; fcm(eq|ge|gt|le|lt)
4918
4919(define_insn "aarch64_cm<optab><mode>"
4920  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4921	(neg:<V_INT_EQUIV>
4922	  (COMPARISONS:<V_INT_EQUIV>
4923	    (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4924	    (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4925	  )))]
4926  "TARGET_SIMD"
4927  "@
4928  fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4929  fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4930  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4931)
4932
4933;; fac(ge|gt)
4934;; Note we can also handle what would be fac(le|lt) by
4935;; generating fac(ge|gt).
4936
4937(define_insn "aarch64_fac<optab><mode>"
4938  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4939	(neg:<V_INT_EQUIV>
4940	  (FAC_COMPARISONS:<V_INT_EQUIV>
4941	    (abs:VHSDF_HSDF
4942	      (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4943	    (abs:VHSDF_HSDF
4944	      (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4945  )))]
4946  "TARGET_SIMD"
4947  "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4948  [(set_attr "type" "neon_fp_compare_<stype><q>")]
4949)
4950
4951;; addp
4952
4953(define_insn "aarch64_addp<mode>"
4954  [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4955        (unspec:VD_BHSI
4956          [(match_operand:VD_BHSI 1 "register_operand" "w")
4957	   (match_operand:VD_BHSI 2 "register_operand" "w")]
4958          UNSPEC_ADDP))]
4959  "TARGET_SIMD"
4960  "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4961  [(set_attr "type" "neon_reduc_add<q>")]
4962)
4963
4964(define_insn "aarch64_addpdi"
4965  [(set (match_operand:DI 0 "register_operand" "=w")
4966        (unspec:DI
4967          [(match_operand:V2DI 1 "register_operand" "w")]
4968          UNSPEC_ADDP))]
4969  "TARGET_SIMD"
4970  "addp\t%d0, %1.2d"
4971  [(set_attr "type" "neon_reduc_add")]
4972)
4973
4974;; sqrt
4975
4976(define_expand "sqrt<mode>2"
4977  [(set (match_operand:VHSDF 0 "register_operand")
4978	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
4979  "TARGET_SIMD"
4980{
4981  if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4982    DONE;
4983})
4984
4985(define_insn "*sqrt<mode>2"
4986  [(set (match_operand:VHSDF 0 "register_operand" "=w")
4987	(sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4988  "TARGET_SIMD"
4989  "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4990  [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4991)
4992
4993;; Patterns for vector struct loads and stores.
4994
4995(define_insn "aarch64_simd_ld2<mode>"
4996  [(set (match_operand:OI 0 "register_operand" "=w")
4997	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4998		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4999		   UNSPEC_LD2))]
5000  "TARGET_SIMD"
5001  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5002  [(set_attr "type" "neon_load2_2reg<q>")]
5003)
5004
5005(define_insn "aarch64_simd_ld2r<mode>"
5006  [(set (match_operand:OI 0 "register_operand" "=w")
5007       (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5008                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5009                  UNSPEC_LD2_DUP))]
5010  "TARGET_SIMD"
5011  "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5012  [(set_attr "type" "neon_load2_all_lanes<q>")]
5013)
5014
5015(define_insn "aarch64_vec_load_lanesoi_lane<mode>"
5016  [(set (match_operand:OI 0 "register_operand" "=w")
5017	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5018		    (match_operand:OI 2 "register_operand" "0")
5019		    (match_operand:SI 3 "immediate_operand" "i")
5020		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5021		   UNSPEC_LD2_LANE))]
5022  "TARGET_SIMD"
5023  {
5024    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5025    return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
5026  }
5027  [(set_attr "type" "neon_load2_one_lane")]
5028)
5029
5030(define_expand "vec_load_lanesoi<mode>"
5031  [(set (match_operand:OI 0 "register_operand")
5032	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand")
5033		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5034		   UNSPEC_LD2))]
5035  "TARGET_SIMD"
5036{
5037  if (BYTES_BIG_ENDIAN)
5038    {
5039      rtx tmp = gen_reg_rtx (OImode);
5040      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5041      emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
5042      emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
5043    }
5044  else
5045    emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
5046  DONE;
5047})
5048
5049(define_insn "aarch64_simd_st2<mode>"
5050  [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5051	(unspec:OI [(match_operand:OI 1 "register_operand" "w")
5052                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5053                   UNSPEC_ST2))]
5054  "TARGET_SIMD"
5055  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5056  [(set_attr "type" "neon_store2_2reg<q>")]
5057)
5058
5059;; RTL uses GCC vector extension indices, so flip only for assembly.
5060(define_insn "aarch64_vec_store_lanesoi_lane<mode>"
5061  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5062	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5063		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5064		    (match_operand:SI 2 "immediate_operand" "i")]
5065		   UNSPEC_ST2_LANE))]
5066  "TARGET_SIMD"
5067  {
5068    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5069    return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
5070  }
5071  [(set_attr "type" "neon_store2_one_lane<q>")]
5072)
5073
5074(define_expand "vec_store_lanesoi<mode>"
5075  [(set (match_operand:OI 0 "aarch64_simd_struct_operand")
5076	(unspec:OI [(match_operand:OI 1 "register_operand")
5077                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5078                   UNSPEC_ST2))]
5079  "TARGET_SIMD"
5080{
5081  if (BYTES_BIG_ENDIAN)
5082    {
5083      rtx tmp = gen_reg_rtx (OImode);
5084      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5085      emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
5086      emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
5087    }
5088  else
5089    emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
5090  DONE;
5091})
5092
5093(define_insn "aarch64_simd_ld3<mode>"
5094  [(set (match_operand:CI 0 "register_operand" "=w")
5095	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5096		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5097		   UNSPEC_LD3))]
5098  "TARGET_SIMD"
5099  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5100  [(set_attr "type" "neon_load3_3reg<q>")]
5101)
5102
5103(define_insn "aarch64_simd_ld3r<mode>"
5104  [(set (match_operand:CI 0 "register_operand" "=w")
5105       (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5106                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5107                  UNSPEC_LD3_DUP))]
5108  "TARGET_SIMD"
5109  "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5110  [(set_attr "type" "neon_load3_all_lanes<q>")]
5111)
5112
5113(define_insn "aarch64_vec_load_lanesci_lane<mode>"
5114  [(set (match_operand:CI 0 "register_operand" "=w")
5115	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5116		    (match_operand:CI 2 "register_operand" "0")
5117		    (match_operand:SI 3 "immediate_operand" "i")
5118		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5119		   UNSPEC_LD3_LANE))]
5120  "TARGET_SIMD"
5121{
5122    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5123    return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5124}
5125  [(set_attr "type" "neon_load3_one_lane")]
5126)
5127
5128(define_expand "vec_load_lanesci<mode>"
5129  [(set (match_operand:CI 0 "register_operand")
5130	(unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand")
5131		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132		   UNSPEC_LD3))]
5133  "TARGET_SIMD"
5134{
5135  if (BYTES_BIG_ENDIAN)
5136    {
5137      rtx tmp = gen_reg_rtx (CImode);
5138      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5139      emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5140      emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5141    }
5142  else
5143    emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5144  DONE;
5145})
5146
5147(define_insn "aarch64_simd_st3<mode>"
5148  [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5149	(unspec:CI [(match_operand:CI 1 "register_operand" "w")
5150                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5151                   UNSPEC_ST3))]
5152  "TARGET_SIMD"
5153  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5154  [(set_attr "type" "neon_store3_3reg<q>")]
5155)
5156
5157;; RTL uses GCC vector extension indices, so flip only for assembly.
5158(define_insn "aarch64_vec_store_lanesci_lane<mode>"
5159  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5160	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5161		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5162		     (match_operand:SI 2 "immediate_operand" "i")]
5163		    UNSPEC_ST3_LANE))]
5164  "TARGET_SIMD"
5165  {
5166    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5167    return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5168  }
5169  [(set_attr "type" "neon_store3_one_lane<q>")]
5170)
5171
5172(define_expand "vec_store_lanesci<mode>"
5173  [(set (match_operand:CI 0 "aarch64_simd_struct_operand")
5174	(unspec:CI [(match_operand:CI 1 "register_operand")
5175                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5176                   UNSPEC_ST3))]
5177  "TARGET_SIMD"
5178{
5179  if (BYTES_BIG_ENDIAN)
5180    {
5181      rtx tmp = gen_reg_rtx (CImode);
5182      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5183      emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5184      emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5185    }
5186  else
5187    emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5188  DONE;
5189})
5190
5191(define_insn "aarch64_simd_ld4<mode>"
5192  [(set (match_operand:XI 0 "register_operand" "=w")
5193	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5194		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195		   UNSPEC_LD4))]
5196  "TARGET_SIMD"
5197  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5198  [(set_attr "type" "neon_load4_4reg<q>")]
5199)
5200
5201(define_insn "aarch64_simd_ld4r<mode>"
5202  [(set (match_operand:XI 0 "register_operand" "=w")
5203       (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5204                   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5205                  UNSPEC_LD4_DUP))]
5206  "TARGET_SIMD"
5207  "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5208  [(set_attr "type" "neon_load4_all_lanes<q>")]
5209)
5210
5211(define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5212  [(set (match_operand:XI 0 "register_operand" "=w")
5213	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5214		    (match_operand:XI 2 "register_operand" "0")
5215		    (match_operand:SI 3 "immediate_operand" "i")
5216		    (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5217		   UNSPEC_LD4_LANE))]
5218  "TARGET_SIMD"
5219{
5220    operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5221    return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5222}
5223  [(set_attr "type" "neon_load4_one_lane")]
5224)
5225
5226(define_expand "vec_load_lanesxi<mode>"
5227  [(set (match_operand:XI 0 "register_operand")
5228	(unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand")
5229		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5230		   UNSPEC_LD4))]
5231  "TARGET_SIMD"
5232{
5233  if (BYTES_BIG_ENDIAN)
5234    {
5235      rtx tmp = gen_reg_rtx (XImode);
5236      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5237      emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5238      emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5239    }
5240  else
5241    emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5242  DONE;
5243})
5244
5245(define_insn "aarch64_simd_st4<mode>"
5246  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5247	(unspec:XI [(match_operand:XI 1 "register_operand" "w")
5248                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5249                   UNSPEC_ST4))]
5250  "TARGET_SIMD"
5251  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5252  [(set_attr "type" "neon_store4_4reg<q>")]
5253)
5254
5255;; RTL uses GCC vector extension indices, so flip only for assembly.
5256(define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5257  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5258	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5259		     (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5260		     (match_operand:SI 2 "immediate_operand" "i")]
5261		    UNSPEC_ST4_LANE))]
5262  "TARGET_SIMD"
5263  {
5264    operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5265    return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5266  }
5267  [(set_attr "type" "neon_store4_one_lane<q>")]
5268)
5269
5270(define_expand "vec_store_lanesxi<mode>"
5271  [(set (match_operand:XI 0 "aarch64_simd_struct_operand")
5272	(unspec:XI [(match_operand:XI 1 "register_operand")
5273                    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5274                   UNSPEC_ST4))]
5275  "TARGET_SIMD"
5276{
5277  if (BYTES_BIG_ENDIAN)
5278    {
5279      rtx tmp = gen_reg_rtx (XImode);
5280      rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5281      emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5282      emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5283    }
5284  else
5285    emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5286  DONE;
5287})
5288
5289(define_insn_and_split "aarch64_rev_reglist<mode>"
5290[(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5291	(unspec:VSTRUCT
5292	           [(match_operand:VSTRUCT 1 "register_operand" "w")
5293		    (match_operand:V16QI 2 "register_operand" "w")]
5294                   UNSPEC_REV_REGLIST))]
5295  "TARGET_SIMD"
5296  "#"
5297  "&& reload_completed"
5298  [(const_int 0)]
5299{
5300  int i;
5301  int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5302  for (i = 0; i < nregs; i++)
5303    {
5304      rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5305      rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5306      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5307    }
5308  DONE;
5309}
5310  [(set_attr "type" "neon_tbl1_q")
5311   (set_attr "length" "<insn_count>")]
5312)
5313
5314;; Reload patterns for AdvSIMD register list operands.
5315
5316(define_expand "mov<mode>"
5317  [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
5318	(match_operand:VSTRUCT 1 "general_operand"))]
5319  "TARGET_SIMD"
5320{
5321  if (can_create_pseudo_p ())
5322    {
5323      if (GET_CODE (operands[0]) != REG)
5324	operands[1] = force_reg (<MODE>mode, operands[1]);
5325    }
5326})
5327
5328
5329(define_expand "aarch64_ld1x3<VALLDIF:mode>"
5330  [(match_operand:CI 0 "register_operand")
5331   (match_operand:DI 1 "register_operand")
5332   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5333  "TARGET_SIMD"
5334{
5335  rtx mem = gen_rtx_MEM (CImode, operands[1]);
5336  emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5337  DONE;
5338})
5339
5340(define_insn "aarch64_ld1_x3_<mode>"
5341  [(set (match_operand:CI 0 "register_operand" "=w")
5342        (unspec:CI
5343	  [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5344	   (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5345  "TARGET_SIMD"
5346  "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5347  [(set_attr "type" "neon_load1_3reg<q>")]
5348)
5349
5350(define_expand "aarch64_ld1x4<VALLDIF:mode>"
5351  [(match_operand:XI 0 "register_operand" "=w")
5352   (match_operand:DI 1 "register_operand" "r")
5353   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5354  "TARGET_SIMD"
5355{
5356  rtx mem = gen_rtx_MEM (XImode, operands[1]);
5357  emit_insn (gen_aarch64_ld1_x4_<VALLDIF:mode> (operands[0], mem));
5358  DONE;
5359})
5360
5361(define_insn "aarch64_ld1_x4_<mode>"
5362  [(set (match_operand:XI 0 "register_operand" "=w")
5363	(unspec:XI
5364	  [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5365	   (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5366	UNSPEC_LD1))]
5367  "TARGET_SIMD"
5368  "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5369  [(set_attr "type" "neon_load1_4reg<q>")]
5370)
5371
5372(define_expand "aarch64_st1x2<VALLDIF:mode>"
5373  [(match_operand:DI 0 "register_operand")
5374   (match_operand:OI 1 "register_operand")
5375   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5376  "TARGET_SIMD"
5377{
5378  rtx mem = gen_rtx_MEM (OImode, operands[0]);
5379  emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5380  DONE;
5381})
5382
5383(define_insn "aarch64_st1_x2_<mode>"
5384   [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5385	 (unspec:OI
5386	  [(match_operand:OI 1 "register_operand" "w")
5387          (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5388  "TARGET_SIMD"
5389  "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5390  [(set_attr "type" "neon_store1_2reg<q>")]
5391)
5392
5393(define_expand "aarch64_st1x3<VALLDIF:mode>"
5394  [(match_operand:DI 0 "register_operand")
5395   (match_operand:CI 1 "register_operand")
5396   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5397  "TARGET_SIMD"
5398{
5399  rtx mem = gen_rtx_MEM (CImode, operands[0]);
5400  emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5401  DONE;
5402})
5403
5404(define_insn "aarch64_st1_x3_<mode>"
5405   [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5406	(unspec:CI
5407         [(match_operand:CI 1 "register_operand" "w")
5408	  (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5409  "TARGET_SIMD"
5410  "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5411  [(set_attr "type" "neon_store1_3reg<q>")]
5412)
5413
5414(define_expand "aarch64_st1x4<VALLDIF:mode>"
5415  [(match_operand:DI 0 "register_operand" "")
5416   (match_operand:XI 1 "register_operand" "")
5417   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5418  "TARGET_SIMD"
5419{
5420  rtx mem = gen_rtx_MEM (XImode, operands[0]);
5421  emit_insn (gen_aarch64_st1_x4_<VALLDIF:mode> (mem, operands[1]));
5422  DONE;
5423})
5424
5425(define_insn "aarch64_st1_x4_<mode>"
5426  [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5427	(unspec:XI
5428	   [(match_operand:XI 1 "register_operand" "w")
5429	   (unspec:VALLDIF [(const_int 4)] UNSPEC_VSTRUCTDUMMY)]
5430	UNSPEC_ST1))]
5431  "TARGET_SIMD"
5432  "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5433  [(set_attr "type" "neon_store1_4reg<q>")]
5434)
5435
5436(define_insn "*aarch64_mov<mode>"
5437  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5438	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5439  "TARGET_SIMD && !BYTES_BIG_ENDIAN
5440   && (register_operand (operands[0], <MODE>mode)
5441       || register_operand (operands[1], <MODE>mode))"
5442  "@
5443   #
5444   st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5445   ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5446  [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5447		     neon_load<nregs>_<nregs>reg_q")
5448   (set_attr "length" "<insn_count>,4,4")]
5449)
5450
5451(define_insn "aarch64_be_ld1<mode>"
5452  [(set (match_operand:VALLDI_F16 0	"register_operand" "=w")
5453	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5454			     "aarch64_simd_struct_operand" "Utv")]
5455	UNSPEC_LD1))]
5456  "TARGET_SIMD"
5457  "ld1\\t{%0<Vmtype>}, %1"
5458  [(set_attr "type" "neon_load1_1reg<q>")]
5459)
5460
5461(define_insn "aarch64_be_st1<mode>"
5462  [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5463	(unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5464	UNSPEC_ST1))]
5465  "TARGET_SIMD"
5466  "st1\\t{%1<Vmtype>}, %0"
5467  [(set_attr "type" "neon_store1_1reg<q>")]
5468)
5469
5470(define_insn "*aarch64_be_movoi"
5471  [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5472	(match_operand:OI 1 "general_operand"      " w,w,m"))]
5473  "TARGET_SIMD && BYTES_BIG_ENDIAN
5474   && (register_operand (operands[0], OImode)
5475       || register_operand (operands[1], OImode))"
5476  "@
5477   #
5478   stp\\t%q1, %R1, %0
5479   ldp\\t%q0, %R0, %1"
5480  [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5481   (set_attr "length" "8,4,4")]
5482)
5483
5484(define_insn "*aarch64_be_movci"
5485  [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5486	(match_operand:CI 1 "general_operand"      " w,w,o"))]
5487  "TARGET_SIMD && BYTES_BIG_ENDIAN
5488   && (register_operand (operands[0], CImode)
5489       || register_operand (operands[1], CImode))"
5490  "#"
5491  [(set_attr "type" "multiple")
5492   (set_attr "length" "12,4,4")]
5493)
5494
5495(define_insn "*aarch64_be_movxi"
5496  [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5497	(match_operand:XI 1 "general_operand"      " w,w,o"))]
5498  "TARGET_SIMD && BYTES_BIG_ENDIAN
5499   && (register_operand (operands[0], XImode)
5500       || register_operand (operands[1], XImode))"
5501  "#"
5502  [(set_attr "type" "multiple")
5503   (set_attr "length" "16,4,4")]
5504)
5505
5506(define_split
5507  [(set (match_operand:OI 0 "register_operand")
5508	(match_operand:OI 1 "register_operand"))]
5509  "TARGET_SIMD && reload_completed"
5510  [(const_int 0)]
5511{
5512  aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5513  DONE;
5514})
5515
5516(define_split
5517  [(set (match_operand:CI 0 "nonimmediate_operand")
5518	(match_operand:CI 1 "general_operand"))]
5519  "TARGET_SIMD && reload_completed"
5520  [(const_int 0)]
5521{
5522  if (register_operand (operands[0], CImode)
5523      && register_operand (operands[1], CImode))
5524    {
5525      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5526      DONE;
5527    }
5528  else if (BYTES_BIG_ENDIAN)
5529    {
5530      emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5531		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
5532      emit_move_insn (gen_lowpart (V16QImode,
5533				   simplify_gen_subreg (TImode, operands[0],
5534							CImode, 32)),
5535		      gen_lowpart (V16QImode,
5536				   simplify_gen_subreg (TImode, operands[1],
5537							CImode, 32)));
5538      DONE;
5539    }
5540  else
5541    FAIL;
5542})
5543
5544(define_split
5545  [(set (match_operand:XI 0 "nonimmediate_operand")
5546	(match_operand:XI 1 "general_operand"))]
5547  "TARGET_SIMD && reload_completed"
5548  [(const_int 0)]
5549{
5550  if (register_operand (operands[0], XImode)
5551      && register_operand (operands[1], XImode))
5552    {
5553      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5554      DONE;
5555    }
5556  else if (BYTES_BIG_ENDIAN)
5557    {
5558      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5559		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
5560      emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5561		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
5562      DONE;
5563    }
5564  else
5565    FAIL;
5566})
5567
5568(define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5569  [(match_operand:VSTRUCT 0 "register_operand")
5570   (match_operand:DI 1 "register_operand")
5571   (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5572  "TARGET_SIMD"
5573{
5574  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5575  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5576		     * <VSTRUCT:nregs>);
5577
5578  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5579								mem));
5580  DONE;
5581})
5582
5583(define_insn "aarch64_ld2<mode>_dreg"
5584  [(set (match_operand:OI 0 "register_operand" "=w")
5585	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5586		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5587		   UNSPEC_LD2_DREG))]
5588  "TARGET_SIMD"
5589  "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5590  [(set_attr "type" "neon_load2_2reg<q>")]
5591)
5592
5593(define_insn "aarch64_ld2<mode>_dreg"
5594  [(set (match_operand:OI 0 "register_operand" "=w")
5595	(unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5596		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5597		   UNSPEC_LD2_DREG))]
5598  "TARGET_SIMD"
5599  "ld1\\t{%S0.1d - %T0.1d}, %1"
5600  [(set_attr "type" "neon_load1_2reg<q>")]
5601)
5602
5603(define_insn "aarch64_ld3<mode>_dreg"
5604  [(set (match_operand:CI 0 "register_operand" "=w")
5605	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5606		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5607		   UNSPEC_LD3_DREG))]
5608  "TARGET_SIMD"
5609  "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5610  [(set_attr "type" "neon_load3_3reg<q>")]
5611)
5612
5613(define_insn "aarch64_ld3<mode>_dreg"
5614  [(set (match_operand:CI 0 "register_operand" "=w")
5615	(unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5616		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5617		   UNSPEC_LD3_DREG))]
5618  "TARGET_SIMD"
5619  "ld1\\t{%S0.1d - %U0.1d}, %1"
5620  [(set_attr "type" "neon_load1_3reg<q>")]
5621)
5622
5623(define_insn "aarch64_ld4<mode>_dreg"
5624  [(set (match_operand:XI 0 "register_operand" "=w")
5625	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5626		    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5627		   UNSPEC_LD4_DREG))]
5628  "TARGET_SIMD"
5629  "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5630  [(set_attr "type" "neon_load4_4reg<q>")]
5631)
5632
5633(define_insn "aarch64_ld4<mode>_dreg"
5634  [(set (match_operand:XI 0 "register_operand" "=w")
5635	(unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5636		    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5637		   UNSPEC_LD4_DREG))]
5638  "TARGET_SIMD"
5639  "ld1\\t{%S0.1d - %V0.1d}, %1"
5640  [(set_attr "type" "neon_load1_4reg<q>")]
5641)
5642
5643(define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5644 [(match_operand:VSTRUCT 0 "register_operand")
5645  (match_operand:DI 1 "register_operand")
5646  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5647  "TARGET_SIMD"
5648{
5649  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5650  set_mem_size (mem, <VSTRUCT:nregs> * 8);
5651
5652  emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5653  DONE;
5654})
5655
5656(define_expand "aarch64_ld1<VALL_F16:mode>"
5657 [(match_operand:VALL_F16 0 "register_operand")
5658  (match_operand:DI 1 "register_operand")]
5659  "TARGET_SIMD"
5660{
5661  machine_mode mode = <VALL_F16:MODE>mode;
5662  rtx mem = gen_rtx_MEM (mode, operands[1]);
5663
5664  if (BYTES_BIG_ENDIAN)
5665    emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5666  else
5667    emit_move_insn (operands[0], mem);
5668  DONE;
5669})
5670
5671(define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5672 [(match_operand:VSTRUCT 0 "register_operand")
5673  (match_operand:DI 1 "register_operand")
5674  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5675  "TARGET_SIMD"
5676{
5677  machine_mode mode = <VSTRUCT:MODE>mode;
5678  rtx mem = gen_rtx_MEM (mode, operands[1]);
5679
5680  emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5681  DONE;
5682})
5683
5684(define_expand "aarch64_ld1x2<VQ:mode>"
5685 [(match_operand:OI 0 "register_operand")
5686  (match_operand:DI 1 "register_operand")
5687  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5688  "TARGET_SIMD"
5689{
5690  machine_mode mode = OImode;
5691  rtx mem = gen_rtx_MEM (mode, operands[1]);
5692
5693  emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5694  DONE;
5695})
5696
5697(define_expand "aarch64_ld1x2<VDC:mode>"
5698 [(match_operand:OI 0 "register_operand")
5699  (match_operand:DI 1 "register_operand")
5700  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5701  "TARGET_SIMD"
5702{
5703  machine_mode mode = OImode;
5704  rtx mem = gen_rtx_MEM (mode, operands[1]);
5705
5706  emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5707  DONE;
5708})
5709
5710
5711(define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5712  [(match_operand:VSTRUCT 0 "register_operand")
5713	(match_operand:DI 1 "register_operand")
5714	(match_operand:VSTRUCT 2 "register_operand")
5715	(match_operand:SI 3 "immediate_operand")
5716	(unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5717  "TARGET_SIMD"
5718{
5719  rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5720  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5721		     * <VSTRUCT:nregs>);
5722
5723  aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5724  emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5725	operands[0], mem, operands[2], operands[3]));
5726  DONE;
5727})
5728
5729;; Expanders for builtins to extract vector registers from large
5730;; opaque integer modes.
5731
5732;; D-register list.
5733
5734(define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5735 [(match_operand:VDC 0 "register_operand")
5736  (match_operand:VSTRUCT 1 "register_operand")
5737  (match_operand:SI 2 "immediate_operand")]
5738  "TARGET_SIMD"
5739{
5740  int part = INTVAL (operands[2]);
5741  rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5742  int offset = part * 16;
5743
5744  emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5745  emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5746  DONE;
5747})
5748
5749;; Q-register list.
5750
5751(define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5752 [(match_operand:VQ 0 "register_operand")
5753  (match_operand:VSTRUCT 1 "register_operand")
5754  (match_operand:SI 2 "immediate_operand")]
5755  "TARGET_SIMD"
5756{
5757  int part = INTVAL (operands[2]);
5758  int offset = part * 16;
5759
5760  emit_move_insn (operands[0],
5761		  gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5762  DONE;
5763})
5764
5765;; Permuted-store expanders for neon intrinsics.
5766
5767;; Permute instructions
5768
5769;; vec_perm support
5770
5771(define_expand "vec_perm<mode>"
5772  [(match_operand:VB 0 "register_operand")
5773   (match_operand:VB 1 "register_operand")
5774   (match_operand:VB 2 "register_operand")
5775   (match_operand:VB 3 "register_operand")]
5776  "TARGET_SIMD"
5777{
5778  aarch64_expand_vec_perm (operands[0], operands[1],
5779			   operands[2], operands[3], <nunits>);
5780  DONE;
5781})
5782
5783(define_insn "aarch64_tbl1<mode>"
5784  [(set (match_operand:VB 0 "register_operand" "=w")
5785	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5786		    (match_operand:VB 2 "register_operand" "w")]
5787		   UNSPEC_TBL))]
5788  "TARGET_SIMD"
5789  "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5790  [(set_attr "type" "neon_tbl1<q>")]
5791)
5792
5793;; Two source registers.
5794
5795(define_insn "aarch64_tbl2v16qi"
5796  [(set (match_operand:V16QI 0 "register_operand" "=w")
5797	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5798		       (match_operand:V16QI 2 "register_operand" "w")]
5799		      UNSPEC_TBL))]
5800  "TARGET_SIMD"
5801  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5802  [(set_attr "type" "neon_tbl2_q")]
5803)
5804
5805(define_insn "aarch64_tbl3<mode>"
5806  [(set (match_operand:VB 0 "register_operand" "=w")
5807	(unspec:VB [(match_operand:OI 1 "register_operand" "w")
5808		      (match_operand:VB 2 "register_operand" "w")]
5809		      UNSPEC_TBL))]
5810  "TARGET_SIMD"
5811  "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5812  [(set_attr "type" "neon_tbl3")]
5813)
5814
5815(define_insn "aarch64_tbx4<mode>"
5816  [(set (match_operand:VB 0 "register_operand" "=w")
5817	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5818		      (match_operand:OI 2 "register_operand" "w")
5819		      (match_operand:VB 3 "register_operand" "w")]
5820		      UNSPEC_TBX))]
5821  "TARGET_SIMD"
5822  "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5823  [(set_attr "type" "neon_tbl4")]
5824)
5825
5826;; Three source registers.
5827
5828(define_insn "aarch64_qtbl3<mode>"
5829  [(set (match_operand:VB 0 "register_operand" "=w")
5830	(unspec:VB [(match_operand:CI 1 "register_operand" "w")
5831		      (match_operand:VB 2 "register_operand" "w")]
5832		      UNSPEC_TBL))]
5833  "TARGET_SIMD"
5834  "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5835  [(set_attr "type" "neon_tbl3")]
5836)
5837
5838(define_insn "aarch64_qtbx3<mode>"
5839  [(set (match_operand:VB 0 "register_operand" "=w")
5840	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5841		      (match_operand:CI 2 "register_operand" "w")
5842		      (match_operand:VB 3 "register_operand" "w")]
5843		      UNSPEC_TBX))]
5844  "TARGET_SIMD"
5845  "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5846  [(set_attr "type" "neon_tbl3")]
5847)
5848
5849;; Four source registers.
5850
5851(define_insn "aarch64_qtbl4<mode>"
5852  [(set (match_operand:VB 0 "register_operand" "=w")
5853	(unspec:VB [(match_operand:XI 1 "register_operand" "w")
5854		      (match_operand:VB 2 "register_operand" "w")]
5855		      UNSPEC_TBL))]
5856  "TARGET_SIMD"
5857  "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5858  [(set_attr "type" "neon_tbl4")]
5859)
5860
5861(define_insn "aarch64_qtbx4<mode>"
5862  [(set (match_operand:VB 0 "register_operand" "=w")
5863	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
5864		      (match_operand:XI 2 "register_operand" "w")
5865		      (match_operand:VB 3 "register_operand" "w")]
5866		      UNSPEC_TBX))]
5867  "TARGET_SIMD"
5868  "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5869  [(set_attr "type" "neon_tbl4")]
5870)
5871
5872(define_insn_and_split "aarch64_combinev16qi"
5873  [(set (match_operand:OI 0 "register_operand" "=w")
5874	(unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5875		    (match_operand:V16QI 2 "register_operand" "w")]
5876		   UNSPEC_CONCAT))]
5877  "TARGET_SIMD"
5878  "#"
5879  "&& reload_completed"
5880  [(const_int 0)]
5881{
5882  aarch64_split_combinev16qi (operands);
5883  DONE;
5884}
5885[(set_attr "type" "multiple")]
5886)
5887
5888;; This instruction's pattern is generated directly by
5889;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5890;; need corresponding changes there.
5891(define_insn "aarch64_<PERMUTE:perm_insn><mode>"
5892  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5893	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5894			  (match_operand:VALL_F16 2 "register_operand" "w")]
5895	 PERMUTE))]
5896  "TARGET_SIMD"
5897  "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5898  [(set_attr "type" "neon_permute<q>")]
5899)
5900
5901;; This instruction's pattern is generated directly by
5902;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5903;; need corresponding changes there.  Note that the immediate (third)
5904;; operand is a lane index not a byte index.
5905(define_insn "aarch64_ext<mode>"
5906  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5907        (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5908			  (match_operand:VALL_F16 2 "register_operand" "w")
5909			  (match_operand:SI 3 "immediate_operand" "i")]
5910	 UNSPEC_EXT))]
5911  "TARGET_SIMD"
5912{
5913  operands[3] = GEN_INT (INTVAL (operands[3])
5914      * GET_MODE_UNIT_SIZE (<MODE>mode));
5915  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5916}
5917  [(set_attr "type" "neon_ext<q>")]
5918)
5919
5920;; This instruction's pattern is generated directly by
5921;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5922;; need corresponding changes there.
5923(define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5924  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5925	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5926                    REVERSE))]
5927  "TARGET_SIMD"
5928  "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5929  [(set_attr "type" "neon_rev<q>")]
5930)
5931
5932(define_insn "aarch64_st2<mode>_dreg"
5933  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5934	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5935                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5936                   UNSPEC_ST2))]
5937  "TARGET_SIMD"
5938  "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5939  [(set_attr "type" "neon_store2_2reg")]
5940)
5941
5942(define_insn "aarch64_st2<mode>_dreg"
5943  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5944	(unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5945                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5946                   UNSPEC_ST2))]
5947  "TARGET_SIMD"
5948  "st1\\t{%S1.1d - %T1.1d}, %0"
5949  [(set_attr "type" "neon_store1_2reg")]
5950)
5951
5952(define_insn "aarch64_st3<mode>_dreg"
5953  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5954	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5955                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5956                   UNSPEC_ST3))]
5957  "TARGET_SIMD"
5958  "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5959  [(set_attr "type" "neon_store3_3reg")]
5960)
5961
5962(define_insn "aarch64_st3<mode>_dreg"
5963  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5964	(unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5965                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5966                   UNSPEC_ST3))]
5967  "TARGET_SIMD"
5968  "st1\\t{%S1.1d - %U1.1d}, %0"
5969  [(set_attr "type" "neon_store1_3reg")]
5970)
5971
5972(define_insn "aarch64_st4<mode>_dreg"
5973  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5974	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5975                    (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5976                   UNSPEC_ST4))]
5977  "TARGET_SIMD"
5978  "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5979  [(set_attr "type" "neon_store4_4reg")]
5980)
5981
5982(define_insn "aarch64_st4<mode>_dreg"
5983  [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5984	(unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5985                    (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5986                   UNSPEC_ST4))]
5987  "TARGET_SIMD"
5988  "st1\\t{%S1.1d - %V1.1d}, %0"
5989  [(set_attr "type" "neon_store1_4reg")]
5990)
5991
5992(define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5993 [(match_operand:DI 0 "register_operand")
5994  (match_operand:VSTRUCT 1 "register_operand")
5995  (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5996  "TARGET_SIMD"
5997{
5998  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5999  set_mem_size (mem, <VSTRUCT:nregs> * 8);
6000
6001  emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
6002  DONE;
6003})
6004
6005(define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
6006 [(match_operand:DI 0 "register_operand")
6007  (match_operand:VSTRUCT 1 "register_operand")
6008  (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6009  "TARGET_SIMD"
6010{
6011  machine_mode mode = <VSTRUCT:MODE>mode;
6012  rtx mem = gen_rtx_MEM (mode, operands[0]);
6013
6014  emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
6015  DONE;
6016})
6017
6018(define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
6019 [(match_operand:DI 0 "register_operand")
6020  (match_operand:VSTRUCT 1 "register_operand")
6021  (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
6022  (match_operand:SI 2 "immediate_operand")]
6023  "TARGET_SIMD"
6024{
6025  rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
6026  set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
6027		     * <VSTRUCT:nregs>);
6028
6029  emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
6030		mem, operands[1], operands[2]));
6031  DONE;
6032})
6033
6034(define_expand "aarch64_st1<VALL_F16:mode>"
6035 [(match_operand:DI 0 "register_operand")
6036  (match_operand:VALL_F16 1 "register_operand")]
6037  "TARGET_SIMD"
6038{
6039  machine_mode mode = <VALL_F16:MODE>mode;
6040  rtx mem = gen_rtx_MEM (mode, operands[0]);
6041
6042  if (BYTES_BIG_ENDIAN)
6043    emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
6044  else
6045    emit_move_insn (mem, operands[1]);
6046  DONE;
6047})
6048
6049;; Expander for builtins to insert vector registers into large
6050;; opaque integer modes.
6051
6052;; Q-register list.  We don't need a D-reg inserter as we zero
6053;; extend them in arm_neon.h and insert the resulting Q-regs.
6054
6055(define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
6056 [(match_operand:VSTRUCT 0 "register_operand")
6057  (match_operand:VSTRUCT 1 "register_operand")
6058  (match_operand:VQ 2 "register_operand")
6059  (match_operand:SI 3 "immediate_operand")]
6060  "TARGET_SIMD"
6061{
6062  int part = INTVAL (operands[3]);
6063  int offset = part * 16;
6064
6065  emit_move_insn (operands[0], operands[1]);
6066  emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
6067		  operands[2]);
6068  DONE;
6069})
6070
6071;; Standard pattern name vec_init<mode><Vel>.
6072
6073(define_expand "vec_init<mode><Vel>"
6074  [(match_operand:VALL_F16 0 "register_operand")
6075   (match_operand 1 "" "")]
6076  "TARGET_SIMD"
6077{
6078  aarch64_expand_vector_init (operands[0], operands[1]);
6079  DONE;
6080})
6081
6082(define_expand "vec_init<mode><Vhalf>"
6083  [(match_operand:VQ_NO2E 0 "register_operand")
6084   (match_operand 1 "" "")]
6085  "TARGET_SIMD"
6086{
6087  aarch64_expand_vector_init (operands[0], operands[1]);
6088  DONE;
6089})
6090
6091(define_insn "*aarch64_simd_ld1r<mode>"
6092  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
6093	(vec_duplicate:VALL_F16
6094	  (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
6095  "TARGET_SIMD"
6096  "ld1r\\t{%0.<Vtype>}, %1"
6097  [(set_attr "type" "neon_load1_all_lanes")]
6098)
6099
6100(define_insn "aarch64_simd_ld1<mode>_x2"
6101  [(set (match_operand:OI 0 "register_operand" "=w")
6102	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6103		    (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6104		   UNSPEC_LD1))]
6105  "TARGET_SIMD"
6106  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6107  [(set_attr "type" "neon_load1_2reg<q>")]
6108)
6109
6110(define_insn "aarch64_simd_ld1<mode>_x2"
6111  [(set (match_operand:OI 0 "register_operand" "=w")
6112	(unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
6113		    (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6114		   UNSPEC_LD1))]
6115  "TARGET_SIMD"
6116  "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
6117  [(set_attr "type" "neon_load1_2reg<q>")]
6118)
6119
6120
6121(define_insn "@aarch64_frecpe<mode>"
6122  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6123	(unspec:VHSDF_HSDF
6124	 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
6125	 UNSPEC_FRECPE))]
6126  "TARGET_SIMD"
6127  "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
6128  [(set_attr "type" "neon_fp_recpe_<stype><q>")]
6129)
6130
6131(define_insn "aarch64_frecpx<mode>"
6132  [(set (match_operand:GPF_F16 0 "register_operand" "=w")
6133	(unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
6134	 UNSPEC_FRECPX))]
6135  "TARGET_SIMD"
6136  "frecpx\t%<s>0, %<s>1"
6137  [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
6138)
6139
6140(define_insn "@aarch64_frecps<mode>"
6141  [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
6142	(unspec:VHSDF_HSDF
6143	  [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
6144	  (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
6145	  UNSPEC_FRECPS))]
6146  "TARGET_SIMD"
6147  "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6148  [(set_attr "type" "neon_fp_recps_<stype><q>")]
6149)
6150
6151(define_insn "aarch64_urecpe<mode>"
6152  [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
6153        (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
6154                UNSPEC_URECPE))]
6155 "TARGET_SIMD"
6156 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
6157  [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
6158
6159;; Standard pattern name vec_extract<mode><Vel>.
6160
6161(define_expand "vec_extract<mode><Vel>"
6162  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
6163   (match_operand:VALL_F16 1 "register_operand")
6164   (match_operand:SI 2 "immediate_operand")]
6165  "TARGET_SIMD"
6166{
6167    emit_insn
6168      (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6169    DONE;
6170})
6171
6172;; Extract a 64-bit vector from one half of a 128-bit vector.
6173(define_expand "vec_extract<mode><Vhalf>"
6174  [(match_operand:<VHALF> 0 "register_operand")
6175   (match_operand:VQMOV_NO2E 1 "register_operand")
6176   (match_operand 2 "immediate_operand")]
6177  "TARGET_SIMD"
6178{
6179  int start = INTVAL (operands[2]);
6180  if (start != 0 && start != <nunits> / 2)
6181    FAIL;
6182  rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
6183  emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
6184  DONE;
6185})
6186
6187;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
6188(define_expand "vec_extractv2dfv1df"
6189  [(match_operand:V1DF 0 "register_operand")
6190   (match_operand:V2DF 1 "register_operand")
6191   (match_operand 2 "immediate_operand")]
6192  "TARGET_SIMD"
6193{
6194  /* V1DF is rarely used by other patterns, so it should be better to hide
6195     it in a subreg destination of a normal DF op.  */
6196  rtx scalar0 = gen_lowpart (DFmode, operands[0]);
6197  emit_insn (gen_vec_extractv2dfdf (scalar0, operands[1], operands[2]));
6198  DONE;
6199})
6200
6201;; aes
6202
6203(define_insn "aarch64_crypto_aes<aes_op>v16qi"
6204  [(set (match_operand:V16QI 0 "register_operand" "=w")
6205	(unspec:V16QI
6206		[(xor:V16QI
6207		 (match_operand:V16QI 1 "register_operand" "%0")
6208		 (match_operand:V16QI 2 "register_operand" "w"))]
6209         CRYPTO_AES))]
6210  "TARGET_SIMD && TARGET_AES"
6211  "aes<aes_op>\\t%0.16b, %2.16b"
6212  [(set_attr "type" "crypto_aese")]
6213)
6214
6215(define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6216  [(set (match_operand:V16QI 0 "register_operand" "=w")
6217	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
6218	 CRYPTO_AESMC))]
6219  "TARGET_SIMD && TARGET_AES"
6220  "aes<aesmc_op>\\t%0.16b, %1.16b"
6221  [(set_attr "type" "crypto_aesmc")]
6222)
6223
6224;; When AESE/AESMC fusion is enabled we really want to keep the two together
6225;; and enforce the register dependency without scheduling or register
6226;; allocation messing up the order or introducing moves inbetween.
6227;;  Mash the two together during combine.
6228
6229(define_insn "*aarch64_crypto_aese_fused"
6230  [(set (match_operand:V16QI 0 "register_operand" "=w")
6231	(unspec:V16QI
6232	  [(unspec:V16QI
6233	   [(xor:V16QI
6234		(match_operand:V16QI 1 "register_operand" "%0")
6235		(match_operand:V16QI 2 "register_operand" "w"))]
6236	     UNSPEC_AESE)]
6237	UNSPEC_AESMC))]
6238  "TARGET_SIMD && TARGET_AES
6239   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6240  "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6241  [(set_attr "type" "crypto_aese")
6242   (set_attr "length" "8")]
6243)
6244
6245;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6246;; and enforce the register dependency without scheduling or register
6247;; allocation messing up the order or introducing moves inbetween.
6248;;  Mash the two together during combine.
6249
6250(define_insn "*aarch64_crypto_aesd_fused"
6251  [(set (match_operand:V16QI 0 "register_operand" "=w")
6252	(unspec:V16QI
6253	  [(unspec:V16QI
6254		    [(xor:V16QI
6255			(match_operand:V16QI 1 "register_operand" "%0")
6256			(match_operand:V16QI 2 "register_operand" "w"))]
6257		UNSPEC_AESD)]
6258	  UNSPEC_AESIMC))]
6259  "TARGET_SIMD && TARGET_AES
6260   && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6261  "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6262  [(set_attr "type" "crypto_aese")
6263   (set_attr "length" "8")]
6264)
6265
6266;; sha1
6267
6268(define_insn "aarch64_crypto_sha1hsi"
6269  [(set (match_operand:SI 0 "register_operand" "=w")
6270        (unspec:SI [(match_operand:SI 1
6271                       "register_operand" "w")]
6272         UNSPEC_SHA1H))]
6273  "TARGET_SIMD && TARGET_SHA2"
6274  "sha1h\\t%s0, %s1"
6275  [(set_attr "type" "crypto_sha1_fast")]
6276)
6277
6278(define_insn "aarch64_crypto_sha1hv4si"
6279  [(set (match_operand:SI 0 "register_operand" "=w")
6280	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6281		     (parallel [(const_int 0)]))]
6282	 UNSPEC_SHA1H))]
6283  "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6284  "sha1h\\t%s0, %s1"
6285  [(set_attr "type" "crypto_sha1_fast")]
6286)
6287
6288(define_insn "aarch64_be_crypto_sha1hv4si"
6289  [(set (match_operand:SI 0 "register_operand" "=w")
6290	(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6291		     (parallel [(const_int 3)]))]
6292	 UNSPEC_SHA1H))]
6293  "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6294  "sha1h\\t%s0, %s1"
6295  [(set_attr "type" "crypto_sha1_fast")]
6296)
6297
6298(define_insn "aarch64_crypto_sha1su1v4si"
6299  [(set (match_operand:V4SI 0 "register_operand" "=w")
6300        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6301                      (match_operand:V4SI 2 "register_operand" "w")]
6302         UNSPEC_SHA1SU1))]
6303  "TARGET_SIMD && TARGET_SHA2"
6304  "sha1su1\\t%0.4s, %2.4s"
6305  [(set_attr "type" "crypto_sha1_fast")]
6306)
6307
6308(define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6309  [(set (match_operand:V4SI 0 "register_operand" "=w")
6310        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6311                      (match_operand:SI 2 "register_operand" "w")
6312                      (match_operand:V4SI 3 "register_operand" "w")]
6313         CRYPTO_SHA1))]
6314  "TARGET_SIMD && TARGET_SHA2"
6315  "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6316  [(set_attr "type" "crypto_sha1_slow")]
6317)
6318
6319(define_insn "aarch64_crypto_sha1su0v4si"
6320  [(set (match_operand:V4SI 0 "register_operand" "=w")
6321        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6322                      (match_operand:V4SI 2 "register_operand" "w")
6323                      (match_operand:V4SI 3 "register_operand" "w")]
6324         UNSPEC_SHA1SU0))]
6325  "TARGET_SIMD && TARGET_SHA2"
6326  "sha1su0\\t%0.4s, %2.4s, %3.4s"
6327  [(set_attr "type" "crypto_sha1_xor")]
6328)
6329
6330;; sha256
6331
6332(define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6333  [(set (match_operand:V4SI 0 "register_operand" "=w")
6334        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6335                      (match_operand:V4SI 2 "register_operand" "w")
6336                      (match_operand:V4SI 3 "register_operand" "w")]
6337         CRYPTO_SHA256))]
6338  "TARGET_SIMD && TARGET_SHA2"
6339  "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6340  [(set_attr "type" "crypto_sha256_slow")]
6341)
6342
6343(define_insn "aarch64_crypto_sha256su0v4si"
6344  [(set (match_operand:V4SI 0 "register_operand" "=w")
6345        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6346                      (match_operand:V4SI 2 "register_operand" "w")]
6347         UNSPEC_SHA256SU0))]
6348  "TARGET_SIMD && TARGET_SHA2"
6349  "sha256su0\\t%0.4s, %2.4s"
6350  [(set_attr "type" "crypto_sha256_fast")]
6351)
6352
6353(define_insn "aarch64_crypto_sha256su1v4si"
6354  [(set (match_operand:V4SI 0 "register_operand" "=w")
6355        (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6356                      (match_operand:V4SI 2 "register_operand" "w")
6357                      (match_operand:V4SI 3 "register_operand" "w")]
6358         UNSPEC_SHA256SU1))]
6359  "TARGET_SIMD && TARGET_SHA2"
6360  "sha256su1\\t%0.4s, %2.4s, %3.4s"
6361  [(set_attr "type" "crypto_sha256_slow")]
6362)
6363
6364;; sha512
6365
6366(define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6367  [(set (match_operand:V2DI 0 "register_operand" "=w")
6368        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6369                      (match_operand:V2DI 2 "register_operand" "w")
6370                      (match_operand:V2DI 3 "register_operand" "w")]
6371         CRYPTO_SHA512))]
6372  "TARGET_SIMD && TARGET_SHA3"
6373  "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6374  [(set_attr "type" "crypto_sha512")]
6375)
6376
6377(define_insn "aarch64_crypto_sha512su0qv2di"
6378  [(set (match_operand:V2DI 0 "register_operand" "=w")
6379        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6380                      (match_operand:V2DI 2 "register_operand" "w")]
6381         UNSPEC_SHA512SU0))]
6382  "TARGET_SIMD && TARGET_SHA3"
6383  "sha512su0\\t%0.2d, %2.2d"
6384  [(set_attr "type" "crypto_sha512")]
6385)
6386
6387(define_insn "aarch64_crypto_sha512su1qv2di"
6388  [(set (match_operand:V2DI 0 "register_operand" "=w")
6389        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6390                      (match_operand:V2DI 2 "register_operand" "w")
6391                      (match_operand:V2DI 3 "register_operand" "w")]
6392         UNSPEC_SHA512SU1))]
6393  "TARGET_SIMD && TARGET_SHA3"
6394  "sha512su1\\t%0.2d, %2.2d, %3.2d"
6395  [(set_attr "type" "crypto_sha512")]
6396)
6397
6398;; sha3
6399
6400(define_insn "eor3q<mode>4"
6401  [(set (match_operand:VQ_I 0 "register_operand" "=w")
6402	(xor:VQ_I
6403	 (xor:VQ_I
6404	  (match_operand:VQ_I 2 "register_operand" "w")
6405	  (match_operand:VQ_I 3 "register_operand" "w"))
6406	 (match_operand:VQ_I 1 "register_operand" "w")))]
6407  "TARGET_SIMD && TARGET_SHA3"
6408  "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6409  [(set_attr "type" "crypto_sha3")]
6410)
6411
6412(define_insn "aarch64_rax1qv2di"
6413  [(set (match_operand:V2DI 0 "register_operand" "=w")
6414	(xor:V2DI
6415	 (rotate:V2DI
6416	  (match_operand:V2DI 2 "register_operand" "w")
6417	  (const_int 1))
6418	 (match_operand:V2DI 1 "register_operand" "w")))]
6419  "TARGET_SIMD && TARGET_SHA3"
6420  "rax1\\t%0.2d, %1.2d, %2.2d"
6421  [(set_attr "type" "crypto_sha3")]
6422)
6423
6424(define_insn "aarch64_xarqv2di"
6425  [(set (match_operand:V2DI 0 "register_operand" "=w")
6426	(rotatert:V2DI
6427	 (xor:V2DI
6428	  (match_operand:V2DI 1 "register_operand" "%w")
6429	  (match_operand:V2DI 2 "register_operand" "w"))
6430	 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6431  "TARGET_SIMD && TARGET_SHA3"
6432  "xar\\t%0.2d, %1.2d, %2.2d, %3"
6433  [(set_attr "type" "crypto_sha3")]
6434)
6435
6436(define_insn "bcaxq<mode>4"
6437  [(set (match_operand:VQ_I 0 "register_operand" "=w")
6438	(xor:VQ_I
6439	 (and:VQ_I
6440	  (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6441	  (match_operand:VQ_I 2 "register_operand" "w"))
6442	 (match_operand:VQ_I 1 "register_operand" "w")))]
6443  "TARGET_SIMD && TARGET_SHA3"
6444  "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6445  [(set_attr "type" "crypto_sha3")]
6446)
6447
6448;; SM3
6449
6450(define_insn "aarch64_sm3ss1qv4si"
6451  [(set (match_operand:V4SI 0 "register_operand" "=w")
6452	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6453		      (match_operand:V4SI 2 "register_operand" "w")
6454		      (match_operand:V4SI 3 "register_operand" "w")]
6455	 UNSPEC_SM3SS1))]
6456  "TARGET_SIMD && TARGET_SM4"
6457  "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6458  [(set_attr "type" "crypto_sm3")]
6459)
6460
6461
6462(define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6463  [(set (match_operand:V4SI 0 "register_operand" "=w")
6464	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6465		      (match_operand:V4SI 2 "register_operand" "w")
6466		      (match_operand:V4SI 3 "register_operand" "w")
6467		      (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6468	 CRYPTO_SM3TT))]
6469  "TARGET_SIMD && TARGET_SM4"
6470  "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6471  [(set_attr "type" "crypto_sm3")]
6472)
6473
6474(define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6475  [(set (match_operand:V4SI 0 "register_operand" "=w")
6476	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6477		      (match_operand:V4SI 2 "register_operand" "w")
6478		      (match_operand:V4SI 3 "register_operand" "w")]
6479	 CRYPTO_SM3PART))]
6480  "TARGET_SIMD && TARGET_SM4"
6481  "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6482  [(set_attr "type" "crypto_sm3")]
6483)
6484
6485;; SM4
6486
6487(define_insn "aarch64_sm4eqv4si"
6488  [(set (match_operand:V4SI 0 "register_operand" "=w")
6489	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6490		      (match_operand:V4SI 2 "register_operand" "w")]
6491	 UNSPEC_SM4E))]
6492  "TARGET_SIMD && TARGET_SM4"
6493  "sm4e\\t%0.4s, %2.4s"
6494  [(set_attr "type" "crypto_sm4")]
6495)
6496
6497(define_insn "aarch64_sm4ekeyqv4si"
6498  [(set (match_operand:V4SI 0 "register_operand" "=w")
6499	(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6500		      (match_operand:V4SI 2 "register_operand" "w")]
6501	 UNSPEC_SM4EKEY))]
6502  "TARGET_SIMD && TARGET_SM4"
6503  "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6504  [(set_attr "type" "crypto_sm4")]
6505)
6506
6507;; fp16fml
6508
6509(define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6510  [(set (match_operand:VDQSF 0 "register_operand")
6511	(unspec:VDQSF
6512	 [(match_operand:VDQSF 1 "register_operand")
6513	  (match_operand:<VFMLA_W> 2 "register_operand")
6514	  (match_operand:<VFMLA_W> 3 "register_operand")]
6515	 VFMLA16_LOW))]
6516  "TARGET_F16FML"
6517{
6518  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6519					    <nunits> * 2, false);
6520  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6521					    <nunits> * 2, false);
6522
6523  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6524								operands[1],
6525								operands[2],
6526								operands[3],
6527								p1, p2));
6528  DONE;
6529
6530})
6531
6532(define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6533  [(set (match_operand:VDQSF 0 "register_operand")
6534	(unspec:VDQSF
6535	 [(match_operand:VDQSF 1 "register_operand")
6536	  (match_operand:<VFMLA_W> 2 "register_operand")
6537	  (match_operand:<VFMLA_W> 3 "register_operand")]
6538	 VFMLA16_HIGH))]
6539  "TARGET_F16FML"
6540{
6541  rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6542  rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6543
6544  emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6545								 operands[1],
6546								 operands[2],
6547								 operands[3],
6548								 p1, p2));
6549  DONE;
6550})
6551
6552(define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6553  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6554	(fma:VDQSF
6555	 (float_extend:VDQSF
6556	  (vec_select:<VFMLA_SEL_W>
6557	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
6558	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6559	 (float_extend:VDQSF
6560	  (vec_select:<VFMLA_SEL_W>
6561	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6562	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6563	 (match_operand:VDQSF 1 "register_operand" "0")))]
6564  "TARGET_F16FML"
6565  "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6566  [(set_attr "type" "neon_fp_mul_s")]
6567)
6568
6569(define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6570  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6571	(fma:VDQSF
6572	 (float_extend:VDQSF
6573	  (neg:<VFMLA_SEL_W>
6574	   (vec_select:<VFMLA_SEL_W>
6575	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
6576	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6577	 (float_extend:VDQSF
6578	  (vec_select:<VFMLA_SEL_W>
6579	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6580	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6581	 (match_operand:VDQSF 1 "register_operand" "0")))]
6582  "TARGET_F16FML"
6583  "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6584  [(set_attr "type" "neon_fp_mul_s")]
6585)
6586
6587(define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6588  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6589	(fma:VDQSF
6590	 (float_extend:VDQSF
6591	  (vec_select:<VFMLA_SEL_W>
6592	   (match_operand:<VFMLA_W> 2 "register_operand" "w")
6593	   (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6594	 (float_extend:VDQSF
6595	  (vec_select:<VFMLA_SEL_W>
6596	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6597	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6598	 (match_operand:VDQSF 1 "register_operand" "0")))]
6599  "TARGET_F16FML"
6600  "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6601  [(set_attr "type" "neon_fp_mul_s")]
6602)
6603
6604(define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6605  [(set (match_operand:VDQSF 0 "register_operand" "=w")
6606	(fma:VDQSF
6607	 (float_extend:VDQSF
6608	  (neg:<VFMLA_SEL_W>
6609	   (vec_select:<VFMLA_SEL_W>
6610	    (match_operand:<VFMLA_W> 2 "register_operand" "w")
6611	    (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6612	 (float_extend:VDQSF
6613	  (vec_select:<VFMLA_SEL_W>
6614	   (match_operand:<VFMLA_W> 3 "register_operand" "w")
6615	   (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6616	 (match_operand:VDQSF 1 "register_operand" "0")))]
6617  "TARGET_F16FML"
6618  "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6619  [(set_attr "type" "neon_fp_mul_s")]
6620)
6621
6622(define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6623  [(set (match_operand:V2SF 0 "register_operand")
6624	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6625			   (match_operand:V4HF 2 "register_operand")
6626			   (match_operand:V4HF 3 "register_operand")
6627			   (match_operand:SI 4 "aarch64_imm2")]
6628	 VFMLA16_LOW))]
6629  "TARGET_F16FML"
6630{
6631    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6632    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6633
6634    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6635							    operands[1],
6636							    operands[2],
6637							    operands[3],
6638							    p1, lane));
6639    DONE;
6640}
6641)
6642
6643(define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6644  [(set (match_operand:V2SF 0 "register_operand")
6645	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6646			   (match_operand:V4HF 2 "register_operand")
6647			   (match_operand:V4HF 3 "register_operand")
6648			   (match_operand:SI 4 "aarch64_imm2")]
6649	 VFMLA16_HIGH))]
6650  "TARGET_F16FML"
6651{
6652    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6653    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6654
6655    emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6656							     operands[1],
6657							     operands[2],
6658							     operands[3],
6659							     p1, lane));
6660    DONE;
6661})
6662
6663(define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6664  [(set (match_operand:V2SF 0 "register_operand" "=w")
6665	(fma:V2SF
6666	 (float_extend:V2SF
6667	   (vec_select:V2HF
6668	    (match_operand:V4HF 2 "register_operand" "w")
6669	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6670	 (float_extend:V2SF
6671	   (vec_duplicate:V2HF
6672	    (vec_select:HF
6673	     (match_operand:V4HF 3 "register_operand" "x")
6674	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6675	 (match_operand:V2SF 1 "register_operand" "0")))]
6676  "TARGET_F16FML"
6677  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6678  [(set_attr "type" "neon_fp_mul_s")]
6679)
6680
6681(define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6682  [(set (match_operand:V2SF 0 "register_operand" "=w")
6683	(fma:V2SF
6684	 (float_extend:V2SF
6685	  (neg:V2HF
6686	   (vec_select:V2HF
6687	    (match_operand:V4HF 2 "register_operand" "w")
6688	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6689	 (float_extend:V2SF
6690	  (vec_duplicate:V2HF
6691	   (vec_select:HF
6692	    (match_operand:V4HF 3 "register_operand" "x")
6693	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6694	 (match_operand:V2SF 1 "register_operand" "0")))]
6695  "TARGET_F16FML"
6696  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6697  [(set_attr "type" "neon_fp_mul_s")]
6698)
6699
6700(define_insn "aarch64_simd_fmlal_lane_highv2sf"
6701  [(set (match_operand:V2SF 0 "register_operand" "=w")
6702	(fma:V2SF
6703	 (float_extend:V2SF
6704	   (vec_select:V2HF
6705	    (match_operand:V4HF 2 "register_operand" "w")
6706	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6707	 (float_extend:V2SF
6708	   (vec_duplicate:V2HF
6709	    (vec_select:HF
6710	     (match_operand:V4HF 3 "register_operand" "x")
6711	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6712	 (match_operand:V2SF 1 "register_operand" "0")))]
6713  "TARGET_F16FML"
6714  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6715  [(set_attr "type" "neon_fp_mul_s")]
6716)
6717
6718(define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6719  [(set (match_operand:V2SF 0 "register_operand" "=w")
6720	(fma:V2SF
6721	 (float_extend:V2SF
6722	   (neg:V2HF
6723	    (vec_select:V2HF
6724	     (match_operand:V4HF 2 "register_operand" "w")
6725	     (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6726	 (float_extend:V2SF
6727	   (vec_duplicate:V2HF
6728	    (vec_select:HF
6729	     (match_operand:V4HF 3 "register_operand" "x")
6730	     (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6731	 (match_operand:V2SF 1 "register_operand" "0")))]
6732  "TARGET_F16FML"
6733  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6734  [(set_attr "type" "neon_fp_mul_s")]
6735)
6736
6737(define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6738  [(set (match_operand:V4SF 0 "register_operand")
6739	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6740			   (match_operand:V8HF 2 "register_operand")
6741			   (match_operand:V8HF 3 "register_operand")
6742			   (match_operand:SI 4 "aarch64_lane_imm3")]
6743	 VFMLA16_LOW))]
6744  "TARGET_F16FML"
6745{
6746    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6747    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6748
6749    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6750							      operands[1],
6751							      operands[2],
6752							      operands[3],
6753							      p1, lane));
6754    DONE;
6755})
6756
6757(define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6758  [(set (match_operand:V4SF 0 "register_operand")
6759	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6760			   (match_operand:V8HF 2 "register_operand")
6761			   (match_operand:V8HF 3 "register_operand")
6762			   (match_operand:SI 4 "aarch64_lane_imm3")]
6763	 VFMLA16_HIGH))]
6764  "TARGET_F16FML"
6765{
6766    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6767    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6768
6769    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6770							       operands[1],
6771							       operands[2],
6772							       operands[3],
6773							       p1, lane));
6774    DONE;
6775})
6776
6777(define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6778  [(set (match_operand:V4SF 0 "register_operand" "=w")
6779	(fma:V4SF
6780	 (float_extend:V4SF
6781	  (vec_select:V4HF
6782	    (match_operand:V8HF 2 "register_operand" "w")
6783	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6784	 (float_extend:V4SF
6785	  (vec_duplicate:V4HF
6786	   (vec_select:HF
6787	    (match_operand:V8HF 3 "register_operand" "x")
6788	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6789	 (match_operand:V4SF 1 "register_operand" "0")))]
6790  "TARGET_F16FML"
6791  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6792  [(set_attr "type" "neon_fp_mul_s")]
6793)
6794
6795(define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6796  [(set (match_operand:V4SF 0 "register_operand" "=w")
6797	(fma:V4SF
6798	  (float_extend:V4SF
6799	   (neg:V4HF
6800	    (vec_select:V4HF
6801	     (match_operand:V8HF 2 "register_operand" "w")
6802	     (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6803	 (float_extend:V4SF
6804	  (vec_duplicate:V4HF
6805	   (vec_select:HF
6806	    (match_operand:V8HF 3 "register_operand" "x")
6807	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6808	 (match_operand:V4SF 1 "register_operand" "0")))]
6809  "TARGET_F16FML"
6810  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6811  [(set_attr "type" "neon_fp_mul_s")]
6812)
6813
6814(define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6815  [(set (match_operand:V4SF 0 "register_operand" "=w")
6816	(fma:V4SF
6817	 (float_extend:V4SF
6818	  (vec_select:V4HF
6819	    (match_operand:V8HF 2 "register_operand" "w")
6820	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6821	 (float_extend:V4SF
6822	  (vec_duplicate:V4HF
6823	   (vec_select:HF
6824	    (match_operand:V8HF 3 "register_operand" "x")
6825	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6826	 (match_operand:V4SF 1 "register_operand" "0")))]
6827  "TARGET_F16FML"
6828  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6829  [(set_attr "type" "neon_fp_mul_s")]
6830)
6831
6832(define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6833  [(set (match_operand:V4SF 0 "register_operand" "=w")
6834	(fma:V4SF
6835	 (float_extend:V4SF
6836	  (neg:V4HF
6837	   (vec_select:V4HF
6838	    (match_operand:V8HF 2 "register_operand" "w")
6839	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6840	 (float_extend:V4SF
6841	  (vec_duplicate:V4HF
6842	   (vec_select:HF
6843	    (match_operand:V8HF 3 "register_operand" "x")
6844	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6845	 (match_operand:V4SF 1 "register_operand" "0")))]
6846  "TARGET_F16FML"
6847  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6848  [(set_attr "type" "neon_fp_mul_s")]
6849)
6850
6851(define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6852  [(set (match_operand:V2SF 0 "register_operand")
6853	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6854		      (match_operand:V4HF 2 "register_operand")
6855		      (match_operand:V8HF 3 "register_operand")
6856		      (match_operand:SI 4 "aarch64_lane_imm3")]
6857	 VFMLA16_LOW))]
6858  "TARGET_F16FML"
6859{
6860    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6861    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6862
6863    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6864							     operands[1],
6865							     operands[2],
6866							     operands[3],
6867							     p1, lane));
6868    DONE;
6869
6870})
6871
6872(define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6873  [(set (match_operand:V2SF 0 "register_operand")
6874	(unspec:V2SF [(match_operand:V2SF 1 "register_operand")
6875		      (match_operand:V4HF 2 "register_operand")
6876		      (match_operand:V8HF 3 "register_operand")
6877		      (match_operand:SI 4 "aarch64_lane_imm3")]
6878	 VFMLA16_HIGH))]
6879  "TARGET_F16FML"
6880{
6881    rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6882    rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6883
6884    emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6885							      operands[1],
6886							      operands[2],
6887							      operands[3],
6888							      p1, lane));
6889    DONE;
6890
6891})
6892
6893(define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6894  [(set (match_operand:V2SF 0 "register_operand" "=w")
6895	(fma:V2SF
6896	 (float_extend:V2SF
6897	   (vec_select:V2HF
6898	    (match_operand:V4HF 2 "register_operand" "w")
6899	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6900	 (float_extend:V2SF
6901	  (vec_duplicate:V2HF
6902	   (vec_select:HF
6903	    (match_operand:V8HF 3 "register_operand" "x")
6904	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6905	 (match_operand:V2SF 1 "register_operand" "0")))]
6906  "TARGET_F16FML"
6907  "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6908  [(set_attr "type" "neon_fp_mul_s")]
6909)
6910
6911(define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6912  [(set (match_operand:V2SF 0 "register_operand" "=w")
6913	(fma:V2SF
6914	 (float_extend:V2SF
6915	  (neg:V2HF
6916	   (vec_select:V2HF
6917	    (match_operand:V4HF 2 "register_operand" "w")
6918	    (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6919	 (float_extend:V2SF
6920	  (vec_duplicate:V2HF
6921	   (vec_select:HF
6922	    (match_operand:V8HF 3 "register_operand" "x")
6923	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6924	 (match_operand:V2SF 1 "register_operand" "0")))]
6925  "TARGET_F16FML"
6926  "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6927  [(set_attr "type" "neon_fp_mul_s")]
6928)
6929
6930(define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6931  [(set (match_operand:V2SF 0 "register_operand" "=w")
6932	(fma:V2SF
6933	 (float_extend:V2SF
6934	   (vec_select:V2HF
6935	    (match_operand:V4HF 2 "register_operand" "w")
6936	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6937	 (float_extend:V2SF
6938	  (vec_duplicate:V2HF
6939	   (vec_select:HF
6940	    (match_operand:V8HF 3 "register_operand" "x")
6941	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6942	 (match_operand:V2SF 1 "register_operand" "0")))]
6943  "TARGET_F16FML"
6944  "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6945  [(set_attr "type" "neon_fp_mul_s")]
6946)
6947
6948(define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6949  [(set (match_operand:V2SF 0 "register_operand" "=w")
6950	(fma:V2SF
6951	 (float_extend:V2SF
6952	  (neg:V2HF
6953	   (vec_select:V2HF
6954	    (match_operand:V4HF 2 "register_operand" "w")
6955	    (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6956	 (float_extend:V2SF
6957	  (vec_duplicate:V2HF
6958	   (vec_select:HF
6959	    (match_operand:V8HF 3 "register_operand" "x")
6960	    (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6961	 (match_operand:V2SF 1 "register_operand" "0")))]
6962  "TARGET_F16FML"
6963  "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6964  [(set_attr "type" "neon_fp_mul_s")]
6965)
6966
6967(define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6968  [(set (match_operand:V4SF 0 "register_operand")
6969	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6970		      (match_operand:V8HF 2 "register_operand")
6971		      (match_operand:V4HF 3 "register_operand")
6972		      (match_operand:SI 4 "aarch64_imm2")]
6973	 VFMLA16_LOW))]
6974  "TARGET_F16FML"
6975{
6976    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6977    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6978
6979    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6980							     operands[1],
6981							     operands[2],
6982							     operands[3],
6983							     p1, lane));
6984    DONE;
6985})
6986
6987(define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6988  [(set (match_operand:V4SF 0 "register_operand")
6989	(unspec:V4SF [(match_operand:V4SF 1 "register_operand")
6990		      (match_operand:V8HF 2 "register_operand")
6991		      (match_operand:V4HF 3 "register_operand")
6992		      (match_operand:SI 4 "aarch64_imm2")]
6993	 VFMLA16_HIGH))]
6994  "TARGET_F16FML"
6995{
6996    rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6997    rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6998
6999    emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
7000							      operands[1],
7001							      operands[2],
7002							      operands[3],
7003							      p1, lane));
7004    DONE;
7005})
7006
7007(define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
7008  [(set (match_operand:V4SF 0 "register_operand" "=w")
7009	(fma:V4SF
7010	 (float_extend:V4SF
7011	  (vec_select:V4HF
7012	   (match_operand:V8HF 2 "register_operand" "w")
7013	   (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
7014	 (float_extend:V4SF
7015	  (vec_duplicate:V4HF
7016	   (vec_select:HF
7017	    (match_operand:V4HF 3 "register_operand" "x")
7018	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7019	 (match_operand:V4SF 1 "register_operand" "0")))]
7020  "TARGET_F16FML"
7021  "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
7022  [(set_attr "type" "neon_fp_mul_s")]
7023)
7024
7025(define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
7026  [(set (match_operand:V4SF 0 "register_operand" "=w")
7027	(fma:V4SF
7028	 (float_extend:V4SF
7029	  (neg:V4HF
7030	   (vec_select:V4HF
7031	    (match_operand:V8HF 2 "register_operand" "w")
7032	    (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
7033	 (float_extend:V4SF
7034	  (vec_duplicate:V4HF
7035	   (vec_select:HF
7036	    (match_operand:V4HF 3 "register_operand" "x")
7037	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7038	 (match_operand:V4SF 1 "register_operand" "0")))]
7039  "TARGET_F16FML"
7040  "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
7041  [(set_attr "type" "neon_fp_mul_s")]
7042)
7043
7044(define_insn "aarch64_simd_fmlalq_lane_highv4sf"
7045  [(set (match_operand:V4SF 0 "register_operand" "=w")
7046	(fma:V4SF
7047	 (float_extend:V4SF
7048	  (vec_select:V4HF
7049	   (match_operand:V8HF 2 "register_operand" "w")
7050	   (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
7051	 (float_extend:V4SF
7052	  (vec_duplicate:V4HF
7053	   (vec_select:HF
7054	    (match_operand:V4HF 3 "register_operand" "x")
7055	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7056	 (match_operand:V4SF 1 "register_operand" "0")))]
7057  "TARGET_F16FML"
7058  "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
7059  [(set_attr "type" "neon_fp_mul_s")]
7060)
7061
7062(define_insn "aarch64_simd_fmlslq_lane_highv4sf"
7063  [(set (match_operand:V4SF 0 "register_operand" "=w")
7064	(fma:V4SF
7065	 (float_extend:V4SF
7066	  (neg:V4HF
7067	   (vec_select:V4HF
7068	    (match_operand:V8HF 2 "register_operand" "w")
7069	    (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
7070	 (float_extend:V4SF
7071	  (vec_duplicate:V4HF
7072	   (vec_select:HF
7073	    (match_operand:V4HF 3 "register_operand" "x")
7074	    (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
7075	 (match_operand:V4SF 1 "register_operand" "0")))]
7076  "TARGET_F16FML"
7077  "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
7078  [(set_attr "type" "neon_fp_mul_s")]
7079)
7080
7081;; pmull
7082
7083(define_insn "aarch64_crypto_pmulldi"
7084  [(set (match_operand:TI 0 "register_operand" "=w")
7085        (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
7086		     (match_operand:DI 2 "register_operand" "w")]
7087		    UNSPEC_PMULL))]
7088 "TARGET_SIMD && TARGET_AES"
7089 "pmull\\t%0.1q, %1.1d, %2.1d"
7090  [(set_attr "type" "crypto_pmull")]
7091)
7092
7093(define_insn "aarch64_crypto_pmullv2di"
7094 [(set (match_operand:TI 0 "register_operand" "=w")
7095       (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
7096		   (match_operand:V2DI 2 "register_operand" "w")]
7097		  UNSPEC_PMULL2))]
7098  "TARGET_SIMD && TARGET_AES"
7099  "pmull2\\t%0.1q, %1.2d, %2.2d"
7100  [(set_attr "type" "crypto_pmull")]
7101)
7102
7103;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
7104(define_insn "<optab><Vnarrowq><mode>2"
7105  [(set (match_operand:VQN 0 "register_operand" "=w")
7106	(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
7107  "TARGET_SIMD"
7108  "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
7109  [(set_attr "type" "neon_shift_imm_long")]
7110)
7111
7112;; Truncate a 128-bit integer vector to a 64-bit vector.
7113(define_insn "trunc<mode><Vnarrowq>2"
7114  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
7115	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
7116  "TARGET_SIMD"
7117  "xtn\t%0.<Vntype>, %1.<Vtype>"
7118  [(set_attr "type" "neon_shift_imm_narrow_q")]
7119)
7120
7121(define_insn "aarch64_bfdot<mode>"
7122  [(set (match_operand:VDQSF 0 "register_operand" "=w")
7123	(plus:VDQSF
7124	  (unspec:VDQSF
7125	   [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
7126	    (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
7127	    UNSPEC_BFDOT)
7128	  (match_operand:VDQSF 1 "register_operand" "0")))]
7129  "TARGET_BF16_SIMD"
7130  "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
7131  [(set_attr "type" "neon_dot<q>")]
7132)
7133
7134(define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
7135  [(set (match_operand:VDQSF 0 "register_operand" "=w")
7136	(plus:VDQSF
7137	  (unspec:VDQSF
7138	   [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
7139	    (match_operand:VBF 3 "register_operand" "w")
7140	    (match_operand:SI 4 "const_int_operand" "n")]
7141	    UNSPEC_BFDOT)
7142	  (match_operand:VDQSF 1 "register_operand" "0")))]
7143  "TARGET_BF16_SIMD"
7144{
7145  int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
7146  int lane = INTVAL (operands[4]);
7147  operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
7148  return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
7149}
7150  [(set_attr "type" "neon_dot<VDQSF:q>")]
7151)
7152
7153;; vget_low/high_bf16
7154(define_expand "aarch64_vget_lo_halfv8bf"
7155  [(match_operand:V4BF 0 "register_operand")
7156   (match_operand:V8BF 1 "register_operand")]
7157  "TARGET_BF16_SIMD"
7158{
7159  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, false);
7160  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7161  DONE;
7162})
7163
7164(define_expand "aarch64_vget_hi_halfv8bf"
7165  [(match_operand:V4BF 0 "register_operand")
7166   (match_operand:V8BF 1 "register_operand")]
7167  "TARGET_BF16_SIMD"
7168{
7169  rtx p = aarch64_simd_vect_par_cnst_half (V8BFmode, 8, true);
7170  emit_insn (gen_aarch64_get_halfv8bf (operands[0], operands[1], p));
7171  DONE;
7172})
7173
7174;; bfmmla
7175(define_insn "aarch64_bfmmlaqv4sf"
7176  [(set (match_operand:V4SF 0 "register_operand" "=w")
7177        (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
7178                   (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7179                                 (match_operand:V8BF 3 "register_operand" "w")]
7180                    UNSPEC_BFMMLA)))]
7181  "TARGET_BF16_SIMD"
7182  "bfmmla\\t%0.4s, %2.8h, %3.8h"
7183  [(set_attr "type" "neon_fp_mla_s_q")]
7184)
7185
7186;; bfmlal<bt>
7187(define_insn "aarch64_bfmlal<bt>v4sf"
7188  [(set (match_operand:V4SF 0 "register_operand" "=w")
7189        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7190                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7191                                  (match_operand:V8BF 3 "register_operand" "w")]
7192                     BF_MLA)))]
7193  "TARGET_BF16_SIMD"
7194  "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
7195  [(set_attr "type" "neon_fp_mla_s_q")]
7196)
7197
7198(define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
7199  [(set (match_operand:V4SF 0 "register_operand" "=w")
7200        (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
7201                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
7202                                  (match_operand:VBF 3 "register_operand" "x")
7203                                  (match_operand:SI 4 "const_int_operand" "n")]
7204                     BF_MLA)))]
7205  "TARGET_BF16_SIMD"
7206{
7207  operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
7208  return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
7209}
7210  [(set_attr "type" "neon_fp_mla_s_scalar_q")]
7211)
7212
7213;; 8-bit integer matrix multiply-accumulate
7214(define_insn "aarch64_simd_<sur>mmlav16qi"
7215  [(set (match_operand:V4SI 0 "register_operand" "=w")
7216	(plus:V4SI
7217	 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
7218		       (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
7219	 (match_operand:V4SI 1 "register_operand" "0")))]
7220  "TARGET_I8MM"
7221  "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
7222  [(set_attr "type" "neon_mla_s_q")]
7223)
7224
7225;; bfcvtn
7226(define_insn "aarch64_bfcvtn<q><mode>"
7227  [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
7228        (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
7229                            UNSPEC_BFCVTN))]
7230  "TARGET_BF16_SIMD"
7231  "bfcvtn\\t%0.4h, %1.4s"
7232  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7233)
7234
7235(define_insn "aarch64_bfcvtn2v8bf"
7236  [(set (match_operand:V8BF 0 "register_operand" "=w")
7237        (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
7238                      (match_operand:V4SF 2 "register_operand" "w")]
7239                      UNSPEC_BFCVTN2))]
7240  "TARGET_BF16_SIMD"
7241  "bfcvtn2\\t%0.8h, %2.4s"
7242  [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
7243)
7244
7245(define_insn "aarch64_bfcvtbf"
7246  [(set (match_operand:BF 0 "register_operand" "=w")
7247        (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
7248                    UNSPEC_BFCVT))]
7249  "TARGET_BF16_FP"
7250  "bfcvt\\t%h0, %s1"
7251  [(set_attr "type" "f_cvt")]
7252)
7253
7254;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
7255(define_insn "aarch64_vbfcvt<mode>"
7256  [(set (match_operand:V4SF 0 "register_operand" "=w")
7257	(unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
7258		      UNSPEC_BFCVTN))]
7259  "TARGET_BF16_SIMD"
7260  "shll\\t%0.4s, %1.4h, #16"
7261  [(set_attr "type" "neon_shift_imm_long")]
7262)
7263
7264(define_insn "aarch64_vbfcvt_highv8bf"
7265  [(set (match_operand:V4SF 0 "register_operand" "=w")
7266	(unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
7267		      UNSPEC_BFCVTN2))]
7268  "TARGET_BF16_SIMD"
7269  "shll2\\t%0.4s, %1.8h, #16"
7270  [(set_attr "type" "neon_shift_imm_long")]
7271)
7272
7273(define_insn "aarch64_bfcvtsf"
7274  [(set (match_operand:SF 0 "register_operand" "=w")
7275	(unspec:SF [(match_operand:BF 1 "register_operand" "w")]
7276		    UNSPEC_BFCVT))]
7277  "TARGET_BF16_FP"
7278  "shl\\t%d0, %d1, #16"
7279  [(set_attr "type" "neon_shift_imm")]
7280)
7281