1;; Machine description for AArch64 SVE.
2;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3;; Contributed by ARM Ltd.
4;;
5;; This file is part of GCC.
6;;
7;; GCC is free software; you can redistribute it and/or modify it
8;; under the terms of the GNU General Public License as published by
9;; the Free Software Foundation; either version 3, or (at your option)
10;; any later version.
11;;
12;; GCC is distributed in the hope that it will be useful, but
13;; WITHOUT ANY WARRANTY; without even the implied warranty of
14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15;; General Public License for more details.
16;;
17;; You should have received a copy of the GNU General Public License
18;; along with GCC; see the file COPYING3.  If not see
19;; <http://www.gnu.org/licenses/>.
20
21;; The file is organised into the following sections (search for the full
22;; line):
23;;
24;; == General notes
25;; ---- Note on the handling of big-endian SVE
26;; ---- Description of UNSPEC_PTEST
27;; ---- Description of UNSPEC_PRED_Z
28;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
29;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
30;; ---- Note on FFR handling
31;;
32;; == Moves
33;; ---- Moves of single vectors
34;; ---- Moves of multiple vectors
35;; ---- Moves of predicates
36;; ---- Moves relating to the FFR
37;;
38;; == Loads
39;; ---- Normal contiguous loads
40;; ---- Extending contiguous loads
41;; ---- First-faulting contiguous loads
42;; ---- First-faulting extending contiguous loads
43;; ---- Non-temporal contiguous loads
44;; ---- Normal gather loads
45;; ---- Extending gather loads
46;; ---- First-faulting gather loads
47;; ---- First-faulting extending gather loads
48;;
49;; == Prefetches
50;; ---- Contiguous prefetches
51;; ---- Gather prefetches
52;;
53;; == Stores
54;; ---- Normal contiguous stores
55;; ---- Truncating contiguous stores
56;; ---- Non-temporal contiguous stores
57;; ---- Normal scatter stores
58;; ---- Truncating scatter stores
59;;
60;; == Vector creation
61;; ---- [INT,FP] Duplicate element
62;; ---- [INT,FP] Initialize from individual elements
63;; ---- [INT] Linear series
64;; ---- [PRED] Duplicate element
65;;
66;; == Vector decomposition
67;; ---- [INT,FP] Extract index
68;; ---- [INT,FP] Extract active element
69;; ---- [PRED] Extract index
70;;
71;; == Unary arithmetic
72;; ---- [INT] General unary arithmetic corresponding to rtx codes
73;; ---- [INT] General unary arithmetic corresponding to unspecs
74;; ---- [INT] Sign and zero extension
75;; ---- [INT] Truncation
76;; ---- [INT] Logical inverse
77;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
78;; ---- [FP] General unary arithmetic corresponding to unspecs
79;; ---- [FP] Square root
80;; ---- [FP] Reciprocal square root
81;; ---- [PRED] Inverse
82
83;; == Binary arithmetic
84;; ---- [INT] General binary arithmetic corresponding to rtx codes
85;; ---- [INT] Addition
86;; ---- [INT] Subtraction
87;; ---- [INT] Take address
88;; ---- [INT] Absolute difference
89;; ---- [INT] Saturating addition and subtraction
90;; ---- [INT] Highpart multiplication
91;; ---- [INT] Division
92;; ---- [INT] Binary logical operations
93;; ---- [INT] Binary logical operations (inverted second input)
94;; ---- [INT] Shifts (rounding towards -Inf)
95;; ---- [INT] Shifts (rounding towards 0)
96;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
97;; ---- [FP] General binary arithmetic corresponding to rtx codes
98;; ---- [FP] General binary arithmetic corresponding to unspecs
99;; ---- [FP] Addition
100;; ---- [FP] Complex addition
101;; ---- [FP] Subtraction
102;; ---- [FP] Absolute difference
103;; ---- [FP] Multiplication
104;; ---- [FP] Division
105;; ---- [FP] Binary logical operations
106;; ---- [FP] Sign copying
107;; ---- [FP] Maximum and minimum
108;; ---- [PRED] Binary logical operations
109;; ---- [PRED] Binary logical operations (inverted second input)
110;; ---- [PRED] Binary logical operations (inverted result)
111;;
112;; == Ternary arithmetic
113;; ---- [INT] MLA and MAD
114;; ---- [INT] MLS and MSB
115;; ---- [INT] Dot product
116;; ---- [INT] Sum of absolute differences
117;; ---- [INT] Matrix multiply-accumulate
118;; ---- [FP] General ternary arithmetic corresponding to unspecs
119;; ---- [FP] Complex multiply-add
120;; ---- [FP] Trigonometric multiply-add
121;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
122;; ---- [FP] Matrix multiply-accumulate
123;;
124;; == Comparisons and selects
125;; ---- [INT,FP] Select based on predicates
126;; ---- [INT,FP] Compare and select
127;; ---- [INT] Comparisons
128;; ---- [INT] While tests
129;; ---- [FP] Direct comparisons
130;; ---- [FP] Absolute comparisons
131;; ---- [PRED] Select
132;; ---- [PRED] Test bits
133;;
134;; == Reductions
135;; ---- [INT,FP] Conditional reductions
136;; ---- [INT] Tree reductions
137;; ---- [FP] Tree reductions
138;; ---- [FP] Left-to-right reductions
139;;
140;; == Permutes
141;; ---- [INT,FP] General permutes
142;; ---- [INT,FP] Special-purpose unary permutes
143;; ---- [INT,FP] Special-purpose binary permutes
144;; ---- [PRED] Special-purpose unary permutes
145;; ---- [PRED] Special-purpose binary permutes
146;;
147;; == Conversions
148;; ---- [INT<-INT] Packs
149;; ---- [INT<-INT] Unpacks
150;; ---- [INT<-FP] Conversions
151;; ---- [INT<-FP] Packs
152;; ---- [INT<-FP] Unpacks
153;; ---- [FP<-INT] Conversions
154;; ---- [FP<-INT] Packs
155;; ---- [FP<-INT] Unpacks
156;; ---- [FP<-FP] Packs
157;; ---- [FP<-FP] Packs (bfloat16)
158;; ---- [FP<-FP] Unpacks
159;; ---- [PRED<-PRED] Packs
160;; ---- [PRED<-PRED] Unpacks
161;;
162;; == Vector partitioning
163;; ---- [PRED] Unary partitioning
164;; ---- [PRED] Binary partitioning
165;; ---- [PRED] Scalarization
166;;
167;; == Counting elements
168;; ---- [INT] Count elements in a pattern (scalar)
169;; ---- [INT] Increment by the number of elements in a pattern (scalar)
170;; ---- [INT] Increment by the number of elements in a pattern (vector)
171;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
172;; ---- [INT] Decrement by the number of elements in a pattern (vector)
173;; ---- [INT] Count elements in a predicate (scalar)
174;; ---- [INT] Increment by the number of elements in a predicate (scalar)
175;; ---- [INT] Increment by the number of elements in a predicate (vector)
176;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
177;; ---- [INT] Decrement by the number of elements in a predicate (vector)
178
179;; =========================================================================
180;; == General notes
181;; =========================================================================
182;;
183;; -------------------------------------------------------------------------
184;; ---- Note on the handling of big-endian SVE
185;; -------------------------------------------------------------------------
186;;
187;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
188;; same way as movdi or movti would: the first byte of memory goes
189;; into the most significant byte of the register and the last byte
190;; of memory goes into the least significant byte of the register.
191;; This is the most natural ordering for Advanced SIMD and matches
192;; the ABI layout for 64-bit and 128-bit vector types.
193;;
194;; As a result, the order of bytes within the register is what GCC
195;; expects for a big-endian target, and subreg offsets therefore work
196;; as expected, with the first element in memory having subreg offset 0
197;; and the last element in memory having the subreg offset associated
198;; with a big-endian lowpart.  However, this ordering also means that
199;; GCC's lane numbering does not match the architecture's numbering:
200;; GCC always treats the element at the lowest address in memory
201;; (subreg offset 0) as element 0, while the architecture treats
202;; the least significant end of the register as element 0.
203;;
204;; The situation for SVE is different.  We want the layout of the
205;; SVE register to be same for mov<mode> as it is for maskload<mode>:
206;; logically, a mov<mode> load must be indistinguishable from a
207;; maskload<mode> whose mask is all true.  We therefore need the
208;; register layout to match LD1 rather than LDR.  The ABI layout of
209;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
210;;
211;; As a result, the architecture lane numbering matches GCC's lane
212;; numbering, with element 0 always being the first in memory.
213;; However:
214;;
215;; - Applying a subreg offset to a register does not give the element
216;;   that GCC expects: the first element in memory has the subreg offset
217;;   associated with a big-endian lowpart while the last element in memory
218;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
219;;
220;; - We cannot use LDR and STR for spill slots that might be accessed
221;;   via subregs, since although the elements have the order GCC expects,
222;;   the order of the bytes within the elements is different.  We instead
223;;   access spill slots via LD1 and ST1, using secondary reloads to
224;;   reserve a predicate register.
225;;
226;; -------------------------------------------------------------------------
227;; ---- Description of UNSPEC_PTEST
228;; -------------------------------------------------------------------------
229;;
230;; SVE provides a PTEST instruction for testing the active lanes of a
231;; predicate and setting the flags based on the result.  The associated
232;; condition code tests are:
233;;
234;; - any   (= ne): at least one active bit is set
235;; - none  (= eq): all active bits are clear (*)
236;; - first (= mi): the first active bit is set
237;; - nfrst (= pl): the first active bit is clear (*)
238;; - last  (= cc): the last active bit is set
239;; - nlast (= cs): the last active bit is clear (*)
240;;
241;; where the conditions marked (*) are also true when there are no active
242;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
243;; of a PTEST use the condition code mode CC_NZC.
244;;
245;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
246;; This means that for other predicate modes, we need a governing predicate
247;; in which all bits are defined.
248;;
249;; For example, most predicated .H operations ignore the odd bits of the
250;; governing predicate, so that an active lane is represented by the
251;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
252;; any value.  To test a .H predicate, we instead need "10" and "00"
253;; respectively, so that the condition only tests the even bits of the
254;; predicate.
255;;
256;; Several instructions set the flags as a side-effect, in the same way
257;; that a separate PTEST would.  It's important for code quality that we
258;; use these flags results as often as possible, particularly in the case
259;; of WHILE* and RDFFR.
260;;
261;; Also, some of the instructions that set the flags are unpredicated
262;; and instead implicitly test all .B, .H, .S or .D elements, as though
263;; they were predicated on a PTRUE of that size.  For example, a .S
264;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
265;; would.
266;;
267;; We therefore need to represent PTEST operations in a way that
268;; makes it easy to combine them with both predicated and unpredicated
269;; operations, while using a VNx16BI governing predicate for all
270;; predicate modes.  We do this using:
271;;
272;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
273;;
274;; where:
275;;
276;; - GP is the real VNx16BI governing predicate
277;;
278;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
279;;   GP to CAST_GP are guaranteed to be clear in GP.
280;;
281;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
282;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
283;;   SVE_MAYBE_NOT_PTRUE otherwise.
284;;
285;; - OP is the predicate we want to test, of the same mode as CAST_GP.
286;;
287;; -------------------------------------------------------------------------
288;; ---- Description of UNSPEC_PRED_Z
289;; -------------------------------------------------------------------------
290;;
291;; SVE integer comparisons are predicated and return zero for inactive
292;; lanes.  Sometimes we use them with predicates that are all-true and
293;; sometimes we use them with general predicates.
294;;
295;; The integer comparisons also set the flags and so build-in the effect
296;; of a PTEST.  We therefore want to be able to combine integer comparison
297;; patterns with PTESTs of the result.  One difficulty with doing this is
298;; that (as noted above) the PTEST is always a .B operation and so can place
299;; stronger requirements on the governing predicate than the comparison does.
300;;
301;; For example, when applying a separate PTEST to the result of a full-vector
302;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
303;; .B PTRUE.  In constrast, the comparison might be predicated on either
304;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
305;; bits don't matter for .H operations.
306;;
307;; We therefore can't rely on a full-vector comparison using the same
308;; predicate register as a following PTEST.  We instead need to remember
309;; whether a comparison is known to be a full-vector comparison and use
310;; this information in addition to a check for equal predicate registers.
311;; At the same time, it's useful to have a common representation for all
312;; integer comparisons, so that they can be handled by a single set of
313;; patterns.
314;;
315;; We therefore take a similar approach to UNSPEC_PTEST above and use:
316;;
317;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
318;;
319;; where:
320;;
321;; - GP is the governing predicate, of mode <M:VPRED>
322;;
323;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
324;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
325;;   otherwise
326;;
327;; - CODE is the comparison code
328;;
329;; - OP0 and OP1 are the values being compared, of mode M
330;;
331;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
332;;
333;; -------------------------------------------------------------------------
334;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
335;; -------------------------------------------------------------------------
336;;
337;; Many SVE integer operations are predicated.  We can generate them
338;; from four sources:
339;;
340;; (1) Using normal unpredicated optabs.  In this case we need to create
341;;     an all-true predicate register to act as the governing predicate
342;;     for the SVE instruction.  There are no inactive lanes, and thus
343;;     the values of inactive lanes don't matter.
344;;
345;; (2) Using _x ACLE functions.  In this case the function provides a
346;;     specific predicate and some lanes might be inactive.  However,
347;;     as for (1), the values of the inactive lanes don't matter.
348;;     We can make extra lanes active without changing the behavior
349;;     (although for code-quality reasons we should avoid doing so
350;;     needlessly).
351;;
352;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
353;;     These optabs have a predicate operand that specifies which lanes are
354;;     active and another operand that provides the values of inactive lanes.
355;;
356;; (4) Using _m and _z ACLE functions.  These functions map to the same
357;;     patterns as (3), with the _z functions setting inactive lanes to zero
358;;     and the _m functions setting the inactive lanes to one of the function
359;;     arguments.
360;;
361;; For (1) and (2) we need a way of attaching the predicate to a normal
362;; unpredicated integer operation.  We do this using:
363;;
364;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
365;;
366;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
367;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
368;; it always is for (1), but might not be for (2).
369;;
370;; The unspec as a whole has the same value as (code:M ...) when PRED is
371;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
372;; but as noted above, we should only do so if there's a specific benefit.
373;;
374;; (The "_X" in the unspec is named after the ACLE functions in (2).)
375;;
376;; For (3) and (4) we can simply use the SVE port's normal representation
377;; of a predicate-based select:
378;;
379;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
380;;
381;; where INACTIVE specifies the values of inactive lanes.
382;;
383;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
384;; than inserting the integer operation directly.  This is mostly useful
385;; if we want the combine pass to merge an integer operation with an explicit
386;; vcond_mask (in other words, with a following SEL instruction).  However,
387;; it's generally better to merge such operations at the gimple level
388;; using (3).
389;;
390;; -------------------------------------------------------------------------
391;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
392;; -------------------------------------------------------------------------
393;;
394;; Most SVE floating-point operations are predicated.  We can generate
395;; them from four sources:
396;;
397;; (1) Using normal unpredicated optabs.  In this case we need to create
398;;     an all-true predicate register to act as the governing predicate
399;;     for the SVE instruction.  There are no inactive lanes, and thus
400;;     the values of inactive lanes don't matter.
401;;
402;; (2) Using _x ACLE functions.  In this case the function provides a
403;;     specific predicate and some lanes might be inactive.  However,
404;;     as for (1), the values of the inactive lanes don't matter.
405;;
406;;     The instruction must have the same exception behavior as the
407;;     function call unless things like command-line flags specifically
408;;     allow otherwise.  For example, with -ffast-math, it is OK to
409;;     raise exceptions for inactive lanes, but normally it isn't.
410;;
411;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
412;;     These optabs have a predicate operand that specifies which lanes are
413;;     active and another operand that provides the values of inactive lanes.
414;;
415;; (4) Using _m and _z ACLE functions.  These functions map to the same
416;;     patterns as (3), with the _z functions setting inactive lanes to zero
417;;     and the _m functions setting the inactive lanes to one of the function
418;;     arguments.
419;;
420;; So:
421;;
422;; - In (1), the predicate is known to be all true and the pattern can use
423;;   unpredicated operations where available.
424;;
425;; - In (2), the predicate might or might not be all true.  The pattern can
426;;   use unpredicated instructions if the predicate is all-true or if things
427;;   like command-line flags allow exceptions for inactive lanes.
428;;
429;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
430;;   might be inactive and inactive lanes of the result must have specific
431;;   values.  There is no scope for using unpredicated instructions (and no
432;;   reason to want to), so the question about command-line flags doesn't
433;;   arise.
434;;
435;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
436;; in combination with a separate predicate operand, e.g.
437;;
438;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
439;;	      (sqrt:SVE_FULL_F 2 "register_operand" "w")]
440;;	     ....)
441;;
442;; because (sqrt ...) can raise an exception for any lane, including
443;; inactive ones.  We therefore need to use an unspec instead.
444;;
445;; Also, (2) requires some way of distinguishing the case in which the
446;; predicate might have inactive lanes and cannot be changed from the
447;; case in which the predicate has no inactive lanes or can be changed.
448;; This information is also useful when matching combined FP patterns
449;; in which the predicates might not be equal.
450;;
451;; We therefore model FP operations as an unspec of the form:
452;;
453;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
454;;
455;; where:
456;;
457;; - PRED is the governing predicate.
458;;
459;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
460;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
461;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
462;;
463;; - OP0 OP1 ... are the normal input operands to the operation.
464;;
465;; - MNEMONIC is the mnemonic of the associated SVE instruction.
466;;
467;; For (3) and (4), we combine these operations with an UNSPEC_SEL
468;; that selects between the result of the FP operation and the "else"
469;; value.  (This else value is a merge input for _m ACLE functions
470;; and zero for _z ACLE functions.)  The outer pattern then has the form:
471;;
472;;   (unspec [pred fp_operation else_value] UNSPEC_SEL)
473;;
474;; This means that the patterns for (3) and (4) have two predicates:
475;; one for the FP operation itself and one for the UNSPEC_SEL.
476;; This pattern is equivalent to the result of combining an instance
477;; of (1) or (2) with a separate vcond instruction, so these patterns
478;; are useful as combine targets too.
479;;
480;; However, in the combine case, the instructions that we want to
481;; combine might use different predicates.  Then:
482;;
483;; - Some of the active lanes of the FP operation might be discarded
484;;   by the UNSPEC_SEL.  It's OK to drop the FP operation on those lanes,
485;;   even for SVE_STRICT_GP, since the operations on those lanes are
486;;   effectively dead code.
487;;
488;; - Some of the inactive lanes of the FP operation might be selected
489;;   by the UNSPEC_SEL, giving unspecified values for those lanes.
490;;   SVE_RELAXED_GP lets us extend the FP operation to cover these
491;;   extra lanes, but SVE_STRICT_GP does not.
492;;
493;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
494;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
495;; This typically leads to patterns like:
496;;
497;;    (unspec [(match_operand 1 "register_operand" "Upl")
498;;             (unspec [(match_operand N)
499;;                      (const_int SVE_RELAXED_GP)
500;;                      ...]
501;;                     UNSPEC_COND_<MNEMONIC>)
502;;             ...])
503;;
504;; where operand N is allowed to be anything.  These instructions then
505;; have rewrite rules to replace operand N with operand 1, which gives the
506;; instructions a canonical form and means that the original operand N is
507;; not kept live unnecessarily.
508;;
509;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
510;; a subset of the FP operation predicate.  This case isn't interesting
511;; for FP operations that have an all-true predicate, since such operations
512;; use SVE_RELAXED_GP instead.  And it is not possible for instruction
513;; conditions to track the subset relationship for arbitrary registers.
514;; So in practice, the only useful case for SVE_STRICT_GP is the one
515;; in which the predicates match:
516;;
517;;    (unspec [(match_operand 1 "register_operand" "Upl")
518;;             (unspec [(match_dup 1)
519;;                      (const_int SVE_STRICT_GP)
520;;                      ...]
521;;                     UNSPEC_COND_<MNEMONIC>)
522;;             ...])
523;;
524;; This pattern would also be correct for SVE_RELAXED_GP, but it would
525;; be redundant with the one above.  However, if the combine pattern
526;; has multiple FP operations, using a match_operand allows combinations
527;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
528;; that the predicates are the same:
529;;
530;;    (unspec [(match_operand 1 "register_operand" "Upl")
531;;             (...
532;;                (unspec [(match_dup 1)
533;;                         (match_operand:SI N "aarch64_sve_gp_strictness")
534;;                         ...]
535;;                        UNSPEC_COND_<MNEMONIC1>)
536;;                (unspec [(match_dup 1)
537;;                         (match_operand:SI M "aarch64_sve_gp_strictness")
538;;                         ...]
539;;                        UNSPEC_COND_<MNEMONIC2>) ...)
540;;             ...])
541;;
542;; The fully-relaxed version of this pattern is:
543;;
544;;    (unspec [(match_operand 1 "register_operand" "Upl")
545;;             (...
546;;                (unspec [(match_operand:SI N)
547;;                         (const_int SVE_RELAXED_GP)
548;;                         ...]
549;;                        UNSPEC_COND_<MNEMONIC1>)
550;;                (unspec [(match_operand:SI M)
551;;                         (const_int SVE_RELAXED_GP)
552;;                         ...]
553;;                        UNSPEC_COND_<MNEMONIC2>) ...)
554;;             ...])
555;;
556;; -------------------------------------------------------------------------
557;; ---- Note on FFR handling
558;; -------------------------------------------------------------------------
559;;
560;; Logically we want to divide FFR-related instructions into regions
561;; that contain exactly one of:
562;;
563;; - a single write to the FFR
564;; - any number of reads from the FFR (but only one read is likely)
565;; - any number of LDFF1 and LDNF1 instructions
566;;
567;; However, LDFF1 and LDNF1 instructions should otherwise behave like
568;; normal loads as far as possible.  This means that they should be
569;; schedulable within a region in the same way that LD1 would be,
570;; and they should be deleted as dead if the result is unused.  The loads
571;; should therefore not write to the FFR, since that would both serialize
572;; the loads with respect to each other and keep the loads live for any
573;; later RDFFR.
574;;
575;; We get around this by using a fake "FFR token" (FFRT) to help describe
576;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
577;; while using the FFRT keeps the instruction within its region.
578;; Specifically:
579;;
580;; - Writes start a new FFRT region as well as setting the FFR:
581;;
582;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
583;;
584;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
585;;   loads stay within the same FFRT region:
586;;
587;;       L1: load data while using the FFRT
588;;
589;;   In addition, any FFRT region that includes a load also has at least one
590;;   instance of:
591;;
592;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
593;;
594;;   to make it clear that the region both reads from and writes to the FFR.
595;;
596;; - Reads do the following:
597;;
598;;       R1: FFRT = FFR               [type == no_insn]
599;;       R2: read from the FFRT
600;;       R3: FFRT = update(FFRT)      [type == no_insn]
601;;
602;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
603;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
604;;   cannot move backwards across R3.
605;;
606;; This way, writes are only kept alive by later loads or reads,
607;; and write/read pairs fold normally.  For two consecutive reads,
608;; the first R3 is made dead by the second R1, which in turn becomes
609;; redundant with the first R1.  We then have:
610;;
611;;     first R1: FFRT = FFR
612;;     first read from the FFRT
613;;     second read from the FFRT
614;;     second R3: FFRT = update(FFRT)
615;;
616;; i.e. the two FFRT regions collapse into a single one with two
617;; independent reads.
618;;
619;; The model still prevents some valid optimizations though.  For example,
620;; if all loads in an FFRT region are deleted as dead, nothing would remove
621;; the L2 instructions.
622
623;; =========================================================================
624;; == Moves
625;; =========================================================================
626
627;; -------------------------------------------------------------------------
628;; ---- Moves of single vectors
629;; -------------------------------------------------------------------------
630;; Includes:
631;; - MOV  (including aliases)
632;; - LD1B (contiguous form)
633;; - LD1D (    "    "     )
634;; - LD1H (    "    "     )
635;; - LD1W (    "    "     )
636;; - LDR
637;; - ST1B (contiguous form)
638;; - ST1D (    "    "     )
639;; - ST1H (    "    "     )
640;; - ST1W (    "    "     )
641;; - STR
642;; -------------------------------------------------------------------------
643
644(define_expand "mov<mode>"
645  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
646	(match_operand:SVE_ALL 1 "general_operand"))]
647  "TARGET_SVE"
648  {
649    /* Use the predicated load and store patterns where possible.
650       This is required for big-endian targets (see the comment at the
651       head of the file) and increases the addressing choices for
652       little-endian.  */
653    if ((MEM_P (operands[0]) || MEM_P (operands[1]))
654	&& can_create_pseudo_p ())
655      {
656	aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
657	DONE;
658      }
659
660    if (CONSTANT_P (operands[1]))
661      {
662	aarch64_expand_mov_immediate (operands[0], operands[1]);
663	DONE;
664      }
665
666    /* Optimize subregs on big-endian targets: we can use REV[BHW]
667       instead of going through memory.  */
668    if (BYTES_BIG_ENDIAN
669	&& aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
670      DONE;
671  }
672)
673
674(define_expand "movmisalign<mode>"
675  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
676	(match_operand:SVE_ALL 1 "general_operand"))]
677  "TARGET_SVE"
678  {
679    /* Equivalent to a normal move for our purpooses.  */
680    emit_move_insn (operands[0], operands[1]);
681    DONE;
682  }
683)
684
685;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
686;; little-endian ordering is acceptable.  Only allow memory operations during
687;; and after RA; before RA we want the predicated load and store patterns to
688;; be used instead.
689(define_insn "*aarch64_sve_mov<mode>_ldr_str"
690  [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
691	(match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
692  "TARGET_SVE
693   && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
694   && ((lra_in_progress || reload_completed)
695       || (register_operand (operands[0], <MODE>mode)
696	   && nonmemory_operand (operands[1], <MODE>mode)))"
697  "@
698   ldr\t%0, %1
699   str\t%1, %0
700   mov\t%0.d, %1.d
701   * return aarch64_output_sve_mov_immediate (operands[1]);"
702)
703
704;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
705;; or vectors for which little-endian ordering isn't acceptable.  Memory
706;; accesses require secondary reloads.
707(define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
708  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
709	(match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
710  "TARGET_SVE
711   && <MODE>mode != VNx16QImode
712   && (BYTES_BIG_ENDIAN
713       || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
714  "@
715   mov\t%0.d, %1.d
716   * return aarch64_output_sve_mov_immediate (operands[1]);"
717)
718
719;; Handle memory reloads for modes that can't use LDR and STR.  We use
720;; byte PTRUE for all modes to try to encourage reuse.  This pattern
721;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
722(define_expand "aarch64_sve_reload_mem"
723  [(parallel
724     [(set (match_operand 0)
725	   (match_operand 1))
726      (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
727  "TARGET_SVE"
728  {
729    /* Create a PTRUE.  */
730    emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
731
732    /* Refer to the PTRUE in the appropriate mode for this move.  */
733    machine_mode mode = GET_MODE (operands[0]);
734    rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
735
736    /* Emit a predicated load or store.  */
737    aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
738    DONE;
739  }
740)
741
742;; A predicated move in which the predicate is known to be all-true.
743;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
744;; so changes to this pattern will need changes there as well.
745(define_insn_and_split "@aarch64_pred_mov<mode>"
746  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
747	(unspec:SVE_ALL
748	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
749	   (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
750	  UNSPEC_PRED_X))]
751  "TARGET_SVE
752   && (register_operand (operands[0], <MODE>mode)
753       || register_operand (operands[2], <MODE>mode))"
754  "@
755   #
756   ld1<Vesize>\t%0.<Vctype>, %1/z, %2
757   st1<Vesize>\t%2.<Vctype>, %1, %0"
758  "&& register_operand (operands[0], <MODE>mode)
759   && register_operand (operands[2], <MODE>mode)"
760  [(set (match_dup 0) (match_dup 2))]
761)
762
763;; A pattern for optimizing SUBREGs that have a reinterpreting effect
764;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
765;; for details.  We use a special predicate for operand 2 to reduce
766;; the number of patterns.
767(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
768  [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
769	(unspec:SVE_ALL
770	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
771	   (match_operand 2 "aarch64_any_register_operand" "w")]
772	  UNSPEC_REV_SUBREG))]
773  "TARGET_SVE && BYTES_BIG_ENDIAN"
774  "#"
775  "&& reload_completed"
776  [(const_int 0)]
777  {
778    aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
779    DONE;
780  }
781)
782
783;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
784;; This is equivalent to a subreg on little-endian targets but not for
785;; big-endian; see the comment at the head of the file for details.
786(define_expand "@aarch64_sve_reinterpret<mode>"
787  [(set (match_operand:SVE_ALL 0 "register_operand")
788	(unspec:SVE_ALL
789	  [(match_operand 1 "aarch64_any_register_operand")]
790	  UNSPEC_REINTERPRET))]
791  "TARGET_SVE"
792  {
793    machine_mode src_mode = GET_MODE (operands[1]);
794    if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
795      {
796	emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
797	DONE;
798      }
799  }
800)
801
802;; A pattern for handling type punning on big-endian targets.  We use a
803;; special predicate for operand 1 to reduce the number of patterns.
804(define_insn_and_split "*aarch64_sve_reinterpret<mode>"
805  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
806	(unspec:SVE_ALL
807	  [(match_operand 1 "aarch64_any_register_operand" "w")]
808	  UNSPEC_REINTERPRET))]
809  "TARGET_SVE"
810  "#"
811  "&& reload_completed"
812  [(set (match_dup 0) (match_dup 1))]
813  {
814    operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
815  }
816)
817
818;; -------------------------------------------------------------------------
819;; ---- Moves of multiple vectors
820;; -------------------------------------------------------------------------
821;; All patterns in this section are synthetic and split to real
822;; instructions after reload.
823;; -------------------------------------------------------------------------
824
825(define_expand "mov<mode>"
826  [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
827	(match_operand:SVE_STRUCT 1 "general_operand"))]
828  "TARGET_SVE"
829  {
830    /* Big-endian loads and stores need to be done via LD1 and ST1;
831       see the comment at the head of the file for details.  */
832    if ((MEM_P (operands[0]) || MEM_P (operands[1]))
833	&& BYTES_BIG_ENDIAN)
834      {
835	gcc_assert (can_create_pseudo_p ());
836	aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
837	DONE;
838      }
839
840    if (CONSTANT_P (operands[1]))
841      {
842	aarch64_expand_mov_immediate (operands[0], operands[1]);
843	DONE;
844      }
845  }
846)
847
848;; Unpredicated structure moves (little-endian).
849(define_insn "*aarch64_sve_mov<mode>_le"
850  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
851	(match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
852  "TARGET_SVE && !BYTES_BIG_ENDIAN"
853  "#"
854  [(set_attr "length" "<insn_length>")]
855)
856
857;; Unpredicated structure moves (big-endian).  Memory accesses require
858;; secondary reloads.
859(define_insn "*aarch64_sve_mov<mode>_be"
860  [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
861	(match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
862  "TARGET_SVE && BYTES_BIG_ENDIAN"
863  "#"
864  [(set_attr "length" "<insn_length>")]
865)
866
867;; Split unpredicated structure moves into pieces.  This is the same
868;; for both big-endian and little-endian code, although it only needs
869;; to handle memory operands for little-endian code.
870(define_split
871  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
872	(match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
873  "TARGET_SVE && reload_completed"
874  [(const_int 0)]
875  {
876    rtx dest = operands[0];
877    rtx src = operands[1];
878    if (REG_P (dest) && REG_P (src))
879      aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
880    else
881      for (unsigned int i = 0; i < <vector_count>; ++i)
882	{
883	  rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
884					     i * BYTES_PER_SVE_VECTOR);
885	  rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
886					    i * BYTES_PER_SVE_VECTOR);
887	  emit_insn (gen_rtx_SET (subdest, subsrc));
888	}
889    DONE;
890  }
891)
892
893;; Predicated structure moves.  This works for both endiannesses but in
894;; practice is only useful for big-endian.
895(define_insn_and_split "@aarch64_pred_mov<mode>"
896  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
897	(unspec:SVE_STRUCT
898	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
899	   (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
900	  UNSPEC_PRED_X))]
901  "TARGET_SVE
902   && (register_operand (operands[0], <MODE>mode)
903       || register_operand (operands[2], <MODE>mode))"
904  "#"
905  "&& reload_completed"
906  [(const_int 0)]
907  {
908    for (unsigned int i = 0; i < <vector_count>; ++i)
909      {
910	rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
911					   <MODE>mode,
912					   i * BYTES_PER_SVE_VECTOR);
913	rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
914					  <MODE>mode,
915					  i * BYTES_PER_SVE_VECTOR);
916	aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
917      }
918    DONE;
919  }
920  [(set_attr "length" "<insn_length>")]
921)
922
923;; -------------------------------------------------------------------------
924;; ---- Moves of predicates
925;; -------------------------------------------------------------------------
926;; Includes:
927;; - MOV
928;; - LDR
929;; - PFALSE
930;; - PTRUE
931;; - PTRUES
932;; - STR
933;; -------------------------------------------------------------------------
934
935(define_expand "mov<mode>"
936  [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
937	(match_operand:PRED_ALL 1 "general_operand"))]
938  "TARGET_SVE"
939  {
940    if (GET_CODE (operands[0]) == MEM)
941      operands[1] = force_reg (<MODE>mode, operands[1]);
942
943    if (CONSTANT_P (operands[1]))
944      {
945	aarch64_expand_mov_immediate (operands[0], operands[1]);
946	DONE;
947      }
948  }
949)
950
951(define_insn "*aarch64_sve_mov<mode>"
952  [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
953	(match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
954  "TARGET_SVE
955   && (register_operand (operands[0], <MODE>mode)
956       || register_operand (operands[1], <MODE>mode))"
957  "@
958   mov\t%0.b, %1.b
959   str\t%1, %0
960   ldr\t%0, %1
961   * return aarch64_output_sve_mov_immediate (operands[1]);"
962)
963
964;; Match PTRUES Pn.B when both the predicate and flags are useful.
965(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
966  [(set (reg:CC_NZC CC_REGNUM)
967	(unspec:CC_NZC
968	  [(match_operand 2)
969	   (match_operand 3)
970	   (const_int SVE_KNOWN_PTRUE)
971	   (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
972	     [(unspec:VNx16BI
973		[(match_operand:SI 4 "const_int_operand")
974		 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
975		UNSPEC_PTRUE)])]
976	  UNSPEC_PTEST))
977   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
978	(match_dup 1))]
979  "TARGET_SVE"
980  {
981    return aarch64_output_sve_ptrues (operands[1]);
982  }
983  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
984  {
985    operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
986  }
987)
988
989;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
990(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
991  [(set (reg:CC_NZC CC_REGNUM)
992	(unspec:CC_NZC
993	  [(match_operand 2)
994	   (match_operand 3)
995	   (const_int SVE_KNOWN_PTRUE)
996	   (subreg:PRED_HSD
997	     (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
998	       [(unspec:VNx16BI
999		  [(match_operand:SI 4 "const_int_operand")
1000		   (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1001		  UNSPEC_PTRUE)]) 0)]
1002	  UNSPEC_PTEST))
1003   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1004	(match_dup 1))]
1005  "TARGET_SVE"
1006  {
1007    return aarch64_output_sve_ptrues (operands[1]);
1008  }
1009  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1010  {
1011    operands[2] = CONSTM1_RTX (VNx16BImode);
1012    operands[3] = CONSTM1_RTX (<MODE>mode);
1013  }
1014)
1015
1016;; Match PTRUES Pn.B when only the flags result is useful (which is
1017;; a way of testing VL).
1018(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1019  [(set (reg:CC_NZC CC_REGNUM)
1020	(unspec:CC_NZC
1021	  [(match_operand 2)
1022	   (match_operand 3)
1023	   (const_int SVE_KNOWN_PTRUE)
1024	   (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1025	     [(unspec:VNx16BI
1026		[(match_operand:SI 4 "const_int_operand")
1027		 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1028		UNSPEC_PTRUE)])]
1029	  UNSPEC_PTEST))
1030   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1031  "TARGET_SVE"
1032  {
1033    return aarch64_output_sve_ptrues (operands[1]);
1034  }
1035  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1036  {
1037    operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1038  }
1039)
1040
1041;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1042;; a way of testing VL).
1043(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1044  [(set (reg:CC_NZC CC_REGNUM)
1045	(unspec:CC_NZC
1046	  [(match_operand 2)
1047	   (match_operand 3)
1048	   (const_int SVE_KNOWN_PTRUE)
1049	   (subreg:PRED_HSD
1050	     (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1051	       [(unspec:VNx16BI
1052		  [(match_operand:SI 4 "const_int_operand")
1053		   (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1054		  UNSPEC_PTRUE)]) 0)]
1055	  UNSPEC_PTEST))
1056   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1057  "TARGET_SVE"
1058  {
1059    return aarch64_output_sve_ptrues (operands[1]);
1060  }
1061  "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1062  {
1063    operands[2] = CONSTM1_RTX (VNx16BImode);
1064    operands[3] = CONSTM1_RTX (<MODE>mode);
1065  }
1066)
1067
1068;; -------------------------------------------------------------------------
1069;; ---- Moves relating to the FFR
1070;; -------------------------------------------------------------------------
1071;; RDFFR
1072;; RDFFRS
1073;; SETFFR
1074;; WRFFR
1075;; -------------------------------------------------------------------------
1076
1077;; [W1 in the block comment above about FFR handling]
1078;;
1079;; Write to the FFR and start a new FFRT scheduling region.
1080(define_insn "aarch64_wrffr"
1081  [(set (reg:VNx16BI FFR_REGNUM)
1082	(match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
1083   (set (reg:VNx16BI FFRT_REGNUM)
1084	(unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1085  "TARGET_SVE"
1086  "@
1087   setffr
1088   wrffr\t%0.b"
1089)
1090
1091;; [L2 in the block comment above about FFR handling]
1092;;
1093;; Introduce a read from and write to the FFR in the current FFRT region,
1094;; so that the FFR value is live on entry to the region and so that the FFR
1095;; value visibly changes within the region.  This is used (possibly multiple
1096;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1097(define_insn "aarch64_update_ffr_for_load"
1098  [(set (reg:VNx16BI FFR_REGNUM)
1099	(unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1100			 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1101  "TARGET_SVE"
1102  ""
1103  [(set_attr "type" "no_insn")]
1104)
1105
1106;; [R1 in the block comment above about FFR handling]
1107;;
1108;; Notionally copy the FFR to the FFRT, so that the current FFR value
1109;; can be read from there by the RDFFR instructions below.  This acts
1110;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1111;; creates a natural dependency with earlier writes.
1112(define_insn "aarch64_copy_ffr_to_ffrt"
1113  [(set (reg:VNx16BI FFRT_REGNUM)
1114	(reg:VNx16BI FFR_REGNUM))]
1115  "TARGET_SVE"
1116  ""
1117  [(set_attr "type" "no_insn")]
1118)
1119
1120;; [R2 in the block comment above about FFR handling]
1121;;
1122;; Read the FFR via the FFRT.
1123(define_insn "aarch64_rdffr"
1124  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1125	(reg:VNx16BI FFRT_REGNUM))]
1126  "TARGET_SVE"
1127  "rdffr\t%0.b"
1128)
1129
1130;; Likewise with zero predication.
1131(define_insn "aarch64_rdffr_z"
1132  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1133	(and:VNx16BI
1134	  (reg:VNx16BI FFRT_REGNUM)
1135	  (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1136  "TARGET_SVE"
1137  "rdffr\t%0.b, %1/z"
1138)
1139
1140;; Read the FFR to test for a fault, without using the predicate result.
1141(define_insn "*aarch64_rdffr_z_ptest"
1142  [(set (reg:CC_NZC CC_REGNUM)
1143	(unspec:CC_NZC
1144	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
1145	   (match_dup 1)
1146	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1147	   (and:VNx16BI
1148	     (reg:VNx16BI FFRT_REGNUM)
1149	     (match_dup 1))]
1150	  UNSPEC_PTEST))
1151   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1152  "TARGET_SVE"
1153  "rdffrs\t%0.b, %1/z"
1154)
1155
1156;; Same for unpredicated RDFFR when tested with a known PTRUE.
1157(define_insn "*aarch64_rdffr_ptest"
1158  [(set (reg:CC_NZC CC_REGNUM)
1159	(unspec:CC_NZC
1160	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
1161	   (match_dup 1)
1162	   (const_int SVE_KNOWN_PTRUE)
1163	   (reg:VNx16BI FFRT_REGNUM)]
1164	  UNSPEC_PTEST))
1165   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1166  "TARGET_SVE"
1167  "rdffrs\t%0.b, %1/z"
1168)
1169
1170;; Read the FFR with zero predication and test the result.
1171(define_insn "*aarch64_rdffr_z_cc"
1172  [(set (reg:CC_NZC CC_REGNUM)
1173	(unspec:CC_NZC
1174	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
1175	   (match_dup 1)
1176	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1177	   (and:VNx16BI
1178	     (reg:VNx16BI FFRT_REGNUM)
1179	     (match_dup 1))]
1180	  UNSPEC_PTEST))
1181   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1182	(and:VNx16BI
1183	  (reg:VNx16BI FFRT_REGNUM)
1184	  (match_dup 1)))]
1185  "TARGET_SVE"
1186  "rdffrs\t%0.b, %1/z"
1187)
1188
1189;; Same for unpredicated RDFFR when tested with a known PTRUE.
1190(define_insn "*aarch64_rdffr_cc"
1191  [(set (reg:CC_NZC CC_REGNUM)
1192	(unspec:CC_NZC
1193	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
1194	   (match_dup 1)
1195	   (const_int SVE_KNOWN_PTRUE)
1196	   (reg:VNx16BI FFRT_REGNUM)]
1197	  UNSPEC_PTEST))
1198   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1199	(reg:VNx16BI FFRT_REGNUM))]
1200  "TARGET_SVE"
1201  "rdffrs\t%0.b, %1/z"
1202)
1203
1204;; [R3 in the block comment above about FFR handling]
1205;;
1206;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
1207;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1208(define_insn "aarch64_update_ffrt"
1209  [(set (reg:VNx16BI FFRT_REGNUM)
1210	(unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1211  "TARGET_SVE"
1212  ""
1213  [(set_attr "type" "no_insn")]
1214)
1215
1216;; =========================================================================
1217;; == Loads
1218;; =========================================================================
1219
1220;; -------------------------------------------------------------------------
1221;; ---- Normal contiguous loads
1222;; -------------------------------------------------------------------------
1223;; Includes contiguous forms of:
1224;; - LD1B
1225;; - LD1D
1226;; - LD1H
1227;; - LD1W
1228;; - LD2B
1229;; - LD2D
1230;; - LD2H
1231;; - LD2W
1232;; - LD3B
1233;; - LD3D
1234;; - LD3H
1235;; - LD3W
1236;; - LD4B
1237;; - LD4D
1238;; - LD4H
1239;; - LD4W
1240;; -------------------------------------------------------------------------
1241
1242;; Predicated LD1.
1243(define_insn "maskload<mode><vpred>"
1244  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1245	(unspec:SVE_ALL
1246	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
1247	   (match_operand:SVE_ALL 1 "memory_operand" "m")]
1248	  UNSPEC_LD1_SVE))]
1249  "TARGET_SVE"
1250  "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1251)
1252
1253;; Unpredicated LD[234].
1254(define_expand "vec_load_lanes<mode><vsingle>"
1255  [(set (match_operand:SVE_STRUCT 0 "register_operand")
1256	(unspec:SVE_STRUCT
1257	  [(match_dup 2)
1258	   (match_operand:SVE_STRUCT 1 "memory_operand")]
1259	  UNSPEC_LDN))]
1260  "TARGET_SVE"
1261  {
1262    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1263  }
1264)
1265
1266;; Predicated LD[234].
1267(define_insn "vec_mask_load_lanes<mode><vsingle>"
1268  [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1269	(unspec:SVE_STRUCT
1270	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
1271	   (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1272	  UNSPEC_LDN))]
1273  "TARGET_SVE"
1274  "ld<vector_count><Vesize>\t%0, %2/z, %1"
1275)
1276
1277;; -------------------------------------------------------------------------
1278;; ---- Extending contiguous loads
1279;; -------------------------------------------------------------------------
1280;; Includes contiguous forms of:
1281;; LD1B
1282;; LD1H
1283;; LD1SB
1284;; LD1SH
1285;; LD1SW
1286;; LD1W
1287;; -------------------------------------------------------------------------
1288
1289;; Predicated load and extend, with 8 elements per 128-bit block.
1290(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1291  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1292	(unspec:SVE_HSDI
1293	  [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1294	   (ANY_EXTEND:SVE_HSDI
1295	     (unspec:SVE_PARTIAL_I
1296	       [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1297		(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1298	       UNSPEC_LD1_SVE))]
1299	  UNSPEC_PRED_X))]
1300  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1301  "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1302  "&& !CONSTANT_P (operands[3])"
1303  {
1304    operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1305  }
1306)
1307
1308;; -------------------------------------------------------------------------
1309;; ---- First-faulting contiguous loads
1310;; -------------------------------------------------------------------------
1311;; Includes contiguous forms of:
1312;; - LDFF1B
1313;; - LDFF1D
1314;; - LDFF1H
1315;; - LDFF1W
1316;; - LDNF1B
1317;; - LDNF1D
1318;; - LDNF1H
1319;; - LDNF1W
1320;; -------------------------------------------------------------------------
1321
1322;; Contiguous non-extending first-faulting or non-faulting loads.
1323(define_insn "@aarch64_ld<fn>f1<mode>"
1324  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1325	(unspec:SVE_FULL
1326	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
1327	   (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1328	   (reg:VNx16BI FFRT_REGNUM)]
1329	  SVE_LDFF1_LDNF1))]
1330  "TARGET_SVE"
1331  "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1332)
1333
1334;; -------------------------------------------------------------------------
1335;; ---- First-faulting extending contiguous loads
1336;; -------------------------------------------------------------------------
1337;; Includes contiguous forms of:
1338;; - LDFF1B
1339;; - LDFF1H
1340;; - LDFF1SB
1341;; - LDFF1SH
1342;; - LDFF1SW
1343;; - LDFF1W
1344;; - LDNF1B
1345;; - LDNF1H
1346;; - LDNF1SB
1347;; - LDNF1SH
1348;; - LDNF1SW
1349;; - LDNF1W
1350;; -------------------------------------------------------------------------
1351
1352;; Predicated first-faulting or non-faulting load and extend.
1353(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1354  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1355	(unspec:SVE_HSDI
1356	  [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1357	   (ANY_EXTEND:SVE_HSDI
1358	     (unspec:SVE_PARTIAL_I
1359	       [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1360		(match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1361		(reg:VNx16BI FFRT_REGNUM)]
1362	       SVE_LDFF1_LDNF1))]
1363	  UNSPEC_PRED_X))]
1364  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1365  "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1366  "&& !CONSTANT_P (operands[3])"
1367  {
1368    operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1369  }
1370)
1371
1372;; -------------------------------------------------------------------------
1373;; ---- Non-temporal contiguous loads
1374;; -------------------------------------------------------------------------
1375;; Includes:
1376;; - LDNT1B
1377;; - LDNT1D
1378;; - LDNT1H
1379;; - LDNT1W
1380;; -------------------------------------------------------------------------
1381
1382;; Predicated contiguous non-temporal load.
1383(define_insn "@aarch64_ldnt1<mode>"
1384  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1385	(unspec:SVE_FULL
1386	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
1387	   (match_operand:SVE_FULL 1 "memory_operand" "m")]
1388	  UNSPEC_LDNT1_SVE))]
1389  "TARGET_SVE"
1390  "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1391)
1392
1393;; -------------------------------------------------------------------------
1394;; ---- Normal gather loads
1395;; -------------------------------------------------------------------------
1396;; Includes gather forms of:
1397;; - LD1D
1398;; - LD1W
1399;; -------------------------------------------------------------------------
1400
1401;; Unpredicated gather loads.
1402(define_expand "gather_load<mode><v_int_container>"
1403  [(set (match_operand:SVE_24 0 "register_operand")
1404	(unspec:SVE_24
1405	  [(match_dup 5)
1406	   (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1407	   (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1408	   (match_operand:DI 3 "const_int_operand")
1409	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1410	   (mem:BLK (scratch))]
1411	  UNSPEC_LD1_GATHER))]
1412  "TARGET_SVE"
1413  {
1414    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1415  }
1416)
1417
1418;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
1419;; unsigned extension and false for signed extension.
1420(define_insn "mask_gather_load<mode><v_int_container>"
1421  [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1422	(unspec:SVE_4
1423	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1424	   (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1425	   (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1426	   (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1427	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1428	   (mem:BLK (scratch))]
1429	  UNSPEC_LD1_GATHER))]
1430  "TARGET_SVE"
1431  "@
1432   ld1<Vesize>\t%0.s, %5/z, [%2.s]
1433   ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1434   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1435   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1436   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1437   ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1438)
1439
1440;; Predicated gather loads for 64-bit elements.  The value of operand 3
1441;; doesn't matter in this case.
1442(define_insn "mask_gather_load<mode><v_int_container>"
1443  [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1444	(unspec:SVE_2
1445	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1446	   (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1447	   (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1448	   (match_operand:DI 3 "const_int_operand")
1449	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1450	   (mem:BLK (scratch))]
1451	  UNSPEC_LD1_GATHER))]
1452  "TARGET_SVE"
1453  "@
1454   ld1<Vesize>\t%0.d, %5/z, [%2.d]
1455   ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1456   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1457   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1458)
1459
1460;; Likewise, but with the offset being extended from 32 bits.
1461(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1462  [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1463	(unspec:SVE_2
1464	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1465	   (match_operand:DI 1 "register_operand" "rk, rk")
1466	   (unspec:VNx2DI
1467	     [(match_operand 6)
1468	      (ANY_EXTEND:VNx2DI
1469		(match_operand:VNx2SI 2 "register_operand" "w, w"))]
1470	     UNSPEC_PRED_X)
1471	   (match_operand:DI 3 "const_int_operand")
1472	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1473	   (mem:BLK (scratch))]
1474	  UNSPEC_LD1_GATHER))]
1475  "TARGET_SVE"
1476  "@
1477   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1478   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1479  "&& !CONSTANT_P (operands[6])"
1480  {
1481    operands[6] = CONSTM1_RTX (VNx2BImode);
1482  }
1483)
1484
1485;; Likewise, but with the offset being truncated to 32 bits and then
1486;; sign-extended.
1487(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1488  [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1489	(unspec:SVE_2
1490	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1491	   (match_operand:DI 1 "register_operand" "rk, rk")
1492	   (unspec:VNx2DI
1493	     [(match_operand 6)
1494	      (sign_extend:VNx2DI
1495		(truncate:VNx2SI
1496		  (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1497	     UNSPEC_PRED_X)
1498	   (match_operand:DI 3 "const_int_operand")
1499	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1500	   (mem:BLK (scratch))]
1501	  UNSPEC_LD1_GATHER))]
1502  "TARGET_SVE"
1503  "@
1504   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1505   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1506  "&& !CONSTANT_P (operands[6])"
1507  {
1508    operands[6] = CONSTM1_RTX (VNx2BImode);
1509  }
1510)
1511
1512;; Likewise, but with the offset being truncated to 32 bits and then
1513;; zero-extended.
1514(define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1515  [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1516	(unspec:SVE_2
1517	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1518	   (match_operand:DI 1 "register_operand" "rk, rk")
1519	   (and:VNx2DI
1520	     (match_operand:VNx2DI 2 "register_operand" "w, w")
1521	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1522	   (match_operand:DI 3 "const_int_operand")
1523	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1524	   (mem:BLK (scratch))]
1525	  UNSPEC_LD1_GATHER))]
1526  "TARGET_SVE"
1527  "@
1528   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1529   ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1530)
1531
1532;; -------------------------------------------------------------------------
1533;; ---- Extending gather loads
1534;; -------------------------------------------------------------------------
1535;; Includes gather forms of:
1536;; - LD1B
1537;; - LD1H
1538;; - LD1SB
1539;; - LD1SH
1540;; - LD1SW
1541;; - LD1W
1542;; -------------------------------------------------------------------------
1543
1544;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
1545;; true for unsigned extension and false for signed extension.
1546(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1547  [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1548	(unspec:SVE_4HSI
1549	  [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1550	   (ANY_EXTEND:SVE_4HSI
1551	     (unspec:SVE_4BHI
1552	       [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1553		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1554		(match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1555		(match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1556		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1557		(mem:BLK (scratch))]
1558	       UNSPEC_LD1_GATHER))]
1559	  UNSPEC_PRED_X))]
1560  "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1561  "@
1562   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1563   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1564   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1565   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1566   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1567   ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1568  "&& !CONSTANT_P (operands[6])"
1569  {
1570    operands[6] = CONSTM1_RTX (VNx4BImode);
1571  }
1572)
1573
1574;; Predicated extending gather loads for 64-bit elements.  The value of
1575;; operand 3 doesn't matter in this case.
1576(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1577  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1578	(unspec:SVE_2HSDI
1579	  [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1580	   (ANY_EXTEND:SVE_2HSDI
1581	     (unspec:SVE_2BHSI
1582	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1583		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1584		(match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1585		(match_operand:DI 3 "const_int_operand")
1586		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1587		(mem:BLK (scratch))]
1588	       UNSPEC_LD1_GATHER))]
1589	  UNSPEC_PRED_X))]
1590  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1591  "@
1592   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1593   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1594   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1595   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1596  "&& !CONSTANT_P (operands[6])"
1597  {
1598    operands[6] = CONSTM1_RTX (VNx2BImode);
1599  }
1600)
1601
1602;; Likewise, but with the offset being extended from 32 bits.
1603(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1604  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1605	(unspec:SVE_2HSDI
1606	  [(match_operand 6)
1607	   (ANY_EXTEND:SVE_2HSDI
1608	     (unspec:SVE_2BHSI
1609	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1610		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1611		(unspec:VNx2DI
1612		  [(match_operand 7)
1613		   (ANY_EXTEND2:VNx2DI
1614		     (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1615		  UNSPEC_PRED_X)
1616		(match_operand:DI 3 "const_int_operand")
1617		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1618		(mem:BLK (scratch))]
1619	       UNSPEC_LD1_GATHER))]
1620	  UNSPEC_PRED_X))]
1621  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1622  "@
1623   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1624   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1625  "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1626  {
1627    operands[6] = CONSTM1_RTX (VNx2BImode);
1628    operands[7] = CONSTM1_RTX (VNx2BImode);
1629  }
1630)
1631
1632;; Likewise, but with the offset being truncated to 32 bits and then
1633;; sign-extended.
1634(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1635  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1636	(unspec:SVE_2HSDI
1637	  [(match_operand 6)
1638	   (ANY_EXTEND:SVE_2HSDI
1639	     (unspec:SVE_2BHSI
1640	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1641		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1642		(unspec:VNx2DI
1643		  [(match_operand 7)
1644		   (sign_extend:VNx2DI
1645		     (truncate:VNx2SI
1646		       (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1647		  UNSPEC_PRED_X)
1648		(match_operand:DI 3 "const_int_operand")
1649		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1650		(mem:BLK (scratch))]
1651	       UNSPEC_LD1_GATHER))]
1652	  UNSPEC_PRED_X))]
1653  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1654  "@
1655   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1656   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1657  "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1658  {
1659    operands[6] = CONSTM1_RTX (VNx2BImode);
1660    operands[7] = CONSTM1_RTX (VNx2BImode);
1661  }
1662)
1663
1664;; Likewise, but with the offset being truncated to 32 bits and then
1665;; zero-extended.
1666(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1667  [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1668	(unspec:SVE_2HSDI
1669	  [(match_operand 7)
1670	   (ANY_EXTEND:SVE_2HSDI
1671	     (unspec:SVE_2BHSI
1672	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1673		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1674		(and:VNx2DI
1675		  (match_operand:VNx2DI 2 "register_operand" "w, w")
1676		  (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1677		(match_operand:DI 3 "const_int_operand")
1678		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1679		(mem:BLK (scratch))]
1680	       UNSPEC_LD1_GATHER))]
1681	  UNSPEC_PRED_X))]
1682  "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1683  "@
1684   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1685   ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1686  "&& !CONSTANT_P (operands[7])"
1687  {
1688    operands[7] = CONSTM1_RTX (VNx2BImode);
1689  }
1690)
1691
1692;; -------------------------------------------------------------------------
1693;; ---- First-faulting gather loads
1694;; -------------------------------------------------------------------------
1695;; Includes gather forms of:
1696;; - LDFF1D
1697;; - LDFF1W
1698;; -------------------------------------------------------------------------
1699
1700;; Predicated first-faulting gather loads for 32-bit elements.  Operand
1701;; 3 is true for unsigned extension and false for signed extension.
1702(define_insn "@aarch64_ldff1_gather<mode>"
1703  [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1704	(unspec:SVE_FULL_S
1705	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1706	   (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1707	   (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1708	   (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1709	   (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1710	   (mem:BLK (scratch))
1711	   (reg:VNx16BI FFRT_REGNUM)]
1712	  UNSPEC_LDFF1_GATHER))]
1713  "TARGET_SVE"
1714  "@
1715   ldff1w\t%0.s, %5/z, [%2.s]
1716   ldff1w\t%0.s, %5/z, [%2.s, #%1]
1717   ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1718   ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1719   ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1720   ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1721)
1722
1723;; Predicated first-faulting gather loads for 64-bit elements.  The value
1724;; of operand 3 doesn't matter in this case.
1725(define_insn "@aarch64_ldff1_gather<mode>"
1726  [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1727	(unspec:SVE_FULL_D
1728	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1729	   (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1730	   (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1731	   (match_operand:DI 3 "const_int_operand")
1732	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1733	   (mem:BLK (scratch))
1734	   (reg:VNx16BI FFRT_REGNUM)]
1735	  UNSPEC_LDFF1_GATHER))]
1736  "TARGET_SVE"
1737  "@
1738   ldff1d\t%0.d, %5/z, [%2.d]
1739   ldff1d\t%0.d, %5/z, [%2.d, #%1]
1740   ldff1d\t%0.d, %5/z, [%1, %2.d]
1741   ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1742)
1743
1744;; Likewise, but with the offset being sign-extended from 32 bits.
1745(define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1746  [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1747	(unspec:SVE_FULL_D
1748	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1749	   (match_operand:DI 1 "register_operand" "rk, rk")
1750	   (unspec:VNx2DI
1751	     [(match_operand 6)
1752	      (sign_extend:VNx2DI
1753		(truncate:VNx2SI
1754		  (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1755	     UNSPEC_PRED_X)
1756	   (match_operand:DI 3 "const_int_operand")
1757	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1758	   (mem:BLK (scratch))
1759	   (reg:VNx16BI FFRT_REGNUM)]
1760	  UNSPEC_LDFF1_GATHER))]
1761  "TARGET_SVE"
1762  "@
1763   ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1764   ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1765  "&& !CONSTANT_P (operands[6])"
1766  {
1767    operands[6] = CONSTM1_RTX (VNx2BImode);
1768  }
1769)
1770
1771;; Likewise, but with the offset being zero-extended from 32 bits.
1772(define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1773  [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1774	(unspec:SVE_FULL_D
1775	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1776	   (match_operand:DI 1 "register_operand" "rk, rk")
1777	   (and:VNx2DI
1778	     (match_operand:VNx2DI 2 "register_operand" "w, w")
1779	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1780	   (match_operand:DI 3 "const_int_operand")
1781	   (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1782	   (mem:BLK (scratch))
1783	   (reg:VNx16BI FFRT_REGNUM)]
1784	  UNSPEC_LDFF1_GATHER))]
1785  "TARGET_SVE"
1786  "@
1787   ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1788   ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1789)
1790
1791;; -------------------------------------------------------------------------
1792;; ---- First-faulting extending gather loads
1793;; -------------------------------------------------------------------------
1794;; Includes gather forms of:
1795;; - LDFF1B
1796;; - LDFF1H
1797;; - LDFF1SB
1798;; - LDFF1SH
1799;; - LDFF1SW
1800;; - LDFF1W
1801;; -------------------------------------------------------------------------
1802
1803;; Predicated extending first-faulting gather loads for 32-bit elements.
1804;; Operand 3 is true for unsigned extension and false for signed extension.
1805(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1806  [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1807	(unspec:VNx4_WIDE
1808	  [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1809	   (ANY_EXTEND:VNx4_WIDE
1810	     (unspec:VNx4_NARROW
1811	       [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1812		(match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1813		(match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1814		(match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1815		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1816		(mem:BLK (scratch))
1817		(reg:VNx16BI FFRT_REGNUM)]
1818	       UNSPEC_LDFF1_GATHER))]
1819	  UNSPEC_PRED_X))]
1820  "TARGET_SVE"
1821  "@
1822   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1823   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1824   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1825   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1826   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1827   ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1828  "&& !CONSTANT_P (operands[6])"
1829  {
1830    operands[6] = CONSTM1_RTX (VNx4BImode);
1831  }
1832)
1833
1834;; Predicated extending first-faulting gather loads for 64-bit elements.
1835;; The value of operand 3 doesn't matter in this case.
1836(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1837  [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1838	(unspec:VNx2_WIDE
1839	  [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1840	   (ANY_EXTEND:VNx2_WIDE
1841	     (unspec:VNx2_NARROW
1842	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1843		(match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1844		(match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1845		(match_operand:DI 3 "const_int_operand")
1846		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1847		(mem:BLK (scratch))
1848		(reg:VNx16BI FFRT_REGNUM)]
1849	       UNSPEC_LDFF1_GATHER))]
1850	  UNSPEC_PRED_X))]
1851  "TARGET_SVE"
1852  "@
1853   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1854   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1855   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1856   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1857  "&& !CONSTANT_P (operands[6])"
1858  {
1859    operands[6] = CONSTM1_RTX (VNx2BImode);
1860  }
1861)
1862
1863;; Likewise, but with the offset being sign-extended from 32 bits.
1864(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1865  [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1866	(unspec:VNx2_WIDE
1867	  [(match_operand 6)
1868	   (ANY_EXTEND:VNx2_WIDE
1869	     (unspec:VNx2_NARROW
1870	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1871		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1872		(unspec:VNx2DI
1873		  [(match_operand 7)
1874		   (sign_extend:VNx2DI
1875		     (truncate:VNx2SI
1876		       (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1877		  UNSPEC_PRED_X)
1878		(match_operand:DI 3 "const_int_operand")
1879		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1880		(mem:BLK (scratch))
1881		(reg:VNx16BI FFRT_REGNUM)]
1882	       UNSPEC_LDFF1_GATHER))]
1883	  UNSPEC_PRED_X))]
1884  "TARGET_SVE"
1885  "@
1886   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1887   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1888  "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1889  {
1890    operands[6] = CONSTM1_RTX (VNx2BImode);
1891    operands[7] = CONSTM1_RTX (VNx2BImode);
1892  }
1893)
1894
1895;; Likewise, but with the offset being zero-extended from 32 bits.
1896(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1897  [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1898	(unspec:VNx2_WIDE
1899	  [(match_operand 7)
1900	   (ANY_EXTEND:VNx2_WIDE
1901	     (unspec:VNx2_NARROW
1902	       [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1903		(match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1904		(and:VNx2DI
1905		  (match_operand:VNx2DI 2 "register_operand" "w, w")
1906		  (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1907		(match_operand:DI 3 "const_int_operand")
1908		(match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1909		(mem:BLK (scratch))
1910		(reg:VNx16BI FFRT_REGNUM)]
1911	       UNSPEC_LDFF1_GATHER))]
1912	  UNSPEC_PRED_X))]
1913  "TARGET_SVE"
1914  "@
1915   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1916   ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1917  "&& !CONSTANT_P (operands[7])"
1918  {
1919    operands[7] = CONSTM1_RTX (VNx2BImode);
1920  }
1921)
1922
1923;; =========================================================================
1924;; == Prefetches
1925;; =========================================================================
1926
1927;; -------------------------------------------------------------------------
1928;; ---- Contiguous prefetches
1929;; -------------------------------------------------------------------------
1930;; Includes contiguous forms of:
1931;; - PRFB
1932;; - PRFD
1933;; - PRFH
1934;; - PRFW
1935;; -------------------------------------------------------------------------
1936
1937;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
1938;; operation (as an svprfop), with operands 3 and 4 providing distilled
1939;; information.
1940(define_insn "@aarch64_sve_prefetch<mode>"
1941  [(prefetch (unspec:DI
1942	       [(match_operand:<VPRED> 0 "register_operand" "Upl")
1943		(match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1944		(match_operand:DI 2 "const_int_operand")]
1945	       UNSPEC_SVE_PREFETCH)
1946	     (match_operand:DI 3 "const_int_operand")
1947	     (match_operand:DI 4 "const_int_operand"))]
1948  "TARGET_SVE"
1949  {
1950    operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1951    return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1952  }
1953)
1954
1955;; -------------------------------------------------------------------------
1956;; ---- Gather prefetches
1957;; -------------------------------------------------------------------------
1958;; Includes gather forms of:
1959;; - PRFB
1960;; - PRFD
1961;; - PRFH
1962;; - PRFW
1963;; -------------------------------------------------------------------------
1964
1965;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
1966;; are:
1967;; 0: the governing predicate
1968;; 1: the scalar component of the address
1969;; 2: the vector component of the address
1970;; 3: 1 for zero extension, 0 for sign extension
1971;; 4: the scale multiplier
1972;; 5: a vector zero that identifies the mode of data being accessed
1973;; 6: the prefetch operator (an svprfop)
1974;; 7: the normal RTL prefetch rw flag
1975;; 8: the normal RTL prefetch locality value
1976(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1977  [(prefetch (unspec:DI
1978	       [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1979		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1980		(match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1981		(match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1982		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1983		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1984		(match_operand:DI 6 "const_int_operand")]
1985	       UNSPEC_SVE_PREFETCH_GATHER)
1986	     (match_operand:DI 7 "const_int_operand")
1987	     (match_operand:DI 8 "const_int_operand"))]
1988  "TARGET_SVE"
1989  {
1990    static const char *const insns[][2] = {
1991      "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1992      "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1993      "prfb", "%0, [%1, %2.s, sxtw]",
1994      "prfb", "%0, [%1, %2.s, uxtw]",
1995      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1996      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1997    };
1998    const char *const *parts = insns[which_alternative];
1999    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2000  }
2001)
2002
2003;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
2004;; doesn't matter in this case.
2005(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
2006  [(prefetch (unspec:DI
2007	       [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
2008		(match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
2009		(match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2010		(match_operand:DI 3 "const_int_operand")
2011		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2012		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2013		(match_operand:DI 6 "const_int_operand")]
2014	       UNSPEC_SVE_PREFETCH_GATHER)
2015	     (match_operand:DI 7 "const_int_operand")
2016	     (match_operand:DI 8 "const_int_operand"))]
2017  "TARGET_SVE"
2018  {
2019    static const char *const insns[][2] = {
2020      "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2021      "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
2022      "prfb", "%0, [%1, %2.d]",
2023      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
2024    };
2025    const char *const *parts = insns[which_alternative];
2026    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2027  }
2028)
2029
2030;; Likewise, but with the offset being sign-extended from 32 bits.
2031(define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
2032  [(prefetch (unspec:DI
2033	       [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2034		(match_operand:DI 1 "register_operand" "rk, rk")
2035		(unspec:VNx2DI_ONLY
2036		  [(match_operand 9)
2037		   (sign_extend:VNx2DI
2038		     (truncate:VNx2SI
2039		       (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2040		  UNSPEC_PRED_X)
2041		(match_operand:DI 3 "const_int_operand")
2042		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2043		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2044		(match_operand:DI 6 "const_int_operand")]
2045	       UNSPEC_SVE_PREFETCH_GATHER)
2046	     (match_operand:DI 7 "const_int_operand")
2047	     (match_operand:DI 8 "const_int_operand"))]
2048  "TARGET_SVE"
2049  {
2050    static const char *const insns[][2] = {
2051      "prfb", "%0, [%1, %2.d, sxtw]",
2052      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
2053    };
2054    const char *const *parts = insns[which_alternative];
2055    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2056  }
2057  "&& !rtx_equal_p (operands[0], operands[9])"
2058  {
2059    operands[9] = copy_rtx (operands[0]);
2060  }
2061)
2062
2063;; Likewise, but with the offset being zero-extended from 32 bits.
2064(define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
2065  [(prefetch (unspec:DI
2066	       [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2067		(match_operand:DI 1 "register_operand" "rk, rk")
2068		(and:VNx2DI_ONLY
2069		  (match_operand:VNx2DI 2 "register_operand" "w, w")
2070		  (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2071		(match_operand:DI 3 "const_int_operand")
2072		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2073		(match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2074		(match_operand:DI 6 "const_int_operand")]
2075	       UNSPEC_SVE_PREFETCH_GATHER)
2076	     (match_operand:DI 7 "const_int_operand")
2077	     (match_operand:DI 8 "const_int_operand"))]
2078  "TARGET_SVE"
2079  {
2080    static const char *const insns[][2] = {
2081      "prfb", "%0, [%1, %2.d, uxtw]",
2082      "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
2083    };
2084    const char *const *parts = insns[which_alternative];
2085    return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2086  }
2087)
2088
2089;; =========================================================================
2090;; == Stores
2091;; =========================================================================
2092
2093;; -------------------------------------------------------------------------
2094;; ---- Normal contiguous stores
2095;; -------------------------------------------------------------------------
2096;; Includes contiguous forms of:
2097;; - ST1B
2098;; - ST1D
2099;; - ST1H
2100;; - ST1W
2101;; - ST2B
2102;; - ST2D
2103;; - ST2H
2104;; - ST2W
2105;; - ST3B
2106;; - ST3D
2107;; - ST3H
2108;; - ST3W
2109;; - ST4B
2110;; - ST4D
2111;; - ST4H
2112;; - ST4W
2113;; -------------------------------------------------------------------------
2114
2115;; Predicated ST1.
2116(define_insn "maskstore<mode><vpred>"
2117  [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2118	(unspec:SVE_ALL
2119	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
2120	   (match_operand:SVE_ALL 1 "register_operand" "w")
2121	   (match_dup 0)]
2122	  UNSPEC_ST1_SVE))]
2123  "TARGET_SVE"
2124  "st1<Vesize>\t%1.<Vctype>, %2, %0"
2125)
2126
2127;; Unpredicated ST[234].  This is always a full update, so the dependence
2128;; on the old value of the memory location (via (match_dup 0)) is redundant.
2129;; There doesn't seem to be any obvious benefit to treating the all-true
2130;; case differently though.  In particular, it's very unlikely that we'll
2131;; only find out during RTL that a store_lanes is dead.
2132(define_expand "vec_store_lanes<mode><vsingle>"
2133  [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2134	(unspec:SVE_STRUCT
2135	  [(match_dup 2)
2136	   (match_operand:SVE_STRUCT 1 "register_operand")
2137	   (match_dup 0)]
2138	  UNSPEC_STN))]
2139  "TARGET_SVE"
2140  {
2141    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2142  }
2143)
2144
2145;; Predicated ST[234].
2146(define_insn "vec_mask_store_lanes<mode><vsingle>"
2147  [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2148	(unspec:SVE_STRUCT
2149	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
2150	   (match_operand:SVE_STRUCT 1 "register_operand" "w")
2151	   (match_dup 0)]
2152	  UNSPEC_STN))]
2153  "TARGET_SVE"
2154  "st<vector_count><Vesize>\t%1, %2, %0"
2155)
2156
2157;; -------------------------------------------------------------------------
2158;; ---- Truncating contiguous stores
2159;; -------------------------------------------------------------------------
2160;; Includes:
2161;; - ST1B
2162;; - ST1H
2163;; - ST1W
2164;; -------------------------------------------------------------------------
2165
2166;; Predicated truncate and store, with 8 elements per 128-bit block.
2167(define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2168  [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2169	(unspec:VNx8_NARROW
2170	  [(match_operand:VNx8BI 2 "register_operand" "Upl")
2171	   (truncate:VNx8_NARROW
2172	     (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2173	   (match_dup 0)]
2174	  UNSPEC_ST1_SVE))]
2175  "TARGET_SVE"
2176  "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2177)
2178
2179;; Predicated truncate and store, with 4 elements per 128-bit block.
2180(define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2181  [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2182	(unspec:VNx4_NARROW
2183	  [(match_operand:VNx4BI 2 "register_operand" "Upl")
2184	   (truncate:VNx4_NARROW
2185	     (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2186	   (match_dup 0)]
2187	  UNSPEC_ST1_SVE))]
2188  "TARGET_SVE"
2189  "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2190)
2191
2192;; Predicated truncate and store, with 2 elements per 128-bit block.
2193(define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2194  [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2195	(unspec:VNx2_NARROW
2196	  [(match_operand:VNx2BI 2 "register_operand" "Upl")
2197	   (truncate:VNx2_NARROW
2198	     (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2199	   (match_dup 0)]
2200	  UNSPEC_ST1_SVE))]
2201  "TARGET_SVE"
2202  "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2203)
2204
2205;; -------------------------------------------------------------------------
2206;; ---- Non-temporal contiguous stores
2207;; -------------------------------------------------------------------------
2208;; Includes:
2209;; - STNT1B
2210;; - STNT1D
2211;; - STNT1H
2212;; - STNT1W
2213;; -------------------------------------------------------------------------
2214
2215(define_insn "@aarch64_stnt1<mode>"
2216  [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2217	(unspec:SVE_FULL
2218	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
2219	   (match_operand:SVE_FULL 1 "register_operand" "w")
2220	   (match_dup 0)]
2221	  UNSPEC_STNT1_SVE))]
2222  "TARGET_SVE"
2223  "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2224)
2225
2226;; -------------------------------------------------------------------------
2227;; ---- Normal scatter stores
2228;; -------------------------------------------------------------------------
2229;; Includes scatter forms of:
2230;; - ST1D
2231;; - ST1W
2232;; -------------------------------------------------------------------------
2233
2234;; Unpredicated scatter stores.
2235(define_expand "scatter_store<mode><v_int_container>"
2236  [(set (mem:BLK (scratch))
2237	(unspec:BLK
2238	  [(match_dup 5)
2239	   (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2240	   (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2241	   (match_operand:DI 2 "const_int_operand")
2242	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2243	   (match_operand:SVE_24 4 "register_operand")]
2244	  UNSPEC_ST1_SCATTER))]
2245  "TARGET_SVE"
2246  {
2247    operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2248  }
2249)
2250
2251;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
2252;; unsigned extension and false for signed extension.
2253(define_insn "mask_scatter_store<mode><v_int_container>"
2254  [(set (mem:BLK (scratch))
2255	(unspec:BLK
2256	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2257	   (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2258	   (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2259	   (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2260	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2261	   (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2262	  UNSPEC_ST1_SCATTER))]
2263  "TARGET_SVE"
2264  "@
2265   st1<Vesize>\t%4.s, %5, [%1.s]
2266   st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2267   st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2268   st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2269   st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2270   st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2271)
2272
2273;; Predicated scatter stores for 64-bit elements.  The value of operand 2
2274;; doesn't matter in this case.
2275(define_insn "mask_scatter_store<mode><v_int_container>"
2276  [(set (mem:BLK (scratch))
2277	(unspec:BLK
2278	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2279	   (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2280	   (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2281	   (match_operand:DI 2 "const_int_operand")
2282	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2283	   (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2284	  UNSPEC_ST1_SCATTER))]
2285  "TARGET_SVE"
2286  "@
2287   st1<Vesize>\t%4.d, %5, [%1.d]
2288   st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2289   st1<Vesize>\t%4.d, %5, [%0, %1.d]
2290   st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2291)
2292
2293;; Likewise, but with the offset being extended from 32 bits.
2294(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2295  [(set (mem:BLK (scratch))
2296	(unspec:BLK
2297	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2298	   (match_operand:DI 0 "register_operand" "rk, rk")
2299	   (unspec:VNx2DI
2300	     [(match_operand 6)
2301	      (ANY_EXTEND:VNx2DI
2302		(match_operand:VNx2SI 1 "register_operand" "w, w"))]
2303	     UNSPEC_PRED_X)
2304	   (match_operand:DI 2 "const_int_operand")
2305	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2306	   (match_operand:SVE_2 4 "register_operand" "w, w")]
2307	  UNSPEC_ST1_SCATTER))]
2308  "TARGET_SVE"
2309  "@
2310   st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2311   st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2312  "&& !CONSTANT_P (operands[6])"
2313  {
2314    operands[6] = CONSTM1_RTX (<VPRED>mode);
2315  }
2316)
2317
2318;; Likewise, but with the offset being truncated to 32 bits and then
2319;; sign-extended.
2320(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2321  [(set (mem:BLK (scratch))
2322	(unspec:BLK
2323	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2324	   (match_operand:DI 0 "register_operand" "rk, rk")
2325	   (unspec:VNx2DI
2326	     [(match_operand 6)
2327	      (sign_extend:VNx2DI
2328		(truncate:VNx2SI
2329		  (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2330	     UNSPEC_PRED_X)
2331	   (match_operand:DI 2 "const_int_operand")
2332	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2333	   (match_operand:SVE_2 4 "register_operand" "w, w")]
2334	  UNSPEC_ST1_SCATTER))]
2335  "TARGET_SVE"
2336  "@
2337   st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2338   st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2339  "&& !CONSTANT_P (operands[6])"
2340  {
2341    operands[6] = CONSTM1_RTX (<VPRED>mode);
2342  }
2343)
2344
2345;; Likewise, but with the offset being truncated to 32 bits and then
2346;; zero-extended.
2347(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2348  [(set (mem:BLK (scratch))
2349	(unspec:BLK
2350	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2351	   (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2352	   (and:VNx2DI
2353	     (match_operand:VNx2DI 1 "register_operand" "w, w")
2354	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2355	   (match_operand:DI 2 "const_int_operand")
2356	   (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2357	   (match_operand:SVE_2 4 "register_operand" "w, w")]
2358	  UNSPEC_ST1_SCATTER))]
2359  "TARGET_SVE"
2360  "@
2361   st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2362   st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2363)
2364
2365;; -------------------------------------------------------------------------
2366;; ---- Truncating scatter stores
2367;; -------------------------------------------------------------------------
2368;; Includes scatter forms of:
2369;; - ST1B
2370;; - ST1H
2371;; - ST1W
2372;; -------------------------------------------------------------------------
2373
2374;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
2375;; true for unsigned extension and false for signed extension.
2376(define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2377  [(set (mem:BLK (scratch))
2378	(unspec:BLK
2379	  [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2380	   (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2381	   (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2382	   (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2383	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2384	   (truncate:VNx4_NARROW
2385	     (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2386	  UNSPEC_ST1_SCATTER))]
2387  "TARGET_SVE"
2388  "@
2389   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2390   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2391   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2392   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2393   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2394   st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2395)
2396
2397;; Predicated truncating scatter stores for 64-bit elements.  The value of
2398;; operand 2 doesn't matter in this case.
2399(define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2400  [(set (mem:BLK (scratch))
2401	(unspec:BLK
2402	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2403	   (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2404	   (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2405	   (match_operand:DI 2 "const_int_operand")
2406	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2407	   (truncate:VNx2_NARROW
2408	     (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2409	  UNSPEC_ST1_SCATTER))]
2410  "TARGET_SVE"
2411  "@
2412   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2413   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2414   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2415   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2416)
2417
2418;; Likewise, but with the offset being sign-extended from 32 bits.
2419(define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2420  [(set (mem:BLK (scratch))
2421	(unspec:BLK
2422	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2423	   (match_operand:DI 0 "register_operand" "rk, rk")
2424	   (unspec:VNx2DI
2425	     [(match_operand 6)
2426	      (sign_extend:VNx2DI
2427		(truncate:VNx2SI
2428		  (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2429	     UNSPEC_PRED_X)
2430	   (match_operand:DI 2 "const_int_operand")
2431	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2432	   (truncate:VNx2_NARROW
2433	     (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2434	  UNSPEC_ST1_SCATTER))]
2435  "TARGET_SVE"
2436  "@
2437   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2438   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2439  "&& !rtx_equal_p (operands[5], operands[6])"
2440  {
2441    operands[6] = copy_rtx (operands[5]);
2442  }
2443)
2444
2445;; Likewise, but with the offset being zero-extended from 32 bits.
2446(define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2447  [(set (mem:BLK (scratch))
2448	(unspec:BLK
2449	  [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2450	   (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2451	   (and:VNx2DI
2452	     (match_operand:VNx2DI 1 "register_operand" "w, w")
2453	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2454	   (match_operand:DI 2 "const_int_operand")
2455	   (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2456	   (truncate:VNx2_NARROW
2457	     (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2458	  UNSPEC_ST1_SCATTER))]
2459  "TARGET_SVE"
2460  "@
2461   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2462   st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2463)
2464
2465;; =========================================================================
2466;; == Vector creation
2467;; =========================================================================
2468
2469;; -------------------------------------------------------------------------
2470;; ---- [INT,FP] Duplicate element
2471;; -------------------------------------------------------------------------
2472;; Includes:
2473;; - DUP
2474;; - MOV
2475;; - LD1RB
2476;; - LD1RD
2477;; - LD1RH
2478;; - LD1RW
2479;; - LD1ROB (F64MM)
2480;; - LD1ROD (F64MM)
2481;; - LD1ROH (F64MM)
2482;; - LD1ROW (F64MM)
2483;; - LD1RQB
2484;; - LD1RQD
2485;; - LD1RQH
2486;; - LD1RQW
2487;; -------------------------------------------------------------------------
2488
2489(define_expand "vec_duplicate<mode>"
2490  [(parallel
2491    [(set (match_operand:SVE_ALL 0 "register_operand")
2492	  (vec_duplicate:SVE_ALL
2493	    (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2494     (clobber (scratch:VNx16BI))])]
2495  "TARGET_SVE"
2496  {
2497    if (MEM_P (operands[1]))
2498      {
2499	rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2500	emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2501				       CONST0_RTX (<MODE>mode)));
2502	DONE;
2503      }
2504  }
2505)
2506
2507;; Accept memory operands for the benefit of combine, and also in case
2508;; the scalar input gets spilled to memory during RA.  We want to split
2509;; the load at the first opportunity in order to allow the PTRUE to be
2510;; optimized with surrounding code.
2511(define_insn_and_split "*vec_duplicate<mode>_reg"
2512  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2513	(vec_duplicate:SVE_ALL
2514	  (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2515   (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2516  "TARGET_SVE"
2517  "@
2518   mov\t%0.<Vetype>, %<vwcore>1
2519   mov\t%0.<Vetype>, %<Vetype>1
2520   #"
2521  "&& MEM_P (operands[1])"
2522  [(const_int 0)]
2523  {
2524    if (GET_CODE (operands[2]) == SCRATCH)
2525      operands[2] = gen_reg_rtx (VNx16BImode);
2526    emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2527    rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2528    emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2529				   CONST0_RTX (<MODE>mode)));
2530    DONE;
2531  }
2532  [(set_attr "length" "4,4,8")]
2533)
2534
2535;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2536(define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2537  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2538	(vec_duplicate:SVE_FULL
2539	  (match_operand:<V128> 1 "register_operand" "w")))]
2540  "TARGET_SVE && !BYTES_BIG_ENDIAN"
2541  {
2542    operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2543    return "dup\t%0.q, %1.q[0]";
2544  }
2545)
2546
2547;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2548;; The SVE register layout puts memory lane N into (architectural)
2549;; register lane N, whereas the Advanced SIMD layout puts the memory
2550;; lsb into the register lsb.  We therefore have to describe this in rtl
2551;; terms as a reverse of the V128 vector followed by a duplicate.
2552(define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2553  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2554	(vec_duplicate:SVE_FULL
2555	  (vec_select:<V128>
2556	    (match_operand:<V128> 1 "register_operand" "w")
2557	    (match_operand 2 "descending_int_parallel"))))]
2558  "TARGET_SVE
2559   && BYTES_BIG_ENDIAN
2560   && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2561		GET_MODE_NUNITS (<V128>mode) - 1)"
2562  {
2563    operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2564    return "dup\t%0.q, %1.q[0]";
2565  }
2566)
2567
2568;; This is used for vec_duplicate<mode>s from memory, but can also
2569;; be used by combine to optimize selects of a vec_duplicate<mode>
2570;; with zero.
2571(define_insn "sve_ld1r<mode>"
2572  [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2573	(unspec:SVE_ALL
2574	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
2575	   (vec_duplicate:SVE_ALL
2576	     (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2577	   (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2578	  UNSPEC_SEL))]
2579  "TARGET_SVE"
2580  "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2581)
2582
2583;; Load 128 bits from memory under predicate control and duplicate to
2584;; fill a vector.
2585(define_insn "@aarch64_sve_ld1rq<mode>"
2586  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2587	(unspec:SVE_FULL
2588	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
2589	   (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2590	  UNSPEC_LD1RQ))]
2591  "TARGET_SVE"
2592  {
2593    operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2594    return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2595  }
2596)
2597
2598(define_insn "@aarch64_sve_ld1ro<mode>"
2599  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2600	(unspec:SVE_FULL
2601	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
2602	   (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2603			       "UO<Vesize>")]
2604	  UNSPEC_LD1RO))]
2605  "TARGET_SVE_F64MM"
2606  {
2607    operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2608    return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2609  }
2610)
2611
2612;; -------------------------------------------------------------------------
2613;; ---- [INT,FP] Initialize from individual elements
2614;; -------------------------------------------------------------------------
2615;; Includes:
2616;; - INSR
2617;; -------------------------------------------------------------------------
2618
2619(define_expand "vec_init<mode><Vel>"
2620  [(match_operand:SVE_FULL 0 "register_operand")
2621    (match_operand 1 "")]
2622  "TARGET_SVE"
2623  {
2624    aarch64_sve_expand_vector_init (operands[0], operands[1]);
2625    DONE;
2626  }
2627)
2628
2629;; Shift an SVE vector left and insert a scalar into element 0.
2630(define_insn "vec_shl_insert_<mode>"
2631  [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2632	(unspec:SVE_FULL
2633	  [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2634	   (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2635	  UNSPEC_INSR))]
2636  "TARGET_SVE"
2637  "@
2638   insr\t%0.<Vetype>, %<vwcore>2
2639   insr\t%0.<Vetype>, %<Vetype>2
2640   movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2641   movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2642  [(set_attr "movprfx" "*,*,yes,yes")]
2643)
2644
2645;; -------------------------------------------------------------------------
2646;; ---- [INT] Linear series
2647;; -------------------------------------------------------------------------
2648;; Includes:
2649;; - INDEX
2650;; -------------------------------------------------------------------------
2651
2652(define_insn "vec_series<mode>"
2653  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2654	(vec_series:SVE_I
2655	  (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2656	  (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2657  "TARGET_SVE"
2658  "@
2659   index\t%0.<Vctype>, #%1, %<vccore>2
2660   index\t%0.<Vctype>, %<vccore>1, #%2
2661   index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2662)
2663
2664;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2665;; of an INDEX instruction.
2666(define_insn "*vec_series<mode>_plus"
2667  [(set (match_operand:SVE_I 0 "register_operand" "=w")
2668	(plus:SVE_I
2669	  (vec_duplicate:SVE_I
2670	    (match_operand:<VEL> 1 "register_operand" "r"))
2671	  (match_operand:SVE_I 2 "immediate_operand")))]
2672  "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2673  {
2674    operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2675    return "index\t%0.<Vctype>, %<vccore>1, #%2";
2676  }
2677)
2678
2679;; -------------------------------------------------------------------------
2680;; ---- [PRED] Duplicate element
2681;; -------------------------------------------------------------------------
2682;; The patterns in this section are synthetic.
2683;; -------------------------------------------------------------------------
2684
2685;; Implement a predicate broadcast by shifting the low bit of the scalar
2686;; input into the top bit and using a WHILELO.  An alternative would be to
2687;; duplicate the input and do a compare with zero.
2688(define_expand "vec_duplicate<mode>"
2689  [(set (match_operand:PRED_ALL 0 "register_operand")
2690	(vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2691  "TARGET_SVE"
2692  {
2693    rtx tmp = gen_reg_rtx (DImode);
2694    rtx op1 = gen_lowpart (DImode, operands[1]);
2695    emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2696    emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2697    DONE;
2698  }
2699)
2700
2701;; =========================================================================
2702;; == Vector decomposition
2703;; =========================================================================
2704
2705;; -------------------------------------------------------------------------
2706;; ---- [INT,FP] Extract index
2707;; -------------------------------------------------------------------------
2708;; Includes:
2709;; - DUP    (Advanced SIMD)
2710;; - DUP    (SVE)
2711;; - EXT    (SVE)
2712;; - ST1    (Advanced SIMD)
2713;; - UMOV   (Advanced SIMD)
2714;; -------------------------------------------------------------------------
2715
2716(define_expand "vec_extract<mode><Vel>"
2717  [(set (match_operand:<VEL> 0 "register_operand")
2718	(vec_select:<VEL>
2719	  (match_operand:SVE_FULL 1 "register_operand")
2720	  (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2721  "TARGET_SVE"
2722  {
2723    poly_int64 val;
2724    if (poly_int_rtx_p (operands[2], &val)
2725	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2726      {
2727	/* The last element can be extracted with a LASTB and a false
2728	   predicate.  */
2729	rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2730	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2731	DONE;
2732      }
2733    if (!CONST_INT_P (operands[2]))
2734      {
2735	/* Create an index with operand[2] as the base and -1 as the step.
2736	   It will then be zero for the element we care about.  */
2737	rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2738	index = force_reg (<VEL_INT>mode, index);
2739	rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2740	emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2741
2742	/* Get a predicate that is true for only that element.  */
2743	rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2744	rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2745	rtx sel = gen_reg_rtx (<VPRED>mode);
2746	emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2747
2748	/* Select the element using LASTB.  */
2749	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2750	DONE;
2751      }
2752  }
2753)
2754
2755;; Extract element zero.  This is a special case because we want to force
2756;; the registers to be the same for the second alternative, and then
2757;; split the instruction into nothing after RA.
2758(define_insn_and_split "*vec_extract<mode><Vel>_0"
2759  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2760	(vec_select:<VEL>
2761	  (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2762	  (parallel [(const_int 0)])))]
2763  "TARGET_SVE"
2764  {
2765    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2766    switch (which_alternative)
2767      {
2768	case 0:
2769	  return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2770	case 1:
2771	  return "#";
2772	case 2:
2773	  return "st1\\t{%1.<Vetype>}[0], %0";
2774	default:
2775	  gcc_unreachable ();
2776      }
2777  }
2778  "&& reload_completed
2779   && REG_P (operands[0])
2780   && REGNO (operands[0]) == REGNO (operands[1])"
2781  [(const_int 0)]
2782  {
2783    emit_note (NOTE_INSN_DELETED);
2784    DONE;
2785  }
2786  [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2787)
2788
2789;; Extract an element from the Advanced SIMD portion of the register.
2790;; We don't just reuse the aarch64-simd.md pattern because we don't
2791;; want any change in lane number on big-endian targets.
2792(define_insn "*vec_extract<mode><Vel>_v128"
2793  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2794	(vec_select:<VEL>
2795	  (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2796	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
2797  "TARGET_SVE
2798   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2799  {
2800    operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2801    switch (which_alternative)
2802      {
2803	case 0:
2804	  return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2805	case 1:
2806	  return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2807	case 2:
2808	  return "st1\\t{%1.<Vetype>}[%2], %0";
2809	default:
2810	  gcc_unreachable ();
2811      }
2812  }
2813  [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2814)
2815
2816;; Extract an element in the range of DUP.  This pattern allows the
2817;; source and destination to be different.
2818(define_insn "*vec_extract<mode><Vel>_dup"
2819  [(set (match_operand:<VEL> 0 "register_operand" "=w")
2820	(vec_select:<VEL>
2821	  (match_operand:SVE_FULL 1 "register_operand" "w")
2822	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
2823  "TARGET_SVE
2824   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2825  {
2826    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2827    return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2828  }
2829)
2830
2831;; Extract an element outside the range of DUP.  This pattern requires the
2832;; source and destination to be the same.
2833(define_insn "*vec_extract<mode><Vel>_ext"
2834  [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2835	(vec_select:<VEL>
2836	  (match_operand:SVE_FULL 1 "register_operand" "0, w")
2837	  (parallel [(match_operand:SI 2 "const_int_operand")])))]
2838  "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2839  {
2840    operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2841    operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2842    return (which_alternative == 0
2843	    ? "ext\t%0.b, %0.b, %0.b, #%2"
2844	    : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2845  }
2846  [(set_attr "movprfx" "*,yes")]
2847)
2848
2849;; -------------------------------------------------------------------------
2850;; ---- [INT,FP] Extract active element
2851;; -------------------------------------------------------------------------
2852;; Includes:
2853;; - LASTA
2854;; - LASTB
2855;; -------------------------------------------------------------------------
2856
2857;; Extract the last active element of operand 1 into operand 0.
2858;; If no elements are active, extract the last inactive element instead.
2859(define_insn "@extract_<last_op>_<mode>"
2860  [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2861	(unspec:<VEL>
2862	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2863	   (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2864	  LAST))]
2865  "TARGET_SVE"
2866  "@
2867   last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2868   last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2869)
2870
2871;; -------------------------------------------------------------------------
2872;; ---- [PRED] Extract index
2873;; -------------------------------------------------------------------------
2874;; The patterns in this section are synthetic.
2875;; -------------------------------------------------------------------------
2876
2877;; Handle extractions from a predicate by converting to an integer vector
2878;; and extracting from there.
2879(define_expand "vec_extract<vpred><Vel>"
2880  [(match_operand:<VEL> 0 "register_operand")
2881   (match_operand:<VPRED> 1 "register_operand")
2882   (match_operand:SI 2 "nonmemory_operand")
2883   ;; Dummy operand to which we can attach the iterator.
2884   (reg:SVE_FULL_I V0_REGNUM)]
2885  "TARGET_SVE"
2886  {
2887    rtx tmp = gen_reg_rtx (<MODE>mode);
2888    emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2889					     CONST1_RTX (<MODE>mode),
2890					     CONST0_RTX (<MODE>mode)));
2891    emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2892    DONE;
2893  }
2894)
2895
2896;; =========================================================================
2897;; == Unary arithmetic
2898;; =========================================================================
2899
2900;; -------------------------------------------------------------------------
2901;; ---- [INT] General unary arithmetic corresponding to rtx codes
2902;; -------------------------------------------------------------------------
2903;; Includes:
2904;; - ABS
2905;; - CLS (= clrsb)
2906;; - CLZ
2907;; - CNT (= popcount)
2908;; - NEG
2909;; - NOT
2910;; -------------------------------------------------------------------------
2911
2912;; Unpredicated integer unary arithmetic.
2913(define_expand "<optab><mode>2"
2914  [(set (match_operand:SVE_FULL_I 0 "register_operand")
2915	(unspec:SVE_FULL_I
2916	  [(match_dup 2)
2917	   (SVE_INT_UNARY:SVE_FULL_I
2918	     (match_operand:SVE_FULL_I 1 "register_operand"))]
2919	  UNSPEC_PRED_X))]
2920  "TARGET_SVE"
2921  {
2922    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2923  }
2924)
2925
2926;; Integer unary arithmetic predicated with a PTRUE.
2927(define_insn "@aarch64_pred_<optab><mode>"
2928  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2929	(unspec:SVE_FULL_I
2930	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2931	   (SVE_INT_UNARY:SVE_FULL_I
2932	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))]
2933	  UNSPEC_PRED_X))]
2934  "TARGET_SVE"
2935  "@
2936   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2937   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2938  [(set_attr "movprfx" "*,yes")]
2939)
2940
2941;; Predicated integer unary arithmetic with merging.
2942(define_expand "@cond_<optab><mode>"
2943  [(set (match_operand:SVE_FULL_I 0 "register_operand")
2944	(unspec:SVE_FULL_I
2945	  [(match_operand:<VPRED> 1 "register_operand")
2946	   (SVE_INT_UNARY:SVE_FULL_I
2947	     (match_operand:SVE_FULL_I 2 "register_operand"))
2948	   (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
2949	  UNSPEC_SEL))]
2950  "TARGET_SVE"
2951)
2952
2953;; Predicated integer unary arithmetic, merging with the first input.
2954(define_insn "*cond_<optab><mode>_2"
2955  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2956	(unspec:SVE_FULL_I
2957	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2958	   (SVE_INT_UNARY:SVE_FULL_I
2959	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
2960	   (match_dup 2)]
2961	  UNSPEC_SEL))]
2962  "TARGET_SVE"
2963  "@
2964   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2965   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2966  [(set_attr "movprfx" "*,yes")]
2967)
2968
2969;; Predicated integer unary arithmetic, merging with an independent value.
2970;;
2971;; The earlyclobber isn't needed for the first alternative, but omitting
2972;; it would only help the case in which operands 2 and 3 are the same,
2973;; which is handled above rather than here.  Marking all the alternatives
2974;; as earlyclobber helps to make the instruction more regular to the
2975;; register allocator.
2976(define_insn "*cond_<optab><mode>_any"
2977  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
2978	(unspec:SVE_FULL_I
2979	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2980	   (SVE_INT_UNARY:SVE_FULL_I
2981	     (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
2982	   (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2983	  UNSPEC_SEL))]
2984  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2985  "@
2986   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2987   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2988   movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2989  [(set_attr "movprfx" "*,yes,yes")]
2990)
2991
2992;; -------------------------------------------------------------------------
2993;; ---- [INT] General unary arithmetic corresponding to unspecs
2994;; -------------------------------------------------------------------------
2995;; Includes
2996;; - RBIT
2997;; - REVB
2998;; - REVH
2999;; - REVW
3000;; -------------------------------------------------------------------------
3001
3002;; Predicated integer unary operations.
3003(define_insn "@aarch64_pred_<optab><mode>"
3004  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3005	(unspec:SVE_FULL_I
3006	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3007	   (unspec:SVE_FULL_I
3008	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")]
3009	     SVE_INT_UNARY)]
3010	  UNSPEC_PRED_X))]
3011  "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3012  "@
3013   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3014   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3015  [(set_attr "movprfx" "*,yes")]
3016)
3017
3018;; Predicated integer unary operations with merging.
3019(define_insn "@cond_<optab><mode>"
3020  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
3021	(unspec:SVE_FULL_I
3022	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3023	   (unspec:SVE_FULL_I
3024	     [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
3025	     SVE_INT_UNARY)
3026	   (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3027	  UNSPEC_SEL))]
3028  "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3029  "@
3030   <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3031   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3032   movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3033  [(set_attr "movprfx" "*,yes,yes")]
3034)
3035
3036;; -------------------------------------------------------------------------
3037;; ---- [INT] Sign and zero extension
3038;; -------------------------------------------------------------------------
3039;; Includes:
3040;; - SXTB
3041;; - SXTH
3042;; - SXTW
3043;; - UXTB
3044;; - UXTH
3045;; - UXTW
3046;; -------------------------------------------------------------------------
3047
3048;; Unpredicated sign and zero extension from a narrower mode.
3049(define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3050  [(set (match_operand:SVE_HSDI 0 "register_operand")
3051	(unspec:SVE_HSDI
3052	  [(match_dup 2)
3053	   (ANY_EXTEND:SVE_HSDI
3054	     (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3055	  UNSPEC_PRED_X))]
3056  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3057  {
3058    operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3059  }
3060)
3061
3062;; Predicated sign and zero extension from a narrower mode.
3063(define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3064  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w")
3065	(unspec:SVE_HSDI
3066	  [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
3067	   (ANY_EXTEND:SVE_HSDI
3068	     (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))]
3069	  UNSPEC_PRED_X))]
3070  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3071  "@
3072   <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3073   movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
3074  [(set_attr "movprfx" "*,yes")]
3075)
3076
3077;; Predicated truncate-and-sign-extend operations.
3078(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3079  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
3080	(unspec:SVE_FULL_HSDI
3081	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
3082	   (sign_extend:SVE_FULL_HSDI
3083	     (truncate:SVE_PARTIAL_I
3084	       (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))]
3085	  UNSPEC_PRED_X))]
3086  "TARGET_SVE
3087   && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3088  "@
3089   sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3090   movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3091  [(set_attr "movprfx" "*,yes")]
3092)
3093
3094;; Predicated truncate-and-sign-extend operations with merging.
3095(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3096  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
3097	(unspec:SVE_FULL_HSDI
3098	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
3099	   (sign_extend:SVE_FULL_HSDI
3100	     (truncate:SVE_PARTIAL_I
3101	       (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
3102	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3103	  UNSPEC_SEL))]
3104  "TARGET_SVE
3105   && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3106  "@
3107   sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3108   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3109   movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3110  [(set_attr "movprfx" "*,yes,yes")]
3111)
3112
3113;; Predicated truncate-and-zero-extend operations, merging with the
3114;; first input.
3115;;
3116;; The canonical form of this operation is an AND of a constant rather
3117;; than (zero_extend (truncate ...)).
3118(define_insn "*cond_uxt<mode>_2"
3119  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3120	(unspec:SVE_FULL_I
3121	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3122	   (and:SVE_FULL_I
3123	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3124	     (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3125	   (match_dup 2)]
3126	  UNSPEC_SEL))]
3127  "TARGET_SVE"
3128  "@
3129   uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3130   movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3131  [(set_attr "movprfx" "*,yes")]
3132)
3133
3134;; Predicated truncate-and-zero-extend operations, merging with an
3135;; independent value.
3136;;
3137;; The earlyclobber isn't needed for the first alternative, but omitting
3138;; it would only help the case in which operands 2 and 4 are the same,
3139;; which is handled above rather than here.  Marking all the alternatives
3140;; as early-clobber helps to make the instruction more regular to the
3141;; register allocator.
3142(define_insn "*cond_uxt<mode>_any"
3143  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3144	(unspec:SVE_FULL_I
3145	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3146	   (and:SVE_FULL_I
3147	     (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3148	     (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3149	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3150	  UNSPEC_SEL))]
3151  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3152  "@
3153   uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3154   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3155   movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3156  [(set_attr "movprfx" "*,yes,yes")]
3157)
3158
3159;; -------------------------------------------------------------------------
3160;; ---- [INT] Truncation
3161;; -------------------------------------------------------------------------
3162;; The patterns in this section are synthetic.
3163;; -------------------------------------------------------------------------
3164
3165;; Truncate to a partial SVE vector from either a full vector or a
3166;; wider partial vector.  This is a no-op, because we can just ignore
3167;; the unused upper bits of the source.
3168(define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3169  [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3170	(truncate:SVE_PARTIAL_I
3171	  (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3172  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3173  "#"
3174  "&& reload_completed"
3175  [(set (match_dup 0) (match_dup 1))]
3176  {
3177    operands[1] = aarch64_replace_reg_mode (operands[1],
3178					    <SVE_PARTIAL_I:MODE>mode);
3179  }
3180)
3181
3182;; -------------------------------------------------------------------------
3183;; ---- [INT] Logical inverse
3184;; -------------------------------------------------------------------------
3185;; Includes:
3186;; - CNOT
3187;; -------------------------------------------------------------------------
3188
3189;; Predicated logical inverse.
3190(define_expand "@aarch64_pred_cnot<mode>"
3191  [(set (match_operand:SVE_FULL_I 0 "register_operand")
3192	(unspec:SVE_FULL_I
3193	  [(unspec:<VPRED>
3194	     [(match_operand:<VPRED> 1 "register_operand")
3195	      (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3196	      (eq:<VPRED>
3197		(match_operand:SVE_FULL_I 3 "register_operand")
3198		(match_dup 4))]
3199	     UNSPEC_PRED_Z)
3200	   (match_dup 5)
3201	   (match_dup 4)]
3202	  UNSPEC_SEL))]
3203  "TARGET_SVE"
3204  {
3205    operands[4] = CONST0_RTX (<MODE>mode);
3206    operands[5] = CONST1_RTX (<MODE>mode);
3207  }
3208)
3209
3210(define_insn "*cnot<mode>"
3211  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3212	(unspec:SVE_FULL_I
3213	  [(unspec:<VPRED>
3214	     [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3215	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3216	      (eq:<VPRED>
3217		(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3218		(match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3219	     UNSPEC_PRED_Z)
3220	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3221	   (match_dup 3)]
3222	  UNSPEC_SEL))]
3223  "TARGET_SVE"
3224  "@
3225   cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3226   movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3227  [(set_attr "movprfx" "*,yes")]
3228)
3229
3230;; Predicated logical inverse with merging.
3231(define_expand "@cond_cnot<mode>"
3232  [(set (match_operand:SVE_FULL_I 0 "register_operand")
3233	(unspec:SVE_FULL_I
3234	  [(match_operand:<VPRED> 1 "register_operand")
3235	   (unspec:SVE_FULL_I
3236	     [(unspec:<VPRED>
3237		[(match_dup 4)
3238		 (const_int SVE_KNOWN_PTRUE)
3239		 (eq:<VPRED>
3240		   (match_operand:SVE_FULL_I 2 "register_operand")
3241		   (match_dup 5))]
3242		UNSPEC_PRED_Z)
3243	      (match_dup 6)
3244	      (match_dup 5)]
3245	     UNSPEC_SEL)
3246	   (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3247	  UNSPEC_SEL))]
3248  "TARGET_SVE"
3249  {
3250    operands[4] = CONSTM1_RTX (<VPRED>mode);
3251    operands[5] = CONST0_RTX (<MODE>mode);
3252    operands[6] = CONST1_RTX (<MODE>mode);
3253  }
3254)
3255
3256;; Predicated logical inverse, merging with the first input.
3257(define_insn_and_rewrite "*cond_cnot<mode>_2"
3258  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3259	(unspec:SVE_FULL_I
3260	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3261	   ;; Logical inverse of operand 2 (as above).
3262	   (unspec:SVE_FULL_I
3263	     [(unspec:<VPRED>
3264		[(match_operand 5)
3265		 (const_int SVE_KNOWN_PTRUE)
3266		 (eq:<VPRED>
3267		   (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3268		   (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3269		UNSPEC_PRED_Z)
3270	      (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3271	      (match_dup 3)]
3272	     UNSPEC_SEL)
3273	   (match_dup 2)]
3274	  UNSPEC_SEL))]
3275  "TARGET_SVE"
3276  "@
3277   cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3278   movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3279  "&& !CONSTANT_P (operands[5])"
3280  {
3281    operands[5] = CONSTM1_RTX (<VPRED>mode);
3282  }
3283  [(set_attr "movprfx" "*,yes")]
3284)
3285
3286;; Predicated logical inverse, merging with an independent value.
3287;;
3288;; The earlyclobber isn't needed for the first alternative, but omitting
3289;; it would only help the case in which operands 2 and 6 are the same,
3290;; which is handled above rather than here.  Marking all the alternatives
3291;; as earlyclobber helps to make the instruction more regular to the
3292;; register allocator.
3293(define_insn_and_rewrite "*cond_cnot<mode>_any"
3294  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3295	(unspec:SVE_FULL_I
3296	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3297	   ;; Logical inverse of operand 2 (as above).
3298	   (unspec:SVE_FULL_I
3299	     [(unspec:<VPRED>
3300		[(match_operand 5)
3301		 (const_int SVE_KNOWN_PTRUE)
3302		 (eq:<VPRED>
3303		   (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3304		   (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3305		UNSPEC_PRED_Z)
3306	      (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3307	      (match_dup 3)]
3308	     UNSPEC_SEL)
3309	   (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3310	  UNSPEC_SEL))]
3311  "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3312  "@
3313   cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3314   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3315   movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3316  "&& !CONSTANT_P (operands[5])"
3317  {
3318    operands[5] = CONSTM1_RTX (<VPRED>mode);
3319  }
3320  [(set_attr "movprfx" "*,yes,yes")]
3321)
3322
3323;; -------------------------------------------------------------------------
3324;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3325;; -------------------------------------------------------------------------
3326;; Includes:
3327;; - FEXPA
3328;; -------------------------------------------------------------------------
3329
3330;; Unpredicated unary operations that take an integer and return a float.
3331(define_insn "@aarch64_sve_<optab><mode>"
3332  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3333	(unspec:SVE_FULL_F
3334	  [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3335	  SVE_FP_UNARY_INT))]
3336  "TARGET_SVE"
3337  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3338)
3339
3340;; -------------------------------------------------------------------------
3341;; ---- [FP] General unary arithmetic corresponding to unspecs
3342;; -------------------------------------------------------------------------
3343;; Includes:
3344;; - FABS
3345;; - FNEG
3346;; - FRECPE
3347;; - FRECPX
3348;; - FRINTA
3349;; - FRINTI
3350;; - FRINTM
3351;; - FRINTN
3352;; - FRINTP
3353;; - FRINTX
3354;; - FRINTZ
3355;; - FRSQRTE
3356;; - FSQRT
3357;; -------------------------------------------------------------------------
3358
3359;; Unpredicated floating-point unary operations.
3360(define_insn "@aarch64_sve_<optab><mode>"
3361  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3362	(unspec:SVE_FULL_F
3363	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3364	  SVE_FP_UNARY))]
3365  "TARGET_SVE"
3366  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3367)
3368
3369;; Unpredicated floating-point unary operations.
3370(define_expand "<optab><mode>2"
3371  [(set (match_operand:SVE_FULL_F 0 "register_operand")
3372	(unspec:SVE_FULL_F
3373	  [(match_dup 2)
3374	   (const_int SVE_RELAXED_GP)
3375	   (match_operand:SVE_FULL_F 1 "register_operand")]
3376	  SVE_COND_FP_UNARY_OPTAB))]
3377  "TARGET_SVE"
3378  {
3379    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3380  }
3381)
3382
3383;; Predicated floating-point unary operations.
3384(define_insn "@aarch64_pred_<optab><mode>"
3385  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3386	(unspec:SVE_FULL_F
3387	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3388	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
3389	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3390	  SVE_COND_FP_UNARY))]
3391  "TARGET_SVE"
3392  "@
3393   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3394   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3395  [(set_attr "movprfx" "*,yes")]
3396)
3397
3398;; Predicated floating-point unary arithmetic with merging.
3399(define_expand "@cond_<optab><mode>"
3400  [(set (match_operand:SVE_FULL_F 0 "register_operand")
3401	(unspec:SVE_FULL_F
3402	  [(match_operand:<VPRED> 1 "register_operand")
3403	   (unspec:SVE_FULL_F
3404	     [(match_dup 1)
3405	      (const_int SVE_STRICT_GP)
3406	      (match_operand:SVE_FULL_F 2 "register_operand")]
3407	     SVE_COND_FP_UNARY)
3408	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3409	  UNSPEC_SEL))]
3410  "TARGET_SVE"
3411)
3412
3413;; Predicated floating-point unary arithmetic, merging with the first input.
3414(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
3415  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3416	(unspec:SVE_FULL_F
3417	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3418	   (unspec:SVE_FULL_F
3419	     [(match_operand 3)
3420	      (const_int SVE_RELAXED_GP)
3421	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3422	     SVE_COND_FP_UNARY)
3423	   (match_dup 2)]
3424	  UNSPEC_SEL))]
3425  "TARGET_SVE"
3426  "@
3427   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3428   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3429  "&& !rtx_equal_p (operands[1], operands[3])"
3430  {
3431    operands[3] = copy_rtx (operands[1]);
3432  }
3433  [(set_attr "movprfx" "*,yes")]
3434)
3435
3436(define_insn "*cond_<optab><mode>_2_strict"
3437  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3438	(unspec:SVE_FULL_F
3439	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3440	   (unspec:SVE_FULL_F
3441	     [(match_dup 1)
3442	      (const_int SVE_STRICT_GP)
3443	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3444	     SVE_COND_FP_UNARY)
3445	   (match_dup 2)]
3446	  UNSPEC_SEL))]
3447  "TARGET_SVE"
3448  "@
3449   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3450   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3451  [(set_attr "movprfx" "*,yes")]
3452)
3453
3454;; Predicated floating-point unary arithmetic, merging with an independent
3455;; value.
3456;;
3457;; The earlyclobber isn't needed for the first alternative, but omitting
3458;; it would only help the case in which operands 2 and 3 are the same,
3459;; which is handled above rather than here.  Marking all the alternatives
3460;; as earlyclobber helps to make the instruction more regular to the
3461;; register allocator.
3462(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
3463  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3464	(unspec:SVE_FULL_F
3465	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3466	   (unspec:SVE_FULL_F
3467	     [(match_operand 4)
3468	      (const_int SVE_RELAXED_GP)
3469	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3470	     SVE_COND_FP_UNARY)
3471	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3472	  UNSPEC_SEL))]
3473  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3474  "@
3475   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3476   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3477   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3478  "&& !rtx_equal_p (operands[1], operands[4])"
3479  {
3480    operands[4] = copy_rtx (operands[1]);
3481  }
3482  [(set_attr "movprfx" "*,yes,yes")]
3483)
3484
3485(define_insn "*cond_<optab><mode>_any_strict"
3486  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3487	(unspec:SVE_FULL_F
3488	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3489	   (unspec:SVE_FULL_F
3490	     [(match_dup 1)
3491	      (const_int SVE_STRICT_GP)
3492	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3493	     SVE_COND_FP_UNARY)
3494	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3495	  UNSPEC_SEL))]
3496  "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3497  "@
3498   <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3499   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3500   movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3501  [(set_attr "movprfx" "*,yes,yes")]
3502)
3503
3504;; -------------------------------------------------------------------------
3505;; ---- [FP] Square root
3506;; -------------------------------------------------------------------------
3507
3508(define_expand "sqrt<mode>2"
3509  [(set (match_operand:SVE_FULL_F 0 "register_operand")
3510	(unspec:SVE_FULL_F
3511	  [(match_dup 2)
3512	   (const_int SVE_RELAXED_GP)
3513	   (match_operand:SVE_FULL_F 1 "register_operand")]
3514	  UNSPEC_COND_FSQRT))]
3515  "TARGET_SVE"
3516{
3517  if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3518    DONE;
3519  operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3520})
3521
3522;; -------------------------------------------------------------------------
3523;; ---- [FP] Reciprocal square root
3524;; -------------------------------------------------------------------------
3525
3526(define_expand "rsqrt<mode>2"
3527  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3528	(unspec:SVE_FULL_SDF
3529	  [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3530	  UNSPEC_RSQRT))]
3531  "TARGET_SVE"
3532{
3533  aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3534  DONE;
3535})
3536
3537(define_expand "@aarch64_rsqrte<mode>"
3538  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3539	(unspec:SVE_FULL_SDF
3540	  [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3541	  UNSPEC_RSQRTE))]
3542  "TARGET_SVE"
3543)
3544
3545(define_expand "@aarch64_rsqrts<mode>"
3546  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3547	(unspec:SVE_FULL_SDF
3548	  [(match_operand:SVE_FULL_SDF 1 "register_operand")
3549	   (match_operand:SVE_FULL_SDF 2 "register_operand")]
3550	  UNSPEC_RSQRTS))]
3551  "TARGET_SVE"
3552)
3553
3554;; -------------------------------------------------------------------------
3555;; ---- [PRED] Inverse
3556;; -------------------------------------------------------------------------
3557;; Includes:
3558;; - NOT
3559;; -------------------------------------------------------------------------
3560
3561;; Unpredicated predicate inverse.
3562(define_expand "one_cmpl<mode>2"
3563  [(set (match_operand:PRED_ALL 0 "register_operand")
3564	(and:PRED_ALL
3565	  (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3566	  (match_dup 2)))]
3567  "TARGET_SVE"
3568  {
3569    operands[2] = aarch64_ptrue_reg (<MODE>mode);
3570  }
3571)
3572
3573;; Predicated predicate inverse.
3574(define_insn "*one_cmpl<mode>3"
3575  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3576	(and:PRED_ALL
3577	  (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3578	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3579  "TARGET_SVE"
3580  "not\t%0.b, %1/z, %2.b"
3581)
3582
3583;; =========================================================================
3584;; == Binary arithmetic
3585;; =========================================================================
3586
3587;; -------------------------------------------------------------------------
3588;; ---- [INT] General binary arithmetic corresponding to rtx codes
3589;; -------------------------------------------------------------------------
3590;; Includes:
3591;; - ADD    (merging form only)
3592;; - AND    (merging form only)
3593;; - ASR    (merging form only)
3594;; - EOR    (merging form only)
3595;; - LSL    (merging form only)
3596;; - LSR    (merging form only)
3597;; - MUL
3598;; - ORR    (merging form only)
3599;; - SMAX
3600;; - SMIN
3601;; - SQADD  (SVE2 merging form only)
3602;; - SQSUB  (SVE2 merging form only)
3603;; - SUB    (merging form only)
3604;; - UMAX
3605;; - UMIN
3606;; - UQADD  (SVE2 merging form only)
3607;; - UQSUB  (SVE2 merging form only)
3608;; -------------------------------------------------------------------------
3609
3610;; Unpredicated integer binary operations that have an immediate form.
3611(define_expand "<optab><mode>3"
3612  [(set (match_operand:SVE_FULL_I 0 "register_operand")
3613	(unspec:SVE_FULL_I
3614	  [(match_dup 3)
3615	   (SVE_INT_BINARY_IMM:SVE_FULL_I
3616	     (match_operand:SVE_FULL_I 1 "register_operand")
3617	     (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3618	  UNSPEC_PRED_X))]
3619  "TARGET_SVE"
3620  {
3621    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3622  }
3623)
3624
3625;; Integer binary operations that have an immediate form, predicated
3626;; with a PTRUE.  We don't actually need the predicate for the first
3627;; and third alternatives, but using Upa or X isn't likely to gain much
3628;; and would make the instruction seem less uniform to the register
3629;; allocator.
3630(define_insn_and_split "@aarch64_pred_<optab><mode>"
3631  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
3632	(unspec:SVE_FULL_I
3633	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3634	   (SVE_INT_BINARY_IMM:SVE_FULL_I
3635	     (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
3636	     (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3637	  UNSPEC_PRED_X))]
3638  "TARGET_SVE"
3639  "@
3640   #
3641   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3642   #
3643   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3644  ; Split the unpredicated form after reload, so that we don't have
3645  ; the unnecessary PTRUE.
3646  "&& reload_completed
3647   && !register_operand (operands[3], <MODE>mode)"
3648  [(set (match_dup 0)
3649	(SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
3650  ""
3651  [(set_attr "movprfx" "*,*,yes,yes")]
3652)
3653
3654;; Unpredicated binary operations with a constant (post-RA only).
3655;; These are generated by splitting a predicated instruction whose
3656;; predicate is unused.
3657(define_insn "*post_ra_<optab><mode>3"
3658  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3659	(SVE_INT_BINARY_IMM:SVE_FULL_I
3660	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
3661	  (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3662  "TARGET_SVE && reload_completed"
3663  "@
3664   <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3665   movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3666  [(set_attr "movprfx" "*,yes")]
3667)
3668
3669;; Predicated integer operations with merging.
3670(define_expand "@cond_<optab><mode>"
3671  [(set (match_operand:SVE_FULL_I 0 "register_operand")
3672	(unspec:SVE_FULL_I
3673	  [(match_operand:<VPRED> 1 "register_operand")
3674	   (SVE_INT_BINARY:SVE_FULL_I
3675	     (match_operand:SVE_FULL_I 2 "register_operand")
3676	     (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>"))
3677	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3678	  UNSPEC_SEL))]
3679  "TARGET_SVE"
3680)
3681
3682;; Predicated integer operations, merging with the first input.
3683(define_insn "*cond_<optab><mode>_2"
3684  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3685	(unspec:SVE_FULL_I
3686	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3687	   (SVE_INT_BINARY:SVE_FULL_I
3688	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3689	     (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3690	   (match_dup 2)]
3691	  UNSPEC_SEL))]
3692  "TARGET_SVE"
3693  "@
3694   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3695   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3696  [(set_attr "movprfx" "*,yes")]
3697)
3698
3699;; Predicated integer operations, merging with the second input.
3700(define_insn "*cond_<optab><mode>_3"
3701  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3702	(unspec:SVE_FULL_I
3703	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3704	   (SVE_INT_BINARY:SVE_FULL_I
3705	     (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
3706	     (match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
3707	   (match_dup 3)]
3708	  UNSPEC_SEL))]
3709  "TARGET_SVE"
3710  "@
3711   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3712   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3713  [(set_attr "movprfx" "*,yes")]
3714)
3715
3716;; Predicated integer operations, merging with an independent value.
3717(define_insn_and_rewrite "*cond_<optab><mode>_any"
3718  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3719	(unspec:SVE_FULL_I
3720	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3721	   (SVE_INT_BINARY:SVE_FULL_I
3722	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3723	     (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))
3724	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3725	  UNSPEC_SEL))]
3726  "TARGET_SVE
3727   && !rtx_equal_p (operands[2], operands[4])
3728   && !rtx_equal_p (operands[3], operands[4])"
3729  "@
3730   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3731   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3732   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3733   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3734   #"
3735  "&& reload_completed
3736   && register_operand (operands[4], <MODE>mode)
3737   && !rtx_equal_p (operands[0], operands[4])"
3738  {
3739    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3740					     operands[4], operands[1]));
3741    operands[4] = operands[2] = operands[0];
3742  }
3743  [(set_attr "movprfx" "yes")]
3744)
3745
3746;; -------------------------------------------------------------------------
3747;; ---- [INT] Addition
3748;; -------------------------------------------------------------------------
3749;; Includes:
3750;; - ADD
3751;; - DECB
3752;; - DECD
3753;; - DECH
3754;; - DECW
3755;; - INCB
3756;; - INCD
3757;; - INCH
3758;; - INCW
3759;; - SUB
3760;; -------------------------------------------------------------------------
3761
3762(define_insn "add<mode>3"
3763  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3764	(plus:SVE_I
3765	  (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3766	  (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3767  "TARGET_SVE"
3768  "@
3769   add\t%0.<Vetype>, %0.<Vetype>, #%D2
3770   sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3771   * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3772   movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3773   movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3774   add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3775  [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3776)
3777
3778;; Merging forms are handled through SVE_INT_BINARY.
3779
3780;; -------------------------------------------------------------------------
3781;; ---- [INT] Subtraction
3782;; -------------------------------------------------------------------------
3783;; Includes:
3784;; - SUB
3785;; - SUBR
3786;; -------------------------------------------------------------------------
3787
3788(define_insn "sub<mode>3"
3789  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
3790	(minus:SVE_FULL_I
3791	  (match_operand:SVE_FULL_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3792	  (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w")))]
3793  "TARGET_SVE"
3794  "@
3795   sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3796   subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3797   movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3798  [(set_attr "movprfx" "*,*,yes")]
3799)
3800
3801;; Merging forms are handled through SVE_INT_BINARY.
3802
3803;; -------------------------------------------------------------------------
3804;; ---- [INT] Take address
3805;; -------------------------------------------------------------------------
3806;; Includes:
3807;; - ADR
3808;; -------------------------------------------------------------------------
3809
3810;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
3811;; but the svadrb intrinsics should preserve the user's choice.
3812(define_insn "@aarch64_adr<mode>"
3813  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3814	(unspec:SVE_FULL_SDI
3815	  [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3816	   (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3817	  UNSPEC_ADR))]
3818  "TARGET_SVE"
3819  "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3820)
3821
3822;; Same, but with the offset being sign-extended from the low 32 bits.
3823(define_insn_and_rewrite "*aarch64_adr_sxtw"
3824  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3825	(unspec:VNx2DI
3826	  [(match_operand:VNx2DI 1 "register_operand" "w")
3827	   (unspec:VNx2DI
3828	     [(match_operand 3)
3829	      (sign_extend:VNx2DI
3830		(truncate:VNx2SI
3831		  (match_operand:VNx2DI 2 "register_operand" "w")))]
3832	     UNSPEC_PRED_X)]
3833	  UNSPEC_ADR))]
3834  "TARGET_SVE"
3835  "adr\t%0.d, [%1.d, %2.d, sxtw]"
3836  "&& !CONSTANT_P (operands[3])"
3837  {
3838    operands[3] = CONSTM1_RTX (VNx2BImode);
3839  }
3840)
3841
3842;; Same, but with the offset being zero-extended from the low 32 bits.
3843(define_insn "*aarch64_adr_uxtw_unspec"
3844  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3845	(unspec:VNx2DI
3846	  [(match_operand:VNx2DI 1 "register_operand" "w")
3847	   (and:VNx2DI
3848	     (match_operand:VNx2DI 2 "register_operand" "w")
3849	     (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3850	  UNSPEC_ADR))]
3851  "TARGET_SVE"
3852  "adr\t%0.d, [%1.d, %2.d, uxtw]"
3853)
3854
3855;; Same, matching as a PLUS rather than unspec.
3856(define_insn "*aarch64_adr_uxtw_and"
3857  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3858	(plus:VNx2DI
3859	  (and:VNx2DI
3860	    (match_operand:VNx2DI 2 "register_operand" "w")
3861	    (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3862	  (match_operand:VNx2DI 1 "register_operand" "w")))]
3863  "TARGET_SVE"
3864  "adr\t%0.d, [%1.d, %2.d, uxtw]"
3865)
3866
3867;; ADR with a nonzero shift.
3868(define_expand "@aarch64_adr<mode>_shift"
3869  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3870	(plus:SVE_FULL_SDI
3871	  (unspec:SVE_FULL_SDI
3872	    [(match_dup 4)
3873	     (ashift:SVE_FULL_SDI
3874	       (match_operand:SVE_FULL_SDI 2 "register_operand")
3875	       (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3876	    UNSPEC_PRED_X)
3877	  (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3878  "TARGET_SVE"
3879  {
3880    operands[4] = CONSTM1_RTX (<VPRED>mode);
3881  }
3882)
3883
3884(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3885  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3886	(plus:SVE_FULL_SDI
3887	  (unspec:SVE_FULL_SDI
3888	    [(match_operand 4)
3889	     (ashift:SVE_FULL_SDI
3890	       (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3891	       (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3892	    UNSPEC_PRED_X)
3893	  (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
3894  "TARGET_SVE"
3895  "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
3896  "&& !CONSTANT_P (operands[4])"
3897  {
3898    operands[4] = CONSTM1_RTX (<VPRED>mode);
3899  }
3900)
3901
3902;; Same, but with the index being sign-extended from the low 32 bits.
3903(define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3904  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3905	(plus:VNx2DI
3906	  (unspec:VNx2DI
3907	    [(match_operand 4)
3908	     (ashift:VNx2DI
3909	       (unspec:VNx2DI
3910		 [(match_operand 5)
3911		  (sign_extend:VNx2DI
3912		    (truncate:VNx2SI
3913		      (match_operand:VNx2DI 2 "register_operand" "w")))]
3914		 UNSPEC_PRED_X)
3915	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3916	    UNSPEC_PRED_X)
3917	  (match_operand:VNx2DI 1 "register_operand" "w")))]
3918  "TARGET_SVE"
3919  "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3920  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3921  {
3922    operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3923  }
3924)
3925
3926;; Same, but with the index being zero-extended from the low 32 bits.
3927(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3928  [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3929	(plus:VNx2DI
3930	  (unspec:VNx2DI
3931	    [(match_operand 5)
3932	     (ashift:VNx2DI
3933	       (and:VNx2DI
3934		 (match_operand:VNx2DI 2 "register_operand" "w")
3935		 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3936	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3937	    UNSPEC_PRED_X)
3938	  (match_operand:VNx2DI 1 "register_operand" "w")))]
3939  "TARGET_SVE"
3940  "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3941  "&& !CONSTANT_P (operands[5])"
3942  {
3943    operands[5] = CONSTM1_RTX (VNx2BImode);
3944  }
3945)
3946
3947;; -------------------------------------------------------------------------
3948;; ---- [INT] Absolute difference
3949;; -------------------------------------------------------------------------
3950;; Includes:
3951;; - SABD
3952;; - UABD
3953;; -------------------------------------------------------------------------
3954
3955;; Unpredicated integer absolute difference.
3956(define_expand "<su>abd<mode>_3"
3957  [(use (match_operand:SVE_FULL_I 0 "register_operand"))
3958   (USMAX:SVE_FULL_I
3959     (match_operand:SVE_FULL_I 1 "register_operand")
3960     (match_operand:SVE_FULL_I 2 "register_operand"))]
3961  "TARGET_SVE"
3962  {
3963    rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3964    emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3965					       operands[2]));
3966    DONE;
3967  }
3968)
3969
3970;; Predicated integer absolute difference.
3971(define_insn "@aarch64_pred_<su>abd<mode>"
3972  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3973	(unspec:SVE_FULL_I
3974	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3975	   (minus:SVE_FULL_I
3976	     (USMAX:SVE_FULL_I
3977	       (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3978	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3979	     (<max_opp>:SVE_FULL_I
3980	       (match_dup 2)
3981	       (match_dup 3)))]
3982	  UNSPEC_PRED_X))]
3983  "TARGET_SVE"
3984  "@
3985   <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3986   movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3987  [(set_attr "movprfx" "*,yes")]
3988)
3989
3990(define_expand "@aarch64_cond_<su>abd<mode>"
3991  [(set (match_operand:SVE_FULL_I 0 "register_operand")
3992	(unspec:SVE_FULL_I
3993	  [(match_operand:<VPRED> 1 "register_operand")
3994	   (minus:SVE_FULL_I
3995	     (unspec:SVE_FULL_I
3996	       [(match_dup 1)
3997		(USMAX:SVE_FULL_I
3998		  (match_operand:SVE_FULL_I 2 "register_operand")
3999		  (match_operand:SVE_FULL_I 3 "register_operand"))]
4000	       UNSPEC_PRED_X)
4001	     (unspec:SVE_FULL_I
4002	       [(match_dup 1)
4003		(<max_opp>:SVE_FULL_I
4004		  (match_dup 2)
4005		  (match_dup 3))]
4006	       UNSPEC_PRED_X))
4007	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4008	  UNSPEC_SEL))]
4009  "TARGET_SVE"
4010{
4011  if (rtx_equal_p (operands[3], operands[4]))
4012    std::swap (operands[2], operands[3]);
4013})
4014
4015;; Predicated integer absolute difference, merging with the first input.
4016(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
4017  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4018	(unspec:SVE_FULL_I
4019	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4020	   (minus:SVE_FULL_I
4021	     (unspec:SVE_FULL_I
4022	       [(match_operand 4)
4023		(USMAX:SVE_FULL_I
4024		  (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4025		  (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
4026	       UNSPEC_PRED_X)
4027	     (unspec:SVE_FULL_I
4028	       [(match_operand 5)
4029		(<max_opp>:SVE_FULL_I
4030		  (match_dup 2)
4031		  (match_dup 3))]
4032	       UNSPEC_PRED_X))
4033	   (match_dup 2)]
4034	  UNSPEC_SEL))]
4035  "TARGET_SVE"
4036  "@
4037   <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4038   movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4039  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4040  {
4041    operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4042  }
4043  [(set_attr "movprfx" "*,yes")]
4044)
4045
4046;; Predicated integer absolute difference, merging with an independent value.
4047(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
4048  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4049	(unspec:SVE_FULL_I
4050	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4051	   (minus:SVE_FULL_I
4052	     (unspec:SVE_FULL_I
4053	       [(match_operand 5)
4054		(USMAX:SVE_FULL_I
4055		  (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
4056		  (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))]
4057	       UNSPEC_PRED_X)
4058	     (unspec:SVE_FULL_I
4059	       [(match_operand 6)
4060		(<max_opp>:SVE_FULL_I
4061		  (match_dup 2)
4062		  (match_dup 3))]
4063	       UNSPEC_PRED_X))
4064	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4065	  UNSPEC_SEL))]
4066  "TARGET_SVE
4067   && !rtx_equal_p (operands[2], operands[4])
4068   && !rtx_equal_p (operands[3], operands[4])"
4069  "@
4070   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4071   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4072   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4073   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4074   #"
4075  "&& 1"
4076  {
4077    if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4078      operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4079    else if (reload_completed
4080	     && register_operand (operands[4], <MODE>mode)
4081	     && !rtx_equal_p (operands[0], operands[4]))
4082      {
4083	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4084						 operands[4], operands[1]));
4085	operands[4] = operands[2] = operands[0];
4086      }
4087    else
4088      FAIL;
4089  }
4090  [(set_attr "movprfx" "yes")]
4091)
4092
4093;; -------------------------------------------------------------------------
4094;; ---- [INT] Saturating addition and subtraction
4095;; -------------------------------------------------------------------------
4096;; - SQADD
4097;; - SQSUB
4098;; - UQADD
4099;; - UQSUB
4100;; -------------------------------------------------------------------------
4101
4102;; Unpredicated saturating signed addition and subtraction.
4103(define_insn "@aarch64_sve_<optab><mode>"
4104  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
4105	(SBINQOPS:SVE_FULL_I
4106	  (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
4107	  (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
4108  "TARGET_SVE"
4109  "@
4110   <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4111   <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4112   movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4113   movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4114   <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4115  [(set_attr "movprfx" "*,*,yes,yes,*")]
4116)
4117
4118;; Unpredicated saturating unsigned addition and subtraction.
4119(define_insn "@aarch64_sve_<optab><mode>"
4120  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
4121	(UBINQOPS:SVE_FULL_I
4122	  (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
4123	  (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
4124  "TARGET_SVE"
4125  "@
4126   <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4127   movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4128   <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4129  [(set_attr "movprfx" "*,yes,*")]
4130)
4131
4132;; -------------------------------------------------------------------------
4133;; ---- [INT] Highpart multiplication
4134;; -------------------------------------------------------------------------
4135;; Includes:
4136;; - SMULH
4137;; - UMULH
4138;; -------------------------------------------------------------------------
4139
4140;; Unpredicated highpart multiplication.
4141(define_expand "<su>mul<mode>3_highpart"
4142  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4143	(unspec:SVE_FULL_I
4144	  [(match_dup 3)
4145	   (unspec:SVE_FULL_I
4146	     [(match_operand:SVE_FULL_I 1 "register_operand")
4147	      (match_operand:SVE_FULL_I 2 "register_operand")]
4148	     MUL_HIGHPART)]
4149	  UNSPEC_PRED_X))]
4150  "TARGET_SVE"
4151  {
4152    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4153  }
4154)
4155
4156;; Predicated highpart multiplication.
4157(define_insn "@aarch64_pred_<optab><mode>"
4158  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4159	(unspec:SVE_FULL_I
4160	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4161	   (unspec:SVE_FULL_I
4162	     [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4163	      (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4164	     MUL_HIGHPART)]
4165	  UNSPEC_PRED_X))]
4166  "TARGET_SVE"
4167  "@
4168   <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4169   movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4170  [(set_attr "movprfx" "*,yes")]
4171)
4172
4173;; Predicated highpart multiplications with merging.
4174(define_expand "@cond_<optab><mode>"
4175  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4176	(unspec:SVE_FULL_I
4177	  [(match_operand:<VPRED> 1 "register_operand")
4178	   (unspec:SVE_FULL_I
4179	     [(match_operand:SVE_FULL_I 2 "register_operand")
4180	      (match_operand:SVE_FULL_I 3 "register_operand")]
4181	     MUL_HIGHPART)
4182	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4183	  UNSPEC_SEL))]
4184  "TARGET_SVE"
4185{
4186  /* Only target code is aware of these operations, so we don't need
4187     to handle the fully-general case.  */
4188  gcc_assert (rtx_equal_p (operands[2], operands[4])
4189	      || CONSTANT_P (operands[4]));
4190})
4191
4192;; Predicated highpart multiplications, merging with the first input.
4193(define_insn "*cond_<optab><mode>_2"
4194  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4195	(unspec:SVE_FULL_I
4196	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4197	   (unspec:SVE_FULL_I
4198	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4199	      (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4200	     MUL_HIGHPART)
4201	   (match_dup 2)]
4202	  UNSPEC_SEL))]
4203  "TARGET_SVE"
4204  "@
4205   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4206   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4207  [(set_attr "movprfx" "*,yes")])
4208
4209;; Predicated highpart multiplications, merging with zero.
4210(define_insn "*cond_<optab><mode>_z"
4211  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4212	(unspec:SVE_FULL_I
4213	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4214	   (unspec:SVE_FULL_I
4215	     [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4216	      (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4217	     MUL_HIGHPART)
4218	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4219	  UNSPEC_SEL))]
4220  "TARGET_SVE"
4221  "@
4222   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4223   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4224  [(set_attr "movprfx" "yes")])
4225
4226;; -------------------------------------------------------------------------
4227;; ---- [INT] Division
4228;; -------------------------------------------------------------------------
4229;; Includes:
4230;; - SDIV
4231;; - SDIVR
4232;; - UDIV
4233;; - UDIVR
4234;; -------------------------------------------------------------------------
4235
4236;; Unpredicated integer division.
4237(define_expand "<optab><mode>3"
4238  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4239	(unspec:SVE_FULL_SDI
4240	  [(match_dup 3)
4241	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
4242	     (match_operand:SVE_FULL_SDI 1 "register_operand")
4243	     (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4244	  UNSPEC_PRED_X))]
4245  "TARGET_SVE"
4246  {
4247    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4248  }
4249)
4250
4251;; Integer division predicated with a PTRUE.
4252(define_insn "@aarch64_pred_<optab><mode>"
4253  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4254	(unspec:SVE_FULL_SDI
4255	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4256	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
4257	     (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4258	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4259	  UNSPEC_PRED_X))]
4260  "TARGET_SVE"
4261  "@
4262   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4263   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4264   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4265  [(set_attr "movprfx" "*,*,yes")]
4266)
4267
4268;; Predicated integer division with merging.
4269(define_expand "@cond_<optab><mode>"
4270  [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4271	(unspec:SVE_FULL_SDI
4272	  [(match_operand:<VPRED> 1 "register_operand")
4273	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
4274	     (match_operand:SVE_FULL_SDI 2 "register_operand")
4275	     (match_operand:SVE_FULL_SDI 3 "register_operand"))
4276	   (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4277	  UNSPEC_SEL))]
4278  "TARGET_SVE"
4279)
4280
4281;; Predicated integer division, merging with the first input.
4282(define_insn "*cond_<optab><mode>_2"
4283  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4284	(unspec:SVE_FULL_SDI
4285	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4286	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
4287	     (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4288	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4289	   (match_dup 2)]
4290	  UNSPEC_SEL))]
4291  "TARGET_SVE"
4292  "@
4293   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4294   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4295  [(set_attr "movprfx" "*,yes")]
4296)
4297
4298;; Predicated integer division, merging with the second input.
4299(define_insn "*cond_<optab><mode>_3"
4300  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4301	(unspec:SVE_FULL_SDI
4302	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4303	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
4304	     (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4305	     (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4306	   (match_dup 3)]
4307	  UNSPEC_SEL))]
4308  "TARGET_SVE"
4309  "@
4310   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4311   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4312  [(set_attr "movprfx" "*,yes")]
4313)
4314
4315;; Predicated integer division, merging with an independent value.
4316(define_insn_and_rewrite "*cond_<optab><mode>_any"
4317  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4318	(unspec:SVE_FULL_SDI
4319	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4320	   (SVE_INT_BINARY_SD:SVE_FULL_SDI
4321	     (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4322	     (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4323	   (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4324	  UNSPEC_SEL))]
4325  "TARGET_SVE
4326   && !rtx_equal_p (operands[2], operands[4])
4327   && !rtx_equal_p (operands[3], operands[4])"
4328  "@
4329   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4330   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4331   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4332   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4333   #"
4334  "&& reload_completed
4335   && register_operand (operands[4], <MODE>mode)
4336   && !rtx_equal_p (operands[0], operands[4])"
4337  {
4338    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4339					     operands[4], operands[1]));
4340    operands[4] = operands[2] = operands[0];
4341  }
4342  [(set_attr "movprfx" "yes")]
4343)
4344
4345;; -------------------------------------------------------------------------
4346;; ---- [INT] Binary logical operations
4347;; -------------------------------------------------------------------------
4348;; Includes:
4349;; - AND
4350;; - EOR
4351;; - ORR
4352;; -------------------------------------------------------------------------
4353
4354;; Unpredicated integer binary logical operations.
4355(define_insn "<optab><mode>3"
4356  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w")
4357	(LOGICAL:SVE_FULL_I
4358	  (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w")
4359	  (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4360  "TARGET_SVE"
4361  "@
4362   <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4363   movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4364   <logical>\t%0.d, %1.d, %2.d"
4365  [(set_attr "movprfx" "*,yes,*")]
4366)
4367
4368;; Merging forms are handled through SVE_INT_BINARY.
4369
4370;; -------------------------------------------------------------------------
4371;; ---- [INT] Binary logical operations (inverted second input)
4372;; -------------------------------------------------------------------------
4373;; Includes:
4374;; - BIC
4375;; -------------------------------------------------------------------------
4376
4377;; Unpredicated BIC.
4378(define_expand "@aarch64_bic<mode>"
4379  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4380	(and:SVE_FULL_I
4381	  (unspec:SVE_FULL_I
4382	    [(match_dup 3)
4383	     (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand"))]
4384	    UNSPEC_PRED_X)
4385	  (match_operand:SVE_FULL_I 1 "register_operand")))]
4386  "TARGET_SVE"
4387  {
4388    operands[3] = CONSTM1_RTX (<VPRED>mode);
4389  }
4390)
4391
4392;; Predicated BIC.
4393(define_insn_and_rewrite "*bic<mode>3"
4394  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4395	(and:SVE_FULL_I
4396	  (unspec:SVE_FULL_I
4397	    [(match_operand 3)
4398	     (not:SVE_FULL_I
4399	       (match_operand:SVE_FULL_I 2 "register_operand" "w"))]
4400	    UNSPEC_PRED_X)
4401	  (match_operand:SVE_FULL_I 1 "register_operand" "w")))]
4402  "TARGET_SVE"
4403  "bic\t%0.d, %1.d, %2.d"
4404  "&& !CONSTANT_P (operands[3])"
4405  {
4406    operands[3] = CONSTM1_RTX (<VPRED>mode);
4407  }
4408)
4409
4410;; Predicated BIC with merging.
4411(define_expand "@cond_bic<mode>"
4412  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4413	(unspec:SVE_FULL_I
4414	  [(match_operand:<VPRED> 1 "register_operand")
4415	   (and:SVE_FULL_I
4416	     (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4417	     (match_operand:SVE_FULL_I 2 "register_operand"))
4418	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4419	  UNSPEC_SEL))]
4420  "TARGET_SVE"
4421)
4422
4423;; Predicated integer BIC, merging with the first input.
4424(define_insn "*cond_bic<mode>_2"
4425  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4426	(unspec:SVE_FULL_I
4427	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4428	   (and:SVE_FULL_I
4429	     (not:SVE_FULL_I
4430	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
4431	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
4432	   (match_dup 2)]
4433	  UNSPEC_SEL))]
4434  "TARGET_SVE"
4435  "@
4436   bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4437   movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4438  [(set_attr "movprfx" "*,yes")]
4439)
4440
4441;; Predicated integer BIC, merging with an independent value.
4442(define_insn_and_rewrite "*cond_bic<mode>_any"
4443  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
4444	(unspec:SVE_FULL_I
4445	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4446	   (and:SVE_FULL_I
4447	     (not:SVE_FULL_I
4448	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
4449	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
4450	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4451	  UNSPEC_SEL))]
4452  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4453  "@
4454   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4455   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4456   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4457   #"
4458  "&& reload_completed
4459   && register_operand (operands[4], <MODE>mode)
4460   && !rtx_equal_p (operands[0], operands[4])"
4461  {
4462    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4463					     operands[4], operands[1]));
4464    operands[4] = operands[2] = operands[0];
4465  }
4466  [(set_attr "movprfx" "yes")]
4467)
4468
4469;; -------------------------------------------------------------------------
4470;; ---- [INT] Shifts (rounding towards -Inf)
4471;; -------------------------------------------------------------------------
4472;; Includes:
4473;; - ASR
4474;; - ASRR
4475;; - LSL
4476;; - LSLR
4477;; - LSR
4478;; - LSRR
4479;; -------------------------------------------------------------------------
4480
4481;; Unpredicated shift by a scalar, which expands into one of the vector
4482;; shifts below.
4483(define_expand "<ASHIFT:optab><mode>3"
4484  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4485	(ASHIFT:SVE_FULL_I
4486	  (match_operand:SVE_FULL_I 1 "register_operand")
4487	  (match_operand:<VEL> 2 "general_operand")))]
4488  "TARGET_SVE"
4489  {
4490    rtx amount;
4491    if (CONST_INT_P (operands[2]))
4492      {
4493	amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4494	if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4495	  amount = force_reg (<MODE>mode, amount);
4496      }
4497    else
4498      {
4499	amount = gen_reg_rtx (<MODE>mode);
4500	emit_insn (gen_vec_duplicate<mode> (amount,
4501					    convert_to_mode (<VEL>mode,
4502							     operands[2], 0)));
4503      }
4504    emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4505    DONE;
4506  }
4507)
4508
4509;; Unpredicated shift by a vector.
4510(define_expand "v<optab><mode>3"
4511  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4512	(unspec:SVE_FULL_I
4513	  [(match_dup 3)
4514	   (ASHIFT:SVE_FULL_I
4515	     (match_operand:SVE_FULL_I 1 "register_operand")
4516	     (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
4517	  UNSPEC_PRED_X))]
4518  "TARGET_SVE"
4519  {
4520    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4521  }
4522)
4523
4524;; Shift by a vector, predicated with a PTRUE.  We don't actually need
4525;; the predicate for the first alternative, but using Upa or X isn't
4526;; likely to gain much and would make the instruction seem less uniform
4527;; to the register allocator.
4528(define_insn_and_split "@aarch64_pred_<optab><mode>"
4529  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
4530	(unspec:SVE_FULL_I
4531	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4532	   (ASHIFT:SVE_FULL_I
4533	     (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
4534	     (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4535	  UNSPEC_PRED_X))]
4536  "TARGET_SVE"
4537  "@
4538   #
4539   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4540   <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4541   movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4542  "&& reload_completed
4543   && !register_operand (operands[3], <MODE>mode)"
4544  [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
4545  ""
4546  [(set_attr "movprfx" "*,*,*,yes")]
4547)
4548
4549;; Unpredicated shift operations by a constant (post-RA only).
4550;; These are generated by splitting a predicated instruction whose
4551;; predicate is unused.
4552(define_insn "*post_ra_v<optab><mode>3"
4553  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4554	(ASHIFT:SVE_FULL_I
4555	  (match_operand:SVE_FULL_I 1 "register_operand" "w")
4556	  (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
4557  "TARGET_SVE && reload_completed"
4558  "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4559)
4560
4561;; Predicated integer shift, merging with the first input.
4562(define_insn "*cond_<optab><mode>_2_const"
4563  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4564	(unspec:SVE_FULL_I
4565	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4566	   (ASHIFT:SVE_FULL_I
4567	     (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4568	     (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4569	   (match_dup 2)]
4570	 UNSPEC_SEL))]
4571  "TARGET_SVE"
4572  "@
4573   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4574   movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4575  [(set_attr "movprfx" "*,yes")]
4576)
4577
4578;; Predicated integer shift, merging with an independent value.
4579(define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4580  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w")
4581	(unspec:SVE_FULL_I
4582	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4583	   (ASHIFT:SVE_FULL_I
4584	     (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
4585	     (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4586	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4587	 UNSPEC_SEL))]
4588  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4589  "@
4590   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4591   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4592   #"
4593  "&& reload_completed
4594   && register_operand (operands[4], <MODE>mode)
4595   && !rtx_equal_p (operands[0], operands[4])"
4596  {
4597    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4598					     operands[4], operands[1]));
4599    operands[4] = operands[2] = operands[0];
4600  }
4601  [(set_attr "movprfx" "yes")]
4602)
4603
4604;; Unpredicated shifts of narrow elements by 64-bit amounts.
4605(define_insn "@aarch64_sve_<sve_int_op><mode>"
4606  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4607	(unspec:SVE_FULL_BHSI
4608	  [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4609	   (match_operand:VNx2DI 2 "register_operand" "w")]
4610	  SVE_SHIFT_WIDE))]
4611  "TARGET_SVE"
4612  "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4613)
4614
4615;; Merging predicated shifts of narrow elements by 64-bit amounts.
4616(define_expand "@cond_<sve_int_op><mode>"
4617  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4618	(unspec:SVE_FULL_BHSI
4619	  [(match_operand:<VPRED> 1 "register_operand")
4620	   (unspec:SVE_FULL_BHSI
4621	     [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4622	      (match_operand:VNx2DI 3 "register_operand")]
4623	     SVE_SHIFT_WIDE)
4624	   (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4625	  UNSPEC_SEL))]
4626  "TARGET_SVE"
4627)
4628
4629;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4630;; the first input.
4631(define_insn "*cond_<sve_int_op><mode>_m"
4632  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4633	(unspec:SVE_FULL_BHSI
4634	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4635	   (unspec:SVE_FULL_BHSI
4636	     [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4637	      (match_operand:VNx2DI 3 "register_operand" "w, w")]
4638	     SVE_SHIFT_WIDE)
4639	   (match_dup 2)]
4640	 UNSPEC_SEL))]
4641  "TARGET_SVE"
4642  "@
4643   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4644   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4645  [(set_attr "movprfx" "*, yes")])
4646
4647;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4648(define_insn "*cond_<sve_int_op><mode>_z"
4649  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4650	(unspec:SVE_FULL_BHSI
4651	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4652	   (unspec:SVE_FULL_BHSI
4653	     [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4654	      (match_operand:VNx2DI 3 "register_operand" "w, w")]
4655	     SVE_SHIFT_WIDE)
4656	   (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4657	 UNSPEC_SEL))]
4658  "TARGET_SVE"
4659  "@
4660   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4661   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4662  [(set_attr "movprfx" "yes")])
4663
4664;; -------------------------------------------------------------------------
4665;; ---- [INT] Shifts (rounding towards 0)
4666;; -------------------------------------------------------------------------
4667;; Includes:
4668;; - ASRD
4669;; - SQSHLU (SVE2)
4670;; - SRSHR (SVE2)
4671;; - URSHR (SVE2)
4672;; -------------------------------------------------------------------------
4673
4674;; Unpredicated <SVE_INT_OP>.
4675(define_expand "sdiv_pow2<mode>3"
4676  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4677	(unspec:SVE_FULL_I
4678	  [(match_dup 3)
4679	   (unspec:SVE_FULL_I
4680	     [(match_operand:SVE_FULL_I 1 "register_operand")
4681	      (match_operand 2 "aarch64_simd_rshift_imm")]
4682	     UNSPEC_ASRD)
4683	   (match_dup 1)]
4684	 UNSPEC_SEL))]
4685  "TARGET_SVE"
4686  {
4687    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4688  }
4689)
4690
4691;; Predicated right shift with merging.
4692(define_expand "@cond_<sve_int_op><mode>"
4693  [(set (match_operand:SVE_FULL_I 0 "register_operand")
4694	(unspec:SVE_FULL_I
4695	  [(match_operand:<VPRED> 1 "register_operand")
4696	   (unspec:SVE_FULL_I
4697	     [(match_operand:SVE_FULL_I 2 "register_operand")
4698	      (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4699	     SVE_INT_SHIFT_IMM)
4700	   (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4701	  UNSPEC_SEL))]
4702  "TARGET_SVE"
4703)
4704
4705;; Predicated right shift, merging with the first input.
4706(define_insn "*cond_<sve_int_op><mode>_2"
4707  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4708	(unspec:SVE_FULL_I
4709	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4710	   (unspec:SVE_FULL_I
4711	     [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4712	      (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4713	     SVE_INT_SHIFT_IMM)
4714	   (match_dup 2)]
4715	  UNSPEC_SEL))]
4716  "TARGET_SVE"
4717  "@
4718   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4719   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4720  [(set_attr "movprfx" "*,yes")])
4721
4722;; Predicated right shift, merging with zero.
4723(define_insn "*cond_<sve_int_op><mode>_z"
4724  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4725	(unspec:SVE_FULL_I
4726	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
4727	   (unspec:SVE_FULL_I
4728	     [(match_operand:SVE_FULL_I 2 "register_operand" "w")
4729	      (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4730	     SVE_INT_SHIFT_IMM)
4731	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4732	  UNSPEC_SEL))]
4733  "TARGET_SVE"
4734  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4735  [(set_attr "movprfx" "yes")])
4736
4737;; -------------------------------------------------------------------------
4738;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4739;; -------------------------------------------------------------------------
4740;; Includes:
4741;; - FSCALE
4742;; - FTSMUL
4743;; - FTSSEL
4744;; -------------------------------------------------------------------------
4745
4746;; Unpredicated floating-point binary operations that take an integer as
4747;; their second operand.
4748(define_insn "@aarch64_sve_<optab><mode>"
4749  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4750	(unspec:SVE_FULL_F
4751	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4752	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4753	  SVE_FP_BINARY_INT))]
4754  "TARGET_SVE"
4755  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4756)
4757
4758;; Predicated floating-point binary operations that take an integer
4759;; as their second operand.
4760(define_insn "@aarch64_pred_<optab><mode>"
4761  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4762	(unspec:SVE_FULL_F
4763	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4764	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
4765	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4766	   (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4767	  SVE_COND_FP_BINARY_INT))]
4768  "TARGET_SVE"
4769  "@
4770   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4771   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4772  [(set_attr "movprfx" "*,yes")]
4773)
4774
4775;; Predicated floating-point binary operations with merging, taking an
4776;; integer as their second operand.
4777(define_expand "@cond_<optab><mode>"
4778  [(set (match_operand:SVE_FULL_F 0 "register_operand")
4779	(unspec:SVE_FULL_F
4780	  [(match_operand:<VPRED> 1 "register_operand")
4781	   (unspec:SVE_FULL_F
4782	     [(match_dup 1)
4783	      (const_int SVE_STRICT_GP)
4784	      (match_operand:SVE_FULL_F 2 "register_operand")
4785	      (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4786	     SVE_COND_FP_BINARY_INT)
4787	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4788	  UNSPEC_SEL))]
4789  "TARGET_SVE"
4790)
4791
4792;; Predicated floating-point binary operations that take an integer as their
4793;; second operand, with inactive lanes coming from the first operand.
4794(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
4795  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4796	(unspec:SVE_FULL_F
4797	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4798	   (unspec:SVE_FULL_F
4799	     [(match_operand 4)
4800	      (const_int SVE_RELAXED_GP)
4801	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4802	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4803	     SVE_COND_FP_BINARY_INT)
4804	   (match_dup 2)]
4805	  UNSPEC_SEL))]
4806  "TARGET_SVE"
4807  "@
4808   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4809   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4810  "&& !rtx_equal_p (operands[1], operands[4])"
4811  {
4812    operands[4] = copy_rtx (operands[1]);
4813  }
4814  [(set_attr "movprfx" "*,yes")]
4815)
4816
4817(define_insn "*cond_<optab><mode>_2_strict"
4818  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4819	(unspec:SVE_FULL_F
4820	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4821	   (unspec:SVE_FULL_F
4822	     [(match_dup 1)
4823	      (const_int SVE_STRICT_GP)
4824	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4825	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4826	     SVE_COND_FP_BINARY_INT)
4827	   (match_dup 2)]
4828	  UNSPEC_SEL))]
4829  "TARGET_SVE"
4830  "@
4831   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4832   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4833  [(set_attr "movprfx" "*,yes")]
4834)
4835
4836;; Predicated floating-point binary operations that take an integer as
4837;; their second operand, with the values of inactive lanes being distinct
4838;; from the other inputs.
4839(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
4840  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4841	(unspec:SVE_FULL_F
4842	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4843	   (unspec:SVE_FULL_F
4844	     [(match_operand 5)
4845	      (const_int SVE_RELAXED_GP)
4846	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4847	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4848	     SVE_COND_FP_BINARY_INT)
4849	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4850	  UNSPEC_SEL))]
4851  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4852  "@
4853   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4854   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4855   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4856   #"
4857  "&& 1"
4858  {
4859    if (reload_completed
4860        && register_operand (operands[4], <MODE>mode)
4861        && !rtx_equal_p (operands[0], operands[4]))
4862      {
4863	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4864						 operands[4], operands[1]));
4865	operands[4] = operands[2] = operands[0];
4866      }
4867    else if (!rtx_equal_p (operands[1], operands[5]))
4868      operands[5] = copy_rtx (operands[1]);
4869    else
4870      FAIL;
4871  }
4872  [(set_attr "movprfx" "yes")]
4873)
4874
4875(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
4876  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4877	(unspec:SVE_FULL_F
4878	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4879	   (unspec:SVE_FULL_F
4880	     [(match_dup 1)
4881	      (const_int SVE_STRICT_GP)
4882	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4883	      (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4884	     SVE_COND_FP_BINARY_INT)
4885	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4886	  UNSPEC_SEL))]
4887  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4888  "@
4889   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4890   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4891   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4892   #"
4893  "&& reload_completed
4894   && register_operand (operands[4], <MODE>mode)
4895   && !rtx_equal_p (operands[0], operands[4])"
4896  {
4897    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4898					     operands[4], operands[1]));
4899    operands[4] = operands[2] = operands[0];
4900  }
4901  [(set_attr "movprfx" "yes")]
4902)
4903
4904;; -------------------------------------------------------------------------
4905;; ---- [FP] General binary arithmetic corresponding to rtx codes
4906;; -------------------------------------------------------------------------
4907;; Includes post-RA forms of:
4908;; - FADD
4909;; - FMUL
4910;; - FSUB
4911;; -------------------------------------------------------------------------
4912
4913;; Unpredicated floating-point binary operations (post-RA only).
4914;; These are generated by splitting a predicated instruction whose
4915;; predicate is unused.
4916(define_insn "*post_ra_<sve_fp_op><mode>3"
4917  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4918	(SVE_UNPRED_FP_BINARY:SVE_FULL_F
4919	  (match_operand:SVE_FULL_F 1 "register_operand" "w")
4920	  (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
4921  "TARGET_SVE && reload_completed"
4922  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
4923
4924;; -------------------------------------------------------------------------
4925;; ---- [FP] General binary arithmetic corresponding to unspecs
4926;; -------------------------------------------------------------------------
4927;; Includes merging forms of:
4928;; - FADD    (constant forms handled in the "Addition" section)
4929;; - FDIV
4930;; - FDIVR
4931;; - FMAX
4932;; - FMAXNM  (including #0.0 and #1.0)
4933;; - FMIN
4934;; - FMINNM  (including #0.0 and #1.0)
4935;; - FMUL    (including #0.5 and #2.0)
4936;; - FMULX
4937;; - FRECPS
4938;; - FRSQRTS
4939;; - FSUB    (constant forms handled in the "Addition" section)
4940;; - FSUBR   (constant forms handled in the "Subtraction" section)
4941;; -------------------------------------------------------------------------
4942
4943;; Unpredicated floating-point binary operations.
4944(define_insn "@aarch64_sve_<optab><mode>"
4945  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4946	(unspec:SVE_FULL_F
4947	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4948	   (match_operand:SVE_FULL_F 2 "register_operand" "w")]
4949	  SVE_FP_BINARY))]
4950  "TARGET_SVE"
4951  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4952)
4953
4954;; Unpredicated floating-point binary operations that need to be predicated
4955;; for SVE.
4956(define_expand "<optab><mode>3"
4957  [(set (match_operand:SVE_FULL_F 0 "register_operand")
4958	(unspec:SVE_FULL_F
4959	  [(match_dup 3)
4960	   (const_int SVE_RELAXED_GP)
4961	   (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
4962	   (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
4963	  SVE_COND_FP_BINARY_OPTAB))]
4964  "TARGET_SVE"
4965  {
4966    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4967  }
4968)
4969
4970;; Predicated floating-point binary operations that have no immediate forms.
4971(define_insn "@aarch64_pred_<optab><mode>"
4972  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
4973	(unspec:SVE_FULL_F
4974	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4975	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
4976	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
4977	   (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
4978	  SVE_COND_FP_BINARY_REG))]
4979  "TARGET_SVE"
4980  "@
4981   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4982   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4983   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4984  [(set_attr "movprfx" "*,*,yes")]
4985)
4986
4987;; Predicated floating-point operations with merging.
4988(define_expand "@cond_<optab><mode>"
4989  [(set (match_operand:SVE_FULL_F 0 "register_operand")
4990	(unspec:SVE_FULL_F
4991	  [(match_operand:<VPRED> 1 "register_operand")
4992	   (unspec:SVE_FULL_F
4993	     [(match_dup 1)
4994	      (const_int SVE_STRICT_GP)
4995	      (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
4996	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
4997	     SVE_COND_FP_BINARY)
4998	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4999	  UNSPEC_SEL))]
5000  "TARGET_SVE"
5001)
5002
5003;; Predicated floating-point operations, merging with the first input.
5004(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5005  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5006	(unspec:SVE_FULL_F
5007	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5008	   (unspec:SVE_FULL_F
5009	     [(match_operand 4)
5010	      (const_int SVE_RELAXED_GP)
5011	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5012	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5013	     SVE_COND_FP_BINARY)
5014	   (match_dup 2)]
5015	  UNSPEC_SEL))]
5016  "TARGET_SVE"
5017  "@
5018   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5019   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5020  "&& !rtx_equal_p (operands[1], operands[4])"
5021  {
5022    operands[4] = copy_rtx (operands[1]);
5023  }
5024  [(set_attr "movprfx" "*,yes")]
5025)
5026
5027(define_insn "*cond_<optab><mode>_2_strict"
5028  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5029	(unspec:SVE_FULL_F
5030	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5031	   (unspec:SVE_FULL_F
5032	     [(match_dup 1)
5033	      (const_int SVE_STRICT_GP)
5034	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5035	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5036	     SVE_COND_FP_BINARY)
5037	   (match_dup 2)]
5038	  UNSPEC_SEL))]
5039  "TARGET_SVE"
5040  "@
5041   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5042   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5043  [(set_attr "movprfx" "*,yes")]
5044)
5045
5046;; Same for operations that take a 1-bit constant.
5047(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
5048  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5049	(unspec:SVE_FULL_F
5050	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5051	   (unspec:SVE_FULL_F
5052	     [(match_operand 4)
5053	      (const_int SVE_RELAXED_GP)
5054	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5055	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5056	     SVE_COND_FP_BINARY_I1)
5057	   (match_dup 2)]
5058	  UNSPEC_SEL))]
5059  "TARGET_SVE"
5060  "@
5061   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5062   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5063  "&& !rtx_equal_p (operands[1], operands[4])"
5064  {
5065    operands[4] = copy_rtx (operands[1]);
5066  }
5067  [(set_attr "movprfx" "*,yes")]
5068)
5069
5070(define_insn "*cond_<optab><mode>_2_const_strict"
5071  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5072	(unspec:SVE_FULL_F
5073	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5074	   (unspec:SVE_FULL_F
5075	     [(match_dup 1)
5076	      (const_int SVE_STRICT_GP)
5077	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5078	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5079	     SVE_COND_FP_BINARY_I1)
5080	   (match_dup 2)]
5081	  UNSPEC_SEL))]
5082  "TARGET_SVE"
5083  "@
5084   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5085   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
5086  [(set_attr "movprfx" "*,yes")]
5087)
5088
5089;; Predicated floating-point operations, merging with the second input.
5090(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
5091  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5092	(unspec:SVE_FULL_F
5093	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5094	   (unspec:SVE_FULL_F
5095	     [(match_operand 4)
5096	      (const_int SVE_RELAXED_GP)
5097	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5098	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5099	     SVE_COND_FP_BINARY)
5100	   (match_dup 3)]
5101	  UNSPEC_SEL))]
5102  "TARGET_SVE"
5103  "@
5104   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5105   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5106  "&& !rtx_equal_p (operands[1], operands[4])"
5107  {
5108    operands[4] = copy_rtx (operands[1]);
5109  }
5110  [(set_attr "movprfx" "*,yes")]
5111)
5112
5113(define_insn "*cond_<optab><mode>_3_strict"
5114  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5115	(unspec:SVE_FULL_F
5116	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5117	   (unspec:SVE_FULL_F
5118	     [(match_dup 1)
5119	      (const_int SVE_STRICT_GP)
5120	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5121	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5122	     SVE_COND_FP_BINARY)
5123	   (match_dup 3)]
5124	  UNSPEC_SEL))]
5125  "TARGET_SVE"
5126  "@
5127   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5128   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5129  [(set_attr "movprfx" "*,yes")]
5130)
5131
5132;; Predicated floating-point operations, merging with an independent value.
5133(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5134  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5135	(unspec:SVE_FULL_F
5136	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5137	   (unspec:SVE_FULL_F
5138	     [(match_operand 5)
5139	      (const_int SVE_RELAXED_GP)
5140	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5141	      (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5142	     SVE_COND_FP_BINARY)
5143	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5144	  UNSPEC_SEL))]
5145  "TARGET_SVE
5146   && !rtx_equal_p (operands[2], operands[4])
5147   && !rtx_equal_p (operands[3], operands[4])"
5148  "@
5149   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5150   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5151   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5152   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5153   #"
5154  "&& 1"
5155  {
5156    if (reload_completed
5157        && register_operand (operands[4], <MODE>mode)
5158        && !rtx_equal_p (operands[0], operands[4]))
5159      {
5160	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5161						 operands[4], operands[1]));
5162	operands[4] = operands[2] = operands[0];
5163      }
5164    else if (!rtx_equal_p (operands[1], operands[5]))
5165      operands[5] = copy_rtx (operands[1]);
5166    else
5167      FAIL;
5168  }
5169  [(set_attr "movprfx" "yes")]
5170)
5171
5172(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5173  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5174	(unspec:SVE_FULL_F
5175	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5176	   (unspec:SVE_FULL_F
5177	     [(match_dup 1)
5178	      (const_int SVE_STRICT_GP)
5179	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5180	      (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5181	     SVE_COND_FP_BINARY)
5182	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5183	  UNSPEC_SEL))]
5184  "TARGET_SVE
5185   && !rtx_equal_p (operands[2], operands[4])
5186   && !rtx_equal_p (operands[3], operands[4])"
5187  "@
5188   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5189   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5190   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5191   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5192   #"
5193  "&& reload_completed
5194   && register_operand (operands[4], <MODE>mode)
5195   && !rtx_equal_p (operands[0], operands[4])"
5196  {
5197    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5198					     operands[4], operands[1]));
5199    operands[4] = operands[2] = operands[0];
5200  }
5201  [(set_attr "movprfx" "yes")]
5202)
5203
5204;; Same for operations that take a 1-bit constant.
5205(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
5206  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5207	(unspec:SVE_FULL_F
5208	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5209	   (unspec:SVE_FULL_F
5210	     [(match_operand 5)
5211	      (const_int SVE_RELAXED_GP)
5212	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5213	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5214	     SVE_COND_FP_BINARY_I1)
5215	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5216	  UNSPEC_SEL))]
5217  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5218  "@
5219   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5220   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5221   #"
5222  "&& 1"
5223  {
5224    if (reload_completed
5225        && register_operand (operands[4], <MODE>mode)
5226        && !rtx_equal_p (operands[0], operands[4]))
5227      {
5228	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5229						 operands[4], operands[1]));
5230	operands[4] = operands[2] = operands[0];
5231      }
5232    else if (!rtx_equal_p (operands[1], operands[5]))
5233      operands[5] = copy_rtx (operands[1]);
5234    else
5235      FAIL;
5236  }
5237  [(set_attr "movprfx" "yes")]
5238)
5239
5240(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5241  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5242	(unspec:SVE_FULL_F
5243	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5244	   (unspec:SVE_FULL_F
5245	     [(match_dup 1)
5246	      (const_int SVE_STRICT_GP)
5247	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
5248	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5249	     SVE_COND_FP_BINARY_I1)
5250	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5251	  UNSPEC_SEL))]
5252  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5253  "@
5254   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5255   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5256   #"
5257  "&& reload_completed
5258   && register_operand (operands[4], <MODE>mode)
5259   && !rtx_equal_p (operands[0], operands[4])"
5260  {
5261    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5262					     operands[4], operands[1]));
5263    operands[4] = operands[2] = operands[0];
5264  }
5265  [(set_attr "movprfx" "yes")]
5266)
5267
5268;; -------------------------------------------------------------------------
5269;; ---- [FP] Addition
5270;; -------------------------------------------------------------------------
5271;; Includes:
5272;; - FADD
5273;; - FSUB
5274;; -------------------------------------------------------------------------
5275
5276;; Predicated floating-point addition.
5277(define_insn_and_split "@aarch64_pred_<optab><mode>"
5278  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
5279	(unspec:SVE_FULL_F
5280	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
5281	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
5282	   (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
5283	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
5284	  SVE_COND_FP_ADD))]
5285  "TARGET_SVE"
5286  "@
5287   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5288   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5289   #
5290   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5291   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5292   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5293   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5294  ; Split the unpredicated form after reload, so that we don't have
5295  ; the unnecessary PTRUE.
5296  "&& reload_completed
5297   && register_operand (operands[3], <MODE>mode)
5298   && INTVAL (operands[4]) == SVE_RELAXED_GP"
5299  [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5300  ""
5301  [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
5302)
5303
5304;; Predicated floating-point addition of a constant, merging with the
5305;; first input.
5306(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
5307  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5308	(unspec:SVE_FULL_F
5309	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5310	   (unspec:SVE_FULL_F
5311	     [(match_operand 4)
5312	      (const_int SVE_RELAXED_GP)
5313	      (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5314	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5315	     UNSPEC_COND_FADD)
5316	   (match_dup 2)]
5317	  UNSPEC_SEL))]
5318  "TARGET_SVE"
5319  "@
5320   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5321   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5322   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5323   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5324  "&& !rtx_equal_p (operands[1], operands[4])"
5325  {
5326    operands[4] = copy_rtx (operands[1]);
5327  }
5328  [(set_attr "movprfx" "*,*,yes,yes")]
5329)
5330
5331(define_insn "*cond_add<mode>_2_const_strict"
5332  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5333	(unspec:SVE_FULL_F
5334	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5335	   (unspec:SVE_FULL_F
5336	     [(match_dup 1)
5337	      (const_int SVE_STRICT_GP)
5338	      (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5339	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5340	     UNSPEC_COND_FADD)
5341	   (match_dup 2)]
5342	  UNSPEC_SEL))]
5343  "TARGET_SVE"
5344  "@
5345   fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5346   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5347   movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5348   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5349  [(set_attr "movprfx" "*,*,yes,yes")]
5350)
5351
5352;; Predicated floating-point addition of a constant, merging with an
5353;; independent value.
5354(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5355  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5356	(unspec:SVE_FULL_F
5357	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5358	   (unspec:SVE_FULL_F
5359	     [(match_operand 5)
5360	      (const_int SVE_RELAXED_GP)
5361	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5362	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5363	     UNSPEC_COND_FADD)
5364	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5365	  UNSPEC_SEL))]
5366  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5367  "@
5368   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5369   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5370   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5371   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5372   #
5373   #"
5374  "&& 1"
5375  {
5376    if (reload_completed
5377        && register_operand (operands[4], <MODE>mode)
5378        && !rtx_equal_p (operands[0], operands[4]))
5379      {
5380	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5381						 operands[4], operands[1]));
5382	operands[4] = operands[2] = operands[0];
5383      }
5384    else if (!rtx_equal_p (operands[1], operands[5]))
5385      operands[5] = copy_rtx (operands[1]);
5386    else
5387      FAIL;
5388  }
5389  [(set_attr "movprfx" "yes")]
5390)
5391
5392(define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5393  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5394	(unspec:SVE_FULL_F
5395	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5396	   (unspec:SVE_FULL_F
5397	     [(match_dup 1)
5398	      (const_int SVE_STRICT_GP)
5399	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5400	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5401	     UNSPEC_COND_FADD)
5402	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5403	  UNSPEC_SEL))]
5404  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5405  "@
5406   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5407   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5408   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5409   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5410   #
5411   #"
5412  "&& reload_completed
5413   && register_operand (operands[4], <MODE>mode)
5414   && !rtx_equal_p (operands[0], operands[4])"
5415  {
5416    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5417					     operands[4], operands[1]));
5418    operands[4] = operands[2] = operands[0];
5419  }
5420  [(set_attr "movprfx" "yes")]
5421)
5422
5423;; Register merging forms are handled through SVE_COND_FP_BINARY.
5424
5425;; -------------------------------------------------------------------------
5426;; ---- [FP] Complex addition
5427;; -------------------------------------------------------------------------
5428;; Includes:
5429;; - FCADD
5430;; -------------------------------------------------------------------------
5431
5432;; Predicated FCADD.
5433(define_insn "@aarch64_pred_<optab><mode>"
5434  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5435	(unspec:SVE_FULL_F
5436	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5437	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
5438	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5439	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5440	  SVE_COND_FCADD))]
5441  "TARGET_SVE"
5442  "@
5443   fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5444   movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5445  [(set_attr "movprfx" "*,yes")]
5446)
5447
5448;; Predicated FCADD with merging.
5449(define_expand "@cond_<optab><mode>"
5450  [(set (match_operand:SVE_FULL_F 0 "register_operand")
5451	(unspec:SVE_FULL_F
5452	  [(match_operand:<VPRED> 1 "register_operand")
5453	   (unspec:SVE_FULL_F
5454	     [(match_dup 1)
5455	      (const_int SVE_STRICT_GP)
5456	      (match_operand:SVE_FULL_F 2 "register_operand")
5457	      (match_operand:SVE_FULL_F 3 "register_operand")]
5458	     SVE_COND_FCADD)
5459	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5460	  UNSPEC_SEL))]
5461  "TARGET_SVE"
5462)
5463
5464;; Predicated FCADD, merging with the first input.
5465(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5466  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5467	(unspec:SVE_FULL_F
5468	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5469	   (unspec:SVE_FULL_F
5470	     [(match_operand 4)
5471	      (const_int SVE_RELAXED_GP)
5472	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5473	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5474	     SVE_COND_FCADD)
5475	   (match_dup 2)]
5476	  UNSPEC_SEL))]
5477  "TARGET_SVE"
5478  "@
5479   fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5480   movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5481  "&& !rtx_equal_p (operands[1], operands[4])"
5482  {
5483    operands[4] = copy_rtx (operands[1]);
5484  }
5485  [(set_attr "movprfx" "*,yes")]
5486)
5487
5488(define_insn "*cond_<optab><mode>_2_strict"
5489  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5490	(unspec:SVE_FULL_F
5491	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5492	   (unspec:SVE_FULL_F
5493	     [(match_dup 1)
5494	      (const_int SVE_STRICT_GP)
5495	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5496	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5497	     SVE_COND_FCADD)
5498	   (match_dup 2)]
5499	  UNSPEC_SEL))]
5500  "TARGET_SVE"
5501  "@
5502   fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5503   movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5504  [(set_attr "movprfx" "*,yes")]
5505)
5506
5507;; Predicated FCADD, merging with an independent value.
5508(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5509  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5510	(unspec:SVE_FULL_F
5511	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5512	   (unspec:SVE_FULL_F
5513	     [(match_operand 5)
5514	      (const_int SVE_RELAXED_GP)
5515	      (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5516	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5517	     SVE_COND_FCADD)
5518	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5519	  UNSPEC_SEL))]
5520  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5521  "@
5522   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5523   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5524   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5525   #"
5526  "&& 1"
5527  {
5528    if (reload_completed
5529        && register_operand (operands[4], <MODE>mode)
5530        && !rtx_equal_p (operands[0], operands[4]))
5531      {
5532	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5533						 operands[4], operands[1]));
5534	operands[4] = operands[2] = operands[0];
5535      }
5536    else if (!rtx_equal_p (operands[1], operands[5]))
5537      operands[5] = copy_rtx (operands[1]);
5538    else
5539      FAIL;
5540  }
5541  [(set_attr "movprfx" "yes")]
5542)
5543
5544(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5545  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5546	(unspec:SVE_FULL_F
5547	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5548	   (unspec:SVE_FULL_F
5549	     [(match_dup 1)
5550	      (const_int SVE_STRICT_GP)
5551	      (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5552	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5553	     SVE_COND_FCADD)
5554	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5555	  UNSPEC_SEL))]
5556  "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5557  "@
5558   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5559   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5560   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5561   #"
5562  "&& reload_completed
5563   && register_operand (operands[4], <MODE>mode)
5564   && !rtx_equal_p (operands[0], operands[4])"
5565  {
5566    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5567					     operands[4], operands[1]));
5568    operands[4] = operands[2] = operands[0];
5569  }
5570  [(set_attr "movprfx" "yes")]
5571)
5572
5573;; -------------------------------------------------------------------------
5574;; ---- [FP] Subtraction
5575;; -------------------------------------------------------------------------
5576;; Includes:
5577;; - FSUB
5578;; - FSUBR
5579;; -------------------------------------------------------------------------
5580
5581;; Predicated floating-point subtraction.
5582(define_insn_and_split "@aarch64_pred_<optab><mode>"
5583  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5584	(unspec:SVE_FULL_F
5585	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5586	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5587	   (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5588	   (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5589	  SVE_COND_FP_SUB))]
5590  "TARGET_SVE"
5591  "@
5592   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5593   #
5594   fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5595   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5596   movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5597   movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5598  ; Split the unpredicated form after reload, so that we don't have
5599  ; the unnecessary PTRUE.
5600  "&& reload_completed
5601   && register_operand (operands[2], <MODE>mode)
5602   && INTVAL (operands[4]) == SVE_RELAXED_GP"
5603  [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5604  ""
5605  [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5606)
5607
5608;; Predicated floating-point subtraction from a constant, merging with the
5609;; second input.
5610(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
5611  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5612	(unspec:SVE_FULL_F
5613	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5614	   (unspec:SVE_FULL_F
5615	     [(match_operand 4)
5616	      (const_int SVE_RELAXED_GP)
5617	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5618	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5619	     UNSPEC_COND_FSUB)
5620	   (match_dup 3)]
5621	  UNSPEC_SEL))]
5622  "TARGET_SVE"
5623  "@
5624   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5625   movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5626  "&& !rtx_equal_p (operands[1], operands[4])"
5627  {
5628    operands[4] = copy_rtx (operands[1]);
5629  }
5630  [(set_attr "movprfx" "*,yes")]
5631)
5632
5633(define_insn "*cond_sub<mode>_3_const_strict"
5634  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5635	(unspec:SVE_FULL_F
5636	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5637	   (unspec:SVE_FULL_F
5638	     [(match_dup 1)
5639	      (const_int SVE_STRICT_GP)
5640	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5641	      (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5642	     UNSPEC_COND_FSUB)
5643	   (match_dup 3)]
5644	  UNSPEC_SEL))]
5645  "TARGET_SVE"
5646  "@
5647   fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5648   movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5649  [(set_attr "movprfx" "*,yes")]
5650)
5651
5652;; Predicated floating-point subtraction from a constant, merging with an
5653;; independent value.
5654(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
5655  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5656	(unspec:SVE_FULL_F
5657	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5658	   (unspec:SVE_FULL_F
5659	     [(match_operand 5)
5660	      (const_int SVE_RELAXED_GP)
5661	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5662	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5663	     UNSPEC_COND_FSUB)
5664	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5665	  UNSPEC_SEL))]
5666  "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
5667  "@
5668   movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5669   movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5670   #"
5671  "&& 1"
5672  {
5673    if (reload_completed
5674        && register_operand (operands[4], <MODE>mode)
5675        && !rtx_equal_p (operands[0], operands[4]))
5676      {
5677	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5678						 operands[4], operands[1]));
5679	operands[4] = operands[3] = operands[0];
5680      }
5681    else if (!rtx_equal_p (operands[1], operands[5]))
5682      operands[5] = copy_rtx (operands[1]);
5683    else
5684      FAIL;
5685  }
5686  [(set_attr "movprfx" "yes")]
5687)
5688
5689(define_insn_and_rewrite "*cond_sub<mode>_const_strict"
5690  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5691	(unspec:SVE_FULL_F
5692	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5693	   (unspec:SVE_FULL_F
5694	     [(match_dup 1)
5695	      (const_int SVE_STRICT_GP)
5696	      (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5697	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5698	     UNSPEC_COND_FSUB)
5699	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5700	  UNSPEC_SEL))]
5701  "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
5702  "@
5703   movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5704   movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5705   #"
5706  "&& reload_completed
5707   && register_operand (operands[4], <MODE>mode)
5708   && !rtx_equal_p (operands[0], operands[4])"
5709  {
5710    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5711                                             operands[4], operands[1]));
5712    operands[4] = operands[3] = operands[0];
5713  }
5714  [(set_attr "movprfx" "yes")]
5715)
5716;; Register merging forms are handled through SVE_COND_FP_BINARY.
5717
5718;; -------------------------------------------------------------------------
5719;; ---- [FP] Absolute difference
5720;; -------------------------------------------------------------------------
5721;; Includes:
5722;; - FABD
5723;; -------------------------------------------------------------------------
5724
5725;; Predicated floating-point absolute difference.
5726(define_expand "@aarch64_pred_abd<mode>"
5727  [(set (match_operand:SVE_FULL_F 0 "register_operand")
5728	(unspec:SVE_FULL_F
5729	  [(match_operand:<VPRED> 1 "register_operand")
5730	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
5731	   (unspec:SVE_FULL_F
5732	     [(match_dup 1)
5733	      (match_dup 4)
5734	      (match_operand:SVE_FULL_F 2 "register_operand")
5735	      (match_operand:SVE_FULL_F 3 "register_operand")]
5736	     UNSPEC_COND_FSUB)]
5737	  UNSPEC_COND_FABS))]
5738  "TARGET_SVE"
5739)
5740
5741;; Predicated floating-point absolute difference.
5742(define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
5743  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5744	(unspec:SVE_FULL_F
5745	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5746	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
5747	   (unspec:SVE_FULL_F
5748	     [(match_operand 5)
5749	      (const_int SVE_RELAXED_GP)
5750	      (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5751	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5752	     UNSPEC_COND_FSUB)]
5753	  UNSPEC_COND_FABS))]
5754  "TARGET_SVE"
5755  "@
5756   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5757   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5758  "&& !rtx_equal_p (operands[1], operands[5])"
5759  {
5760    operands[5] = copy_rtx (operands[1]);
5761  }
5762  [(set_attr "movprfx" "*,yes")]
5763)
5764
5765(define_insn "*aarch64_pred_abd<mode>_strict"
5766  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5767	(unspec:SVE_FULL_F
5768	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5769	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
5770	   (unspec:SVE_FULL_F
5771	     [(match_dup 1)
5772	      (const_int SVE_STRICT_GP)
5773	      (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5774	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5775	     UNSPEC_COND_FSUB)]
5776	  UNSPEC_COND_FABS))]
5777  "TARGET_SVE"
5778  "@
5779   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5780   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5781  [(set_attr "movprfx" "*,yes")]
5782)
5783
5784(define_expand "@aarch64_cond_abd<mode>"
5785  [(set (match_operand:SVE_FULL_F 0 "register_operand")
5786	(unspec:SVE_FULL_F
5787	  [(match_operand:<VPRED> 1 "register_operand")
5788	   (unspec:SVE_FULL_F
5789	     [(match_dup 1)
5790	      (const_int SVE_STRICT_GP)
5791	      (unspec:SVE_FULL_F
5792		[(match_dup 1)
5793		 (const_int SVE_STRICT_GP)
5794		 (match_operand:SVE_FULL_F 2 "register_operand")
5795		 (match_operand:SVE_FULL_F 3 "register_operand")]
5796		UNSPEC_COND_FSUB)]
5797	     UNSPEC_COND_FABS)
5798	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5799	  UNSPEC_SEL))]
5800  "TARGET_SVE"
5801{
5802  if (rtx_equal_p (operands[3], operands[4]))
5803    std::swap (operands[2], operands[3]);
5804})
5805
5806;; Predicated floating-point absolute difference, merging with the first
5807;; input.
5808(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
5809  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5810	(unspec:SVE_FULL_F
5811	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5812	   (unspec:SVE_FULL_F
5813	     [(match_operand 4)
5814	      (const_int SVE_RELAXED_GP)
5815	      (unspec:SVE_FULL_F
5816		[(match_operand 5)
5817		 (const_int SVE_RELAXED_GP)
5818		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5819		 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5820		UNSPEC_COND_FSUB)]
5821	     UNSPEC_COND_FABS)
5822	   (match_dup 2)]
5823	  UNSPEC_SEL))]
5824  "TARGET_SVE"
5825  "@
5826   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5827   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5828  "&& (!rtx_equal_p (operands[1], operands[4])
5829       || !rtx_equal_p (operands[1], operands[5]))"
5830  {
5831    operands[4] = copy_rtx (operands[1]);
5832    operands[5] = copy_rtx (operands[1]);
5833  }
5834  [(set_attr "movprfx" "*,yes")]
5835)
5836
5837(define_insn "*aarch64_cond_abd<mode>_2_strict"
5838  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5839	(unspec:SVE_FULL_F
5840	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5841	   (unspec:SVE_FULL_F
5842	     [(match_dup 1)
5843	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
5844	      (unspec:SVE_FULL_F
5845		[(match_dup 1)
5846		 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5847		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5848		 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5849		UNSPEC_COND_FSUB)]
5850	     UNSPEC_COND_FABS)
5851	   (match_dup 2)]
5852	  UNSPEC_SEL))]
5853  "TARGET_SVE"
5854  "@
5855   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5856   movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5857  [(set_attr "movprfx" "*,yes")]
5858)
5859
5860;; Predicated floating-point absolute difference, merging with the second
5861;; input.
5862(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
5863  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5864	(unspec:SVE_FULL_F
5865	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5866	   (unspec:SVE_FULL_F
5867	     [(match_operand 4)
5868	      (const_int SVE_RELAXED_GP)
5869	      (unspec:SVE_FULL_F
5870		[(match_operand 5)
5871		 (const_int SVE_RELAXED_GP)
5872		 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5873		 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5874		UNSPEC_COND_FSUB)]
5875	     UNSPEC_COND_FABS)
5876	   (match_dup 3)]
5877	  UNSPEC_SEL))]
5878  "TARGET_SVE"
5879  "@
5880   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5881   movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5882  "&& (!rtx_equal_p (operands[1], operands[4])
5883       || !rtx_equal_p (operands[1], operands[5]))"
5884  {
5885    operands[4] = copy_rtx (operands[1]);
5886    operands[5] = copy_rtx (operands[1]);
5887  }
5888  [(set_attr "movprfx" "*,yes")]
5889)
5890
5891(define_insn "*aarch64_cond_abd<mode>_3_strict"
5892  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5893	(unspec:SVE_FULL_F
5894	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5895	   (unspec:SVE_FULL_F
5896	     [(match_dup 1)
5897	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
5898	      (unspec:SVE_FULL_F
5899		[(match_dup 1)
5900		 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5901		 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5902		 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5903		UNSPEC_COND_FSUB)]
5904	     UNSPEC_COND_FABS)
5905	   (match_dup 3)]
5906	  UNSPEC_SEL))]
5907  "TARGET_SVE"
5908  "@
5909   fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5910   movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5911  [(set_attr "movprfx" "*,yes")]
5912)
5913
5914;; Predicated floating-point absolute difference, merging with an
5915;; independent value.
5916(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
5917  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5918	(unspec:SVE_FULL_F
5919	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5920	   (unspec:SVE_FULL_F
5921	     [(match_operand 5)
5922	      (const_int SVE_RELAXED_GP)
5923	      (unspec:SVE_FULL_F
5924		[(match_operand 6)
5925		 (const_int SVE_RELAXED_GP)
5926		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5927		 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5928		UNSPEC_COND_FSUB)]
5929	     UNSPEC_COND_FABS)
5930	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5931	  UNSPEC_SEL))]
5932  "TARGET_SVE
5933   && !rtx_equal_p (operands[2], operands[4])
5934   && !rtx_equal_p (operands[3], operands[4])"
5935  "@
5936   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5937   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5938   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5939   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5940   #"
5941  "&& 1"
5942  {
5943    if (reload_completed
5944	&& register_operand (operands[4], <MODE>mode)
5945	&& !rtx_equal_p (operands[0], operands[4]))
5946      {
5947	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5948						 operands[4], operands[1]));
5949	operands[4] = operands[3] = operands[0];
5950      }
5951    else if (!rtx_equal_p (operands[1], operands[5])
5952	     || !rtx_equal_p (operands[1], operands[6]))
5953      {
5954	operands[5] = copy_rtx (operands[1]);
5955	operands[6] = copy_rtx (operands[1]);
5956      }
5957    else
5958      FAIL;
5959  }
5960  [(set_attr "movprfx" "yes")]
5961)
5962
5963(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
5964  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5965	(unspec:SVE_FULL_F
5966	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5967	   (unspec:SVE_FULL_F
5968	     [(match_dup 1)
5969	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
5970	      (unspec:SVE_FULL_F
5971		[(match_dup 1)
5972		 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5973		 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5974		 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5975		UNSPEC_COND_FSUB)]
5976	     UNSPEC_COND_FABS)
5977	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5978	  UNSPEC_SEL))]
5979  "TARGET_SVE
5980   && !rtx_equal_p (operands[2], operands[4])
5981   && !rtx_equal_p (operands[3], operands[4])"
5982  "@
5983   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5984   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5985   movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5986   movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5987   #"
5988  "&& reload_completed
5989   && register_operand (operands[4], <MODE>mode)
5990   && !rtx_equal_p (operands[0], operands[4])"
5991  {
5992    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5993					     operands[4], operands[1]));
5994    operands[4] = operands[3] = operands[0];
5995  }
5996  [(set_attr "movprfx" "yes")]
5997)
5998
5999;; -------------------------------------------------------------------------
6000;; ---- [FP] Multiplication
6001;; -------------------------------------------------------------------------
6002;; Includes:
6003;; - FMUL
6004;; -------------------------------------------------------------------------
6005
6006;; Predicated floating-point multiplication.
6007(define_insn_and_split "@aarch64_pred_<optab><mode>"
6008  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
6009	(unspec:SVE_FULL_F
6010	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
6011	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
6012	   (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
6013	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
6014	  SVE_COND_FP_MUL))]
6015  "TARGET_SVE"
6016  "@
6017   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6018   #
6019   fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6020   movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6021   movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
6022  ; Split the unpredicated form after reload, so that we don't have
6023  ; the unnecessary PTRUE.
6024  "&& reload_completed
6025   && register_operand (operands[3], <MODE>mode)
6026   && INTVAL (operands[4]) == SVE_RELAXED_GP"
6027  [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
6028  ""
6029  [(set_attr "movprfx" "*,*,*,yes,yes")]
6030)
6031
6032;; Merging forms are handled through SVE_COND_FP_BINARY and
6033;; SVE_COND_FP_BINARY_I1.
6034
6035;; Unpredicated multiplication by selected lanes.
6036(define_insn "@aarch64_mul_lane_<mode>"
6037  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6038	(mult:SVE_FULL_F
6039	  (unspec:SVE_FULL_F
6040	    [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
6041	     (match_operand:SI 3 "const_int_operand")]
6042	    UNSPEC_SVE_LANE_SELECT)
6043	  (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
6044  "TARGET_SVE"
6045  "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6046)
6047
6048;; -------------------------------------------------------------------------
6049;; ---- [FP] Division
6050;; -------------------------------------------------------------------------
6051;; The patterns in this section are synthetic.
6052;; -------------------------------------------------------------------------
6053
6054(define_expand "div<mode>3"
6055  [(set (match_operand:SVE_FULL_F 0 "register_operand")
6056	(unspec:SVE_FULL_F
6057	  [(match_dup 3)
6058	   (const_int SVE_RELAXED_GP)
6059	   (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6060	   (match_operand:SVE_FULL_F 2 "register_operand")]
6061	  UNSPEC_COND_FDIV))]
6062  "TARGET_SVE"
6063  {
6064    if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6065      DONE;
6066
6067    operands[1] = force_reg (<MODE>mode, operands[1]);
6068    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6069  }
6070)
6071
6072(define_expand "@aarch64_frecpe<mode>"
6073  [(set (match_operand:SVE_FULL_F 0 "register_operand")
6074	(unspec:SVE_FULL_F
6075	  [(match_operand:SVE_FULL_F 1 "register_operand")]
6076	  UNSPEC_FRECPE))]
6077  "TARGET_SVE"
6078)
6079
6080(define_expand "@aarch64_frecps<mode>"
6081  [(set (match_operand:SVE_FULL_F 0 "register_operand")
6082	(unspec:SVE_FULL_F
6083	  [(match_operand:SVE_FULL_F 1 "register_operand")
6084	   (match_operand:SVE_FULL_F 2 "register_operand")]
6085	  UNSPEC_FRECPS))]
6086  "TARGET_SVE"
6087)
6088
6089;; -------------------------------------------------------------------------
6090;; ---- [FP] Binary logical operations
6091;; -------------------------------------------------------------------------
6092;; Includes
6093;; - AND
6094;; - EOR
6095;; - ORR
6096;; -------------------------------------------------------------------------
6097
6098;; Binary logical operations on floating-point modes.  We avoid subregs
6099;; by providing this, but we need to use UNSPECs since rtx logical ops
6100;; aren't defined for floating-point modes.
6101(define_insn "*<optab><mode>3"
6102  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
6103	(unspec:SVE_FULL_F
6104	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
6105	   (match_operand:SVE_FULL_F 2 "register_operand" "w")]
6106	  LOGICALF))]
6107  "TARGET_SVE"
6108  "<logicalf_op>\t%0.d, %1.d, %2.d"
6109)
6110
6111;; -------------------------------------------------------------------------
6112;; ---- [FP] Sign copying
6113;; -------------------------------------------------------------------------
6114;; The patterns in this section are synthetic.
6115;; -------------------------------------------------------------------------
6116
6117(define_expand "copysign<mode>3"
6118  [(match_operand:SVE_FULL_F 0 "register_operand")
6119   (match_operand:SVE_FULL_F 1 "register_operand")
6120   (match_operand:SVE_FULL_F 2 "register_operand")]
6121  "TARGET_SVE"
6122  {
6123    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6124    rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6125    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6126    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6127
6128    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6129    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6130
6131    emit_insn (gen_and<v_int_equiv>3
6132	       (sign, arg2,
6133		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6134						   HOST_WIDE_INT_M1U
6135						   << bits)));
6136    emit_insn (gen_and<v_int_equiv>3
6137	       (mant, arg1,
6138		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6139						   ~(HOST_WIDE_INT_M1U
6140						     << bits))));
6141    emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6142    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6143    DONE;
6144  }
6145)
6146
6147(define_expand "xorsign<mode>3"
6148  [(match_operand:SVE_FULL_F 0 "register_operand")
6149   (match_operand:SVE_FULL_F 1 "register_operand")
6150   (match_operand:SVE_FULL_F 2 "register_operand")]
6151  "TARGET_SVE"
6152  {
6153    rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6154    rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6155    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6156
6157    rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6158    rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6159
6160    emit_insn (gen_and<v_int_equiv>3
6161	       (sign, arg2,
6162		aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6163						   HOST_WIDE_INT_M1U
6164						   << bits)));
6165    emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6166    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6167    DONE;
6168  }
6169)
6170
6171;; -------------------------------------------------------------------------
6172;; ---- [FP] Maximum and minimum
6173;; -------------------------------------------------------------------------
6174;; Includes:
6175;; - FMAX
6176;; - FMAXNM
6177;; - FMIN
6178;; - FMINNM
6179;; -------------------------------------------------------------------------
6180
6181;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
6182;; smin/smax rtx codes are handled in the generic section above.
6183(define_expand "<maxmin_uns><mode>3"
6184  [(set (match_operand:SVE_FULL_F 0 "register_operand")
6185	(unspec:SVE_FULL_F
6186	  [(match_dup 3)
6187	   (const_int SVE_RELAXED_GP)
6188	   (match_operand:SVE_FULL_F 1 "register_operand")
6189	   (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
6190	  SVE_COND_FP_MAXMIN_PUBLIC))]
6191  "TARGET_SVE"
6192  {
6193    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6194  }
6195)
6196
6197;; Predicated floating-point maximum/minimum.
6198(define_insn "@aarch64_pred_<optab><mode>"
6199  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
6200	(unspec:SVE_FULL_F
6201	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6202	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
6203	   (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
6204	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
6205	  SVE_COND_FP_MAXMIN))]
6206  "TARGET_SVE"
6207  "@
6208   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6209   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6210   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6211   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
6212  [(set_attr "movprfx" "*,*,yes,yes")]
6213)
6214
6215;; Merging forms are handled through SVE_COND_FP_BINARY and
6216;; SVE_COND_FP_BINARY_I1.
6217
6218;; -------------------------------------------------------------------------
6219;; ---- [PRED] Binary logical operations
6220;; -------------------------------------------------------------------------
6221;; Includes:
6222;; - AND
6223;; - ANDS
6224;; - EOR
6225;; - EORS
6226;; - ORR
6227;; - ORRS
6228;; -------------------------------------------------------------------------
6229
6230;; Predicate AND.  We can reuse one of the inputs as the GP.
6231;; Doubling the second operand is the preferred implementation
6232;; of the MOV alias, so we use that instead of %1/z, %1, %2.
6233(define_insn "and<mode>3"
6234  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6235	(and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
6236		      (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
6237  "TARGET_SVE"
6238  "and\t%0.b, %1/z, %2.b, %2.b"
6239)
6240
6241;; Unpredicated predicate EOR and ORR.
6242(define_expand "<optab><mode>3"
6243  [(set (match_operand:PRED_ALL 0 "register_operand")
6244	(and:PRED_ALL
6245	  (LOGICAL_OR:PRED_ALL
6246	    (match_operand:PRED_ALL 1 "register_operand")
6247	    (match_operand:PRED_ALL 2 "register_operand"))
6248	  (match_dup 3)))]
6249  "TARGET_SVE"
6250  {
6251    operands[3] = aarch64_ptrue_reg (<MODE>mode);
6252  }
6253)
6254
6255;; Predicated predicate AND, EOR and ORR.
6256(define_insn "@aarch64_pred_<optab><mode>_z"
6257  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6258	(and:PRED_ALL
6259	  (LOGICAL:PRED_ALL
6260	    (match_operand:PRED_ALL 2 "register_operand" "Upa")
6261	    (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6262	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6263  "TARGET_SVE"
6264  "<logical>\t%0.b, %1/z, %2.b, %3.b"
6265)
6266
6267;; Perform a logical operation on operands 2 and 3, using operand 1 as
6268;; the GP.  Store the result in operand 0 and set the flags in the same
6269;; way as for PTEST.
6270(define_insn "*<optab><mode>3_cc"
6271  [(set (reg:CC_NZC CC_REGNUM)
6272	(unspec:CC_NZC
6273	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
6274	   (match_operand 4)
6275	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6276	   (and:PRED_ALL
6277	     (LOGICAL:PRED_ALL
6278	       (match_operand:PRED_ALL 2 "register_operand" "Upa")
6279	       (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6280	     (match_dup 4))]
6281	  UNSPEC_PTEST))
6282   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6283	(and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
6284		      (match_dup 4)))]
6285  "TARGET_SVE"
6286  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6287)
6288
6289;; Same with just the flags result.
6290(define_insn "*<optab><mode>3_ptest"
6291  [(set (reg:CC_NZC CC_REGNUM)
6292	(unspec:CC_NZC
6293	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
6294	   (match_operand 4)
6295	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6296	   (and:PRED_ALL
6297	     (LOGICAL:PRED_ALL
6298	       (match_operand:PRED_ALL 2 "register_operand" "Upa")
6299	       (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6300	     (match_dup 4))]
6301	  UNSPEC_PTEST))
6302   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6303  "TARGET_SVE"
6304  "<logical>s\t%0.b, %1/z, %2.b, %3.b"
6305)
6306
6307;; -------------------------------------------------------------------------
6308;; ---- [PRED] Binary logical operations (inverted second input)
6309;; -------------------------------------------------------------------------
6310;; Includes:
6311;; - BIC
6312;; - ORN
6313;; -------------------------------------------------------------------------
6314
6315;; Predicated predicate BIC and ORN.
6316(define_insn "aarch64_pred_<nlogical><mode>_z"
6317  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6318	(and:PRED_ALL
6319	  (NLOGICAL:PRED_ALL
6320	    (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6321	    (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6322	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6323  "TARGET_SVE"
6324  "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
6325)
6326
6327;; Same, but set the flags as a side-effect.
6328(define_insn "*<nlogical><mode>3_cc"
6329  [(set (reg:CC_NZC CC_REGNUM)
6330	(unspec:CC_NZC
6331	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
6332	   (match_operand 4)
6333	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6334	   (and:PRED_ALL
6335	     (NLOGICAL:PRED_ALL
6336	       (not:PRED_ALL
6337		 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6338	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6339	     (match_dup 4))]
6340	  UNSPEC_PTEST))
6341   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6342	(and:PRED_ALL (NLOGICAL:PRED_ALL
6343			(not:PRED_ALL (match_dup 3))
6344			(match_dup 2))
6345		      (match_dup 4)))]
6346  "TARGET_SVE"
6347  "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6348)
6349
6350;; Same with just the flags result.
6351(define_insn "*<nlogical><mode>3_ptest"
6352  [(set (reg:CC_NZC CC_REGNUM)
6353	(unspec:CC_NZC
6354	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
6355	   (match_operand 4)
6356	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6357	   (and:PRED_ALL
6358	     (NLOGICAL:PRED_ALL
6359	       (not:PRED_ALL
6360		 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
6361	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6362	     (match_dup 4))]
6363	  UNSPEC_PTEST))
6364   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6365  "TARGET_SVE"
6366  "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
6367)
6368
6369;; -------------------------------------------------------------------------
6370;; ---- [PRED] Binary logical operations (inverted result)
6371;; -------------------------------------------------------------------------
6372;; Includes:
6373;; - NAND
6374;; - NOR
6375;; -------------------------------------------------------------------------
6376
6377;; Predicated predicate NAND and NOR.
6378(define_insn "aarch64_pred_<logical_nn><mode>_z"
6379  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6380	(and:PRED_ALL
6381	  (NLOGICAL:PRED_ALL
6382	    (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6383	    (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6384	  (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
6385  "TARGET_SVE"
6386  "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
6387)
6388
6389;; Same, but set the flags as a side-effect.
6390(define_insn "*<logical_nn><mode>3_cc"
6391  [(set (reg:CC_NZC CC_REGNUM)
6392	(unspec:CC_NZC
6393	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
6394	   (match_operand 4)
6395	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6396	   (and:PRED_ALL
6397	     (NLOGICAL:PRED_ALL
6398	       (not:PRED_ALL
6399		 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6400	       (not:PRED_ALL
6401		 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6402	     (match_dup 4))]
6403	  UNSPEC_PTEST))
6404   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
6405	(and:PRED_ALL (NLOGICAL:PRED_ALL
6406			(not:PRED_ALL (match_dup 2))
6407			(not:PRED_ALL (match_dup 3)))
6408		      (match_dup 4)))]
6409  "TARGET_SVE"
6410  "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6411)
6412
6413;; Same with just the flags result.
6414(define_insn "*<logical_nn><mode>3_ptest"
6415  [(set (reg:CC_NZC CC_REGNUM)
6416	(unspec:CC_NZC
6417	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
6418	   (match_operand 4)
6419	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6420	   (and:PRED_ALL
6421	     (NLOGICAL:PRED_ALL
6422	       (not:PRED_ALL
6423		 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
6424	       (not:PRED_ALL
6425		 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
6426	     (match_dup 4))]
6427	  UNSPEC_PTEST))
6428   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
6429  "TARGET_SVE"
6430  "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
6431)
6432
6433;; =========================================================================
6434;; == Ternary arithmetic
6435;; =========================================================================
6436
6437;; -------------------------------------------------------------------------
6438;; ---- [INT] MLA and MAD
6439;; -------------------------------------------------------------------------
6440;; Includes:
6441;; - MAD
6442;; - MLA
6443;; -------------------------------------------------------------------------
6444
6445;; Unpredicated integer addition of product.
6446(define_expand "fma<mode>4"
6447  [(set (match_operand:SVE_FULL_I 0 "register_operand")
6448	(plus:SVE_FULL_I
6449	  (unspec:SVE_FULL_I
6450	    [(match_dup 4)
6451	     (mult:SVE_FULL_I
6452	       (match_operand:SVE_FULL_I 1 "register_operand")
6453	       (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
6454	    UNSPEC_PRED_X)
6455	  (match_operand:SVE_FULL_I 3 "register_operand")))]
6456  "TARGET_SVE"
6457  {
6458    if (aarch64_prepare_sve_int_fma (operands, PLUS))
6459      DONE;
6460    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6461  }
6462)
6463
6464;; Predicated integer addition of product.
6465(define_insn "@aarch64_pred_fma<mode>"
6466  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
6467	(plus:SVE_FULL_I
6468	  (unspec:SVE_FULL_I
6469	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6470	     (mult:SVE_FULL_I
6471	       (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
6472	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
6473	    UNSPEC_PRED_X)
6474	  (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
6475  "TARGET_SVE"
6476  "@
6477   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6478   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6479   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6480  [(set_attr "movprfx" "*,*,yes")]
6481)
6482
6483;; Predicated integer addition of product with merging.
6484(define_expand "cond_fma<mode>"
6485  [(set (match_operand:SVE_FULL_I 0 "register_operand")
6486	(unspec:SVE_FULL_I
6487	  [(match_operand:<VPRED> 1 "register_operand")
6488	   (plus:SVE_FULL_I
6489	     (mult:SVE_FULL_I
6490	       (match_operand:SVE_FULL_I 2 "register_operand")
6491	       (match_operand:SVE_FULL_I 3 "general_operand"))
6492	     (match_operand:SVE_FULL_I 4 "register_operand"))
6493	   (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
6494	  UNSPEC_SEL))]
6495  "TARGET_SVE"
6496  {
6497    if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
6498      DONE;
6499    /* Swap the multiplication operands if the fallback value is the
6500       second of the two.  */
6501    if (rtx_equal_p (operands[3], operands[5]))
6502      std::swap (operands[2], operands[3]);
6503  }
6504)
6505
6506;; Predicated integer addition of product, merging with the first input.
6507(define_insn "*cond_fma<mode>_2"
6508  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6509	(unspec:SVE_FULL_I
6510	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6511	   (plus:SVE_FULL_I
6512	     (mult:SVE_FULL_I
6513	       (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
6514	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
6515	     (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
6516	   (match_dup 2)]
6517	  UNSPEC_SEL))]
6518  "TARGET_SVE"
6519  "@
6520   mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6521   movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6522  [(set_attr "movprfx" "*,yes")]
6523)
6524
6525;; Predicated integer addition of product, merging with the third input.
6526(define_insn "*cond_fma<mode>_4"
6527  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6528	(unspec:SVE_FULL_I
6529	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6530	   (plus:SVE_FULL_I
6531	     (mult:SVE_FULL_I
6532	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6533	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
6534	     (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
6535	   (match_dup 4)]
6536	  UNSPEC_SEL))]
6537  "TARGET_SVE"
6538  "@
6539   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6540   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6541  [(set_attr "movprfx" "*,yes")]
6542)
6543
6544;; Predicated integer addition of product, merging with an independent value.
6545(define_insn_and_rewrite "*cond_fma<mode>_any"
6546  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6547	(unspec:SVE_FULL_I
6548	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6549	   (plus:SVE_FULL_I
6550	     (mult:SVE_FULL_I
6551	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6552	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))
6553	     (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
6554	   (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6555	  UNSPEC_SEL))]
6556  "TARGET_SVE
6557   && !rtx_equal_p (operands[2], operands[5])
6558   && !rtx_equal_p (operands[3], operands[5])
6559   && !rtx_equal_p (operands[4], operands[5])"
6560  "@
6561   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6562   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6563   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6564   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6565   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6566   #"
6567  "&& reload_completed
6568   && register_operand (operands[5], <MODE>mode)
6569   && !rtx_equal_p (operands[0], operands[5])"
6570  {
6571    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6572					     operands[5], operands[1]));
6573    operands[5] = operands[4] = operands[0];
6574  }
6575  [(set_attr "movprfx" "yes")]
6576)
6577
6578;; -------------------------------------------------------------------------
6579;; ---- [INT] MLS and MSB
6580;; -------------------------------------------------------------------------
6581;; Includes:
6582;; - MLS
6583;; - MSB
6584;; -------------------------------------------------------------------------
6585
6586;; Unpredicated integer subtraction of product.
6587(define_expand "fnma<mode>4"
6588  [(set (match_operand:SVE_FULL_I 0 "register_operand")
6589	(minus:SVE_FULL_I
6590	  (match_operand:SVE_FULL_I 3 "register_operand")
6591	  (unspec:SVE_FULL_I
6592	    [(match_dup 4)
6593	     (mult:SVE_FULL_I
6594	       (match_operand:SVE_FULL_I 1 "register_operand")
6595	       (match_operand:SVE_FULL_I 2 "general_operand"))]
6596	    UNSPEC_PRED_X)))]
6597  "TARGET_SVE"
6598  {
6599    if (aarch64_prepare_sve_int_fma (operands, MINUS))
6600      DONE;
6601    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6602  }
6603)
6604
6605;; Predicated integer subtraction of product.
6606(define_insn "@aarch64_pred_fnma<mode>"
6607  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
6608	(minus:SVE_FULL_I
6609	  (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")
6610	  (unspec:SVE_FULL_I
6611	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6612	     (mult:SVE_FULL_I
6613	       (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
6614	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
6615	    UNSPEC_PRED_X)))]
6616  "TARGET_SVE"
6617  "@
6618   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6619   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6620   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6621  [(set_attr "movprfx" "*,*,yes")]
6622)
6623
6624;; Predicated integer subtraction of product with merging.
6625(define_expand "cond_fnma<mode>"
6626  [(set (match_operand:SVE_FULL_I 0 "register_operand")
6627   (unspec:SVE_FULL_I
6628	[(match_operand:<VPRED> 1 "register_operand")
6629	 (minus:SVE_FULL_I
6630	   (match_operand:SVE_FULL_I 4 "register_operand")
6631	   (mult:SVE_FULL_I
6632	     (match_operand:SVE_FULL_I 2 "register_operand")
6633	     (match_operand:SVE_FULL_I 3 "general_operand")))
6634	 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
6635	UNSPEC_SEL))]
6636  "TARGET_SVE"
6637  {
6638    if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6639      DONE;
6640    /* Swap the multiplication operands if the fallback value is the
6641       second of the two.  */
6642    if (rtx_equal_p (operands[3], operands[5]))
6643      std::swap (operands[2], operands[3]);
6644  }
6645)
6646
6647;; Predicated integer subtraction of product, merging with the first input.
6648(define_insn "*cond_fnma<mode>_2"
6649  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6650	(unspec:SVE_FULL_I
6651	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6652	   (minus:SVE_FULL_I
6653	     (match_operand:SVE_FULL_I 4 "register_operand" "w, w")
6654	     (mult:SVE_FULL_I
6655	       (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
6656	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6657	   (match_dup 2)]
6658	  UNSPEC_SEL))]
6659  "TARGET_SVE"
6660  "@
6661   msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6662   movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6663  [(set_attr "movprfx" "*,yes")]
6664)
6665
6666;; Predicated integer subtraction of product, merging with the third input.
6667(define_insn "*cond_fnma<mode>_4"
6668  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6669	(unspec:SVE_FULL_I
6670	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6671	   (minus:SVE_FULL_I
6672	     (match_operand:SVE_FULL_I 4 "register_operand" "0, w")
6673	     (mult:SVE_FULL_I
6674	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6675	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6676	   (match_dup 4)]
6677	  UNSPEC_SEL))]
6678  "TARGET_SVE"
6679  "@
6680   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6681   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6682  [(set_attr "movprfx" "*,yes")]
6683)
6684
6685;; Predicated integer subtraction of product, merging with an
6686;; independent value.
6687(define_insn_and_rewrite "*cond_fnma<mode>_any"
6688  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6689	(unspec:SVE_FULL_I
6690	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6691	   (minus:SVE_FULL_I
6692	     (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")
6693	     (mult:SVE_FULL_I
6694	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6695	       (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")))
6696	   (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6697	  UNSPEC_SEL))]
6698  "TARGET_SVE
6699   && !rtx_equal_p (operands[2], operands[5])
6700   && !rtx_equal_p (operands[3], operands[5])
6701   && !rtx_equal_p (operands[4], operands[5])"
6702  "@
6703   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6704   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6705   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6706   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6707   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6708   #"
6709  "&& reload_completed
6710   && register_operand (operands[5], <MODE>mode)
6711   && !rtx_equal_p (operands[0], operands[5])"
6712  {
6713    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6714					     operands[5], operands[1]));
6715    operands[5] = operands[4] = operands[0];
6716  }
6717  [(set_attr "movprfx" "yes")]
6718)
6719
6720;; -------------------------------------------------------------------------
6721;; ---- [INT] Dot product
6722;; -------------------------------------------------------------------------
6723;; Includes:
6724;; - SDOT
6725;; - SUDOT   (I8MM)
6726;; - UDOT
6727;; - USDOT   (I8MM)
6728;; -------------------------------------------------------------------------
6729
6730;; Four-element integer dot-product with accumulation.
6731(define_insn "<sur>dot_prod<vsi2qi>"
6732  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6733	(plus:SVE_FULL_SDI
6734	  (unspec:SVE_FULL_SDI
6735	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6736	     (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6737	    DOTPROD)
6738	  (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6739  "TARGET_SVE"
6740  "@
6741   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6742   movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6743  [(set_attr "movprfx" "*,yes")]
6744)
6745
6746;; Four-element integer dot-product by selected lanes with accumulation.
6747(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6748  [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6749	(plus:SVE_FULL_SDI
6750	  (unspec:SVE_FULL_SDI
6751	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6752	     (unspec:<VSI2QI>
6753	       [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6754		(match_operand:SI 3 "const_int_operand")]
6755	       UNSPEC_SVE_LANE_SELECT)]
6756	    DOTPROD)
6757	  (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6758  "TARGET_SVE"
6759  "@
6760   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6761   movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6762  [(set_attr "movprfx" "*,yes")]
6763)
6764
6765(define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
6766  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6767        (plus:VNx4SI_ONLY
6768	  (unspec:VNx4SI_ONLY
6769	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6770	     (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6771	    DOTPROD_US_ONLY)
6772	  (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
6773  "TARGET_SVE_I8MM"
6774  "@
6775   <sur>dot\\t%0.s, %1.b, %2.b
6776   movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
6777   [(set_attr "movprfx" "*,yes")]
6778)
6779
6780(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6781  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6782	(plus:VNx4SI_ONLY
6783	  (unspec:VNx4SI_ONLY
6784	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6785	     (unspec:<VSI2QI>
6786	       [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
6787		(match_operand:SI 3 "const_int_operand")]
6788	       UNSPEC_SVE_LANE_SELECT)]
6789	    DOTPROD_I8MM)
6790	  (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
6791  "TARGET_SVE_I8MM"
6792  "@
6793   <sur>dot\\t%0.s, %1.b, %2.b[%3]
6794   movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
6795  [(set_attr "movprfx" "*,yes")]
6796)
6797
6798;; -------------------------------------------------------------------------
6799;; ---- [INT] Sum of absolute differences
6800;; -------------------------------------------------------------------------
6801;; The patterns in this section are synthetic.
6802;; -------------------------------------------------------------------------
6803
6804;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6805;; operands 1 and 2.  The sequence also has to perform a widening reduction of
6806;; the difference into a vector and accumulate that into operand 3 before
6807;; copying that into the result operand 0.
6808;; Perform that with a sequence of:
6809;; MOV		ones.b, #1
6810;; [SU]ABD	diff.b, p0/m, op1.b, op2.b
6811;; MOVPRFX	op0, op3	// If necessary
6812;; UDOT		op0.s, diff.b, ones.b
6813(define_expand "<sur>sad<vsi2qi>"
6814  [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6815   (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6816		    (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6817   (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6818  "TARGET_SVE"
6819  {
6820    rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6821    rtx diff = gen_reg_rtx (<VSI2QI>mode);
6822    emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6823    emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6824    DONE;
6825  }
6826)
6827
6828;; -------------------------------------------------------------------------
6829;; ---- [INT] Matrix multiply-accumulate
6830;; -------------------------------------------------------------------------
6831;; Includes:
6832;; - SMMLA (I8MM)
6833;; - UMMLA (I8MM)
6834;; - USMMLA (I8MM)
6835;; -------------------------------------------------------------------------
6836
6837(define_insn "@aarch64_sve_add_<optab><vsi2qi>"
6838  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6839	(plus:VNx4SI_ONLY
6840	  (unspec:VNx4SI_ONLY
6841	    [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
6842	     (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
6843	    MATMUL)
6844	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
6845  "TARGET_SVE_I8MM"
6846  "@
6847   <sur>mmla\\t%0.s, %2.b, %3.b
6848   movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
6849  [(set_attr "movprfx" "*,yes")]
6850)
6851
6852;; -------------------------------------------------------------------------
6853;; ---- [FP] General ternary arithmetic corresponding to unspecs
6854;; -------------------------------------------------------------------------
6855;; Includes merging patterns for:
6856;; - FMAD
6857;; - FMLA
6858;; - FMLS
6859;; - FMSB
6860;; - FNMAD
6861;; - FNMLA
6862;; - FNMLS
6863;; - FNMSB
6864;; -------------------------------------------------------------------------
6865
6866;; Unpredicated floating-point ternary operations.
6867(define_expand "<optab><mode>4"
6868  [(set (match_operand:SVE_FULL_F 0 "register_operand")
6869	(unspec:SVE_FULL_F
6870	  [(match_dup 4)
6871	   (const_int SVE_RELAXED_GP)
6872	   (match_operand:SVE_FULL_F 1 "register_operand")
6873	   (match_operand:SVE_FULL_F 2 "register_operand")
6874	   (match_operand:SVE_FULL_F 3 "register_operand")]
6875	  SVE_COND_FP_TERNARY))]
6876  "TARGET_SVE"
6877  {
6878    operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6879  }
6880)
6881
6882;; Predicated floating-point ternary operations.
6883(define_insn "@aarch64_pred_<optab><mode>"
6884  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
6885	(unspec:SVE_FULL_F
6886	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6887	   (match_operand:SI 5 "aarch64_sve_gp_strictness")
6888	   (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
6889	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
6890	   (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
6891	  SVE_COND_FP_TERNARY))]
6892  "TARGET_SVE"
6893  "@
6894   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6895   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6896   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6897  [(set_attr "movprfx" "*,*,yes")]
6898)
6899
6900;; Predicated floating-point ternary operations with merging.
6901(define_expand "@cond_<optab><mode>"
6902  [(set (match_operand:SVE_FULL_F 0 "register_operand")
6903	(unspec:SVE_FULL_F
6904	  [(match_operand:<VPRED> 1 "register_operand")
6905	   (unspec:SVE_FULL_F
6906	     [(match_dup 1)
6907	      (const_int SVE_STRICT_GP)
6908	      (match_operand:SVE_FULL_F 2 "register_operand")
6909	      (match_operand:SVE_FULL_F 3 "register_operand")
6910	      (match_operand:SVE_FULL_F 4 "register_operand")]
6911	     SVE_COND_FP_TERNARY)
6912	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6913	  UNSPEC_SEL))]
6914  "TARGET_SVE"
6915{
6916  /* Swap the multiplication operands if the fallback value is the
6917     second of the two.  */
6918  if (rtx_equal_p (operands[3], operands[5]))
6919    std::swap (operands[2], operands[3]);
6920})
6921
6922;; Predicated floating-point ternary operations, merging with the
6923;; first input.
6924(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
6925  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6926	(unspec:SVE_FULL_F
6927	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6928	   (unspec:SVE_FULL_F
6929	     [(match_operand 5)
6930	      (const_int SVE_RELAXED_GP)
6931	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6932	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6933	      (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6934	     SVE_COND_FP_TERNARY)
6935	   (match_dup 2)]
6936	  UNSPEC_SEL))]
6937  "TARGET_SVE"
6938  "@
6939   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6940   movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6941  "&& !rtx_equal_p (operands[1], operands[5])"
6942  {
6943    operands[5] = copy_rtx (operands[1]);
6944  }
6945  [(set_attr "movprfx" "*,yes")]
6946)
6947
6948(define_insn "*cond_<optab><mode>_2_strict"
6949  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6950	(unspec:SVE_FULL_F
6951	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6952	   (unspec:SVE_FULL_F
6953	     [(match_dup 1)
6954	      (const_int SVE_STRICT_GP)
6955	      (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6956	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6957	      (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6958	     SVE_COND_FP_TERNARY)
6959	   (match_dup 2)]
6960	  UNSPEC_SEL))]
6961  "TARGET_SVE"
6962  "@
6963   <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6964   movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6965  [(set_attr "movprfx" "*,yes")]
6966)
6967
6968;; Predicated floating-point ternary operations, merging with the
6969;; third input.
6970(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
6971  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6972	(unspec:SVE_FULL_F
6973	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6974	   (unspec:SVE_FULL_F
6975	     [(match_operand 5)
6976	      (const_int SVE_RELAXED_GP)
6977	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6978	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6979	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6980	     SVE_COND_FP_TERNARY)
6981	   (match_dup 4)]
6982	  UNSPEC_SEL))]
6983  "TARGET_SVE"
6984  "@
6985   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6986   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6987  "&& !rtx_equal_p (operands[1], operands[5])"
6988  {
6989    operands[5] = copy_rtx (operands[1]);
6990  }
6991  [(set_attr "movprfx" "*,yes")]
6992)
6993
6994(define_insn "*cond_<optab><mode>_4_strict"
6995  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6996	(unspec:SVE_FULL_F
6997	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6998	   (unspec:SVE_FULL_F
6999	     [(match_dup 1)
7000	      (const_int SVE_STRICT_GP)
7001	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7002	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7003	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7004	     SVE_COND_FP_TERNARY)
7005	   (match_dup 4)]
7006	  UNSPEC_SEL))]
7007  "TARGET_SVE"
7008  "@
7009   <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7010   movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
7011  [(set_attr "movprfx" "*,yes")]
7012)
7013
7014;; Predicated floating-point ternary operations, merging with an
7015;; independent value.
7016(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7017  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7018	(unspec:SVE_FULL_F
7019	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7020	   (unspec:SVE_FULL_F
7021	     [(match_operand 6)
7022	      (const_int SVE_RELAXED_GP)
7023	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7024	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7025	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
7026	     SVE_COND_FP_TERNARY)
7027	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
7028	  UNSPEC_SEL))]
7029  "TARGET_SVE
7030   && !rtx_equal_p (operands[2], operands[5])
7031   && !rtx_equal_p (operands[3], operands[5])
7032   && !rtx_equal_p (operands[4], operands[5])"
7033  "@
7034   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7035   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7036   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7037   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7038   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7039   #"
7040  "&& 1"
7041  {
7042    if (reload_completed
7043        && register_operand (operands[5], <MODE>mode)
7044        && !rtx_equal_p (operands[0], operands[5]))
7045      {
7046	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7047						 operands[5], operands[1]));
7048	operands[5] = operands[4] = operands[0];
7049      }
7050    else if (!rtx_equal_p (operands[1], operands[6]))
7051      operands[6] = copy_rtx (operands[1]);
7052    else
7053      FAIL;
7054  }
7055  [(set_attr "movprfx" "yes")]
7056)
7057
7058(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7059  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
7060	(unspec:SVE_FULL_F
7061	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7062	   (unspec:SVE_FULL_F
7063	     [(match_dup 1)
7064	      (const_int SVE_STRICT_GP)
7065	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
7066	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
7067	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
7068	     SVE_COND_FP_TERNARY)
7069	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
7070	  UNSPEC_SEL))]
7071  "TARGET_SVE
7072   && !rtx_equal_p (operands[2], operands[5])
7073   && !rtx_equal_p (operands[3], operands[5])
7074   && !rtx_equal_p (operands[4], operands[5])"
7075  "@
7076   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7077   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7078   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7079   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7080   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7081   #"
7082  "&& reload_completed
7083   && register_operand (operands[5], <MODE>mode)
7084   && !rtx_equal_p (operands[0], operands[5])"
7085  {
7086    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7087					     operands[5], operands[1]));
7088    operands[5] = operands[4] = operands[0];
7089  }
7090  [(set_attr "movprfx" "yes")]
7091)
7092
7093;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
7094;; (fma ...) since target-independent code won't understand the indexing.
7095(define_insn "@aarch64_<optab>_lane_<mode>"
7096  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7097	(unspec:SVE_FULL_F
7098	  [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
7099	   (unspec:SVE_FULL_F
7100	     [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
7101	      (match_operand:SI 3 "const_int_operand")]
7102	     UNSPEC_SVE_LANE_SELECT)
7103	   (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7104	  SVE_FP_TERNARY_LANE))]
7105  "TARGET_SVE"
7106  "@
7107   <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7108   movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
7109  [(set_attr "movprfx" "*,yes")]
7110)
7111
7112;; -------------------------------------------------------------------------
7113;; ---- [FP] Complex multiply-add
7114;; -------------------------------------------------------------------------
7115;; Includes merging patterns for:
7116;; - FCMLA
7117;; -------------------------------------------------------------------------
7118
7119;; Predicated FCMLA.
7120(define_insn "@aarch64_pred_<optab><mode>"
7121  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7122	(unspec:SVE_FULL_F
7123	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7124	   (match_operand:SI 5 "aarch64_sve_gp_strictness")
7125	   (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7126	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7127	   (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7128	  SVE_COND_FCMLA))]
7129  "TARGET_SVE"
7130  "@
7131   fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7132   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7133  [(set_attr "movprfx" "*,yes")]
7134)
7135
7136;; Predicated FCMLA with merging.
7137(define_expand "@cond_<optab><mode>"
7138  [(set (match_operand:SVE_FULL_F 0 "register_operand")
7139	(unspec:SVE_FULL_F
7140	  [(match_operand:<VPRED> 1 "register_operand")
7141	   (unspec:SVE_FULL_F
7142	     [(match_dup 1)
7143	      (const_int SVE_STRICT_GP)
7144	      (match_operand:SVE_FULL_F 2 "register_operand")
7145	      (match_operand:SVE_FULL_F 3 "register_operand")
7146	      (match_operand:SVE_FULL_F 4 "register_operand")]
7147	     SVE_COND_FCMLA)
7148	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7149	  UNSPEC_SEL))]
7150  "TARGET_SVE"
7151)
7152
7153;; Predicated FCMLA, merging with the third input.
7154(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7155  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7156	(unspec:SVE_FULL_F
7157	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7158	   (unspec:SVE_FULL_F
7159	     [(match_operand 5)
7160	      (const_int SVE_RELAXED_GP)
7161	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7162	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7163	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7164	     SVE_COND_FCMLA)
7165	   (match_dup 4)]
7166	  UNSPEC_SEL))]
7167  "TARGET_SVE"
7168  "@
7169   fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7170   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7171  "&& !rtx_equal_p (operands[1], operands[5])"
7172  {
7173    operands[5] = copy_rtx (operands[1]);
7174  }
7175  [(set_attr "movprfx" "*,yes")]
7176)
7177
7178(define_insn "*cond_<optab><mode>_4_strict"
7179  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7180	(unspec:SVE_FULL_F
7181	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7182	   (unspec:SVE_FULL_F
7183	     [(match_dup 1)
7184	      (const_int SVE_STRICT_GP)
7185	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7186	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7187	      (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
7188	     SVE_COND_FCMLA)
7189	   (match_dup 4)]
7190	  UNSPEC_SEL))]
7191  "TARGET_SVE"
7192  "@
7193   fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7194   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
7195  [(set_attr "movprfx" "*,yes")]
7196)
7197
7198;; Predicated FCMLA, merging with an independent value.
7199(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7200  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7201	(unspec:SVE_FULL_F
7202	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
7203	   (unspec:SVE_FULL_F
7204	     [(match_operand 6)
7205	      (const_int SVE_RELAXED_GP)
7206	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7207	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7208	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
7209	     SVE_COND_FCMLA)
7210	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
7211	  UNSPEC_SEL))]
7212  "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7213  "@
7214   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7215   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7216   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7217   #"
7218  "&& 1"
7219  {
7220    if (reload_completed
7221        && register_operand (operands[5], <MODE>mode)
7222        && !rtx_equal_p (operands[0], operands[5]))
7223      {
7224	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7225						 operands[5], operands[1]));
7226	operands[5] = operands[4] = operands[0];
7227      }
7228    else if (!rtx_equal_p (operands[1], operands[6]))
7229      operands[6] = copy_rtx (operands[1]);
7230    else
7231      FAIL;
7232  }
7233  [(set_attr "movprfx" "yes")]
7234)
7235
7236(define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7237  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
7238	(unspec:SVE_FULL_F
7239	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
7240	   (unspec:SVE_FULL_F
7241	     [(match_dup 1)
7242	      (const_int SVE_STRICT_GP)
7243	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
7244	      (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
7245	      (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
7246	     SVE_COND_FCMLA)
7247	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
7248	  UNSPEC_SEL))]
7249  "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7250  "@
7251   movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7252   movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7253   movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7254   #"
7255  "&& reload_completed
7256   && register_operand (operands[5], <MODE>mode)
7257   && !rtx_equal_p (operands[0], operands[5])"
7258  {
7259    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7260					     operands[5], operands[1]));
7261    operands[5] = operands[4] = operands[0];
7262  }
7263  [(set_attr "movprfx" "yes")]
7264)
7265
7266;; Unpredicated FCMLA with indexing.
7267(define_insn "@aarch64_<optab>_lane_<mode>"
7268  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
7269	(unspec:SVE_FULL_HSF
7270	  [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
7271	   (unspec:SVE_FULL_HSF
7272	     [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
7273	      (match_operand:SI 3 "const_int_operand")]
7274	     UNSPEC_SVE_LANE_SELECT)
7275	   (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
7276	  FCMLA))]
7277  "TARGET_SVE"
7278  "@
7279   fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7280   movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
7281  [(set_attr "movprfx" "*,yes")]
7282)
7283
7284;; -------------------------------------------------------------------------
7285;; ---- [FP] Trigonometric multiply-add
7286;; -------------------------------------------------------------------------
7287;; Includes:
7288;; - FTMAD
7289;; -------------------------------------------------------------------------
7290
7291(define_insn "@aarch64_sve_tmad<mode>"
7292  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
7293	(unspec:SVE_FULL_F
7294	  [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
7295	   (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7296	   (match_operand:DI 3 "const_int_operand")]
7297	  UNSPEC_FTMAD))]
7298  "TARGET_SVE"
7299  "@
7300   ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7301   movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
7302  [(set_attr "movprfx" "*,yes")]
7303)
7304
7305;; -------------------------------------------------------------------------
7306;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7307;; -------------------------------------------------------------------------
7308;; Includes:
7309;; - BFDOT (BF16)
7310;; - BFMLALB (BF16)
7311;; - BFMLALT (BF16)
7312;; - BFMMLA (BF16)
7313;; -------------------------------------------------------------------------
7314
7315(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7316  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7317	(unspec:VNx4SF
7318	  [(match_operand:VNx4SF 1 "register_operand" "0, w")
7319	   (match_operand:VNx8BF 2 "register_operand" "w, w")
7320	   (match_operand:VNx8BF 3 "register_operand" "w, w")]
7321	  SVE_BFLOAT_TERNARY_LONG))]
7322  "TARGET_SVE_BF16"
7323  "@
7324   <sve_fp_op>\t%0.s, %2.h, %3.h
7325   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
7326  [(set_attr "movprfx" "*,yes")]
7327)
7328
7329;; The immediate range is enforced before generating the instruction.
7330(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7331  [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
7332	(unspec:VNx4SF
7333	  [(match_operand:VNx4SF 1 "register_operand" "0, w")
7334	   (match_operand:VNx8BF 2 "register_operand" "w, w")
7335	   (match_operand:VNx8BF 3 "register_operand" "y, y")
7336	   (match_operand:SI 4 "const_int_operand")]
7337	  SVE_BFLOAT_TERNARY_LONG_LANE))]
7338  "TARGET_SVE_BF16"
7339  "@
7340   <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7341   movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
7342  [(set_attr "movprfx" "*,yes")]
7343)
7344
7345;; -------------------------------------------------------------------------
7346;; ---- [FP] Matrix multiply-accumulate
7347;; -------------------------------------------------------------------------
7348;; Includes:
7349;; - FMMLA (F32MM,F64MM)
7350;; -------------------------------------------------------------------------
7351
7352;; The mode iterator enforces the target requirements.
7353(define_insn "@aarch64_sve_<sve_fp_op><mode>"
7354  [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
7355	(unspec:SVE_MATMULF
7356	  [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
7357	   (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
7358	   (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
7359	  FMMLA))]
7360  "TARGET_SVE"
7361  "@
7362   <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
7363   movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
7364  [(set_attr "movprfx" "*,yes")]
7365)
7366
7367;; =========================================================================
7368;; == Comparisons and selects
7369;; =========================================================================
7370
7371;; -------------------------------------------------------------------------
7372;; ---- [INT,FP] Select based on predicates
7373;; -------------------------------------------------------------------------
7374;; Includes merging patterns for:
7375;; - FMOV
7376;; - MOV
7377;; - SEL
7378;; -------------------------------------------------------------------------
7379
7380;; vcond_mask operand order: true, false, mask
7381;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
7382;; SEL operand order:        mask, true, false
7383(define_expand "@vcond_mask_<mode><vpred>"
7384  [(set (match_operand:SVE_FULL 0 "register_operand")
7385	(unspec:SVE_FULL
7386	  [(match_operand:<VPRED> 3 "register_operand")
7387	   (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
7388	   (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
7389	  UNSPEC_SEL))]
7390  "TARGET_SVE"
7391  {
7392    if (register_operand (operands[1], <MODE>mode))
7393      operands[2] = force_reg (<MODE>mode, operands[2]);
7394  }
7395)
7396
7397;; Selects between:
7398;; - two registers
7399;; - a duplicated immediate and a register
7400;; - a duplicated immediate and zero
7401(define_insn "*vcond_mask_<mode><vpred>"
7402  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
7403	(unspec:SVE_FULL
7404	  [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
7405	   (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
7406	   (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
7407	  UNSPEC_SEL))]
7408  "TARGET_SVE
7409   && (!register_operand (operands[1], <MODE>mode)
7410       || register_operand (operands[2], <MODE>mode))"
7411  "@
7412   sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
7413   mov\t%0.<Vetype>, %3/m, #%I1
7414   mov\t%0.<Vetype>, %3/z, #%I1
7415   fmov\t%0.<Vetype>, %3/m, #%1
7416   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
7417   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
7418   movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
7419  [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
7420)
7421
7422;; Optimize selects between a duplicated scalar variable and another vector,
7423;; the latter of which can be a zero constant or a variable.  Treat duplicates
7424;; of GPRs as being more expensive than duplicates of FPRs, since they
7425;; involve a cross-file move.
7426(define_insn "@aarch64_sel_dup<mode>"
7427  [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
7428	(unspec:SVE_FULL
7429	  [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
7430	   (vec_duplicate:SVE_FULL
7431	     (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
7432	   (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
7433	  UNSPEC_SEL))]
7434  "TARGET_SVE"
7435  "@
7436   mov\t%0.<Vetype>, %3/m, %<vwcore>1
7437   mov\t%0.<Vetype>, %3/m, %<Vetype>1
7438   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7439   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
7440   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
7441   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
7442  [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
7443)
7444
7445;; -------------------------------------------------------------------------
7446;; ---- [INT,FP] Compare and select
7447;; -------------------------------------------------------------------------
7448;; The patterns in this section are synthetic.
7449;; -------------------------------------------------------------------------
7450
7451;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
7452;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7453(define_expand "vcond<mode><v_int_equiv>"
7454  [(set (match_operand:SVE_FULL 0 "register_operand")
7455	(if_then_else:SVE_FULL
7456	  (match_operator 3 "comparison_operator"
7457	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
7458	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
7459	  (match_operand:SVE_FULL 1 "nonmemory_operand")
7460	  (match_operand:SVE_FULL 2 "nonmemory_operand")))]
7461  "TARGET_SVE"
7462  {
7463    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
7464    DONE;
7465  }
7466)
7467
7468;; Integer vcondu.  Don't enforce an immediate range here, since it
7469;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
7470(define_expand "vcondu<mode><v_int_equiv>"
7471  [(set (match_operand:SVE_FULL 0 "register_operand")
7472	(if_then_else:SVE_FULL
7473	  (match_operator 3 "comparison_operator"
7474	    [(match_operand:<V_INT_EQUIV> 4 "register_operand")
7475	     (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
7476	  (match_operand:SVE_FULL 1 "nonmemory_operand")
7477	  (match_operand:SVE_FULL 2 "nonmemory_operand")))]
7478  "TARGET_SVE"
7479  {
7480    aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
7481    DONE;
7482  }
7483)
7484
7485;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
7486;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
7487(define_expand "vcond<mode><v_fp_equiv>"
7488  [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
7489	(if_then_else:SVE_FULL_HSD
7490	  (match_operator 3 "comparison_operator"
7491	    [(match_operand:<V_FP_EQUIV> 4 "register_operand")
7492	     (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
7493	  (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
7494	  (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
7495  "TARGET_SVE"
7496  {
7497    aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
7498    DONE;
7499  }
7500)
7501
7502;; -------------------------------------------------------------------------
7503;; ---- [INT] Comparisons
7504;; -------------------------------------------------------------------------
7505;; Includes:
7506;; - CMPEQ
7507;; - CMPGE
7508;; - CMPGT
7509;; - CMPHI
7510;; - CMPHS
7511;; - CMPLE
7512;; - CMPLO
7513;; - CMPLS
7514;; - CMPLT
7515;; - CMPNE
7516;; -------------------------------------------------------------------------
7517
7518;; Signed integer comparisons.  Don't enforce an immediate range here, since
7519;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7520;; instead.
7521(define_expand "vec_cmp<mode><vpred>"
7522  [(parallel
7523    [(set (match_operand:<VPRED> 0 "register_operand")
7524	  (match_operator:<VPRED> 1 "comparison_operator"
7525	    [(match_operand:SVE_FULL_I 2 "register_operand")
7526	     (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
7527     (clobber (reg:CC_NZC CC_REGNUM))])]
7528  "TARGET_SVE"
7529  {
7530    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7531				    operands[2], operands[3]);
7532    DONE;
7533  }
7534)
7535
7536;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
7537;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
7538;; instead.
7539(define_expand "vec_cmpu<mode><vpred>"
7540  [(parallel
7541    [(set (match_operand:<VPRED> 0 "register_operand")
7542	  (match_operator:<VPRED> 1 "comparison_operator"
7543	    [(match_operand:SVE_FULL_I 2 "register_operand")
7544	     (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
7545     (clobber (reg:CC_NZC CC_REGNUM))])]
7546  "TARGET_SVE"
7547  {
7548    aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
7549				    operands[2], operands[3]);
7550    DONE;
7551  }
7552)
7553
7554;; Predicated integer comparisons.
7555(define_insn "@aarch64_pred_cmp<cmp_op><mode>"
7556  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7557	(unspec:<VPRED>
7558	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7559	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7560	   (SVE_INT_CMP:<VPRED>
7561	     (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
7562	     (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7563	  UNSPEC_PRED_Z))
7564   (clobber (reg:CC_NZC CC_REGNUM))]
7565  "TARGET_SVE"
7566  "@
7567   cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
7568   cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7569)
7570
7571;; Predicated integer comparisons in which both the flag and predicate
7572;; results are interesting.
7573(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
7574  [(set (reg:CC_NZC CC_REGNUM)
7575	(unspec:CC_NZC
7576	  [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7577	   (match_operand 4)
7578	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7579	   (unspec:<VPRED>
7580	     [(match_operand 6)
7581	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7582	      (SVE_INT_CMP:<VPRED>
7583		(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
7584		(match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7585	     UNSPEC_PRED_Z)]
7586	  UNSPEC_PTEST))
7587   (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7588	(unspec:<VPRED>
7589	  [(match_dup 6)
7590	   (match_dup 7)
7591	   (SVE_INT_CMP:<VPRED>
7592	     (match_dup 2)
7593	     (match_dup 3))]
7594	  UNSPEC_PRED_Z))]
7595  "TARGET_SVE
7596   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7597  "@
7598   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7599   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7600  "&& !rtx_equal_p (operands[4], operands[6])"
7601  {
7602    operands[6] = copy_rtx (operands[4]);
7603    operands[7] = operands[5];
7604  }
7605)
7606
7607;; Predicated integer comparisons in which only the flags result is
7608;; interesting.
7609(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
7610  [(set (reg:CC_NZC CC_REGNUM)
7611	(unspec:CC_NZC
7612	  [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
7613	   (match_operand 4)
7614	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7615	   (unspec:<VPRED>
7616	     [(match_operand 6)
7617	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7618	      (SVE_INT_CMP:<VPRED>
7619		(match_operand:SVE_FULL_I 2 "register_operand" "w, w")
7620		(match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7621	     UNSPEC_PRED_Z)]
7622	  UNSPEC_PTEST))
7623   (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
7624  "TARGET_SVE
7625   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7626  "@
7627   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
7628   cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7629  "&& !rtx_equal_p (operands[4], operands[6])"
7630  {
7631    operands[6] = copy_rtx (operands[4]);
7632    operands[7] = operands[5];
7633  }
7634)
7635
7636;; Predicated integer comparisons, formed by combining a PTRUE-predicated
7637;; comparison with an AND.  Split the instruction into its preferred form
7638;; at the earliest opportunity, in order to get rid of the redundant
7639;; operand 4.
7640(define_insn_and_split "*cmp<cmp_op><mode>_and"
7641  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7642	(and:<VPRED>
7643	  (unspec:<VPRED>
7644	    [(match_operand 4)
7645	     (const_int SVE_KNOWN_PTRUE)
7646	     (SVE_INT_CMP:<VPRED>
7647	       (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
7648	       (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
7649	    UNSPEC_PRED_Z)
7650	  (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
7651   (clobber (reg:CC_NZC CC_REGNUM))]
7652  "TARGET_SVE"
7653  "#"
7654  "&& 1"
7655  [(parallel
7656     [(set (match_dup 0)
7657	   (unspec:<VPRED>
7658	     [(match_dup 1)
7659	      (const_int SVE_MAYBE_NOT_PTRUE)
7660	      (SVE_INT_CMP:<VPRED>
7661		(match_dup 2)
7662		(match_dup 3))]
7663	     UNSPEC_PRED_Z))
7664      (clobber (reg:CC_NZC CC_REGNUM))])]
7665)
7666
7667;; Predicated integer wide comparisons.
7668(define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
7669  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7670	(unspec:<VPRED>
7671	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
7672	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7673	   (unspec:<VPRED>
7674	     [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
7675	      (match_operand:VNx2DI 4 "register_operand" "w")]
7676	     SVE_COND_INT_CMP_WIDE)]
7677	  UNSPEC_PRED_Z))
7678   (clobber (reg:CC_NZC CC_REGNUM))]
7679  "TARGET_SVE"
7680  "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
7681)
7682
7683;; Predicated integer wide comparisons in which both the flag and
7684;; predicate results are interesting.
7685(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
7686  [(set (reg:CC_NZC CC_REGNUM)
7687	(unspec:CC_NZC
7688	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
7689	   (match_operand 4)
7690	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7691	   (unspec:<VPRED>
7692	     [(match_operand:VNx16BI 6 "register_operand" "Upl")
7693	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7694	      (unspec:<VPRED>
7695		[(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7696		 (match_operand:VNx2DI 3 "register_operand" "w")]
7697		SVE_COND_INT_CMP_WIDE)]
7698	     UNSPEC_PRED_Z)]
7699	  UNSPEC_PTEST))
7700   (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7701	(unspec:<VPRED>
7702	  [(match_dup 6)
7703	   (match_dup 7)
7704	   (unspec:<VPRED>
7705	     [(match_dup 2)
7706	      (match_dup 3)]
7707	     SVE_COND_INT_CMP_WIDE)]
7708	  UNSPEC_PRED_Z))]
7709  "TARGET_SVE
7710   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7711  "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7712)
7713
7714;; Predicated integer wide comparisons in which only the flags result
7715;; is interesting.
7716(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
7717  [(set (reg:CC_NZC CC_REGNUM)
7718	(unspec:CC_NZC
7719	  [(match_operand:VNx16BI 1 "register_operand" "Upl")
7720	   (match_operand 4)
7721	   (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7722	   (unspec:<VPRED>
7723	     [(match_operand:VNx16BI 6 "register_operand" "Upl")
7724	      (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7725	      (unspec:<VPRED>
7726		[(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7727		 (match_operand:VNx2DI 3 "register_operand" "w")]
7728		SVE_COND_INT_CMP_WIDE)]
7729	     UNSPEC_PRED_Z)]
7730	  UNSPEC_PTEST))
7731   (clobber (match_scratch:<VPRED> 0 "=Upa"))]
7732  "TARGET_SVE
7733   && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7734  "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7735)
7736
7737;; -------------------------------------------------------------------------
7738;; ---- [INT] While tests
7739;; -------------------------------------------------------------------------
7740;; Includes:
7741;; - WHILEGE (SVE2)
7742;; - WHILEGT (SVE2)
7743;; - WHILEHI (SVE2)
7744;; - WHILEHS (SVE2)
7745;; - WHILELE
7746;; - WHILELO
7747;; - WHILELS
7748;; - WHILELT
7749;; - WHILERW (SVE2)
7750;; - WHILEWR (SVE2)
7751;; -------------------------------------------------------------------------
7752
7753;; Set element I of the result if (cmp (plus operand1 J) operand2) is
7754;; true for all J in [0, I].
7755(define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
7756  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7757	(unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7758			  (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7759			 SVE_WHILE))
7760   (clobber (reg:CC_NZC CC_REGNUM))]
7761  "TARGET_SVE"
7762  "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7763)
7764
7765;; The WHILE instructions set the flags in the same way as a PTEST with
7766;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
7767;; operands to the PTEST aren't needed, so we allow them to be anything.
7768(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
7769  [(set (reg:CC_NZC CC_REGNUM)
7770	(unspec:CC_NZC
7771	  [(match_operand 3)
7772	   (match_operand 4)
7773	   (const_int SVE_KNOWN_PTRUE)
7774	   (unspec:PRED_ALL
7775	     [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7776	      (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7777	     SVE_WHILE)]
7778	  UNSPEC_PTEST))
7779   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7780	(unspec:PRED_ALL [(match_dup 1)
7781			  (match_dup 2)]
7782			 SVE_WHILE))]
7783  "TARGET_SVE"
7784  "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7785  ;; Force the compiler to drop the unused predicate operand, so that we
7786  ;; don't have an unnecessary PTRUE.
7787  "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7788  {
7789    operands[3] = CONSTM1_RTX (VNx16BImode);
7790    operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7791  }
7792)
7793
7794;; Same, but handle the case in which only the flags result is useful.
7795(define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
7796  [(set (reg:CC_NZC CC_REGNUM)
7797	(unspec:CC_NZC
7798	  [(match_operand 3)
7799	   (match_operand 4)
7800	   (const_int SVE_KNOWN_PTRUE)
7801	   (unspec:PRED_ALL
7802	     [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7803	      (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7804	     SVE_WHILE)]
7805	  UNSPEC_PTEST))
7806   (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
7807  "TARGET_SVE"
7808  "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7809  ;; Force the compiler to drop the unused predicate operand, so that we
7810  ;; don't have an unnecessary PTRUE.
7811  "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7812  {
7813    operands[3] = CONSTM1_RTX (VNx16BImode);
7814    operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7815  }
7816)
7817
7818;; -------------------------------------------------------------------------
7819;; ---- [FP] Direct comparisons
7820;; -------------------------------------------------------------------------
7821;; Includes:
7822;; - FCMEQ
7823;; - FCMGE
7824;; - FCMGT
7825;; - FCMLE
7826;; - FCMLT
7827;; - FCMNE
7828;; - FCMUO
7829;; -------------------------------------------------------------------------
7830
7831;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
7832;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
7833;; with zero.
7834(define_expand "vec_cmp<mode><vpred>"
7835  [(set (match_operand:<VPRED> 0 "register_operand")
7836	(match_operator:<VPRED> 1 "comparison_operator"
7837	  [(match_operand:SVE_FULL_F 2 "register_operand")
7838	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
7839  "TARGET_SVE"
7840  {
7841    aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
7842				      operands[2], operands[3], false);
7843    DONE;
7844  }
7845)
7846
7847;; Predicated floating-point comparisons.
7848(define_insn "@aarch64_pred_fcm<cmp_op><mode>"
7849  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7850	(unspec:<VPRED>
7851	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7852	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7853	   (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7854	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
7855	  SVE_COND_FP_CMP_I0))]
7856  "TARGET_SVE"
7857  "@
7858   fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
7859   fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7860)
7861
7862;; Same for unordered comparisons.
7863(define_insn "@aarch64_pred_fcmuo<mode>"
7864  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7865	(unspec:<VPRED>
7866	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
7867	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7868	   (match_operand:SVE_FULL_F 3 "register_operand" "w")
7869	   (match_operand:SVE_FULL_F 4 "register_operand" "w")]
7870	  UNSPEC_COND_FCMUO))]
7871  "TARGET_SVE"
7872  "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7873)
7874
7875;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
7876;; with another predicate P.  This does not have the same trapping behavior
7877;; as predicating the comparison itself on P, but it's a legitimate fold,
7878;; since we can drop any potentially-trapping operations whose results
7879;; are not needed.
7880;;
7881;; Split the instruction into its preferred form (below) at the earliest
7882;; opportunity, in order to get rid of the redundant operand 1.
7883(define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
7884  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7885	(and:<VPRED>
7886	  (unspec:<VPRED>
7887	    [(match_operand:<VPRED> 1)
7888	     (const_int SVE_KNOWN_PTRUE)
7889	     (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7890	     (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
7891	    SVE_COND_FP_CMP_I0)
7892	  (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
7893  "TARGET_SVE"
7894  "#"
7895  "&& 1"
7896  [(set (match_dup 0)
7897	(unspec:<VPRED>
7898	  [(match_dup 4)
7899	   (const_int SVE_MAYBE_NOT_PTRUE)
7900	   (match_dup 2)
7901	   (match_dup 3)]
7902	  SVE_COND_FP_CMP_I0))]
7903)
7904
7905;; Same for unordered comparisons.
7906(define_insn_and_split "*fcmuo<mode>_and_combine"
7907  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7908	(and:<VPRED>
7909	  (unspec:<VPRED>
7910	    [(match_operand:<VPRED> 1)
7911	     (const_int SVE_KNOWN_PTRUE)
7912	     (match_operand:SVE_FULL_F 2 "register_operand" "w")
7913	     (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7914	    UNSPEC_COND_FCMUO)
7915	  (match_operand:<VPRED> 4 "register_operand" "Upl")))]
7916  "TARGET_SVE"
7917  "#"
7918  "&& 1"
7919  [(set (match_dup 0)
7920	(unspec:<VPRED>
7921	  [(match_dup 4)
7922	   (const_int SVE_MAYBE_NOT_PTRUE)
7923	   (match_dup 2)
7924	   (match_dup 3)]
7925	  UNSPEC_COND_FCMUO))]
7926)
7927
7928;; -------------------------------------------------------------------------
7929;; ---- [FP] Absolute comparisons
7930;; -------------------------------------------------------------------------
7931;; Includes:
7932;; - FACGE
7933;; - FACGT
7934;; - FACLE
7935;; - FACLT
7936;; -------------------------------------------------------------------------
7937
7938;; Predicated floating-point absolute comparisons.
7939(define_expand "@aarch64_pred_fac<cmp_op><mode>"
7940  [(set (match_operand:<VPRED> 0 "register_operand")
7941	(unspec:<VPRED>
7942	  [(match_operand:<VPRED> 1 "register_operand")
7943	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7944	   (unspec:SVE_FULL_F
7945	     [(match_dup 1)
7946	      (match_dup 2)
7947	      (match_operand:SVE_FULL_F 3 "register_operand")]
7948	     UNSPEC_COND_FABS)
7949	   (unspec:SVE_FULL_F
7950	     [(match_dup 1)
7951	      (match_dup 2)
7952	      (match_operand:SVE_FULL_F 4 "register_operand")]
7953	     UNSPEC_COND_FABS)]
7954	  SVE_COND_FP_ABS_CMP))]
7955  "TARGET_SVE"
7956)
7957
7958(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
7959  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7960	(unspec:<VPRED>
7961	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
7962	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7963	   (unspec:SVE_FULL_F
7964	     [(match_operand 5)
7965	      (const_int SVE_RELAXED_GP)
7966	      (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7967	     UNSPEC_COND_FABS)
7968	   (unspec:SVE_FULL_F
7969	     [(match_operand 6)
7970	      (const_int SVE_RELAXED_GP)
7971	      (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7972	     UNSPEC_COND_FABS)]
7973	  SVE_COND_FP_ABS_CMP))]
7974  "TARGET_SVE"
7975  "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7976  "&& (!rtx_equal_p (operands[1], operands[5])
7977       || !rtx_equal_p (operands[1], operands[6]))"
7978  {
7979    operands[5] = copy_rtx (operands[1]);
7980    operands[6] = copy_rtx (operands[1]);
7981  }
7982)
7983
7984(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
7985  [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7986	(unspec:<VPRED>
7987	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
7988	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7989	   (unspec:SVE_FULL_F
7990	     [(match_dup 1)
7991	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
7992	      (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7993	     UNSPEC_COND_FABS)
7994	   (unspec:SVE_FULL_F
7995	     [(match_dup 1)
7996	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
7997	      (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7998	     UNSPEC_COND_FABS)]
7999	  SVE_COND_FP_ABS_CMP))]
8000  "TARGET_SVE"
8001  "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8002)
8003
8004;; -------------------------------------------------------------------------
8005;; ---- [PRED] Select
8006;; -------------------------------------------------------------------------
8007;; Includes:
8008;; - SEL
8009;; -------------------------------------------------------------------------
8010
8011(define_insn "@vcond_mask_<mode><mode>"
8012  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8013	(ior:PRED_ALL
8014	  (and:PRED_ALL
8015	    (match_operand:PRED_ALL 3 "register_operand" "Upa")
8016	    (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8017	  (and:PRED_ALL
8018	    (not (match_dup 3))
8019	    (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8020  "TARGET_SVE"
8021  "sel\t%0.b, %3, %1.b, %2.b"
8022)
8023
8024;; -------------------------------------------------------------------------
8025;; ---- [PRED] Test bits
8026;; -------------------------------------------------------------------------
8027;; Includes:
8028;; - PTEST
8029;; -------------------------------------------------------------------------
8030
8031;; Branch based on predicate equality or inequality.
8032(define_expand "cbranch<mode>4"
8033  [(set (pc)
8034	(if_then_else
8035	  (match_operator 0 "aarch64_equality_operator"
8036	    [(match_operand:PRED_ALL 1 "register_operand")
8037	     (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8038	  (label_ref (match_operand 3 ""))
8039	  (pc)))]
8040  ""
8041  {
8042    rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8043    rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8044    rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
8045    rtx pred;
8046    if (operands[2] == CONST0_RTX (<MODE>mode))
8047      pred = operands[1];
8048    else
8049      {
8050	pred = gen_reg_rtx (<MODE>mode);
8051	emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8052						 operands[2]));
8053      }
8054    emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
8055    operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8056    operands[2] = const0_rtx;
8057  }
8058)
8059
8060;; See "Description of UNSPEC_PTEST" above for details.
8061(define_insn "aarch64_ptest<mode>"
8062  [(set (reg:CC_NZC CC_REGNUM)
8063	(unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8064			(match_operand 1)
8065			(match_operand:SI 2 "aarch64_sve_ptrue_flag")
8066			(match_operand:PRED_ALL 3 "register_operand" "Upa")]
8067		       UNSPEC_PTEST))]
8068  "TARGET_SVE"
8069  "ptest\t%0, %3.b"
8070)
8071
8072;; =========================================================================
8073;; == Reductions
8074;; =========================================================================
8075
8076;; -------------------------------------------------------------------------
8077;; ---- [INT,FP] Conditional reductions
8078;; -------------------------------------------------------------------------
8079;; Includes:
8080;; - CLASTA
8081;; - CLASTB
8082;; -------------------------------------------------------------------------
8083
8084;; Set operand 0 to the last active element in operand 3, or to tied
8085;; operand 1 if no elements are active.
8086(define_insn "@fold_extract_<last_op>_<mode>"
8087  [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
8088	(unspec:<VEL>
8089	  [(match_operand:<VEL> 1 "register_operand" "0, 0")
8090	   (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
8091	   (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8092	  CLAST))]
8093  "TARGET_SVE"
8094  "@
8095   clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8096   clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
8097)
8098
8099(define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
8100  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8101	(unspec:SVE_FULL
8102	  [(match_operand:SVE_FULL 1 "register_operand" "0, w")
8103	   (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
8104	   (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8105	  CLAST))]
8106  "TARGET_SVE"
8107  "@
8108   clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8109   movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
8110)
8111
8112;; -------------------------------------------------------------------------
8113;; ---- [INT] Tree reductions
8114;; -------------------------------------------------------------------------
8115;; Includes:
8116;; - ANDV
8117;; - EORV
8118;; - ORV
8119;; - SADDV
8120;; - SMAXV
8121;; - SMINV
8122;; - UADDV
8123;; - UMAXV
8124;; - UMINV
8125;; -------------------------------------------------------------------------
8126
8127;; Unpredicated integer add reduction.
8128(define_expand "reduc_plus_scal_<mode>"
8129  [(match_operand:<VEL> 0 "register_operand")
8130   (match_operand:SVE_FULL_I 1 "register_operand")]
8131  "TARGET_SVE"
8132  {
8133    rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8134    rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8135    emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8136    if (tmp != operands[0])
8137      emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8138    DONE;
8139  }
8140)
8141
8142;; Predicated integer add reduction.  The result is always 64-bits.
8143(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8144  [(set (match_operand:DI 0 "register_operand" "=w")
8145	(unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
8146		    (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8147		   SVE_INT_ADDV))]
8148  "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8149  "<su>addv\t%d0, %1, %2.<Vetype>"
8150)
8151
8152;; Unpredicated integer reductions.
8153(define_expand "reduc_<optab>_scal_<mode>"
8154  [(set (match_operand:<VEL> 0 "register_operand")
8155	(unspec:<VEL> [(match_dup 2)
8156		       (match_operand:SVE_FULL_I 1 "register_operand")]
8157		      SVE_INT_REDUCTION))]
8158  "TARGET_SVE"
8159  {
8160    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8161  }
8162)
8163
8164;; Predicated integer reductions.
8165(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8166  [(set (match_operand:<VEL> 0 "register_operand" "=w")
8167	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8168		       (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8169		      SVE_INT_REDUCTION))]
8170  "TARGET_SVE"
8171  "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
8172)
8173
8174;; -------------------------------------------------------------------------
8175;; ---- [FP] Tree reductions
8176;; -------------------------------------------------------------------------
8177;; Includes:
8178;; - FADDV
8179;; - FMAXNMV
8180;; - FMAXV
8181;; - FMINNMV
8182;; - FMINV
8183;; -------------------------------------------------------------------------
8184
8185;; Unpredicated floating-point tree reductions.
8186(define_expand "reduc_<optab>_scal_<mode>"
8187  [(set (match_operand:<VEL> 0 "register_operand")
8188	(unspec:<VEL> [(match_dup 2)
8189		       (match_operand:SVE_FULL_F 1 "register_operand")]
8190		      SVE_FP_REDUCTION))]
8191  "TARGET_SVE"
8192  {
8193    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8194  }
8195)
8196
8197;; Predicated floating-point tree reductions.
8198(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8199  [(set (match_operand:<VEL> 0 "register_operand" "=w")
8200	(unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8201		       (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8202		      SVE_FP_REDUCTION))]
8203  "TARGET_SVE"
8204  "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
8205)
8206
8207;; -------------------------------------------------------------------------
8208;; ---- [FP] Left-to-right reductions
8209;; -------------------------------------------------------------------------
8210;; Includes:
8211;; - FADDA
8212;; -------------------------------------------------------------------------
8213
8214;; Unpredicated in-order FP reductions.
8215(define_expand "fold_left_plus_<mode>"
8216  [(set (match_operand:<VEL> 0 "register_operand")
8217	(unspec:<VEL> [(match_dup 3)
8218		       (match_operand:<VEL> 1 "register_operand")
8219		       (match_operand:SVE_FULL_F 2 "register_operand")]
8220		      UNSPEC_FADDA))]
8221  "TARGET_SVE"
8222  {
8223    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
8224  }
8225)
8226
8227;; Predicated in-order FP reductions.
8228(define_insn "mask_fold_left_plus_<mode>"
8229  [(set (match_operand:<VEL> 0 "register_operand" "=w")
8230	(unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
8231		       (match_operand:<VEL> 1 "register_operand" "0")
8232		       (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8233		      UNSPEC_FADDA))]
8234  "TARGET_SVE"
8235  "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
8236)
8237
8238;; =========================================================================
8239;; == Permutes
8240;; =========================================================================
8241
8242;; -------------------------------------------------------------------------
8243;; ---- [INT,FP] General permutes
8244;; -------------------------------------------------------------------------
8245;; Includes:
8246;; - TBL
8247;; -------------------------------------------------------------------------
8248
8249(define_expand "vec_perm<mode>"
8250  [(match_operand:SVE_FULL 0 "register_operand")
8251   (match_operand:SVE_FULL 1 "register_operand")
8252   (match_operand:SVE_FULL 2 "register_operand")
8253   (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
8254  "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
8255  {
8256    aarch64_expand_sve_vec_perm (operands[0], operands[1],
8257				 operands[2], operands[3]);
8258    DONE;
8259  }
8260)
8261
8262(define_insn "@aarch64_sve_tbl<mode>"
8263  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8264	(unspec:SVE_FULL
8265	  [(match_operand:SVE_FULL 1 "register_operand" "w")
8266	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
8267	  UNSPEC_TBL))]
8268  "TARGET_SVE"
8269  "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8270)
8271
8272;; -------------------------------------------------------------------------
8273;; ---- [INT,FP] Special-purpose unary permutes
8274;; -------------------------------------------------------------------------
8275;; Includes:
8276;; - COMPACT
8277;; - DUP
8278;; - REV
8279;; -------------------------------------------------------------------------
8280
8281;; Compact active elements and pad with zeros.
8282(define_insn "@aarch64_sve_compact<mode>"
8283  [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
8284	(unspec:SVE_FULL_SD
8285	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
8286	   (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
8287	  UNSPEC_SVE_COMPACT))]
8288  "TARGET_SVE"
8289  "compact\t%0.<Vetype>, %1, %2.<Vetype>"
8290)
8291
8292;; Duplicate one element of a vector.
8293(define_insn "@aarch64_sve_dup_lane<mode>"
8294  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8295	(vec_duplicate:SVE_FULL
8296	  (vec_select:<VEL>
8297	    (match_operand:SVE_FULL 1 "register_operand" "w")
8298	    (parallel [(match_operand:SI 2 "const_int_operand")]))))]
8299  "TARGET_SVE
8300   && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
8301  "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
8302)
8303
8304;; Use DUP.Q to duplicate a 128-bit segment of a register.
8305;;
8306;; The vec_select:<V128> sets memory lane number N of the V128 to lane
8307;; number op2 + N of op1.  (We don't need to distinguish between memory
8308;; and architectural register lane numbering for op1 or op0, since the
8309;; two numbering schemes are the same for SVE.)
8310;;
8311;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
8312;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
8313;; of op0.  We therefore get the correct result for both endiannesses.
8314;;
8315;; The wrinkle is that for big-endian V128 registers, memory lane numbering
8316;; is in the opposite order to architectural register lane numbering.
8317;; Thus if we were to do this operation via a V128 temporary register,
8318;; the vec_select and vec_duplicate would both involve a reverse operation
8319;; for big-endian targets.  In this fused pattern the two reverses cancel
8320;; each other out.
8321(define_insn "@aarch64_sve_dupq_lane<mode>"
8322  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8323	(vec_duplicate:SVE_FULL
8324	  (vec_select:<V128>
8325	    (match_operand:SVE_FULL 1 "register_operand" "w")
8326	    (match_operand 2 "ascending_int_parallel"))))]
8327  "TARGET_SVE
8328   && (INTVAL (XVECEXP (operands[2], 0, 0))
8329       * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
8330   && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
8331		* GET_MODE_SIZE (<VEL>mode), 0, 63)"
8332  {
8333    unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
8334			 * GET_MODE_SIZE (<VEL>mode));
8335    operands[2] = gen_int_mode (byte / 16, DImode);
8336    return "dup\t%0.q, %1.q[%2]";
8337  }
8338)
8339
8340;; Reverse the order of elements within a full vector.
8341(define_insn "@aarch64_sve_rev<mode>"
8342  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8343	(unspec:SVE_FULL
8344	  [(match_operand:SVE_FULL 1 "register_operand" "w")]
8345	  UNSPEC_REV))]
8346  "TARGET_SVE"
8347  "rev\t%0.<Vetype>, %1.<Vetype>")
8348
8349;; -------------------------------------------------------------------------
8350;; ---- [INT,FP] Special-purpose binary permutes
8351;; -------------------------------------------------------------------------
8352;; Includes:
8353;; - SPLICE
8354;; - TRN1
8355;; - TRN2
8356;; - UZP1
8357;; - UZP2
8358;; - ZIP1
8359;; - ZIP2
8360;; -------------------------------------------------------------------------
8361
8362;; Like EXT, but start at the first active element.
8363(define_insn "@aarch64_sve_splice<mode>"
8364  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8365	(unspec:SVE_FULL
8366	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
8367	   (match_operand:SVE_FULL 2 "register_operand" "0, w")
8368	   (match_operand:SVE_FULL 3 "register_operand" "w, w")]
8369	  UNSPEC_SVE_SPLICE))]
8370  "TARGET_SVE"
8371  "@
8372   splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
8373   movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
8374  [(set_attr "movprfx" "*, yes")]
8375)
8376
8377;; Permutes that take half the elements from one vector and half the
8378;; elements from the other.
8379(define_insn "@aarch64_sve_<perm_insn><mode>"
8380  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8381	(unspec:SVE_FULL
8382	  [(match_operand:SVE_FULL 1 "register_operand" "w")
8383	   (match_operand:SVE_FULL 2 "register_operand" "w")]
8384	  PERMUTE))]
8385  "TARGET_SVE"
8386  "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8387)
8388
8389;; Apply PERMUTE to 128-bit sequences.  The behavior of these patterns
8390;; doesn't depend on the mode.
8391(define_insn "@aarch64_sve_<optab><mode>"
8392  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
8393	(unspec:SVE_FULL
8394	  [(match_operand:SVE_FULL 1 "register_operand" "w")
8395	   (match_operand:SVE_FULL 2 "register_operand" "w")]
8396	  PERMUTEQ))]
8397  "TARGET_SVE_F64MM"
8398  "<perm_insn>\t%0.q, %1.q, %2.q"
8399)
8400
8401;; Concatenate two vectors and extract a subvector.  Note that the
8402;; immediate (third) operand is the lane index not the byte index.
8403(define_insn "@aarch64_sve_ext<mode>"
8404  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
8405	(unspec:SVE_FULL
8406	  [(match_operand:SVE_FULL 1 "register_operand" "0, w")
8407	   (match_operand:SVE_FULL 2 "register_operand" "w, w")
8408	   (match_operand:SI 3 "const_int_operand")]
8409	  UNSPEC_EXT))]
8410  "TARGET_SVE
8411   && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
8412  {
8413    operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
8414    return (which_alternative == 0
8415	    ? "ext\\t%0.b, %0.b, %2.b, #%3"
8416	    : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
8417  }
8418  [(set_attr "movprfx" "*,yes")]
8419)
8420
8421;; -------------------------------------------------------------------------
8422;; ---- [PRED] Special-purpose unary permutes
8423;; -------------------------------------------------------------------------
8424;; Includes:
8425;; - REV
8426;; -------------------------------------------------------------------------
8427
8428(define_insn "@aarch64_sve_rev<mode>"
8429  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8430	(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
8431			 UNSPEC_REV))]
8432  "TARGET_SVE"
8433  "rev\t%0.<Vetype>, %1.<Vetype>")
8434
8435;; -------------------------------------------------------------------------
8436;; ---- [PRED] Special-purpose binary permutes
8437;; -------------------------------------------------------------------------
8438;; Includes:
8439;; - TRN1
8440;; - TRN2
8441;; - UZP1
8442;; - UZP2
8443;; - ZIP1
8444;; - ZIP2
8445;; -------------------------------------------------------------------------
8446
8447;; Permutes that take half the elements from one vector and half the
8448;; elements from the other.
8449(define_insn "@aarch64_sve_<perm_insn><mode>"
8450  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8451	(unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8452			  (match_operand:PRED_ALL 2 "register_operand" "Upa")]
8453			 PERMUTE))]
8454  "TARGET_SVE"
8455  "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8456)
8457
8458;; Special purpose permute used by the predicate generation instructions.
8459;; Unlike the normal permute patterns, these instructions operate on VNx16BI
8460;; regardless of the element size, so that all input and output bits are
8461;; well-defined.  Operand 3 then indicates the size of the permute.
8462(define_insn "@aarch64_sve_trn1_conv<mode>"
8463  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8464	(unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
8465			 (match_operand:VNx16BI 2 "register_operand" "Upa")
8466			 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
8467			UNSPEC_TRN1_CONV))]
8468  "TARGET_SVE"
8469  "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
8470)
8471
8472;; =========================================================================
8473;; == Conversions
8474;; =========================================================================
8475
8476;; -------------------------------------------------------------------------
8477;; ---- [INT<-INT] Packs
8478;; -------------------------------------------------------------------------
8479;; Includes:
8480;; - UZP1
8481;; -------------------------------------------------------------------------
8482
8483;; Integer pack.  Use UZP1 on the narrower type, which discards
8484;; the high part of each wide element.
8485(define_insn "vec_pack_trunc_<Vwide>"
8486  [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
8487	(unspec:SVE_FULL_BHSI
8488	  [(match_operand:<VWIDE> 1 "register_operand" "w")
8489	   (match_operand:<VWIDE> 2 "register_operand" "w")]
8490	  UNSPEC_PACK))]
8491  "TARGET_SVE"
8492  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8493)
8494
8495;; -------------------------------------------------------------------------
8496;; ---- [INT<-INT] Unpacks
8497;; -------------------------------------------------------------------------
8498;; Includes:
8499;; - SUNPKHI
8500;; - SUNPKLO
8501;; - UUNPKHI
8502;; - UUNPKLO
8503;; -------------------------------------------------------------------------
8504
8505;; Unpack the low or high half of a vector, where "high" refers to
8506;; the low-numbered lanes for big-endian and the high-numbered lanes
8507;; for little-endian.
8508(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
8509  [(match_operand:<VWIDE> 0 "register_operand")
8510   (unspec:<VWIDE>
8511     [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
8512  "TARGET_SVE"
8513  {
8514    emit_insn ((<hi_lanes_optab>
8515		? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
8516		: gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
8517	       (operands[0], operands[1]));
8518    DONE;
8519  }
8520)
8521
8522(define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
8523  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
8524	(unspec:<VWIDE>
8525	  [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
8526	  UNPACK))]
8527  "TARGET_SVE"
8528  "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
8529)
8530
8531;; -------------------------------------------------------------------------
8532;; ---- [INT<-FP] Conversions
8533;; -------------------------------------------------------------------------
8534;; Includes:
8535;; - FCVTZS
8536;; - FCVTZU
8537;; -------------------------------------------------------------------------
8538
8539;; Unpredicated conversion of floats to integers of the same size (HF to HI,
8540;; SF to SI or DF to DI).
8541(define_expand "<optab><mode><v_int_equiv>2"
8542  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
8543	(unspec:<V_INT_EQUIV>
8544	  [(match_dup 2)
8545	   (const_int SVE_RELAXED_GP)
8546	   (match_operand:SVE_FULL_F 1 "register_operand")]
8547	  SVE_COND_FCVTI))]
8548  "TARGET_SVE"
8549  {
8550    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8551  }
8552)
8553
8554;; Predicated float-to-integer conversion, either to the same width or wider.
8555(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
8556  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w")
8557	(unspec:SVE_FULL_HSDI
8558	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
8559	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
8560	   (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
8561	  SVE_COND_FCVTI))]
8562  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8563  "@
8564   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8565   movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
8566  [(set_attr "movprfx" "*,yes")]
8567)
8568
8569;; Predicated narrowing float-to-integer conversion.
8570(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
8571  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
8572	(unspec:VNx4SI_ONLY
8573	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
8574	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
8575	   (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")]
8576	  SVE_COND_FCVTI))]
8577  "TARGET_SVE"
8578  "@
8579   fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
8580   movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
8581  [(set_attr "movprfx" "*,yes")]
8582)
8583
8584;; Predicated float-to-integer conversion with merging, either to the same
8585;; width or wider.
8586(define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
8587  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
8588	(unspec:SVE_FULL_HSDI
8589	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8590	   (unspec:SVE_FULL_HSDI
8591	     [(match_dup 1)
8592	      (const_int SVE_STRICT_GP)
8593	      (match_operand:SVE_FULL_F 2 "register_operand")]
8594	     SVE_COND_FCVTI)
8595	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
8596	  UNSPEC_SEL))]
8597  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8598)
8599
8600;; The first alternative doesn't need the earlyclobber, but the only case
8601;; it would help is the uninteresting one in which operands 2 and 3 are
8602;; the same register (despite having different modes).  Making all the
8603;; alternatives earlyclobber makes things more consistent for the
8604;; register allocator.
8605(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
8606  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
8607	(unspec:SVE_FULL_HSDI
8608	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8609	   (unspec:SVE_FULL_HSDI
8610	     [(match_operand 4)
8611	      (const_int SVE_RELAXED_GP)
8612	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
8613	     SVE_COND_FCVTI)
8614	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8615	  UNSPEC_SEL))]
8616  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8617  "@
8618   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8619   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8620   movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
8621  "&& !rtx_equal_p (operands[1], operands[4])"
8622  {
8623    operands[4] = copy_rtx (operands[1]);
8624  }
8625  [(set_attr "movprfx" "*,yes,yes")]
8626)
8627
8628(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
8629  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
8630	(unspec:SVE_FULL_HSDI
8631	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8632	   (unspec:SVE_FULL_HSDI
8633	     [(match_dup 1)
8634	      (const_int SVE_STRICT_GP)
8635	      (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
8636	     SVE_COND_FCVTI)
8637	   (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8638	  UNSPEC_SEL))]
8639  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8640  "@
8641   fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8642   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
8643   movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
8644  [(set_attr "movprfx" "*,yes,yes")]
8645)
8646
8647;; Predicated narrowing float-to-integer conversion with merging.
8648(define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
8649  [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
8650	(unspec:VNx4SI_ONLY
8651	  [(match_operand:VNx2BI 1 "register_operand")
8652	   (unspec:VNx4SI_ONLY
8653	     [(match_dup 1)
8654	      (const_int SVE_STRICT_GP)
8655	      (match_operand:VNx2DF_ONLY 2 "register_operand")]
8656	     SVE_COND_FCVTI)
8657	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
8658	  UNSPEC_SEL))]
8659  "TARGET_SVE"
8660)
8661
8662(define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
8663  [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
8664	(unspec:VNx4SI_ONLY
8665	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
8666	   (unspec:VNx4SI_ONLY
8667	     [(match_dup 1)
8668	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
8669	      (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
8670	     SVE_COND_FCVTI)
8671	   (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8672	  UNSPEC_SEL))]
8673  "TARGET_SVE"
8674  "@
8675   fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
8676   movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
8677   movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
8678  [(set_attr "movprfx" "*,yes,yes")]
8679)
8680
8681;; -------------------------------------------------------------------------
8682;; ---- [INT<-FP] Packs
8683;; -------------------------------------------------------------------------
8684;; The patterns in this section are synthetic.
8685;; -------------------------------------------------------------------------
8686
8687;; Convert two vectors of DF to SI and pack the results into a single vector.
8688(define_expand "vec_pack_<su>fix_trunc_vnx2df"
8689  [(set (match_dup 4)
8690	(unspec:VNx4SI
8691	  [(match_dup 3)
8692	   (const_int SVE_RELAXED_GP)
8693	   (match_operand:VNx2DF 1 "register_operand")]
8694	  SVE_COND_FCVTI))
8695   (set (match_dup 5)
8696	(unspec:VNx4SI
8697	  [(match_dup 3)
8698	   (const_int SVE_RELAXED_GP)
8699	   (match_operand:VNx2DF 2 "register_operand")]
8700	  SVE_COND_FCVTI))
8701   (set (match_operand:VNx4SI 0 "register_operand")
8702	(unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
8703  "TARGET_SVE"
8704  {
8705    operands[3] = aarch64_ptrue_reg (VNx2BImode);
8706    operands[4] = gen_reg_rtx (VNx4SImode);
8707    operands[5] = gen_reg_rtx (VNx4SImode);
8708  }
8709)
8710
8711;; -------------------------------------------------------------------------
8712;; ---- [INT<-FP] Unpacks
8713;; -------------------------------------------------------------------------
8714;; No patterns here yet!
8715;; -------------------------------------------------------------------------
8716
8717;; -------------------------------------------------------------------------
8718;; ---- [FP<-INT] Conversions
8719;; -------------------------------------------------------------------------
8720;; Includes:
8721;; - SCVTF
8722;; - UCVTF
8723;; -------------------------------------------------------------------------
8724
8725;; Unpredicated conversion of integers to floats of the same size
8726;; (HI to HF, SI to SF or DI to DF).
8727(define_expand "<optab><v_int_equiv><mode>2"
8728  [(set (match_operand:SVE_FULL_F 0 "register_operand")
8729	(unspec:SVE_FULL_F
8730	  [(match_dup 2)
8731	   (const_int SVE_RELAXED_GP)
8732	   (match_operand:<V_INT_EQUIV> 1 "register_operand")]
8733	  SVE_COND_ICVTF))]
8734  "TARGET_SVE"
8735  {
8736    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8737  }
8738)
8739
8740;; Predicated integer-to-float conversion, either to the same width or
8741;; narrower.
8742(define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8743  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
8744	(unspec:SVE_FULL_F
8745	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl")
8746	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
8747	   (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")]
8748	  SVE_COND_ICVTF))]
8749  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8750  "@
8751   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8752   movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8753  [(set_attr "movprfx" "*,yes")]
8754)
8755
8756;; Predicated widening integer-to-float conversion.
8757(define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8758  [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w")
8759	(unspec:VNx2DF_ONLY
8760	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl")
8761	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
8762	   (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")]
8763	  SVE_COND_ICVTF))]
8764  "TARGET_SVE"
8765  "@
8766   <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8767   movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8768  [(set_attr "movprfx" "*,yes")]
8769)
8770
8771;; Predicated integer-to-float conversion with merging, either to the same
8772;; width or narrower.
8773(define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8774  [(set (match_operand:SVE_FULL_F 0 "register_operand")
8775	(unspec:SVE_FULL_F
8776	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8777	   (unspec:SVE_FULL_F
8778	     [(match_dup 1)
8779	      (const_int SVE_STRICT_GP)
8780	      (match_operand:SVE_FULL_HSDI 2 "register_operand")]
8781	     SVE_COND_ICVTF)
8782	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
8783	  UNSPEC_SEL))]
8784  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8785)
8786
8787;; The first alternative doesn't need the earlyclobber, but the only case
8788;; it would help is the uninteresting one in which operands 2 and 3 are
8789;; the same register (despite having different modes).  Making all the
8790;; alternatives earlyclobber makes things more consistent for the
8791;; register allocator.
8792(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
8793  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
8794	(unspec:SVE_FULL_F
8795	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8796	   (unspec:SVE_FULL_F
8797	     [(match_operand 4)
8798	      (const_int SVE_RELAXED_GP)
8799	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
8800	     SVE_COND_ICVTF)
8801	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8802	  UNSPEC_SEL))]
8803  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8804  "@
8805   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8806   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8807   movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8808  "&& !rtx_equal_p (operands[1], operands[4])"
8809  {
8810    operands[4] = copy_rtx (operands[1]);
8811  }
8812  [(set_attr "movprfx" "*,yes,yes")]
8813)
8814
8815(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
8816  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
8817	(unspec:SVE_FULL_F
8818	  [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8819	   (unspec:SVE_FULL_F
8820	     [(match_dup 1)
8821	      (const_int SVE_STRICT_GP)
8822	      (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
8823	     SVE_COND_ICVTF)
8824	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8825	  UNSPEC_SEL))]
8826  "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8827  "@
8828   <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8829   movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8830   movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8831  [(set_attr "movprfx" "*,yes,yes")]
8832)
8833
8834;; Predicated widening integer-to-float conversion with merging.
8835(define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8836  [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
8837	(unspec:VNx2DF_ONLY
8838	  [(match_operand:VNx2BI 1 "register_operand")
8839	   (unspec:VNx2DF_ONLY
8840	     [(match_dup 1)
8841	      (const_int SVE_STRICT_GP)
8842	      (match_operand:VNx4SI_ONLY 2 "register_operand")]
8843	     SVE_COND_ICVTF)
8844	   (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
8845	  UNSPEC_SEL))]
8846  "TARGET_SVE"
8847)
8848
8849(define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8850  [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
8851	(unspec:VNx2DF_ONLY
8852	  [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
8853	   (unspec:VNx2DF_ONLY
8854	     [(match_dup 1)
8855	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
8856	      (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
8857	     SVE_COND_ICVTF)
8858	   (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8859	  UNSPEC_SEL))]
8860  "TARGET_SVE"
8861  "@
8862   <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8863   movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8864   movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8865  [(set_attr "movprfx" "*,yes,yes")]
8866)
8867
8868;; -------------------------------------------------------------------------
8869;; ---- [FP<-INT] Packs
8870;; -------------------------------------------------------------------------
8871;; No patterns here yet!
8872;; -------------------------------------------------------------------------
8873
8874;; -------------------------------------------------------------------------
8875;; ---- [FP<-INT] Unpacks
8876;; -------------------------------------------------------------------------
8877;; The patterns in this section are synthetic.
8878;; -------------------------------------------------------------------------
8879
8880;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
8881;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
8882;; unpacked VNx4SI to VNx2DF.
8883(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
8884  [(match_operand:VNx2DF 0 "register_operand")
8885   (FLOATUORS:VNx2DF
8886     (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
8887		    UNPACK_UNSIGNED))]
8888  "TARGET_SVE"
8889  {
8890    /* Use ZIP to do the unpack, since we don't care about the upper halves
8891       and since it has the nice property of not needing any subregs.
8892       If using UUNPK* turns out to be preferable, we could model it as
8893       a ZIP whose first operand is zero.  */
8894    rtx temp = gen_reg_rtx (VNx4SImode);
8895    emit_insn ((<hi_lanes_optab>
8896		? gen_aarch64_sve_zip2vnx4si
8897		: gen_aarch64_sve_zip1vnx4si)
8898	       (temp, operands[1], operands[1]));
8899    rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
8900    rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8901    emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
8902	       (operands[0], ptrue, temp, strictness));
8903    DONE;
8904  }
8905)
8906
8907;; -------------------------------------------------------------------------
8908;; ---- [FP<-FP] Packs
8909;; -------------------------------------------------------------------------
8910;; Includes:
8911;; - FCVT
8912;; -------------------------------------------------------------------------
8913
8914;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
8915;; the results into a single vector.
8916(define_expand "vec_pack_trunc_<Vwide>"
8917  [(set (match_dup 4)
8918	(unspec:SVE_FULL_HSF
8919	  [(match_dup 3)
8920	   (const_int SVE_RELAXED_GP)
8921	   (match_operand:<VWIDE> 1 "register_operand")]
8922	  UNSPEC_COND_FCVT))
8923   (set (match_dup 5)
8924	(unspec:SVE_FULL_HSF
8925	  [(match_dup 3)
8926	   (const_int SVE_RELAXED_GP)
8927	   (match_operand:<VWIDE> 2 "register_operand")]
8928	  UNSPEC_COND_FCVT))
8929   (set (match_operand:SVE_FULL_HSF 0 "register_operand")
8930	(unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
8931  "TARGET_SVE"
8932  {
8933    operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8934    operands[4] = gen_reg_rtx (<MODE>mode);
8935    operands[5] = gen_reg_rtx (<MODE>mode);
8936  }
8937)
8938
8939;; Predicated float-to-float truncation.
8940(define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8941  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
8942	(unspec:SVE_FULL_HSF
8943	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
8944	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
8945	   (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")]
8946	  SVE_COND_FCVT))]
8947  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8948  "@
8949   fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8950   movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8951  [(set_attr "movprfx" "*,yes")]
8952)
8953
8954;; Predicated float-to-float truncation with merging.
8955(define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8956  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
8957	(unspec:SVE_FULL_HSF
8958	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8959	   (unspec:SVE_FULL_HSF
8960	     [(match_dup 1)
8961	      (const_int SVE_STRICT_GP)
8962	      (match_operand:SVE_FULL_SDF 2 "register_operand")]
8963	     SVE_COND_FCVT)
8964	   (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
8965	  UNSPEC_SEL))]
8966  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8967)
8968
8969(define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8970  [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
8971	(unspec:SVE_FULL_HSF
8972	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8973	   (unspec:SVE_FULL_HSF
8974	     [(match_dup 1)
8975	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
8976	      (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
8977	     SVE_COND_FCVT)
8978	   (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8979	  UNSPEC_SEL))]
8980  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8981  "@
8982   fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8983   movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8984   movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8985  [(set_attr "movprfx" "*,yes,yes")]
8986)
8987
8988;; -------------------------------------------------------------------------
8989;; ---- [FP<-FP] Packs (bfloat16)
8990;; -------------------------------------------------------------------------
8991;; Includes:
8992;; - BFCVT (BF16)
8993;; - BFCVTNT (BF16)
8994;; -------------------------------------------------------------------------
8995
8996;; Predicated BFCVT.
8997(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8998  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w")
8999	(unspec:VNx8BF_ONLY
9000	  [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl")
9001	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
9002	   (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")]
9003	  SVE_COND_FCVT))]
9004  "TARGET_SVE_BF16"
9005  "@
9006   bfcvt\t%0.h, %1/m, %2.s
9007   movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s"
9008  [(set_attr "movprfx" "*,yes")]
9009)
9010
9011;; Predicated BFCVT with merging.
9012(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9013  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9014	(unspec:VNx8BF_ONLY
9015	  [(match_operand:VNx4BI 1 "register_operand")
9016	   (unspec:VNx8BF_ONLY
9017	     [(match_dup 1)
9018	      (const_int SVE_STRICT_GP)
9019	      (match_operand:VNx4SF_ONLY 2 "register_operand")]
9020	     SVE_COND_FCVT)
9021	   (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9022	  UNSPEC_SEL))]
9023  "TARGET_SVE_BF16"
9024)
9025
9026(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9027  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
9028	(unspec:VNx8BF_ONLY
9029	  [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
9030	   (unspec:VNx8BF_ONLY
9031	     [(match_dup 1)
9032	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
9033	      (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
9034	     SVE_COND_FCVT)
9035	   (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9036	  UNSPEC_SEL))]
9037  "TARGET_SVE_BF16"
9038  "@
9039   bfcvt\t%0.h, %1/m, %2.s
9040   movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9041   movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
9042  [(set_attr "movprfx" "*,yes,yes")]
9043)
9044
9045;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
9046;; pair because the even elements always have to be supplied for active
9047;; elements, even if the inactive elements don't matter.
9048;;
9049;; This instructions does not take MOVPRFX.
9050(define_insn "@aarch64_sve_cvtnt<mode>"
9051  [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9052	(unspec:VNx8BF_ONLY
9053	  [(match_operand:VNx4BI 2 "register_operand" "Upl")
9054	   (const_int SVE_STRICT_GP)
9055	   (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9056	   (match_operand:VNx4SF 3 "register_operand" "w")]
9057	  UNSPEC_COND_FCVTNT))]
9058  "TARGET_SVE_BF16"
9059  "bfcvtnt\t%0.h, %2/m, %3.s"
9060)
9061
9062;; -------------------------------------------------------------------------
9063;; ---- [FP<-FP] Unpacks
9064;; -------------------------------------------------------------------------
9065;; Includes:
9066;; - FCVT
9067;; -------------------------------------------------------------------------
9068
9069;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9070;; First unpack the source without conversion, then float-convert the
9071;; unpacked source.
9072(define_expand "vec_unpacks_<perm_hilo>_<mode>"
9073  [(match_operand:<VWIDE> 0 "register_operand")
9074   (unspec:SVE_FULL_HSF
9075     [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9076     UNPACK_UNSIGNED)]
9077  "TARGET_SVE"
9078  {
9079    /* Use ZIP to do the unpack, since we don't care about the upper halves
9080       and since it has the nice property of not needing any subregs.
9081       If using UUNPK* turns out to be preferable, we could model it as
9082       a ZIP whose first operand is zero.  */
9083    rtx temp = gen_reg_rtx (<MODE>mode);
9084    emit_insn ((<hi_lanes_optab>
9085		? gen_aarch64_sve_zip2<mode>
9086		: gen_aarch64_sve_zip1<mode>)
9087		(temp, operands[1], operands[1]));
9088    rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9089    rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9090    emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
9091	       (operands[0], ptrue, temp, strictness));
9092    DONE;
9093  }
9094)
9095
9096;; Predicated float-to-float extension.
9097(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9098  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w")
9099	(unspec:SVE_FULL_SDF
9100	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl")
9101	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
9102	   (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")]
9103	  SVE_COND_FCVT))]
9104  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9105  "@
9106   fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9107   movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
9108  [(set_attr "movprfx" "*,yes")]
9109)
9110
9111;; Predicated float-to-float extension with merging.
9112(define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9113  [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9114	(unspec:SVE_FULL_SDF
9115	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9116	   (unspec:SVE_FULL_SDF
9117	     [(match_dup 1)
9118	      (const_int SVE_STRICT_GP)
9119	      (match_operand:SVE_FULL_HSF 2 "register_operand")]
9120	     SVE_COND_FCVT)
9121	   (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9122	  UNSPEC_SEL))]
9123  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9124)
9125
9126(define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9127  [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
9128	(unspec:SVE_FULL_SDF
9129	  [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
9130	   (unspec:SVE_FULL_SDF
9131	     [(match_dup 1)
9132	      (match_operand:SI 4 "aarch64_sve_gp_strictness")
9133	      (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
9134	     SVE_COND_FCVT)
9135	   (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
9136	  UNSPEC_SEL))]
9137  "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9138  "@
9139   fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9140   movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9141   movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
9142  [(set_attr "movprfx" "*,yes,yes")]
9143)
9144
9145;; -------------------------------------------------------------------------
9146;; ---- [PRED<-PRED] Packs
9147;; -------------------------------------------------------------------------
9148;; Includes:
9149;; - UZP1
9150;; -------------------------------------------------------------------------
9151
9152;; Predicate pack.  Use UZP1 on the narrower type, which discards
9153;; the high part of each wide element.
9154(define_insn "vec_pack_trunc_<Vwide>"
9155  [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
9156	(unspec:PRED_BHS
9157	  [(match_operand:<VWIDE> 1 "register_operand" "Upa")
9158	   (match_operand:<VWIDE> 2 "register_operand" "Upa")]
9159	  UNSPEC_PACK))]
9160  "TARGET_SVE"
9161  "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9162)
9163
9164;; -------------------------------------------------------------------------
9165;; ---- [PRED<-PRED] Unpacks
9166;; -------------------------------------------------------------------------
9167;; Includes:
9168;; - PUNPKHI
9169;; - PUNPKLO
9170;; -------------------------------------------------------------------------
9171
9172;; Unpack the low or high half of a predicate, where "high" refers to
9173;; the low-numbered lanes for big-endian and the high-numbered lanes
9174;; for little-endian.
9175(define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
9176  [(match_operand:<VWIDE> 0 "register_operand")
9177   (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
9178		   UNPACK)]
9179  "TARGET_SVE"
9180  {
9181    emit_insn ((<hi_lanes_optab>
9182		? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
9183		: gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
9184	       (operands[0], operands[1]));
9185    DONE;
9186  }
9187)
9188
9189(define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
9190  [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
9191	(unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
9192			UNPACK_UNSIGNED))]
9193  "TARGET_SVE"
9194  "punpk<perm_hilo>\t%0.h, %1.b"
9195)
9196
9197;; =========================================================================
9198;; == Vector partitioning
9199;; =========================================================================
9200
9201;; -------------------------------------------------------------------------
9202;; ---- [PRED] Unary partitioning
9203;; -------------------------------------------------------------------------
9204;; Includes:
9205;; - BRKA
9206;; - BRKAS
9207;; - BRKB
9208;; - BRKBS
9209;; -------------------------------------------------------------------------
9210
9211;; Note that unlike most other instructions that have both merging and
9212;; zeroing forms, these instructions don't operate elementwise and so
9213;; don't fit the IFN_COND model.
9214(define_insn "@aarch64_brk<brk_op>"
9215  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
9216	(unspec:VNx16BI
9217	  [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
9218	   (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
9219	   (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
9220	  SVE_BRK_UNARY))]
9221  "TARGET_SVE"
9222  "@
9223   brk<brk_op>\t%0.b, %1/z, %2.b
9224   brk<brk_op>\t%0.b, %1/m, %2.b"
9225)
9226
9227;; Same, but also producing a flags result.
9228(define_insn "*aarch64_brk<brk_op>_cc"
9229  [(set (reg:CC_NZC CC_REGNUM)
9230	(unspec:CC_NZC
9231	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
9232	   (match_dup 1)
9233	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9234	   (unspec:VNx16BI
9235	     [(match_dup 1)
9236	      (match_operand:VNx16BI 2 "register_operand" "Upa")
9237	      (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
9238	     SVE_BRK_UNARY)]
9239	  UNSPEC_PTEST))
9240   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9241	(unspec:VNx16BI
9242	  [(match_dup 1)
9243	   (match_dup 2)
9244	   (match_dup 3)]
9245	  SVE_BRK_UNARY))]
9246  "TARGET_SVE"
9247  "brk<brk_op>s\t%0.b, %1/z, %2.b"
9248)
9249
9250;; Same, but with only the flags result being interesting.
9251(define_insn "*aarch64_brk<brk_op>_ptest"
9252  [(set (reg:CC_NZC CC_REGNUM)
9253	(unspec:CC_NZC
9254	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
9255	   (match_dup 1)
9256	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9257	   (unspec:VNx16BI
9258	     [(match_dup 1)
9259	      (match_operand:VNx16BI 2 "register_operand" "Upa")
9260	      (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
9261	     SVE_BRK_UNARY)]
9262	  UNSPEC_PTEST))
9263   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
9264  "TARGET_SVE"
9265  "brk<brk_op>s\t%0.b, %1/z, %2.b"
9266)
9267
9268;; -------------------------------------------------------------------------
9269;; ---- [PRED] Binary partitioning
9270;; -------------------------------------------------------------------------
9271;; Includes:
9272;; - BRKN
9273;; - BRKNS
9274;; - BRKPA
9275;; - BRKPAS
9276;; - BRKPB
9277;; - BRKPBS
9278;; -------------------------------------------------------------------------
9279
9280;; Binary BRKs (BRKN, BRKPA, BRKPB).
9281(define_insn "@aarch64_brk<brk_op>"
9282  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9283	(unspec:VNx16BI
9284	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
9285	   (match_operand:VNx16BI 2 "register_operand" "Upa")
9286	   (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
9287	  SVE_BRK_BINARY))]
9288  "TARGET_SVE"
9289  "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
9290)
9291
9292;; BRKN, producing both a predicate and a flags result.  Unlike other
9293;; flag-setting instructions, these flags are always set wrt a ptrue.
9294(define_insn_and_rewrite "*aarch64_brkn_cc"
9295  [(set (reg:CC_NZC CC_REGNUM)
9296	(unspec:CC_NZC
9297	  [(match_operand:VNx16BI 4)
9298	   (match_operand:VNx16BI 5)
9299	   (const_int SVE_KNOWN_PTRUE)
9300	   (unspec:VNx16BI
9301	     [(match_operand:VNx16BI 1 "register_operand" "Upa")
9302	      (match_operand:VNx16BI 2 "register_operand" "Upa")
9303	      (match_operand:VNx16BI 3 "register_operand" "0")]
9304	     UNSPEC_BRKN)]
9305	  UNSPEC_PTEST))
9306   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9307	(unspec:VNx16BI
9308	  [(match_dup 1)
9309	   (match_dup 2)
9310	   (match_dup 3)]
9311	  UNSPEC_BRKN))]
9312  "TARGET_SVE"
9313  "brkns\t%0.b, %1/z, %2.b, %0.b"
9314  "&& (operands[4] != CONST0_RTX (VNx16BImode)
9315       || operands[5] != CONST0_RTX (VNx16BImode))"
9316  {
9317    operands[4] = CONST0_RTX (VNx16BImode);
9318    operands[5] = CONST0_RTX (VNx16BImode);
9319  }
9320)
9321
9322;; Same, but with only the flags result being interesting.
9323(define_insn_and_rewrite "*aarch64_brkn_ptest"
9324  [(set (reg:CC_NZC CC_REGNUM)
9325	(unspec:CC_NZC
9326	  [(match_operand:VNx16BI 4)
9327	   (match_operand:VNx16BI 5)
9328	   (const_int SVE_KNOWN_PTRUE)
9329	   (unspec:VNx16BI
9330	     [(match_operand:VNx16BI 1 "register_operand" "Upa")
9331	      (match_operand:VNx16BI 2 "register_operand" "Upa")
9332	      (match_operand:VNx16BI 3 "register_operand" "0")]
9333	     UNSPEC_BRKN)]
9334	  UNSPEC_PTEST))
9335   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
9336  "TARGET_SVE"
9337  "brkns\t%0.b, %1/z, %2.b, %0.b"
9338  "&& (operands[4] != CONST0_RTX (VNx16BImode)
9339       || operands[5] != CONST0_RTX (VNx16BImode))"
9340  {
9341    operands[4] = CONST0_RTX (VNx16BImode);
9342    operands[5] = CONST0_RTX (VNx16BImode);
9343  }
9344)
9345
9346;; BRKPA and BRKPB, producing both a predicate and a flags result.
9347(define_insn "*aarch64_brk<brk_op>_cc"
9348  [(set (reg:CC_NZC CC_REGNUM)
9349	(unspec:CC_NZC
9350	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
9351	   (match_dup 1)
9352	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9353	   (unspec:VNx16BI
9354	     [(match_dup 1)
9355	      (match_operand:VNx16BI 2 "register_operand" "Upa")
9356	      (match_operand:VNx16BI 3 "register_operand" "Upa")]
9357	     SVE_BRKP)]
9358	  UNSPEC_PTEST))
9359   (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9360	(unspec:VNx16BI
9361	  [(match_dup 1)
9362	   (match_dup 2)
9363	   (match_dup 3)]
9364	  SVE_BRKP))]
9365  "TARGET_SVE"
9366  "brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b"
9367)
9368
9369;; Same, but with only the flags result being interesting.
9370(define_insn "*aarch64_brk<brk_op>_ptest"
9371  [(set (reg:CC_NZC CC_REGNUM)
9372	(unspec:CC_NZC
9373	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
9374	   (match_dup 1)
9375	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
9376	   (unspec:VNx16BI
9377	     [(match_dup 1)
9378	      (match_operand:VNx16BI 2 "register_operand" "Upa")
9379	      (match_operand:VNx16BI 3 "register_operand" "Upa")]
9380	     SVE_BRKP)]
9381	  UNSPEC_PTEST))
9382   (clobber (match_scratch:VNx16BI 0 "=Upa"))]
9383  "TARGET_SVE"
9384  "brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b"
9385)
9386
9387;; -------------------------------------------------------------------------
9388;; ---- [PRED] Scalarization
9389;; -------------------------------------------------------------------------
9390;; Includes:
9391;; - PFIRST
9392;; - PNEXT
9393;; -------------------------------------------------------------------------
9394
9395(define_insn "@aarch64_sve_<sve_pred_op><mode>"
9396  [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9397	(unspec:PRED_ALL
9398	  [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9399	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9400	   (match_operand:PRED_ALL 3 "register_operand" "0")]
9401	  SVE_PITER))
9402   (clobber (reg:CC_NZC CC_REGNUM))]
9403  "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
9404  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9405)
9406
9407;; Same, but also producing a flags result.
9408(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
9409  [(set (reg:CC_NZC CC_REGNUM)
9410	(unspec:CC_NZC
9411	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
9412	   (match_operand 2)
9413	   (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9414	   (unspec:PRED_ALL
9415	     [(match_operand 4)
9416	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9417	      (match_operand:PRED_ALL 6 "register_operand" "0")]
9418	     SVE_PITER)]
9419	  UNSPEC_PTEST))
9420   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9421	(unspec:PRED_ALL
9422	  [(match_dup 4)
9423	   (match_dup 5)
9424	   (match_dup 6)]
9425	  SVE_PITER))]
9426  "TARGET_SVE
9427   && <max_elem_bits> >= <elem_bits>
9428   && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9429  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9430  "&& !rtx_equal_p (operands[2], operands[4])"
9431  {
9432    operands[4] = operands[2];
9433    operands[5] = operands[3];
9434  }
9435)
9436
9437;; Same, but with only the flags result being interesting.
9438(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
9439  [(set (reg:CC_NZC CC_REGNUM)
9440	(unspec:CC_NZC
9441	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
9442	   (match_operand 2)
9443	   (match_operand:SI 3 "aarch64_sve_ptrue_flag")
9444	   (unspec:PRED_ALL
9445	     [(match_operand 4)
9446	      (match_operand:SI 5 "aarch64_sve_ptrue_flag")
9447	      (match_operand:PRED_ALL 6 "register_operand" "0")]
9448	     SVE_PITER)]
9449	  UNSPEC_PTEST))
9450   (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
9451  "TARGET_SVE
9452   && <max_elem_bits> >= <elem_bits>
9453   && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
9454  "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
9455  "&& !rtx_equal_p (operands[2], operands[4])"
9456  {
9457    operands[4] = operands[2];
9458    operands[5] = operands[3];
9459  }
9460)
9461
9462;; =========================================================================
9463;; == Counting elements
9464;; =========================================================================
9465
9466;; -------------------------------------------------------------------------
9467;; ---- [INT] Count elements in a pattern (scalar)
9468;; -------------------------------------------------------------------------
9469;; Includes:
9470;; - CNTB
9471;; - CNTD
9472;; - CNTH
9473;; - CNTW
9474;; -------------------------------------------------------------------------
9475
9476;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
9477;; operand 2 is the number of elements that fit in a 128-bit block, and
9478;; operand 3 is a multiplier in the range [1, 16].
9479;;
9480;; Note that this pattern isn't used for SV_ALL (but would work for that too).
9481(define_insn "aarch64_sve_cnt_pat"
9482  [(set (match_operand:DI 0 "register_operand" "=r")
9483	(zero_extend:DI
9484	  (unspec:SI [(match_operand:DI 1 "const_int_operand")
9485		      (match_operand:DI 2 "const_int_operand")
9486		      (match_operand:DI 3 "const_int_operand")]
9487		     UNSPEC_SVE_CNT_PAT)))]
9488  "TARGET_SVE"
9489  {
9490    return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
9491  }
9492)
9493
9494;; -------------------------------------------------------------------------
9495;; ---- [INT] Increment by the number of elements in a pattern (scalar)
9496;; -------------------------------------------------------------------------
9497;; Includes:
9498;; - INC
9499;; - SQINC
9500;; - UQINC
9501;; -------------------------------------------------------------------------
9502
9503;; Increment a DImode register by the number of elements in an svpattern.
9504;; See aarch64_sve_cnt_pat for the counting behavior.
9505(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9506  [(set (match_operand:DI 0 "register_operand" "=r")
9507	(ANY_PLUS:DI (zero_extend:DI
9508		       (unspec:SI [(match_operand:DI 2 "const_int_operand")
9509				   (match_operand:DI 3 "const_int_operand")
9510				   (match_operand:DI 4 "const_int_operand")]
9511				  UNSPEC_SVE_CNT_PAT))
9512		     (match_operand:DI_ONLY 1 "register_operand" "0")))]
9513  "TARGET_SVE"
9514  {
9515    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
9516						 operands + 2);
9517  }
9518)
9519
9520;; Increment an SImode register by the number of elements in an svpattern
9521;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
9522;; behavior.
9523(define_insn "*aarch64_sve_incsi_pat"
9524  [(set (match_operand:SI 0 "register_operand" "=r")
9525	(plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
9526			     (match_operand:DI 3 "const_int_operand")
9527			     (match_operand:DI 4 "const_int_operand")]
9528			    UNSPEC_SVE_CNT_PAT)
9529		 (match_operand:SI 1 "register_operand" "0")))]
9530  "TARGET_SVE"
9531  {
9532    return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
9533  }
9534)
9535
9536;; Increment an SImode register by the number of elements in an svpattern
9537;; using saturating arithmetic, extending the result to 64 bits.
9538;;
9539;; See aarch64_sve_cnt_pat for the counting behavior.
9540(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9541  [(set (match_operand:DI 0 "register_operand" "=r")
9542	(<paired_extend>:DI
9543	  (SAT_PLUS:SI
9544	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
9545			(match_operand:DI 3 "const_int_operand")
9546			(match_operand:DI 4 "const_int_operand")]
9547		       UNSPEC_SVE_CNT_PAT)
9548	    (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9549  "TARGET_SVE"
9550  {
9551    const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
9552    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
9553						 operands + 2);
9554  }
9555)
9556
9557;; -------------------------------------------------------------------------
9558;; ---- [INT] Increment by the number of elements in a pattern (vector)
9559;; -------------------------------------------------------------------------
9560;; Includes:
9561;; - INC
9562;; - SQINC
9563;; - UQINC
9564;; -------------------------------------------------------------------------
9565
9566;; Increment a vector of DIs by the number of elements in an svpattern.
9567;; See aarch64_sve_cnt_pat for the counting behavior.
9568(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9569  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9570	(ANY_PLUS:VNx2DI
9571	  (vec_duplicate:VNx2DI
9572	    (zero_extend:DI
9573	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
9574			  (match_operand:DI 3 "const_int_operand")
9575			  (match_operand:DI 4 "const_int_operand")]
9576			 UNSPEC_SVE_CNT_PAT)))
9577	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9578  "TARGET_SVE"
9579  {
9580    if (which_alternative == 1)
9581      output_asm_insn ("movprfx\t%0, %1", operands);
9582    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9583						 operands + 2);
9584  }
9585  [(set_attr "movprfx" "*,yes")]
9586)
9587
9588;; Increment a vector of SIs by the number of elements in an svpattern.
9589;; See aarch64_sve_cnt_pat for the counting behavior.
9590(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9591  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9592	(ANY_PLUS:VNx4SI
9593	  (vec_duplicate:VNx4SI
9594	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
9595			(match_operand:DI 3 "const_int_operand")
9596			(match_operand:DI 4 "const_int_operand")]
9597		       UNSPEC_SVE_CNT_PAT))
9598	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9599  "TARGET_SVE"
9600  {
9601    if (which_alternative == 1)
9602      output_asm_insn ("movprfx\t%0, %1", operands);
9603    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9604						 operands + 2);
9605  }
9606  [(set_attr "movprfx" "*,yes")]
9607)
9608
9609;; Increment a vector of HIs by the number of elements in an svpattern.
9610;; See aarch64_sve_cnt_pat for the counting behavior.
9611(define_expand "@aarch64_sve_<inc_dec><mode>_pat"
9612  [(set (match_operand:VNx8HI 0 "register_operand")
9613	(ANY_PLUS:VNx8HI
9614	  (vec_duplicate:VNx8HI
9615	    (truncate:HI
9616	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
9617			  (match_operand:DI 3 "const_int_operand")
9618			  (match_operand:DI 4 "const_int_operand")]
9619			 UNSPEC_SVE_CNT_PAT)))
9620	  (match_operand:VNx8HI_ONLY 1 "register_operand")))]
9621  "TARGET_SVE"
9622)
9623
9624(define_insn "*aarch64_sve_<inc_dec><mode>_pat"
9625  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9626	(ANY_PLUS:VNx8HI
9627	  (vec_duplicate:VNx8HI
9628	    (match_operator:HI 5 "subreg_lowpart_operator"
9629	      [(unspec:SI [(match_operand:DI 2 "const_int_operand")
9630			   (match_operand:DI 3 "const_int_operand")
9631			   (match_operand:DI 4 "const_int_operand")]
9632			  UNSPEC_SVE_CNT_PAT)]))
9633	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
9634  "TARGET_SVE"
9635  {
9636    if (which_alternative == 1)
9637      output_asm_insn ("movprfx\t%0, %1", operands);
9638    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9639						 operands + 2);
9640  }
9641  [(set_attr "movprfx" "*,yes")]
9642)
9643
9644;; -------------------------------------------------------------------------
9645;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
9646;; -------------------------------------------------------------------------
9647;; Includes:
9648;; - DEC
9649;; - SQDEC
9650;; - UQDEC
9651;; -------------------------------------------------------------------------
9652
9653;; Decrement a DImode register by the number of elements in an svpattern.
9654;; See aarch64_sve_cnt_pat for the counting behavior.
9655(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9656  [(set (match_operand:DI 0 "register_operand" "=r")
9657	(ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
9658		      (zero_extend:DI
9659			(unspec:SI [(match_operand:DI 2 "const_int_operand")
9660				    (match_operand:DI 3 "const_int_operand")
9661				    (match_operand:DI 4 "const_int_operand")]
9662				   UNSPEC_SVE_CNT_PAT))))]
9663  "TARGET_SVE"
9664  {
9665    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
9666						 operands + 2);
9667  }
9668)
9669
9670;; Decrement an SImode register by the number of elements in an svpattern
9671;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
9672;; behavior.
9673(define_insn "*aarch64_sve_decsi_pat"
9674  [(set (match_operand:SI 0 "register_operand" "=r")
9675	(minus:SI (match_operand:SI 1 "register_operand" "0")
9676		  (unspec:SI [(match_operand:DI 2 "const_int_operand")
9677			      (match_operand:DI 3 "const_int_operand")
9678			      (match_operand:DI 4 "const_int_operand")]
9679			     UNSPEC_SVE_CNT_PAT)))]
9680  "TARGET_SVE"
9681  {
9682    return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
9683  }
9684)
9685
9686;; Decrement an SImode register by the number of elements in an svpattern
9687;; using saturating arithmetic, extending the result to 64 bits.
9688;;
9689;; See aarch64_sve_cnt_pat for the counting behavior.
9690(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9691  [(set (match_operand:DI 0 "register_operand" "=r")
9692	(<paired_extend>:DI
9693	  (SAT_MINUS:SI
9694	    (match_operand:SI_ONLY 1 "register_operand" "0")
9695	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
9696			(match_operand:DI 3 "const_int_operand")
9697			(match_operand:DI 4 "const_int_operand")]
9698		       UNSPEC_SVE_CNT_PAT))))]
9699  "TARGET_SVE"
9700  {
9701    const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
9702    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
9703						 operands + 2);
9704  }
9705)
9706
9707;; -------------------------------------------------------------------------
9708;; ---- [INT] Decrement by the number of elements in a pattern (vector)
9709;; -------------------------------------------------------------------------
9710;; Includes:
9711;; - DEC
9712;; - SQDEC
9713;; - UQDEC
9714;; -------------------------------------------------------------------------
9715
9716;; Decrement a vector of DIs by the number of elements in an svpattern.
9717;; See aarch64_sve_cnt_pat for the counting behavior.
9718(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9719  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9720	(ANY_MINUS:VNx2DI
9721	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
9722	  (vec_duplicate:VNx2DI
9723	    (zero_extend:DI
9724	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
9725			  (match_operand:DI 3 "const_int_operand")
9726			  (match_operand:DI 4 "const_int_operand")]
9727			 UNSPEC_SVE_CNT_PAT)))))]
9728  "TARGET_SVE"
9729  {
9730    if (which_alternative == 1)
9731      output_asm_insn ("movprfx\t%0, %1", operands);
9732    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9733						 operands + 2);
9734  }
9735  [(set_attr "movprfx" "*,yes")]
9736)
9737
9738;; Decrement a vector of SIs by the number of elements in an svpattern.
9739;; See aarch64_sve_cnt_pat for the counting behavior.
9740(define_insn "@aarch64_sve_<inc_dec><mode>_pat"
9741  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9742	(ANY_MINUS:VNx4SI
9743	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
9744	  (vec_duplicate:VNx4SI
9745	    (unspec:SI [(match_operand:DI 2 "const_int_operand")
9746			(match_operand:DI 3 "const_int_operand")
9747			(match_operand:DI 4 "const_int_operand")]
9748		       UNSPEC_SVE_CNT_PAT))))]
9749  "TARGET_SVE"
9750  {
9751    if (which_alternative == 1)
9752      output_asm_insn ("movprfx\t%0, %1", operands);
9753    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9754						 operands + 2);
9755  }
9756  [(set_attr "movprfx" "*,yes")]
9757)
9758
9759;; Decrement a vector of HIs by the number of elements in an svpattern.
9760;; See aarch64_sve_cnt_pat for the counting behavior.
9761(define_expand "@aarch64_sve_<inc_dec><mode>_pat"
9762  [(set (match_operand:VNx8HI 0 "register_operand")
9763	(ANY_MINUS:VNx8HI
9764	  (match_operand:VNx8HI_ONLY 1 "register_operand")
9765	  (vec_duplicate:VNx8HI
9766	    (truncate:HI
9767	      (unspec:SI [(match_operand:DI 2 "const_int_operand")
9768			  (match_operand:DI 3 "const_int_operand")
9769			  (match_operand:DI 4 "const_int_operand")]
9770			 UNSPEC_SVE_CNT_PAT)))))]
9771  "TARGET_SVE"
9772)
9773
9774(define_insn "*aarch64_sve_<inc_dec><mode>_pat"
9775  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9776	(ANY_MINUS:VNx8HI
9777	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
9778	  (vec_duplicate:VNx8HI
9779	    (match_operator:HI 5 "subreg_lowpart_operator"
9780	      [(unspec:SI [(match_operand:DI 2 "const_int_operand")
9781			   (match_operand:DI 3 "const_int_operand")
9782			   (match_operand:DI 4 "const_int_operand")]
9783			  UNSPEC_SVE_CNT_PAT)]))))]
9784  "TARGET_SVE"
9785  {
9786    if (which_alternative == 1)
9787      output_asm_insn ("movprfx\t%0, %1", operands);
9788    return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
9789						 operands + 2);
9790  }
9791  [(set_attr "movprfx" "*,yes")]
9792)
9793
9794;; -------------------------------------------------------------------------
9795;; ---- [INT] Count elements in a predicate (scalar)
9796;; -------------------------------------------------------------------------
9797;; Includes:
9798;; - CNTP
9799;; -------------------------------------------------------------------------
9800
9801;; Count the number of set bits in a predicate.  Operand 3 is true if
9802;; operand 1 is known to be all-true.
9803(define_insn "@aarch64_pred_cntp<mode>"
9804  [(set (match_operand:DI 0 "register_operand" "=r")
9805	(zero_extend:DI
9806	  (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
9807		      (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9808		      (match_operand:PRED_ALL 3 "register_operand" "Upa")]
9809		     UNSPEC_CNTP)))]
9810  "TARGET_SVE"
9811  "cntp\t%x0, %1, %3.<Vetype>")
9812
9813;; -------------------------------------------------------------------------
9814;; ---- [INT] Increment by the number of elements in a predicate (scalar)
9815;; -------------------------------------------------------------------------
9816;; Includes:
9817;; - INCP
9818;; - SQINCP
9819;; - UQINCP
9820;; -------------------------------------------------------------------------
9821
9822;; Increment a DImode register by the number of set bits in a predicate.
9823;; See aarch64_sve_cntp for a description of the operands.
9824(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9825  [(set (match_operand:DI 0 "register_operand")
9826	(ANY_PLUS:DI
9827	  (zero_extend:DI
9828	    (unspec:SI [(match_dup 3)
9829			(const_int SVE_KNOWN_PTRUE)
9830			(match_operand:PRED_ALL 2 "register_operand")]
9831		       UNSPEC_CNTP))
9832	  (match_operand:DI_ONLY 1 "register_operand")))]
9833  "TARGET_SVE"
9834  {
9835    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9836  }
9837)
9838
9839(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9840  [(set (match_operand:DI 0 "register_operand" "=r")
9841	(ANY_PLUS:DI
9842	  (zero_extend:DI
9843	    (unspec:SI [(match_operand 3)
9844			(const_int SVE_KNOWN_PTRUE)
9845			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
9846		       UNSPEC_CNTP))
9847	  (match_operand:DI_ONLY 1 "register_operand" "0")))]
9848  "TARGET_SVE"
9849  "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
9850  "&& !CONSTANT_P (operands[3])"
9851  {
9852    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9853  }
9854)
9855
9856;; Increment an SImode register by the number of set bits in a predicate
9857;; using modular arithmetic.  See aarch64_sve_cntp for a description of
9858;; the operands.
9859(define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
9860  [(set (match_operand:SI 0 "register_operand" "=r")
9861	(plus:SI
9862	  (unspec:SI [(match_operand 3)
9863		      (const_int SVE_KNOWN_PTRUE)
9864		      (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9865		     UNSPEC_CNTP)
9866	  (match_operand:SI 1 "register_operand" "0")))]
9867  "TARGET_SVE"
9868  "incp\t%x0, %2.<Vetype>"
9869  "&& !CONSTANT_P (operands[3])"
9870  {
9871    operands[3] = CONSTM1_RTX (<MODE>mode);
9872  }
9873)
9874
9875;; Increment an SImode register by the number of set bits in a predicate
9876;; using saturating arithmetic, extending the result to 64 bits.
9877;;
9878;; See aarch64_sve_cntp for a description of the operands.
9879(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9880  [(set (match_operand:DI 0 "register_operand")
9881	(<paired_extend>:DI
9882	  (SAT_PLUS:SI
9883	    (unspec:SI [(match_dup 3)
9884			(const_int SVE_KNOWN_PTRUE)
9885			(match_operand:PRED_ALL 2 "register_operand")]
9886		       UNSPEC_CNTP)
9887	    (match_operand:SI_ONLY 1 "register_operand"))))]
9888  "TARGET_SVE"
9889  {
9890    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9891  }
9892)
9893
9894(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9895  [(set (match_operand:DI 0 "register_operand" "=r")
9896	(<paired_extend>:DI
9897	  (SAT_PLUS:SI
9898	    (unspec:SI [(match_operand 3)
9899			(const_int SVE_KNOWN_PTRUE)
9900			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
9901		       UNSPEC_CNTP)
9902	    (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9903  "TARGET_SVE"
9904  {
9905    if (<CODE> == SS_PLUS)
9906      return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9907    else
9908      return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9909  }
9910  "&& !CONSTANT_P (operands[3])"
9911  {
9912    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9913  }
9914)
9915
9916;; -------------------------------------------------------------------------
9917;; ---- [INT] Increment by the number of elements in a predicate (vector)
9918;; -------------------------------------------------------------------------
9919;; Includes:
9920;; - INCP
9921;; - SQINCP
9922;; - UQINCP
9923;; -------------------------------------------------------------------------
9924
9925;; Increment a vector of DIs by the number of set bits in a predicate.
9926;; See aarch64_sve_cntp for a description of the operands.
9927(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9928  [(set (match_operand:VNx2DI 0 "register_operand")
9929	(ANY_PLUS:VNx2DI
9930	  (vec_duplicate:VNx2DI
9931	    (zero_extend:DI
9932	      (unspec:SI
9933		[(match_dup 3)
9934		 (const_int SVE_KNOWN_PTRUE)
9935		 (match_operand:<VPRED> 2 "register_operand")]
9936		UNSPEC_CNTP)))
9937	  (match_operand:VNx2DI_ONLY 1 "register_operand")))]
9938  "TARGET_SVE"
9939  {
9940    operands[3] = CONSTM1_RTX (<VPRED>mode);
9941  }
9942)
9943
9944(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9945  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9946	(ANY_PLUS:VNx2DI
9947	  (vec_duplicate:VNx2DI
9948	    (zero_extend:DI
9949	      (unspec:SI
9950		[(match_operand 3)
9951		 (const_int SVE_KNOWN_PTRUE)
9952		 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9953		UNSPEC_CNTP)))
9954	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9955  "TARGET_SVE"
9956  "@
9957   <inc_dec>p\t%0.d, %2
9958   movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9959  "&& !CONSTANT_P (operands[3])"
9960  {
9961    operands[3] = CONSTM1_RTX (<VPRED>mode);
9962  }
9963  [(set_attr "movprfx" "*,yes")]
9964)
9965
9966;; Increment a vector of SIs by the number of set bits in a predicate.
9967;; See aarch64_sve_cntp for a description of the operands.
9968(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9969  [(set (match_operand:VNx4SI 0 "register_operand")
9970	(ANY_PLUS:VNx4SI
9971	  (vec_duplicate:VNx4SI
9972	    (unspec:SI
9973	      [(match_dup 3)
9974	       (const_int SVE_KNOWN_PTRUE)
9975	       (match_operand:<VPRED> 2 "register_operand")]
9976	      UNSPEC_CNTP))
9977	  (match_operand:VNx4SI_ONLY 1 "register_operand")))]
9978  "TARGET_SVE"
9979  {
9980    operands[3] = CONSTM1_RTX (<VPRED>mode);
9981  }
9982)
9983
9984(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9985  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9986	(ANY_PLUS:VNx4SI
9987	  (vec_duplicate:VNx4SI
9988	    (unspec:SI
9989	      [(match_operand 3)
9990	       (const_int SVE_KNOWN_PTRUE)
9991	       (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9992	      UNSPEC_CNTP))
9993	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9994  "TARGET_SVE"
9995  "@
9996   <inc_dec>p\t%0.s, %2
9997   movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9998  "&& !CONSTANT_P (operands[3])"
9999  {
10000    operands[3] = CONSTM1_RTX (<VPRED>mode);
10001  }
10002  [(set_attr "movprfx" "*,yes")]
10003)
10004
10005;; Increment a vector of HIs by the number of set bits in a predicate.
10006;; See aarch64_sve_cntp for a description of the operands.
10007(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10008  [(set (match_operand:VNx8HI 0 "register_operand")
10009	(ANY_PLUS:VNx8HI
10010	  (vec_duplicate:VNx8HI
10011	    (truncate:HI
10012	      (unspec:SI
10013		[(match_dup 3)
10014		 (const_int SVE_KNOWN_PTRUE)
10015		 (match_operand:<VPRED> 2 "register_operand")]
10016		UNSPEC_CNTP)))
10017	  (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10018  "TARGET_SVE"
10019  {
10020    operands[3] = CONSTM1_RTX (<VPRED>mode);
10021  }
10022)
10023
10024(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10025  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10026	(ANY_PLUS:VNx8HI
10027	  (vec_duplicate:VNx8HI
10028	    (match_operator:HI 3 "subreg_lowpart_operator"
10029	      [(unspec:SI
10030		 [(match_operand 4)
10031		  (const_int SVE_KNOWN_PTRUE)
10032		  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10033		 UNSPEC_CNTP)]))
10034	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10035  "TARGET_SVE"
10036  "@
10037   <inc_dec>p\t%0.h, %2
10038   movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10039  "&& !CONSTANT_P (operands[4])"
10040  {
10041    operands[4] = CONSTM1_RTX (<VPRED>mode);
10042  }
10043  [(set_attr "movprfx" "*,yes")]
10044)
10045
10046;; -------------------------------------------------------------------------
10047;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10048;; -------------------------------------------------------------------------
10049;; Includes:
10050;; - DECP
10051;; - SQDECP
10052;; - UQDECP
10053;; -------------------------------------------------------------------------
10054
10055;; Decrement a DImode register by the number of set bits in a predicate.
10056;; See aarch64_sve_cntp for a description of the operands.
10057(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10058  [(set (match_operand:DI 0 "register_operand")
10059	(ANY_MINUS:DI
10060	  (match_operand:DI_ONLY 1 "register_operand")
10061	  (zero_extend:DI
10062	    (unspec:SI [(match_dup 3)
10063			(const_int SVE_KNOWN_PTRUE)
10064			(match_operand:PRED_ALL 2 "register_operand")]
10065		       UNSPEC_CNTP))))]
10066  "TARGET_SVE"
10067  {
10068    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10069  }
10070)
10071
10072(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10073  [(set (match_operand:DI 0 "register_operand" "=r")
10074	(ANY_MINUS:DI
10075	  (match_operand:DI_ONLY 1 "register_operand" "0")
10076	  (zero_extend:DI
10077	    (unspec:SI [(match_operand 3)
10078			(const_int SVE_KNOWN_PTRUE)
10079			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
10080		       UNSPEC_CNTP))))]
10081  "TARGET_SVE"
10082  "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10083  "&& !CONSTANT_P (operands[3])"
10084  {
10085    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10086  }
10087)
10088
10089;; Decrement an SImode register by the number of set bits in a predicate
10090;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
10091;; operands.
10092(define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10093  [(set (match_operand:SI 0 "register_operand" "=r")
10094	(minus:SI
10095	  (match_operand:SI 1 "register_operand" "0")
10096	  (unspec:SI [(match_operand 3)
10097		      (const_int SVE_KNOWN_PTRUE)
10098		      (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10099		     UNSPEC_CNTP)))]
10100  "TARGET_SVE"
10101  "decp\t%x0, %2.<Vetype>"
10102  "&& !CONSTANT_P (operands[3])"
10103  {
10104    operands[3] = CONSTM1_RTX (<MODE>mode);
10105  }
10106)
10107
10108;; Decrement an SImode register by the number of set bits in a predicate
10109;; using saturating arithmetic, extending the result to 64 bits.
10110;;
10111;; See aarch64_sve_cntp for a description of the operands.
10112(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10113  [(set (match_operand:DI 0 "register_operand")
10114	(<paired_extend>:DI
10115	  (SAT_MINUS:SI
10116	    (match_operand:SI_ONLY 1 "register_operand")
10117	    (unspec:SI [(match_dup 3)
10118			(const_int SVE_KNOWN_PTRUE)
10119			(match_operand:PRED_ALL 2 "register_operand")]
10120		       UNSPEC_CNTP))))]
10121  "TARGET_SVE"
10122  {
10123    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10124  }
10125)
10126
10127(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10128  [(set (match_operand:DI 0 "register_operand" "=r")
10129	(<paired_extend>:DI
10130	  (SAT_MINUS:SI
10131	    (match_operand:SI_ONLY 1 "register_operand" "0")
10132	    (unspec:SI [(match_operand 3)
10133			(const_int SVE_KNOWN_PTRUE)
10134			(match_operand:PRED_ALL 2 "register_operand" "Upa")]
10135		       UNSPEC_CNTP))))]
10136  "TARGET_SVE"
10137  {
10138    if (<CODE> == SS_MINUS)
10139      return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10140    else
10141      return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10142  }
10143  "&& !CONSTANT_P (operands[3])"
10144  {
10145    operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10146  }
10147)
10148
10149;; -------------------------------------------------------------------------
10150;; ---- [INT] Decrement by the number of elements in a predicate (vector)
10151;; -------------------------------------------------------------------------
10152;; Includes:
10153;; - DECP
10154;; - SQDECP
10155;; - UQDECP
10156;; -------------------------------------------------------------------------
10157
10158;; Decrement a vector of DIs by the number of set bits in a predicate.
10159;; See aarch64_sve_cntp for a description of the operands.
10160(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10161  [(set (match_operand:VNx2DI 0 "register_operand")
10162	(ANY_MINUS:VNx2DI
10163	  (match_operand:VNx2DI_ONLY 1 "register_operand")
10164	  (vec_duplicate:VNx2DI
10165	    (zero_extend:DI
10166	      (unspec:SI
10167		[(match_dup 3)
10168		 (const_int SVE_KNOWN_PTRUE)
10169		 (match_operand:<VPRED> 2 "register_operand")]
10170		UNSPEC_CNTP)))))]
10171  "TARGET_SVE"
10172  {
10173    operands[3] = CONSTM1_RTX (<VPRED>mode);
10174  }
10175)
10176
10177(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10178  [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10179	(ANY_MINUS:VNx2DI
10180	  (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10181	  (vec_duplicate:VNx2DI
10182	    (zero_extend:DI
10183	      (unspec:SI
10184		[(match_operand 3)
10185		 (const_int SVE_KNOWN_PTRUE)
10186		 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10187		UNSPEC_CNTP)))))]
10188  "TARGET_SVE"
10189  "@
10190   <inc_dec>p\t%0.d, %2
10191   movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
10192  "&& !CONSTANT_P (operands[3])"
10193  {
10194    operands[3] = CONSTM1_RTX (<VPRED>mode);
10195  }
10196  [(set_attr "movprfx" "*,yes")]
10197)
10198
10199;; Decrement a vector of SIs by the number of set bits in a predicate.
10200;; See aarch64_sve_cntp for a description of the operands.
10201(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10202  [(set (match_operand:VNx4SI 0 "register_operand")
10203	(ANY_MINUS:VNx4SI
10204	  (match_operand:VNx4SI_ONLY 1 "register_operand")
10205	  (vec_duplicate:VNx4SI
10206	    (unspec:SI
10207	      [(match_dup 3)
10208	       (const_int SVE_KNOWN_PTRUE)
10209	       (match_operand:<VPRED> 2 "register_operand")]
10210	      UNSPEC_CNTP))))]
10211  "TARGET_SVE"
10212  {
10213    operands[3] = CONSTM1_RTX (<VPRED>mode);
10214  }
10215)
10216
10217(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10218  [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10219	(ANY_MINUS:VNx4SI
10220	  (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10221	  (vec_duplicate:VNx4SI
10222	    (unspec:SI
10223	      [(match_operand 3)
10224	       (const_int SVE_KNOWN_PTRUE)
10225	       (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10226	      UNSPEC_CNTP))))]
10227  "TARGET_SVE"
10228  "@
10229   <inc_dec>p\t%0.s, %2
10230   movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
10231  "&& !CONSTANT_P (operands[3])"
10232  {
10233    operands[3] = CONSTM1_RTX (<VPRED>mode);
10234  }
10235  [(set_attr "movprfx" "*,yes")]
10236)
10237
10238;; Decrement a vector of HIs by the number of set bits in a predicate.
10239;; See aarch64_sve_cntp for a description of the operands.
10240(define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10241  [(set (match_operand:VNx8HI 0 "register_operand")
10242	(ANY_MINUS:VNx8HI
10243	  (match_operand:VNx8HI_ONLY 1 "register_operand")
10244	  (vec_duplicate:VNx8HI
10245	    (truncate:HI
10246	      (unspec:SI
10247		[(match_dup 3)
10248		 (const_int SVE_KNOWN_PTRUE)
10249		 (match_operand:<VPRED> 2 "register_operand")]
10250		UNSPEC_CNTP)))))]
10251  "TARGET_SVE"
10252  {
10253    operands[3] = CONSTM1_RTX (<VPRED>mode);
10254  }
10255)
10256
10257(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10258  [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10259	(ANY_MINUS:VNx8HI
10260	  (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10261	  (vec_duplicate:VNx8HI
10262	    (match_operator:HI 3 "subreg_lowpart_operator"
10263	      [(unspec:SI
10264		 [(match_operand 4)
10265		  (const_int SVE_KNOWN_PTRUE)
10266		  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
10267		 UNSPEC_CNTP)]))))]
10268  "TARGET_SVE"
10269  "@
10270   <inc_dec>p\t%0.h, %2
10271   movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
10272  "&& !CONSTANT_P (operands[4])"
10273  {
10274    operands[4] = CONSTM1_RTX (<VPRED>mode);
10275  }
10276  [(set_attr "movprfx" "*,yes")]
10277)
10278