1;; Machine description for AArch64 SVE. 2;; Copyright (C) 2009-2020 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; The file is organised into the following sections (search for the full 22;; line): 23;; 24;; == General notes 25;; ---- Note on the handling of big-endian SVE 26;; ---- Description of UNSPEC_PTEST 27;; ---- Description of UNSPEC_PRED_Z 28;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X 29;; ---- Note on predicated FP arithmetic patterns and GP "strictness" 30;; ---- Note on FFR handling 31;; 32;; == Moves 33;; ---- Moves of single vectors 34;; ---- Moves of multiple vectors 35;; ---- Moves of predicates 36;; ---- Moves relating to the FFR 37;; 38;; == Loads 39;; ---- Normal contiguous loads 40;; ---- Extending contiguous loads 41;; ---- First-faulting contiguous loads 42;; ---- First-faulting extending contiguous loads 43;; ---- Non-temporal contiguous loads 44;; ---- Normal gather loads 45;; ---- Extending gather loads 46;; ---- First-faulting gather loads 47;; ---- First-faulting extending gather loads 48;; 49;; == Prefetches 50;; ---- Contiguous prefetches 51;; ---- Gather prefetches 52;; 53;; == Stores 54;; ---- Normal contiguous stores 55;; ---- Truncating contiguous stores 56;; ---- Non-temporal contiguous stores 57;; ---- Normal scatter stores 58;; ---- Truncating scatter stores 59;; 60;; == Vector creation 61;; ---- [INT,FP] Duplicate element 62;; ---- [INT,FP] Initialize from individual elements 63;; ---- [INT] Linear series 64;; ---- [PRED] Duplicate element 65;; 66;; == Vector decomposition 67;; ---- [INT,FP] Extract index 68;; ---- [INT,FP] Extract active element 69;; ---- [PRED] Extract index 70;; 71;; == Unary arithmetic 72;; ---- [INT] General unary arithmetic corresponding to rtx codes 73;; ---- [INT] General unary arithmetic corresponding to unspecs 74;; ---- [INT] Sign and zero extension 75;; ---- [INT] Truncation 76;; ---- [INT] Logical inverse 77;; ---- [FP<-INT] General unary arithmetic that maps to unspecs 78;; ---- [FP] General unary arithmetic corresponding to unspecs 79;; ---- [FP] Square root 80;; ---- [FP] Reciprocal square root 81;; ---- [PRED] Inverse 82 83;; == Binary arithmetic 84;; ---- [INT] General binary arithmetic corresponding to rtx codes 85;; ---- [INT] Addition 86;; ---- [INT] Subtraction 87;; ---- [INT] Take address 88;; ---- [INT] Absolute difference 89;; ---- [INT] Saturating addition and subtraction 90;; ---- [INT] Highpart multiplication 91;; ---- [INT] Division 92;; ---- [INT] Binary logical operations 93;; ---- [INT] Binary logical operations (inverted second input) 94;; ---- [INT] Shifts (rounding towards -Inf) 95;; ---- [INT] Shifts (rounding towards 0) 96;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs 97;; ---- [FP] General binary arithmetic corresponding to rtx codes 98;; ---- [FP] General binary arithmetic corresponding to unspecs 99;; ---- [FP] Addition 100;; ---- [FP] Complex addition 101;; ---- [FP] Subtraction 102;; ---- [FP] Absolute difference 103;; ---- [FP] Multiplication 104;; ---- [FP] Division 105;; ---- [FP] Binary logical operations 106;; ---- [FP] Sign copying 107;; ---- [FP] Maximum and minimum 108;; ---- [PRED] Binary logical operations 109;; ---- [PRED] Binary logical operations (inverted second input) 110;; ---- [PRED] Binary logical operations (inverted result) 111;; 112;; == Ternary arithmetic 113;; ---- [INT] MLA and MAD 114;; ---- [INT] MLS and MSB 115;; ---- [INT] Dot product 116;; ---- [INT] Sum of absolute differences 117;; ---- [INT] Matrix multiply-accumulate 118;; ---- [FP] General ternary arithmetic corresponding to unspecs 119;; ---- [FP] Complex multiply-add 120;; ---- [FP] Trigonometric multiply-add 121;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) 122;; ---- [FP] Matrix multiply-accumulate 123;; 124;; == Comparisons and selects 125;; ---- [INT,FP] Select based on predicates 126;; ---- [INT,FP] Compare and select 127;; ---- [INT] Comparisons 128;; ---- [INT] While tests 129;; ---- [FP] Direct comparisons 130;; ---- [FP] Absolute comparisons 131;; ---- [PRED] Select 132;; ---- [PRED] Test bits 133;; 134;; == Reductions 135;; ---- [INT,FP] Conditional reductions 136;; ---- [INT] Tree reductions 137;; ---- [FP] Tree reductions 138;; ---- [FP] Left-to-right reductions 139;; 140;; == Permutes 141;; ---- [INT,FP] General permutes 142;; ---- [INT,FP] Special-purpose unary permutes 143;; ---- [INT,FP] Special-purpose binary permutes 144;; ---- [PRED] Special-purpose unary permutes 145;; ---- [PRED] Special-purpose binary permutes 146;; 147;; == Conversions 148;; ---- [INT<-INT] Packs 149;; ---- [INT<-INT] Unpacks 150;; ---- [INT<-FP] Conversions 151;; ---- [INT<-FP] Packs 152;; ---- [INT<-FP] Unpacks 153;; ---- [FP<-INT] Conversions 154;; ---- [FP<-INT] Packs 155;; ---- [FP<-INT] Unpacks 156;; ---- [FP<-FP] Packs 157;; ---- [FP<-FP] Packs (bfloat16) 158;; ---- [FP<-FP] Unpacks 159;; ---- [PRED<-PRED] Packs 160;; ---- [PRED<-PRED] Unpacks 161;; 162;; == Vector partitioning 163;; ---- [PRED] Unary partitioning 164;; ---- [PRED] Binary partitioning 165;; ---- [PRED] Scalarization 166;; 167;; == Counting elements 168;; ---- [INT] Count elements in a pattern (scalar) 169;; ---- [INT] Increment by the number of elements in a pattern (scalar) 170;; ---- [INT] Increment by the number of elements in a pattern (vector) 171;; ---- [INT] Decrement by the number of elements in a pattern (scalar) 172;; ---- [INT] Decrement by the number of elements in a pattern (vector) 173;; ---- [INT] Count elements in a predicate (scalar) 174;; ---- [INT] Increment by the number of elements in a predicate (scalar) 175;; ---- [INT] Increment by the number of elements in a predicate (vector) 176;; ---- [INT] Decrement by the number of elements in a predicate (scalar) 177;; ---- [INT] Decrement by the number of elements in a predicate (vector) 178 179;; ========================================================================= 180;; == General notes 181;; ========================================================================= 182;; 183;; ------------------------------------------------------------------------- 184;; ---- Note on the handling of big-endian SVE 185;; ------------------------------------------------------------------------- 186;; 187;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the 188;; same way as movdi or movti would: the first byte of memory goes 189;; into the most significant byte of the register and the last byte 190;; of memory goes into the least significant byte of the register. 191;; This is the most natural ordering for Advanced SIMD and matches 192;; the ABI layout for 64-bit and 128-bit vector types. 193;; 194;; As a result, the order of bytes within the register is what GCC 195;; expects for a big-endian target, and subreg offsets therefore work 196;; as expected, with the first element in memory having subreg offset 0 197;; and the last element in memory having the subreg offset associated 198;; with a big-endian lowpart. However, this ordering also means that 199;; GCC's lane numbering does not match the architecture's numbering: 200;; GCC always treats the element at the lowest address in memory 201;; (subreg offset 0) as element 0, while the architecture treats 202;; the least significant end of the register as element 0. 203;; 204;; The situation for SVE is different. We want the layout of the 205;; SVE register to be same for mov<mode> as it is for maskload<mode>: 206;; logically, a mov<mode> load must be indistinguishable from a 207;; maskload<mode> whose mask is all true. We therefore need the 208;; register layout to match LD1 rather than LDR. The ABI layout of 209;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. 210;; 211;; As a result, the architecture lane numbering matches GCC's lane 212;; numbering, with element 0 always being the first in memory. 213;; However: 214;; 215;; - Applying a subreg offset to a register does not give the element 216;; that GCC expects: the first element in memory has the subreg offset 217;; associated with a big-endian lowpart while the last element in memory 218;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. 219;; 220;; - We cannot use LDR and STR for spill slots that might be accessed 221;; via subregs, since although the elements have the order GCC expects, 222;; the order of the bytes within the elements is different. We instead 223;; access spill slots via LD1 and ST1, using secondary reloads to 224;; reserve a predicate register. 225;; 226;; ------------------------------------------------------------------------- 227;; ---- Description of UNSPEC_PTEST 228;; ------------------------------------------------------------------------- 229;; 230;; SVE provides a PTEST instruction for testing the active lanes of a 231;; predicate and setting the flags based on the result. The associated 232;; condition code tests are: 233;; 234;; - any (= ne): at least one active bit is set 235;; - none (= eq): all active bits are clear (*) 236;; - first (= mi): the first active bit is set 237;; - nfrst (= pl): the first active bit is clear (*) 238;; - last (= cc): the last active bit is set 239;; - nlast (= cs): the last active bit is clear (*) 240;; 241;; where the conditions marked (*) are also true when there are no active 242;; lanes (i.e. when the governing predicate is a PFALSE). The flags results 243;; of a PTEST use the condition code mode CC_NZC. 244;; 245;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). 246;; This means that for other predicate modes, we need a governing predicate 247;; in which all bits are defined. 248;; 249;; For example, most predicated .H operations ignore the odd bits of the 250;; governing predicate, so that an active lane is represented by the 251;; bits "1x" and an inactive lane by the bits "0x", where "x" can be 252;; any value. To test a .H predicate, we instead need "10" and "00" 253;; respectively, so that the condition only tests the even bits of the 254;; predicate. 255;; 256;; Several instructions set the flags as a side-effect, in the same way 257;; that a separate PTEST would. It's important for code quality that we 258;; use these flags results as often as possible, particularly in the case 259;; of WHILE* and RDFFR. 260;; 261;; Also, some of the instructions that set the flags are unpredicated 262;; and instead implicitly test all .B, .H, .S or .D elements, as though 263;; they were predicated on a PTRUE of that size. For example, a .S 264;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE 265;; would. 266;; 267;; We therefore need to represent PTEST operations in a way that 268;; makes it easy to combine them with both predicated and unpredicated 269;; operations, while using a VNx16BI governing predicate for all 270;; predicate modes. We do this using: 271;; 272;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) 273;; 274;; where: 275;; 276;; - GP is the real VNx16BI governing predicate 277;; 278;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting 279;; GP to CAST_GP are guaranteed to be clear in GP. 280;; 281;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value 282;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and 283;; SVE_MAYBE_NOT_PTRUE otherwise. 284;; 285;; - OP is the predicate we want to test, of the same mode as CAST_GP. 286;; 287;; ------------------------------------------------------------------------- 288;; ---- Description of UNSPEC_PRED_Z 289;; ------------------------------------------------------------------------- 290;; 291;; SVE integer comparisons are predicated and return zero for inactive 292;; lanes. Sometimes we use them with predicates that are all-true and 293;; sometimes we use them with general predicates. 294;; 295;; The integer comparisons also set the flags and so build-in the effect 296;; of a PTEST. We therefore want to be able to combine integer comparison 297;; patterns with PTESTs of the result. One difficulty with doing this is 298;; that (as noted above) the PTEST is always a .B operation and so can place 299;; stronger requirements on the governing predicate than the comparison does. 300;; 301;; For example, when applying a separate PTEST to the result of a full-vector 302;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a 303;; .B PTRUE. In constrast, the comparison might be predicated on either 304;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate 305;; bits don't matter for .H operations. 306;; 307;; We therefore can't rely on a full-vector comparison using the same 308;; predicate register as a following PTEST. We instead need to remember 309;; whether a comparison is known to be a full-vector comparison and use 310;; this information in addition to a check for equal predicate registers. 311;; At the same time, it's useful to have a common representation for all 312;; integer comparisons, so that they can be handled by a single set of 313;; patterns. 314;; 315;; We therefore take a similar approach to UNSPEC_PTEST above and use: 316;; 317;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z) 318;; 319;; where: 320;; 321;; - GP is the governing predicate, of mode <M:VPRED> 322;; 323;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value 324;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE 325;; otherwise 326;; 327;; - CODE is the comparison code 328;; 329;; - OP0 and OP1 are the values being compared, of mode M 330;; 331;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero. 332;; 333;; ------------------------------------------------------------------------- 334;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X 335;; ------------------------------------------------------------------------- 336;; 337;; Many SVE integer operations are predicated. We can generate them 338;; from four sources: 339;; 340;; (1) Using normal unpredicated optabs. In this case we need to create 341;; an all-true predicate register to act as the governing predicate 342;; for the SVE instruction. There are no inactive lanes, and thus 343;; the values of inactive lanes don't matter. 344;; 345;; (2) Using _x ACLE functions. In this case the function provides a 346;; specific predicate and some lanes might be inactive. However, 347;; as for (1), the values of the inactive lanes don't matter. 348;; We can make extra lanes active without changing the behavior 349;; (although for code-quality reasons we should avoid doing so 350;; needlessly). 351;; 352;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. 353;; These optabs have a predicate operand that specifies which lanes are 354;; active and another operand that provides the values of inactive lanes. 355;; 356;; (4) Using _m and _z ACLE functions. These functions map to the same 357;; patterns as (3), with the _z functions setting inactive lanes to zero 358;; and the _m functions setting the inactive lanes to one of the function 359;; arguments. 360;; 361;; For (1) and (2) we need a way of attaching the predicate to a normal 362;; unpredicated integer operation. We do this using: 363;; 364;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X) 365;; 366;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED 367;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE; 368;; it always is for (1), but might not be for (2). 369;; 370;; The unspec as a whole has the same value as (code:M ...) when PRED is 371;; all-true. It is always semantically valid to replace PRED with a PTRUE, 372;; but as noted above, we should only do so if there's a specific benefit. 373;; 374;; (The "_X" in the unspec is named after the ACLE functions in (2).) 375;; 376;; For (3) and (4) we can simply use the SVE port's normal representation 377;; of a predicate-based select: 378;; 379;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL) 380;; 381;; where INACTIVE specifies the values of inactive lanes. 382;; 383;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather 384;; than inserting the integer operation directly. This is mostly useful 385;; if we want the combine pass to merge an integer operation with an explicit 386;; vcond_mask (in other words, with a following SEL instruction). However, 387;; it's generally better to merge such operations at the gimple level 388;; using (3). 389;; 390;; ------------------------------------------------------------------------- 391;; ---- Note on predicated FP arithmetic patterns and GP "strictness" 392;; ------------------------------------------------------------------------- 393;; 394;; Most SVE floating-point operations are predicated. We can generate 395;; them from four sources: 396;; 397;; (1) Using normal unpredicated optabs. In this case we need to create 398;; an all-true predicate register to act as the governing predicate 399;; for the SVE instruction. There are no inactive lanes, and thus 400;; the values of inactive lanes don't matter. 401;; 402;; (2) Using _x ACLE functions. In this case the function provides a 403;; specific predicate and some lanes might be inactive. However, 404;; as for (1), the values of the inactive lanes don't matter. 405;; 406;; The instruction must have the same exception behavior as the 407;; function call unless things like command-line flags specifically 408;; allow otherwise. For example, with -ffast-math, it is OK to 409;; raise exceptions for inactive lanes, but normally it isn't. 410;; 411;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. 412;; These optabs have a predicate operand that specifies which lanes are 413;; active and another operand that provides the values of inactive lanes. 414;; 415;; (4) Using _m and _z ACLE functions. These functions map to the same 416;; patterns as (3), with the _z functions setting inactive lanes to zero 417;; and the _m functions setting the inactive lanes to one of the function 418;; arguments. 419;; 420;; So: 421;; 422;; - In (1), the predicate is known to be all true and the pattern can use 423;; unpredicated operations where available. 424;; 425;; - In (2), the predicate might or might not be all true. The pattern can 426;; use unpredicated instructions if the predicate is all-true or if things 427;; like command-line flags allow exceptions for inactive lanes. 428;; 429;; - (3) and (4) represent a native SVE predicated operation. Some lanes 430;; might be inactive and inactive lanes of the result must have specific 431;; values. There is no scope for using unpredicated instructions (and no 432;; reason to want to), so the question about command-line flags doesn't 433;; arise. 434;; 435;; It would be inaccurate to model (2) as an rtx code like (sqrt ...) 436;; in combination with a separate predicate operand, e.g. 437;; 438;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl") 439;; (sqrt:SVE_FULL_F 2 "register_operand" "w")] 440;; ....) 441;; 442;; because (sqrt ...) can raise an exception for any lane, including 443;; inactive ones. We therefore need to use an unspec instead. 444;; 445;; Also, (2) requires some way of distinguishing the case in which the 446;; predicate might have inactive lanes and cannot be changed from the 447;; case in which the predicate has no inactive lanes or can be changed. 448;; This information is also useful when matching combined FP patterns 449;; in which the predicates might not be equal. 450;; 451;; We therefore model FP operations as an unspec of the form: 452;; 453;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>) 454;; 455;; where: 456;; 457;; - PRED is the governing predicate. 458;; 459;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the 460;; value SVE_STRICT_GP if PRED might have inactive lanes and if those 461;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise. 462;; 463;; - OP0 OP1 ... are the normal input operands to the operation. 464;; 465;; - MNEMONIC is the mnemonic of the associated SVE instruction. 466;; 467;; For (3) and (4), we combine these operations with an UNSPEC_SEL 468;; that selects between the result of the FP operation and the "else" 469;; value. (This else value is a merge input for _m ACLE functions 470;; and zero for _z ACLE functions.) The outer pattern then has the form: 471;; 472;; (unspec [pred fp_operation else_value] UNSPEC_SEL) 473;; 474;; This means that the patterns for (3) and (4) have two predicates: 475;; one for the FP operation itself and one for the UNSPEC_SEL. 476;; This pattern is equivalent to the result of combining an instance 477;; of (1) or (2) with a separate vcond instruction, so these patterns 478;; are useful as combine targets too. 479;; 480;; However, in the combine case, the instructions that we want to 481;; combine might use different predicates. Then: 482;; 483;; - Some of the active lanes of the FP operation might be discarded 484;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes, 485;; even for SVE_STRICT_GP, since the operations on those lanes are 486;; effectively dead code. 487;; 488;; - Some of the inactive lanes of the FP operation might be selected 489;; by the UNSPEC_SEL, giving unspecified values for those lanes. 490;; SVE_RELAXED_GP lets us extend the FP operation to cover these 491;; extra lanes, but SVE_STRICT_GP does not. 492;; 493;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation 494;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate. 495;; This typically leads to patterns like: 496;; 497;; (unspec [(match_operand 1 "register_operand" "Upl") 498;; (unspec [(match_operand N) 499;; (const_int SVE_RELAXED_GP) 500;; ...] 501;; UNSPEC_COND_<MNEMONIC>) 502;; ...]) 503;; 504;; where operand N is allowed to be anything. These instructions then 505;; have rewrite rules to replace operand N with operand 1, which gives the 506;; instructions a canonical form and means that the original operand N is 507;; not kept live unnecessarily. 508;; 509;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be 510;; a subset of the FP operation predicate. This case isn't interesting 511;; for FP operations that have an all-true predicate, since such operations 512;; use SVE_RELAXED_GP instead. And it is not possible for instruction 513;; conditions to track the subset relationship for arbitrary registers. 514;; So in practice, the only useful case for SVE_STRICT_GP is the one 515;; in which the predicates match: 516;; 517;; (unspec [(match_operand 1 "register_operand" "Upl") 518;; (unspec [(match_dup 1) 519;; (const_int SVE_STRICT_GP) 520;; ...] 521;; UNSPEC_COND_<MNEMONIC>) 522;; ...]) 523;; 524;; This pattern would also be correct for SVE_RELAXED_GP, but it would 525;; be redundant with the one above. However, if the combine pattern 526;; has multiple FP operations, using a match_operand allows combinations 527;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided 528;; that the predicates are the same: 529;; 530;; (unspec [(match_operand 1 "register_operand" "Upl") 531;; (... 532;; (unspec [(match_dup 1) 533;; (match_operand:SI N "aarch64_sve_gp_strictness") 534;; ...] 535;; UNSPEC_COND_<MNEMONIC1>) 536;; (unspec [(match_dup 1) 537;; (match_operand:SI M "aarch64_sve_gp_strictness") 538;; ...] 539;; UNSPEC_COND_<MNEMONIC2>) ...) 540;; ...]) 541;; 542;; The fully-relaxed version of this pattern is: 543;; 544;; (unspec [(match_operand 1 "register_operand" "Upl") 545;; (... 546;; (unspec [(match_operand:SI N) 547;; (const_int SVE_RELAXED_GP) 548;; ...] 549;; UNSPEC_COND_<MNEMONIC1>) 550;; (unspec [(match_operand:SI M) 551;; (const_int SVE_RELAXED_GP) 552;; ...] 553;; UNSPEC_COND_<MNEMONIC2>) ...) 554;; ...]) 555;; 556;; ------------------------------------------------------------------------- 557;; ---- Note on FFR handling 558;; ------------------------------------------------------------------------- 559;; 560;; Logically we want to divide FFR-related instructions into regions 561;; that contain exactly one of: 562;; 563;; - a single write to the FFR 564;; - any number of reads from the FFR (but only one read is likely) 565;; - any number of LDFF1 and LDNF1 instructions 566;; 567;; However, LDFF1 and LDNF1 instructions should otherwise behave like 568;; normal loads as far as possible. This means that they should be 569;; schedulable within a region in the same way that LD1 would be, 570;; and they should be deleted as dead if the result is unused. The loads 571;; should therefore not write to the FFR, since that would both serialize 572;; the loads with respect to each other and keep the loads live for any 573;; later RDFFR. 574;; 575;; We get around this by using a fake "FFR token" (FFRT) to help describe 576;; the dependencies. Writing to the FFRT starts a new "FFRT region", 577;; while using the FFRT keeps the instruction within its region. 578;; Specifically: 579;; 580;; - Writes start a new FFRT region as well as setting the FFR: 581;; 582;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>) 583;; 584;; - Loads use an LD1-like instruction that also uses the FFRT, so that the 585;; loads stay within the same FFRT region: 586;; 587;; L1: load data while using the FFRT 588;; 589;; In addition, any FFRT region that includes a load also has at least one 590;; instance of: 591;; 592;; L2: FFR = update(FFR, FFRT) [type == no_insn] 593;; 594;; to make it clear that the region both reads from and writes to the FFR. 595;; 596;; - Reads do the following: 597;; 598;; R1: FFRT = FFR [type == no_insn] 599;; R2: read from the FFRT 600;; R3: FFRT = update(FFRT) [type == no_insn] 601;; 602;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and 603;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s 604;; cannot move backwards across R3. 605;; 606;; This way, writes are only kept alive by later loads or reads, 607;; and write/read pairs fold normally. For two consecutive reads, 608;; the first R3 is made dead by the second R1, which in turn becomes 609;; redundant with the first R1. We then have: 610;; 611;; first R1: FFRT = FFR 612;; first read from the FFRT 613;; second read from the FFRT 614;; second R3: FFRT = update(FFRT) 615;; 616;; i.e. the two FFRT regions collapse into a single one with two 617;; independent reads. 618;; 619;; The model still prevents some valid optimizations though. For example, 620;; if all loads in an FFRT region are deleted as dead, nothing would remove 621;; the L2 instructions. 622 623;; ========================================================================= 624;; == Moves 625;; ========================================================================= 626 627;; ------------------------------------------------------------------------- 628;; ---- Moves of single vectors 629;; ------------------------------------------------------------------------- 630;; Includes: 631;; - MOV (including aliases) 632;; - LD1B (contiguous form) 633;; - LD1D ( " " ) 634;; - LD1H ( " " ) 635;; - LD1W ( " " ) 636;; - LDR 637;; - ST1B (contiguous form) 638;; - ST1D ( " " ) 639;; - ST1H ( " " ) 640;; - ST1W ( " " ) 641;; - STR 642;; ------------------------------------------------------------------------- 643 644(define_expand "mov<mode>" 645 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") 646 (match_operand:SVE_ALL 1 "general_operand"))] 647 "TARGET_SVE" 648 { 649 /* Use the predicated load and store patterns where possible. 650 This is required for big-endian targets (see the comment at the 651 head of the file) and increases the addressing choices for 652 little-endian. */ 653 if ((MEM_P (operands[0]) || MEM_P (operands[1])) 654 && can_create_pseudo_p ()) 655 { 656 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); 657 DONE; 658 } 659 660 if (CONSTANT_P (operands[1])) 661 { 662 aarch64_expand_mov_immediate (operands[0], operands[1]); 663 DONE; 664 } 665 666 /* Optimize subregs on big-endian targets: we can use REV[BHW] 667 instead of going through memory. */ 668 if (BYTES_BIG_ENDIAN 669 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) 670 DONE; 671 } 672) 673 674(define_expand "movmisalign<mode>" 675 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") 676 (match_operand:SVE_ALL 1 "general_operand"))] 677 "TARGET_SVE" 678 { 679 /* Equivalent to a normal move for our purpooses. */ 680 emit_move_insn (operands[0], operands[1]); 681 DONE; 682 } 683) 684 685;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which 686;; little-endian ordering is acceptable. Only allow memory operations during 687;; and after RA; before RA we want the predicated load and store patterns to 688;; be used instead. 689(define_insn "*aarch64_sve_mov<mode>_ldr_str" 690 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") 691 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] 692 "TARGET_SVE 693 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN) 694 && ((lra_in_progress || reload_completed) 695 || (register_operand (operands[0], <MODE>mode) 696 && nonmemory_operand (operands[1], <MODE>mode)))" 697 "@ 698 ldr\t%0, %1 699 str\t%1, %0 700 mov\t%0.d, %1.d 701 * return aarch64_output_sve_mov_immediate (operands[1]);" 702) 703 704;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors 705;; or vectors for which little-endian ordering isn't acceptable. Memory 706;; accesses require secondary reloads. 707(define_insn "*aarch64_sve_mov<mode>_no_ldr_str" 708 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") 709 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] 710 "TARGET_SVE 711 && <MODE>mode != VNx16QImode 712 && (BYTES_BIG_ENDIAN 713 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))" 714 "@ 715 mov\t%0.d, %1.d 716 * return aarch64_output_sve_mov_immediate (operands[1]);" 717) 718 719;; Handle memory reloads for modes that can't use LDR and STR. We use 720;; byte PTRUE for all modes to try to encourage reuse. This pattern 721;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD. 722(define_expand "aarch64_sve_reload_mem" 723 [(parallel 724 [(set (match_operand 0) 725 (match_operand 1)) 726 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] 727 "TARGET_SVE" 728 { 729 /* Create a PTRUE. */ 730 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); 731 732 /* Refer to the PTRUE in the appropriate mode for this move. */ 733 machine_mode mode = GET_MODE (operands[0]); 734 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]); 735 736 /* Emit a predicated load or store. */ 737 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); 738 DONE; 739 } 740) 741 742;; A predicated move in which the predicate is known to be all-true. 743;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move, 744;; so changes to this pattern will need changes there as well. 745(define_insn_and_split "@aarch64_pred_mov<mode>" 746 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") 747 (unspec:SVE_ALL 748 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 749 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] 750 UNSPEC_PRED_X))] 751 "TARGET_SVE 752 && (register_operand (operands[0], <MODE>mode) 753 || register_operand (operands[2], <MODE>mode))" 754 "@ 755 # 756 ld1<Vesize>\t%0.<Vctype>, %1/z, %2 757 st1<Vesize>\t%2.<Vctype>, %1, %0" 758 "&& register_operand (operands[0], <MODE>mode) 759 && register_operand (operands[2], <MODE>mode)" 760 [(set (match_dup 0) (match_dup 2))] 761) 762 763;; A pattern for optimizing SUBREGs that have a reinterpreting effect 764;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move 765;; for details. We use a special predicate for operand 2 to reduce 766;; the number of patterns. 767(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" 768 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") 769 (unspec:SVE_ALL 770 [(match_operand:VNx16BI 1 "register_operand" "Upl") 771 (match_operand 2 "aarch64_any_register_operand" "w")] 772 UNSPEC_REV_SUBREG))] 773 "TARGET_SVE && BYTES_BIG_ENDIAN" 774 "#" 775 "&& reload_completed" 776 [(const_int 0)] 777 { 778 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); 779 DONE; 780 } 781) 782 783;; Reinterpret operand 1 in operand 0's mode, without changing its contents. 784;; This is equivalent to a subreg on little-endian targets but not for 785;; big-endian; see the comment at the head of the file for details. 786(define_expand "@aarch64_sve_reinterpret<mode>" 787 [(set (match_operand:SVE_ALL 0 "register_operand") 788 (unspec:SVE_ALL 789 [(match_operand 1 "aarch64_any_register_operand")] 790 UNSPEC_REINTERPRET))] 791 "TARGET_SVE" 792 { 793 machine_mode src_mode = GET_MODE (operands[1]); 794 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS)) 795 { 796 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1])); 797 DONE; 798 } 799 } 800) 801 802;; A pattern for handling type punning on big-endian targets. We use a 803;; special predicate for operand 1 to reduce the number of patterns. 804(define_insn_and_split "*aarch64_sve_reinterpret<mode>" 805 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 806 (unspec:SVE_ALL 807 [(match_operand 1 "aarch64_any_register_operand" "w")] 808 UNSPEC_REINTERPRET))] 809 "TARGET_SVE" 810 "#" 811 "&& reload_completed" 812 [(set (match_dup 0) (match_dup 1))] 813 { 814 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode); 815 } 816) 817 818;; ------------------------------------------------------------------------- 819;; ---- Moves of multiple vectors 820;; ------------------------------------------------------------------------- 821;; All patterns in this section are synthetic and split to real 822;; instructions after reload. 823;; ------------------------------------------------------------------------- 824 825(define_expand "mov<mode>" 826 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") 827 (match_operand:SVE_STRUCT 1 "general_operand"))] 828 "TARGET_SVE" 829 { 830 /* Big-endian loads and stores need to be done via LD1 and ST1; 831 see the comment at the head of the file for details. */ 832 if ((MEM_P (operands[0]) || MEM_P (operands[1])) 833 && BYTES_BIG_ENDIAN) 834 { 835 gcc_assert (can_create_pseudo_p ()); 836 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); 837 DONE; 838 } 839 840 if (CONSTANT_P (operands[1])) 841 { 842 aarch64_expand_mov_immediate (operands[0], operands[1]); 843 DONE; 844 } 845 } 846) 847 848;; Unpredicated structure moves (little-endian). 849(define_insn "*aarch64_sve_mov<mode>_le" 850 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") 851 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] 852 "TARGET_SVE && !BYTES_BIG_ENDIAN" 853 "#" 854 [(set_attr "length" "<insn_length>")] 855) 856 857;; Unpredicated structure moves (big-endian). Memory accesses require 858;; secondary reloads. 859(define_insn "*aarch64_sve_mov<mode>_be" 860 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") 861 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] 862 "TARGET_SVE && BYTES_BIG_ENDIAN" 863 "#" 864 [(set_attr "length" "<insn_length>")] 865) 866 867;; Split unpredicated structure moves into pieces. This is the same 868;; for both big-endian and little-endian code, although it only needs 869;; to handle memory operands for little-endian code. 870(define_split 871 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") 872 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] 873 "TARGET_SVE && reload_completed" 874 [(const_int 0)] 875 { 876 rtx dest = operands[0]; 877 rtx src = operands[1]; 878 if (REG_P (dest) && REG_P (src)) 879 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); 880 else 881 for (unsigned int i = 0; i < <vector_count>; ++i) 882 { 883 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, 884 i * BYTES_PER_SVE_VECTOR); 885 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, 886 i * BYTES_PER_SVE_VECTOR); 887 emit_insn (gen_rtx_SET (subdest, subsrc)); 888 } 889 DONE; 890 } 891) 892 893;; Predicated structure moves. This works for both endiannesses but in 894;; practice is only useful for big-endian. 895(define_insn_and_split "@aarch64_pred_mov<mode>" 896 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx") 897 (unspec:SVE_STRUCT 898 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 899 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] 900 UNSPEC_PRED_X))] 901 "TARGET_SVE 902 && (register_operand (operands[0], <MODE>mode) 903 || register_operand (operands[2], <MODE>mode))" 904 "#" 905 "&& reload_completed" 906 [(const_int 0)] 907 { 908 for (unsigned int i = 0; i < <vector_count>; ++i) 909 { 910 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], 911 <MODE>mode, 912 i * BYTES_PER_SVE_VECTOR); 913 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], 914 <MODE>mode, 915 i * BYTES_PER_SVE_VECTOR); 916 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); 917 } 918 DONE; 919 } 920 [(set_attr "length" "<insn_length>")] 921) 922 923;; ------------------------------------------------------------------------- 924;; ---- Moves of predicates 925;; ------------------------------------------------------------------------- 926;; Includes: 927;; - MOV 928;; - LDR 929;; - PFALSE 930;; - PTRUE 931;; - PTRUES 932;; - STR 933;; ------------------------------------------------------------------------- 934 935(define_expand "mov<mode>" 936 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") 937 (match_operand:PRED_ALL 1 "general_operand"))] 938 "TARGET_SVE" 939 { 940 if (GET_CODE (operands[0]) == MEM) 941 operands[1] = force_reg (<MODE>mode, operands[1]); 942 943 if (CONSTANT_P (operands[1])) 944 { 945 aarch64_expand_mov_immediate (operands[0], operands[1]); 946 DONE; 947 } 948 } 949) 950 951(define_insn "*aarch64_sve_mov<mode>" 952 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") 953 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] 954 "TARGET_SVE 955 && (register_operand (operands[0], <MODE>mode) 956 || register_operand (operands[1], <MODE>mode))" 957 "@ 958 mov\t%0.b, %1.b 959 str\t%1, %0 960 ldr\t%0, %1 961 * return aarch64_output_sve_mov_immediate (operands[1]);" 962) 963 964;; Match PTRUES Pn.B when both the predicate and flags are useful. 965(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc" 966 [(set (reg:CC_NZC CC_REGNUM) 967 (unspec:CC_NZC 968 [(match_operand 2) 969 (match_operand 3) 970 (const_int SVE_KNOWN_PTRUE) 971 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 972 [(unspec:VNx16BI 973 [(match_operand:SI 4 "const_int_operand") 974 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] 975 UNSPEC_PTRUE)])] 976 UNSPEC_PTEST)) 977 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 978 (match_dup 1))] 979 "TARGET_SVE" 980 { 981 return aarch64_output_sve_ptrues (operands[1]); 982 } 983 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 984 { 985 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); 986 } 987) 988 989;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful. 990(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc" 991 [(set (reg:CC_NZC CC_REGNUM) 992 (unspec:CC_NZC 993 [(match_operand 2) 994 (match_operand 3) 995 (const_int SVE_KNOWN_PTRUE) 996 (subreg:PRED_HSD 997 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 998 [(unspec:VNx16BI 999 [(match_operand:SI 4 "const_int_operand") 1000 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] 1001 UNSPEC_PTRUE)]) 0)] 1002 UNSPEC_PTEST)) 1003 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1004 (match_dup 1))] 1005 "TARGET_SVE" 1006 { 1007 return aarch64_output_sve_ptrues (operands[1]); 1008 } 1009 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 1010 { 1011 operands[2] = CONSTM1_RTX (VNx16BImode); 1012 operands[3] = CONSTM1_RTX (<MODE>mode); 1013 } 1014) 1015 1016;; Match PTRUES Pn.B when only the flags result is useful (which is 1017;; a way of testing VL). 1018(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest" 1019 [(set (reg:CC_NZC CC_REGNUM) 1020 (unspec:CC_NZC 1021 [(match_operand 2) 1022 (match_operand 3) 1023 (const_int SVE_KNOWN_PTRUE) 1024 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 1025 [(unspec:VNx16BI 1026 [(match_operand:SI 4 "const_int_operand") 1027 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] 1028 UNSPEC_PTRUE)])] 1029 UNSPEC_PTEST)) 1030 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1031 "TARGET_SVE" 1032 { 1033 return aarch64_output_sve_ptrues (operands[1]); 1034 } 1035 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 1036 { 1037 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); 1038 } 1039) 1040 1041;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is 1042;; a way of testing VL). 1043(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest" 1044 [(set (reg:CC_NZC CC_REGNUM) 1045 (unspec:CC_NZC 1046 [(match_operand 2) 1047 (match_operand 3) 1048 (const_int SVE_KNOWN_PTRUE) 1049 (subreg:PRED_HSD 1050 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 1051 [(unspec:VNx16BI 1052 [(match_operand:SI 4 "const_int_operand") 1053 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] 1054 UNSPEC_PTRUE)]) 0)] 1055 UNSPEC_PTEST)) 1056 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1057 "TARGET_SVE" 1058 { 1059 return aarch64_output_sve_ptrues (operands[1]); 1060 } 1061 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 1062 { 1063 operands[2] = CONSTM1_RTX (VNx16BImode); 1064 operands[3] = CONSTM1_RTX (<MODE>mode); 1065 } 1066) 1067 1068;; ------------------------------------------------------------------------- 1069;; ---- Moves relating to the FFR 1070;; ------------------------------------------------------------------------- 1071;; RDFFR 1072;; RDFFRS 1073;; SETFFR 1074;; WRFFR 1075;; ------------------------------------------------------------------------- 1076 1077;; [W1 in the block comment above about FFR handling] 1078;; 1079;; Write to the FFR and start a new FFRT scheduling region. 1080(define_insn "aarch64_wrffr" 1081 [(set (reg:VNx16BI FFR_REGNUM) 1082 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa")) 1083 (set (reg:VNx16BI FFRT_REGNUM) 1084 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))] 1085 "TARGET_SVE" 1086 "@ 1087 setffr 1088 wrffr\t%0.b" 1089) 1090 1091;; [L2 in the block comment above about FFR handling] 1092;; 1093;; Introduce a read from and write to the FFR in the current FFRT region, 1094;; so that the FFR value is live on entry to the region and so that the FFR 1095;; value visibly changes within the region. This is used (possibly multiple 1096;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions. 1097(define_insn "aarch64_update_ffr_for_load" 1098 [(set (reg:VNx16BI FFR_REGNUM) 1099 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM) 1100 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))] 1101 "TARGET_SVE" 1102 "" 1103 [(set_attr "type" "no_insn")] 1104) 1105 1106;; [R1 in the block comment above about FFR handling] 1107;; 1108;; Notionally copy the FFR to the FFRT, so that the current FFR value 1109;; can be read from there by the RDFFR instructions below. This acts 1110;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and 1111;; creates a natural dependency with earlier writes. 1112(define_insn "aarch64_copy_ffr_to_ffrt" 1113 [(set (reg:VNx16BI FFRT_REGNUM) 1114 (reg:VNx16BI FFR_REGNUM))] 1115 "TARGET_SVE" 1116 "" 1117 [(set_attr "type" "no_insn")] 1118) 1119 1120;; [R2 in the block comment above about FFR handling] 1121;; 1122;; Read the FFR via the FFRT. 1123(define_insn "aarch64_rdffr" 1124 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1125 (reg:VNx16BI FFRT_REGNUM))] 1126 "TARGET_SVE" 1127 "rdffr\t%0.b" 1128) 1129 1130;; Likewise with zero predication. 1131(define_insn "aarch64_rdffr_z" 1132 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1133 (and:VNx16BI 1134 (reg:VNx16BI FFRT_REGNUM) 1135 (match_operand:VNx16BI 1 "register_operand" "Upa")))] 1136 "TARGET_SVE" 1137 "rdffr\t%0.b, %1/z" 1138) 1139 1140;; Read the FFR to test for a fault, without using the predicate result. 1141(define_insn "*aarch64_rdffr_z_ptest" 1142 [(set (reg:CC_NZC CC_REGNUM) 1143 (unspec:CC_NZC 1144 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1145 (match_dup 1) 1146 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 1147 (and:VNx16BI 1148 (reg:VNx16BI FFRT_REGNUM) 1149 (match_dup 1))] 1150 UNSPEC_PTEST)) 1151 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1152 "TARGET_SVE" 1153 "rdffrs\t%0.b, %1/z" 1154) 1155 1156;; Same for unpredicated RDFFR when tested with a known PTRUE. 1157(define_insn "*aarch64_rdffr_ptest" 1158 [(set (reg:CC_NZC CC_REGNUM) 1159 (unspec:CC_NZC 1160 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1161 (match_dup 1) 1162 (const_int SVE_KNOWN_PTRUE) 1163 (reg:VNx16BI FFRT_REGNUM)] 1164 UNSPEC_PTEST)) 1165 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1166 "TARGET_SVE" 1167 "rdffrs\t%0.b, %1/z" 1168) 1169 1170;; Read the FFR with zero predication and test the result. 1171(define_insn "*aarch64_rdffr_z_cc" 1172 [(set (reg:CC_NZC CC_REGNUM) 1173 (unspec:CC_NZC 1174 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1175 (match_dup 1) 1176 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 1177 (and:VNx16BI 1178 (reg:VNx16BI FFRT_REGNUM) 1179 (match_dup 1))] 1180 UNSPEC_PTEST)) 1181 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1182 (and:VNx16BI 1183 (reg:VNx16BI FFRT_REGNUM) 1184 (match_dup 1)))] 1185 "TARGET_SVE" 1186 "rdffrs\t%0.b, %1/z" 1187) 1188 1189;; Same for unpredicated RDFFR when tested with a known PTRUE. 1190(define_insn "*aarch64_rdffr_cc" 1191 [(set (reg:CC_NZC CC_REGNUM) 1192 (unspec:CC_NZC 1193 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1194 (match_dup 1) 1195 (const_int SVE_KNOWN_PTRUE) 1196 (reg:VNx16BI FFRT_REGNUM)] 1197 UNSPEC_PTEST)) 1198 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1199 (reg:VNx16BI FFRT_REGNUM))] 1200 "TARGET_SVE" 1201 "rdffrs\t%0.b, %1/z" 1202) 1203 1204;; [R3 in the block comment above about FFR handling] 1205;; 1206;; Arbitrarily update the FFRT after a read from the FFR. This acts as 1207;; a scheduling barrier for later LDFF1 and LDNF1 instructions. 1208(define_insn "aarch64_update_ffrt" 1209 [(set (reg:VNx16BI FFRT_REGNUM) 1210 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))] 1211 "TARGET_SVE" 1212 "" 1213 [(set_attr "type" "no_insn")] 1214) 1215 1216;; ========================================================================= 1217;; == Loads 1218;; ========================================================================= 1219 1220;; ------------------------------------------------------------------------- 1221;; ---- Normal contiguous loads 1222;; ------------------------------------------------------------------------- 1223;; Includes contiguous forms of: 1224;; - LD1B 1225;; - LD1D 1226;; - LD1H 1227;; - LD1W 1228;; - LD2B 1229;; - LD2D 1230;; - LD2H 1231;; - LD2W 1232;; - LD3B 1233;; - LD3D 1234;; - LD3H 1235;; - LD3W 1236;; - LD4B 1237;; - LD4D 1238;; - LD4H 1239;; - LD4W 1240;; ------------------------------------------------------------------------- 1241 1242;; Predicated LD1. 1243(define_insn "maskload<mode><vpred>" 1244 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 1245 (unspec:SVE_ALL 1246 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1247 (match_operand:SVE_ALL 1 "memory_operand" "m")] 1248 UNSPEC_LD1_SVE))] 1249 "TARGET_SVE" 1250 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1" 1251) 1252 1253;; Unpredicated LD[234]. 1254(define_expand "vec_load_lanes<mode><vsingle>" 1255 [(set (match_operand:SVE_STRUCT 0 "register_operand") 1256 (unspec:SVE_STRUCT 1257 [(match_dup 2) 1258 (match_operand:SVE_STRUCT 1 "memory_operand")] 1259 UNSPEC_LDN))] 1260 "TARGET_SVE" 1261 { 1262 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 1263 } 1264) 1265 1266;; Predicated LD[234]. 1267(define_insn "vec_mask_load_lanes<mode><vsingle>" 1268 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") 1269 (unspec:SVE_STRUCT 1270 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1271 (match_operand:SVE_STRUCT 1 "memory_operand" "m")] 1272 UNSPEC_LDN))] 1273 "TARGET_SVE" 1274 "ld<vector_count><Vesize>\t%0, %2/z, %1" 1275) 1276 1277;; ------------------------------------------------------------------------- 1278;; ---- Extending contiguous loads 1279;; ------------------------------------------------------------------------- 1280;; Includes contiguous forms of: 1281;; LD1B 1282;; LD1H 1283;; LD1SB 1284;; LD1SH 1285;; LD1SW 1286;; LD1W 1287;; ------------------------------------------------------------------------- 1288 1289;; Predicated load and extend, with 8 elements per 128-bit block. 1290(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" 1291 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") 1292 (unspec:SVE_HSDI 1293 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") 1294 (ANY_EXTEND:SVE_HSDI 1295 (unspec:SVE_PARTIAL_I 1296 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") 1297 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")] 1298 UNSPEC_LD1_SVE))] 1299 UNSPEC_PRED_X))] 1300 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 1301 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" 1302 "&& !CONSTANT_P (operands[3])" 1303 { 1304 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); 1305 } 1306) 1307 1308;; ------------------------------------------------------------------------- 1309;; ---- First-faulting contiguous loads 1310;; ------------------------------------------------------------------------- 1311;; Includes contiguous forms of: 1312;; - LDFF1B 1313;; - LDFF1D 1314;; - LDFF1H 1315;; - LDFF1W 1316;; - LDNF1B 1317;; - LDNF1D 1318;; - LDNF1H 1319;; - LDNF1W 1320;; ------------------------------------------------------------------------- 1321 1322;; Contiguous non-extending first-faulting or non-faulting loads. 1323(define_insn "@aarch64_ld<fn>f1<mode>" 1324 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 1325 (unspec:SVE_FULL 1326 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1327 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") 1328 (reg:VNx16BI FFRT_REGNUM)] 1329 SVE_LDFF1_LDNF1))] 1330 "TARGET_SVE" 1331 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1" 1332) 1333 1334;; ------------------------------------------------------------------------- 1335;; ---- First-faulting extending contiguous loads 1336;; ------------------------------------------------------------------------- 1337;; Includes contiguous forms of: 1338;; - LDFF1B 1339;; - LDFF1H 1340;; - LDFF1SB 1341;; - LDFF1SH 1342;; - LDFF1SW 1343;; - LDFF1W 1344;; - LDNF1B 1345;; - LDNF1H 1346;; - LDNF1SB 1347;; - LDNF1SH 1348;; - LDNF1SW 1349;; - LDNF1W 1350;; ------------------------------------------------------------------------- 1351 1352;; Predicated first-faulting or non-faulting load and extend. 1353(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" 1354 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") 1355 (unspec:SVE_HSDI 1356 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") 1357 (ANY_EXTEND:SVE_HSDI 1358 (unspec:SVE_PARTIAL_I 1359 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") 1360 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") 1361 (reg:VNx16BI FFRT_REGNUM)] 1362 SVE_LDFF1_LDNF1))] 1363 UNSPEC_PRED_X))] 1364 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 1365 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" 1366 "&& !CONSTANT_P (operands[3])" 1367 { 1368 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); 1369 } 1370) 1371 1372;; ------------------------------------------------------------------------- 1373;; ---- Non-temporal contiguous loads 1374;; ------------------------------------------------------------------------- 1375;; Includes: 1376;; - LDNT1B 1377;; - LDNT1D 1378;; - LDNT1H 1379;; - LDNT1W 1380;; ------------------------------------------------------------------------- 1381 1382;; Predicated contiguous non-temporal load. 1383(define_insn "@aarch64_ldnt1<mode>" 1384 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 1385 (unspec:SVE_FULL 1386 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1387 (match_operand:SVE_FULL 1 "memory_operand" "m")] 1388 UNSPEC_LDNT1_SVE))] 1389 "TARGET_SVE" 1390 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1" 1391) 1392 1393;; ------------------------------------------------------------------------- 1394;; ---- Normal gather loads 1395;; ------------------------------------------------------------------------- 1396;; Includes gather forms of: 1397;; - LD1D 1398;; - LD1W 1399;; ------------------------------------------------------------------------- 1400 1401;; Unpredicated gather loads. 1402(define_expand "gather_load<mode><v_int_container>" 1403 [(set (match_operand:SVE_24 0 "register_operand") 1404 (unspec:SVE_24 1405 [(match_dup 5) 1406 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>") 1407 (match_operand:<V_INT_CONTAINER> 2 "register_operand") 1408 (match_operand:DI 3 "const_int_operand") 1409 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") 1410 (mem:BLK (scratch))] 1411 UNSPEC_LD1_GATHER))] 1412 "TARGET_SVE" 1413 { 1414 operands[5] = aarch64_ptrue_reg (<VPRED>mode); 1415 } 1416) 1417 1418;; Predicated gather loads for 32-bit elements. Operand 3 is true for 1419;; unsigned extension and false for signed extension. 1420(define_insn "mask_gather_load<mode><v_int_container>" 1421 [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w") 1422 (unspec:SVE_4 1423 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1424 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") 1425 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") 1426 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 1427 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1428 (mem:BLK (scratch))] 1429 UNSPEC_LD1_GATHER))] 1430 "TARGET_SVE" 1431 "@ 1432 ld1<Vesize>\t%0.s, %5/z, [%2.s] 1433 ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] 1434 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] 1435 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] 1436 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1437 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1438) 1439 1440;; Predicated gather loads for 64-bit elements. The value of operand 3 1441;; doesn't matter in this case. 1442(define_insn "mask_gather_load<mode><v_int_container>" 1443 [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w") 1444 (unspec:SVE_2 1445 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1446 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") 1447 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") 1448 (match_operand:DI 3 "const_int_operand") 1449 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") 1450 (mem:BLK (scratch))] 1451 UNSPEC_LD1_GATHER))] 1452 "TARGET_SVE" 1453 "@ 1454 ld1<Vesize>\t%0.d, %5/z, [%2.d] 1455 ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] 1456 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] 1457 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1458) 1459 1460;; Likewise, but with the offset being extended from 32 bits. 1461(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked" 1462 [(set (match_operand:SVE_2 0 "register_operand" "=w, w") 1463 (unspec:SVE_2 1464 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1465 (match_operand:DI 1 "register_operand" "rk, rk") 1466 (unspec:VNx2DI 1467 [(match_operand 6) 1468 (ANY_EXTEND:VNx2DI 1469 (match_operand:VNx2SI 2 "register_operand" "w, w"))] 1470 UNSPEC_PRED_X) 1471 (match_operand:DI 3 "const_int_operand") 1472 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 1473 (mem:BLK (scratch))] 1474 UNSPEC_LD1_GATHER))] 1475 "TARGET_SVE" 1476 "@ 1477 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw] 1478 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]" 1479 "&& !CONSTANT_P (operands[6])" 1480 { 1481 operands[6] = CONSTM1_RTX (VNx2BImode); 1482 } 1483) 1484 1485;; Likewise, but with the offset being truncated to 32 bits and then 1486;; sign-extended. 1487(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw" 1488 [(set (match_operand:SVE_2 0 "register_operand" "=w, w") 1489 (unspec:SVE_2 1490 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1491 (match_operand:DI 1 "register_operand" "rk, rk") 1492 (unspec:VNx2DI 1493 [(match_operand 6) 1494 (sign_extend:VNx2DI 1495 (truncate:VNx2SI 1496 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1497 UNSPEC_PRED_X) 1498 (match_operand:DI 3 "const_int_operand") 1499 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 1500 (mem:BLK (scratch))] 1501 UNSPEC_LD1_GATHER))] 1502 "TARGET_SVE" 1503 "@ 1504 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] 1505 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1506 "&& !CONSTANT_P (operands[6])" 1507 { 1508 operands[6] = CONSTM1_RTX (VNx2BImode); 1509 } 1510) 1511 1512;; Likewise, but with the offset being truncated to 32 bits and then 1513;; zero-extended. 1514(define_insn "*mask_gather_load<mode><v_int_container>_uxtw" 1515 [(set (match_operand:SVE_2 0 "register_operand" "=w, w") 1516 (unspec:SVE_2 1517 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1518 (match_operand:DI 1 "register_operand" "rk, rk") 1519 (and:VNx2DI 1520 (match_operand:VNx2DI 2 "register_operand" "w, w") 1521 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1522 (match_operand:DI 3 "const_int_operand") 1523 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 1524 (mem:BLK (scratch))] 1525 UNSPEC_LD1_GATHER))] 1526 "TARGET_SVE" 1527 "@ 1528 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] 1529 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1530) 1531 1532;; ------------------------------------------------------------------------- 1533;; ---- Extending gather loads 1534;; ------------------------------------------------------------------------- 1535;; Includes gather forms of: 1536;; - LD1B 1537;; - LD1H 1538;; - LD1SB 1539;; - LD1SH 1540;; - LD1SW 1541;; - LD1W 1542;; ------------------------------------------------------------------------- 1543 1544;; Predicated extending gather loads for 32-bit elements. Operand 3 is 1545;; true for unsigned extension and false for signed extension. 1546(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>" 1547 [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w") 1548 (unspec:SVE_4HSI 1549 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm") 1550 (ANY_EXTEND:SVE_4HSI 1551 (unspec:SVE_4BHI 1552 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1553 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk") 1554 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") 1555 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 1556 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1557 (mem:BLK (scratch))] 1558 UNSPEC_LD1_GATHER))] 1559 UNSPEC_PRED_X))] 1560 "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0" 1561 "@ 1562 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s] 1563 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1] 1564 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] 1565 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] 1566 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1567 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1568 "&& !CONSTANT_P (operands[6])" 1569 { 1570 operands[6] = CONSTM1_RTX (VNx4BImode); 1571 } 1572) 1573 1574;; Predicated extending gather loads for 64-bit elements. The value of 1575;; operand 3 doesn't matter in this case. 1576(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>" 1577 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w") 1578 (unspec:SVE_2HSDI 1579 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm") 1580 (ANY_EXTEND:SVE_2HSDI 1581 (unspec:SVE_2BHSI 1582 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1583 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk") 1584 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") 1585 (match_operand:DI 3 "const_int_operand") 1586 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i") 1587 (mem:BLK (scratch))] 1588 UNSPEC_LD1_GATHER))] 1589 UNSPEC_PRED_X))] 1590 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1591 "@ 1592 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d] 1593 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1] 1594 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d] 1595 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1596 "&& !CONSTANT_P (operands[6])" 1597 { 1598 operands[6] = CONSTM1_RTX (VNx2BImode); 1599 } 1600) 1601 1602;; Likewise, but with the offset being extended from 32 bits. 1603(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked" 1604 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") 1605 (unspec:SVE_2HSDI 1606 [(match_operand 6) 1607 (ANY_EXTEND:SVE_2HSDI 1608 (unspec:SVE_2BHSI 1609 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1610 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1611 (unspec:VNx2DI 1612 [(match_operand 7) 1613 (ANY_EXTEND2:VNx2DI 1614 (match_operand:VNx2SI 2 "register_operand" "w, w"))] 1615 UNSPEC_PRED_X) 1616 (match_operand:DI 3 "const_int_operand") 1617 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") 1618 (mem:BLK (scratch))] 1619 UNSPEC_LD1_GATHER))] 1620 UNSPEC_PRED_X))] 1621 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1622 "@ 1623 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw] 1624 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]" 1625 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" 1626 { 1627 operands[6] = CONSTM1_RTX (VNx2BImode); 1628 operands[7] = CONSTM1_RTX (VNx2BImode); 1629 } 1630) 1631 1632;; Likewise, but with the offset being truncated to 32 bits and then 1633;; sign-extended. 1634(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw" 1635 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") 1636 (unspec:SVE_2HSDI 1637 [(match_operand 6) 1638 (ANY_EXTEND:SVE_2HSDI 1639 (unspec:SVE_2BHSI 1640 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1641 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1642 (unspec:VNx2DI 1643 [(match_operand 7) 1644 (sign_extend:VNx2DI 1645 (truncate:VNx2SI 1646 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1647 UNSPEC_PRED_X) 1648 (match_operand:DI 3 "const_int_operand") 1649 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") 1650 (mem:BLK (scratch))] 1651 UNSPEC_LD1_GATHER))] 1652 UNSPEC_PRED_X))] 1653 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1654 "@ 1655 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] 1656 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1657 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" 1658 { 1659 operands[6] = CONSTM1_RTX (VNx2BImode); 1660 operands[7] = CONSTM1_RTX (VNx2BImode); 1661 } 1662) 1663 1664;; Likewise, but with the offset being truncated to 32 bits and then 1665;; zero-extended. 1666(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw" 1667 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") 1668 (unspec:SVE_2HSDI 1669 [(match_operand 7) 1670 (ANY_EXTEND:SVE_2HSDI 1671 (unspec:SVE_2BHSI 1672 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1673 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1674 (and:VNx2DI 1675 (match_operand:VNx2DI 2 "register_operand" "w, w") 1676 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1677 (match_operand:DI 3 "const_int_operand") 1678 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") 1679 (mem:BLK (scratch))] 1680 UNSPEC_LD1_GATHER))] 1681 UNSPEC_PRED_X))] 1682 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1683 "@ 1684 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] 1685 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1686 "&& !CONSTANT_P (operands[7])" 1687 { 1688 operands[7] = CONSTM1_RTX (VNx2BImode); 1689 } 1690) 1691 1692;; ------------------------------------------------------------------------- 1693;; ---- First-faulting gather loads 1694;; ------------------------------------------------------------------------- 1695;; Includes gather forms of: 1696;; - LDFF1D 1697;; - LDFF1W 1698;; ------------------------------------------------------------------------- 1699 1700;; Predicated first-faulting gather loads for 32-bit elements. Operand 1701;; 3 is true for unsigned extension and false for signed extension. 1702(define_insn "@aarch64_ldff1_gather<mode>" 1703 [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") 1704 (unspec:SVE_FULL_S 1705 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1706 (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") 1707 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") 1708 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") 1709 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") 1710 (mem:BLK (scratch)) 1711 (reg:VNx16BI FFRT_REGNUM)] 1712 UNSPEC_LDFF1_GATHER))] 1713 "TARGET_SVE" 1714 "@ 1715 ldff1w\t%0.s, %5/z, [%2.s] 1716 ldff1w\t%0.s, %5/z, [%2.s, #%1] 1717 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw] 1718 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw] 1719 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1720 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1721) 1722 1723;; Predicated first-faulting gather loads for 64-bit elements. The value 1724;; of operand 3 doesn't matter in this case. 1725(define_insn "@aarch64_ldff1_gather<mode>" 1726 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") 1727 (unspec:SVE_FULL_D 1728 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1729 (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") 1730 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") 1731 (match_operand:DI 3 "const_int_operand") 1732 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") 1733 (mem:BLK (scratch)) 1734 (reg:VNx16BI FFRT_REGNUM)] 1735 UNSPEC_LDFF1_GATHER))] 1736 "TARGET_SVE" 1737 "@ 1738 ldff1d\t%0.d, %5/z, [%2.d] 1739 ldff1d\t%0.d, %5/z, [%2.d, #%1] 1740 ldff1d\t%0.d, %5/z, [%1, %2.d] 1741 ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1742) 1743 1744;; Likewise, but with the offset being sign-extended from 32 bits. 1745(define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw" 1746 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") 1747 (unspec:SVE_FULL_D 1748 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1749 (match_operand:DI 1 "register_operand" "rk, rk") 1750 (unspec:VNx2DI 1751 [(match_operand 6) 1752 (sign_extend:VNx2DI 1753 (truncate:VNx2SI 1754 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1755 UNSPEC_PRED_X) 1756 (match_operand:DI 3 "const_int_operand") 1757 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") 1758 (mem:BLK (scratch)) 1759 (reg:VNx16BI FFRT_REGNUM)] 1760 UNSPEC_LDFF1_GATHER))] 1761 "TARGET_SVE" 1762 "@ 1763 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] 1764 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1765 "&& !CONSTANT_P (operands[6])" 1766 { 1767 operands[6] = CONSTM1_RTX (VNx2BImode); 1768 } 1769) 1770 1771;; Likewise, but with the offset being zero-extended from 32 bits. 1772(define_insn "*aarch64_ldff1_gather<mode>_uxtw" 1773 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") 1774 (unspec:SVE_FULL_D 1775 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1776 (match_operand:DI 1 "register_operand" "rk, rk") 1777 (and:VNx2DI 1778 (match_operand:VNx2DI 2 "register_operand" "w, w") 1779 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1780 (match_operand:DI 3 "const_int_operand") 1781 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") 1782 (mem:BLK (scratch)) 1783 (reg:VNx16BI FFRT_REGNUM)] 1784 UNSPEC_LDFF1_GATHER))] 1785 "TARGET_SVE" 1786 "@ 1787 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] 1788 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1789) 1790 1791;; ------------------------------------------------------------------------- 1792;; ---- First-faulting extending gather loads 1793;; ------------------------------------------------------------------------- 1794;; Includes gather forms of: 1795;; - LDFF1B 1796;; - LDFF1H 1797;; - LDFF1SB 1798;; - LDFF1SH 1799;; - LDFF1SW 1800;; - LDFF1W 1801;; ------------------------------------------------------------------------- 1802 1803;; Predicated extending first-faulting gather loads for 32-bit elements. 1804;; Operand 3 is true for unsigned extension and false for signed extension. 1805(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>" 1806 [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w") 1807 (unspec:VNx4_WIDE 1808 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm") 1809 (ANY_EXTEND:VNx4_WIDE 1810 (unspec:VNx4_NARROW 1811 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1812 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk") 1813 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w") 1814 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") 1815 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1816 (mem:BLK (scratch)) 1817 (reg:VNx16BI FFRT_REGNUM)] 1818 UNSPEC_LDFF1_GATHER))] 1819 UNSPEC_PRED_X))] 1820 "TARGET_SVE" 1821 "@ 1822 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s] 1823 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1] 1824 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] 1825 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] 1826 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1827 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1828 "&& !CONSTANT_P (operands[6])" 1829 { 1830 operands[6] = CONSTM1_RTX (VNx4BImode); 1831 } 1832) 1833 1834;; Predicated extending first-faulting gather loads for 64-bit elements. 1835;; The value of operand 3 doesn't matter in this case. 1836(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>" 1837 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w") 1838 (unspec:VNx2_WIDE 1839 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm") 1840 (ANY_EXTEND:VNx2_WIDE 1841 (unspec:VNx2_NARROW 1842 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1843 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk") 1844 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w") 1845 (match_operand:DI 3 "const_int_operand") 1846 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i") 1847 (mem:BLK (scratch)) 1848 (reg:VNx16BI FFRT_REGNUM)] 1849 UNSPEC_LDFF1_GATHER))] 1850 UNSPEC_PRED_X))] 1851 "TARGET_SVE" 1852 "@ 1853 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d] 1854 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1] 1855 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d] 1856 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1857 "&& !CONSTANT_P (operands[6])" 1858 { 1859 operands[6] = CONSTM1_RTX (VNx2BImode); 1860 } 1861) 1862 1863;; Likewise, but with the offset being sign-extended from 32 bits. 1864(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw" 1865 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") 1866 (unspec:VNx2_WIDE 1867 [(match_operand 6) 1868 (ANY_EXTEND:VNx2_WIDE 1869 (unspec:VNx2_NARROW 1870 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1871 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1872 (unspec:VNx2DI 1873 [(match_operand 7) 1874 (sign_extend:VNx2DI 1875 (truncate:VNx2SI 1876 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1877 UNSPEC_PRED_X) 1878 (match_operand:DI 3 "const_int_operand") 1879 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 1880 (mem:BLK (scratch)) 1881 (reg:VNx16BI FFRT_REGNUM)] 1882 UNSPEC_LDFF1_GATHER))] 1883 UNSPEC_PRED_X))] 1884 "TARGET_SVE" 1885 "@ 1886 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] 1887 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1888 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" 1889 { 1890 operands[6] = CONSTM1_RTX (VNx2BImode); 1891 operands[7] = CONSTM1_RTX (VNx2BImode); 1892 } 1893) 1894 1895;; Likewise, but with the offset being zero-extended from 32 bits. 1896(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw" 1897 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") 1898 (unspec:VNx2_WIDE 1899 [(match_operand 7) 1900 (ANY_EXTEND:VNx2_WIDE 1901 (unspec:VNx2_NARROW 1902 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1903 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1904 (and:VNx2DI 1905 (match_operand:VNx2DI 2 "register_operand" "w, w") 1906 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1907 (match_operand:DI 3 "const_int_operand") 1908 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 1909 (mem:BLK (scratch)) 1910 (reg:VNx16BI FFRT_REGNUM)] 1911 UNSPEC_LDFF1_GATHER))] 1912 UNSPEC_PRED_X))] 1913 "TARGET_SVE" 1914 "@ 1915 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] 1916 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1917 "&& !CONSTANT_P (operands[7])" 1918 { 1919 operands[7] = CONSTM1_RTX (VNx2BImode); 1920 } 1921) 1922 1923;; ========================================================================= 1924;; == Prefetches 1925;; ========================================================================= 1926 1927;; ------------------------------------------------------------------------- 1928;; ---- Contiguous prefetches 1929;; ------------------------------------------------------------------------- 1930;; Includes contiguous forms of: 1931;; - PRFB 1932;; - PRFD 1933;; - PRFH 1934;; - PRFW 1935;; ------------------------------------------------------------------------- 1936 1937;; Contiguous predicated prefetches. Operand 2 gives the real prefetch 1938;; operation (as an svprfop), with operands 3 and 4 providing distilled 1939;; information. 1940(define_insn "@aarch64_sve_prefetch<mode>" 1941 [(prefetch (unspec:DI 1942 [(match_operand:<VPRED> 0 "register_operand" "Upl") 1943 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>") 1944 (match_operand:DI 2 "const_int_operand")] 1945 UNSPEC_SVE_PREFETCH) 1946 (match_operand:DI 3 "const_int_operand") 1947 (match_operand:DI 4 "const_int_operand"))] 1948 "TARGET_SVE" 1949 { 1950 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]); 1951 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1"); 1952 } 1953) 1954 1955;; ------------------------------------------------------------------------- 1956;; ---- Gather prefetches 1957;; ------------------------------------------------------------------------- 1958;; Includes gather forms of: 1959;; - PRFB 1960;; - PRFD 1961;; - PRFH 1962;; - PRFW 1963;; ------------------------------------------------------------------------- 1964 1965;; Predicated gather prefetches for 32-bit bases and offsets. The operands 1966;; are: 1967;; 0: the governing predicate 1968;; 1: the scalar component of the address 1969;; 2: the vector component of the address 1970;; 3: 1 for zero extension, 0 for sign extension 1971;; 4: the scale multiplier 1972;; 5: a vector zero that identifies the mode of data being accessed 1973;; 6: the prefetch operator (an svprfop) 1974;; 7: the normal RTL prefetch rw flag 1975;; 8: the normal RTL prefetch locality value 1976(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>" 1977 [(prefetch (unspec:DI 1978 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1979 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk") 1980 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w") 1981 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") 1982 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1983 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 1984 (match_operand:DI 6 "const_int_operand")] 1985 UNSPEC_SVE_PREFETCH_GATHER) 1986 (match_operand:DI 7 "const_int_operand") 1987 (match_operand:DI 8 "const_int_operand"))] 1988 "TARGET_SVE" 1989 { 1990 static const char *const insns[][2] = { 1991 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]", 1992 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]", 1993 "prfb", "%0, [%1, %2.s, sxtw]", 1994 "prfb", "%0, [%1, %2.s, uxtw]", 1995 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]", 1996 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]" 1997 }; 1998 const char *const *parts = insns[which_alternative]; 1999 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2000 } 2001) 2002 2003;; Predicated gather prefetches for 64-bit elements. The value of operand 3 2004;; doesn't matter in this case. 2005(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>" 2006 [(prefetch (unspec:DI 2007 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl") 2008 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk") 2009 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w") 2010 (match_operand:DI 3 "const_int_operand") 2011 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i") 2012 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 2013 (match_operand:DI 6 "const_int_operand")] 2014 UNSPEC_SVE_PREFETCH_GATHER) 2015 (match_operand:DI 7 "const_int_operand") 2016 (match_operand:DI 8 "const_int_operand"))] 2017 "TARGET_SVE" 2018 { 2019 static const char *const insns[][2] = { 2020 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]", 2021 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]", 2022 "prfb", "%0, [%1, %2.d]", 2023 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]" 2024 }; 2025 const char *const *parts = insns[which_alternative]; 2026 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2027 } 2028) 2029 2030;; Likewise, but with the offset being sign-extended from 32 bits. 2031(define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw" 2032 [(prefetch (unspec:DI 2033 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") 2034 (match_operand:DI 1 "register_operand" "rk, rk") 2035 (unspec:VNx2DI_ONLY 2036 [(match_operand 9) 2037 (sign_extend:VNx2DI 2038 (truncate:VNx2SI 2039 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 2040 UNSPEC_PRED_X) 2041 (match_operand:DI 3 "const_int_operand") 2042 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i") 2043 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 2044 (match_operand:DI 6 "const_int_operand")] 2045 UNSPEC_SVE_PREFETCH_GATHER) 2046 (match_operand:DI 7 "const_int_operand") 2047 (match_operand:DI 8 "const_int_operand"))] 2048 "TARGET_SVE" 2049 { 2050 static const char *const insns[][2] = { 2051 "prfb", "%0, [%1, %2.d, sxtw]", 2052 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]" 2053 }; 2054 const char *const *parts = insns[which_alternative]; 2055 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2056 } 2057 "&& !rtx_equal_p (operands[0], operands[9])" 2058 { 2059 operands[9] = copy_rtx (operands[0]); 2060 } 2061) 2062 2063;; Likewise, but with the offset being zero-extended from 32 bits. 2064(define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw" 2065 [(prefetch (unspec:DI 2066 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") 2067 (match_operand:DI 1 "register_operand" "rk, rk") 2068 (and:VNx2DI_ONLY 2069 (match_operand:VNx2DI 2 "register_operand" "w, w") 2070 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate")) 2071 (match_operand:DI 3 "const_int_operand") 2072 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i") 2073 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 2074 (match_operand:DI 6 "const_int_operand")] 2075 UNSPEC_SVE_PREFETCH_GATHER) 2076 (match_operand:DI 7 "const_int_operand") 2077 (match_operand:DI 8 "const_int_operand"))] 2078 "TARGET_SVE" 2079 { 2080 static const char *const insns[][2] = { 2081 "prfb", "%0, [%1, %2.d, uxtw]", 2082 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]" 2083 }; 2084 const char *const *parts = insns[which_alternative]; 2085 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2086 } 2087) 2088 2089;; ========================================================================= 2090;; == Stores 2091;; ========================================================================= 2092 2093;; ------------------------------------------------------------------------- 2094;; ---- Normal contiguous stores 2095;; ------------------------------------------------------------------------- 2096;; Includes contiguous forms of: 2097;; - ST1B 2098;; - ST1D 2099;; - ST1H 2100;; - ST1W 2101;; - ST2B 2102;; - ST2D 2103;; - ST2H 2104;; - ST2W 2105;; - ST3B 2106;; - ST3D 2107;; - ST3H 2108;; - ST3W 2109;; - ST4B 2110;; - ST4D 2111;; - ST4H 2112;; - ST4W 2113;; ------------------------------------------------------------------------- 2114 2115;; Predicated ST1. 2116(define_insn "maskstore<mode><vpred>" 2117 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") 2118 (unspec:SVE_ALL 2119 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2120 (match_operand:SVE_ALL 1 "register_operand" "w") 2121 (match_dup 0)] 2122 UNSPEC_ST1_SVE))] 2123 "TARGET_SVE" 2124 "st1<Vesize>\t%1.<Vctype>, %2, %0" 2125) 2126 2127;; Unpredicated ST[234]. This is always a full update, so the dependence 2128;; on the old value of the memory location (via (match_dup 0)) is redundant. 2129;; There doesn't seem to be any obvious benefit to treating the all-true 2130;; case differently though. In particular, it's very unlikely that we'll 2131;; only find out during RTL that a store_lanes is dead. 2132(define_expand "vec_store_lanes<mode><vsingle>" 2133 [(set (match_operand:SVE_STRUCT 0 "memory_operand") 2134 (unspec:SVE_STRUCT 2135 [(match_dup 2) 2136 (match_operand:SVE_STRUCT 1 "register_operand") 2137 (match_dup 0)] 2138 UNSPEC_STN))] 2139 "TARGET_SVE" 2140 { 2141 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 2142 } 2143) 2144 2145;; Predicated ST[234]. 2146(define_insn "vec_mask_store_lanes<mode><vsingle>" 2147 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") 2148 (unspec:SVE_STRUCT 2149 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2150 (match_operand:SVE_STRUCT 1 "register_operand" "w") 2151 (match_dup 0)] 2152 UNSPEC_STN))] 2153 "TARGET_SVE" 2154 "st<vector_count><Vesize>\t%1, %2, %0" 2155) 2156 2157;; ------------------------------------------------------------------------- 2158;; ---- Truncating contiguous stores 2159;; ------------------------------------------------------------------------- 2160;; Includes: 2161;; - ST1B 2162;; - ST1H 2163;; - ST1W 2164;; ------------------------------------------------------------------------- 2165 2166;; Predicated truncate and store, with 8 elements per 128-bit block. 2167(define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>" 2168 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m") 2169 (unspec:VNx8_NARROW 2170 [(match_operand:VNx8BI 2 "register_operand" "Upl") 2171 (truncate:VNx8_NARROW 2172 (match_operand:VNx8_WIDE 1 "register_operand" "w")) 2173 (match_dup 0)] 2174 UNSPEC_ST1_SVE))] 2175 "TARGET_SVE" 2176 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0" 2177) 2178 2179;; Predicated truncate and store, with 4 elements per 128-bit block. 2180(define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>" 2181 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m") 2182 (unspec:VNx4_NARROW 2183 [(match_operand:VNx4BI 2 "register_operand" "Upl") 2184 (truncate:VNx4_NARROW 2185 (match_operand:VNx4_WIDE 1 "register_operand" "w")) 2186 (match_dup 0)] 2187 UNSPEC_ST1_SVE))] 2188 "TARGET_SVE" 2189 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0" 2190) 2191 2192;; Predicated truncate and store, with 2 elements per 128-bit block. 2193(define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>" 2194 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m") 2195 (unspec:VNx2_NARROW 2196 [(match_operand:VNx2BI 2 "register_operand" "Upl") 2197 (truncate:VNx2_NARROW 2198 (match_operand:VNx2_WIDE 1 "register_operand" "w")) 2199 (match_dup 0)] 2200 UNSPEC_ST1_SVE))] 2201 "TARGET_SVE" 2202 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0" 2203) 2204 2205;; ------------------------------------------------------------------------- 2206;; ---- Non-temporal contiguous stores 2207;; ------------------------------------------------------------------------- 2208;; Includes: 2209;; - STNT1B 2210;; - STNT1D 2211;; - STNT1H 2212;; - STNT1W 2213;; ------------------------------------------------------------------------- 2214 2215(define_insn "@aarch64_stnt1<mode>" 2216 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m") 2217 (unspec:SVE_FULL 2218 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2219 (match_operand:SVE_FULL 1 "register_operand" "w") 2220 (match_dup 0)] 2221 UNSPEC_STNT1_SVE))] 2222 "TARGET_SVE" 2223 "stnt1<Vesize>\t%1.<Vetype>, %2, %0" 2224) 2225 2226;; ------------------------------------------------------------------------- 2227;; ---- Normal scatter stores 2228;; ------------------------------------------------------------------------- 2229;; Includes scatter forms of: 2230;; - ST1D 2231;; - ST1W 2232;; ------------------------------------------------------------------------- 2233 2234;; Unpredicated scatter stores. 2235(define_expand "scatter_store<mode><v_int_container>" 2236 [(set (mem:BLK (scratch)) 2237 (unspec:BLK 2238 [(match_dup 5) 2239 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>") 2240 (match_operand:<V_INT_CONTAINER> 1 "register_operand") 2241 (match_operand:DI 2 "const_int_operand") 2242 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") 2243 (match_operand:SVE_24 4 "register_operand")] 2244 UNSPEC_ST1_SCATTER))] 2245 "TARGET_SVE" 2246 { 2247 operands[5] = aarch64_ptrue_reg (<VPRED>mode); 2248 } 2249) 2250 2251;; Predicated scatter stores for 32-bit elements. Operand 2 is true for 2252;; unsigned extension and false for signed extension. 2253(define_insn "mask_scatter_store<mode><v_int_container>" 2254 [(set (mem:BLK (scratch)) 2255 (unspec:BLK 2256 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 2257 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") 2258 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") 2259 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 2260 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 2261 (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")] 2262 UNSPEC_ST1_SCATTER))] 2263 "TARGET_SVE" 2264 "@ 2265 st1<Vesize>\t%4.s, %5, [%1.s] 2266 st1<Vesize>\t%4.s, %5, [%1.s, #%0] 2267 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw] 2268 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw] 2269 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] 2270 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]" 2271) 2272 2273;; Predicated scatter stores for 64-bit elements. The value of operand 2 2274;; doesn't matter in this case. 2275(define_insn "mask_scatter_store<mode><v_int_container>" 2276 [(set (mem:BLK (scratch)) 2277 (unspec:BLK 2278 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 2279 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") 2280 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") 2281 (match_operand:DI 2 "const_int_operand") 2282 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") 2283 (match_operand:SVE_2 4 "register_operand" "w, w, w, w")] 2284 UNSPEC_ST1_SCATTER))] 2285 "TARGET_SVE" 2286 "@ 2287 st1<Vesize>\t%4.d, %5, [%1.d] 2288 st1<Vesize>\t%4.d, %5, [%1.d, #%0] 2289 st1<Vesize>\t%4.d, %5, [%0, %1.d] 2290 st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]" 2291) 2292 2293;; Likewise, but with the offset being extended from 32 bits. 2294(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked" 2295 [(set (mem:BLK (scratch)) 2296 (unspec:BLK 2297 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2298 (match_operand:DI 0 "register_operand" "rk, rk") 2299 (unspec:VNx2DI 2300 [(match_operand 6) 2301 (ANY_EXTEND:VNx2DI 2302 (match_operand:VNx2SI 1 "register_operand" "w, w"))] 2303 UNSPEC_PRED_X) 2304 (match_operand:DI 2 "const_int_operand") 2305 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 2306 (match_operand:SVE_2 4 "register_operand" "w, w")] 2307 UNSPEC_ST1_SCATTER))] 2308 "TARGET_SVE" 2309 "@ 2310 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw] 2311 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]" 2312 "&& !CONSTANT_P (operands[6])" 2313 { 2314 operands[6] = CONSTM1_RTX (<VPRED>mode); 2315 } 2316) 2317 2318;; Likewise, but with the offset being truncated to 32 bits and then 2319;; sign-extended. 2320(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw" 2321 [(set (mem:BLK (scratch)) 2322 (unspec:BLK 2323 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2324 (match_operand:DI 0 "register_operand" "rk, rk") 2325 (unspec:VNx2DI 2326 [(match_operand 6) 2327 (sign_extend:VNx2DI 2328 (truncate:VNx2SI 2329 (match_operand:VNx2DI 1 "register_operand" "w, w")))] 2330 UNSPEC_PRED_X) 2331 (match_operand:DI 2 "const_int_operand") 2332 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 2333 (match_operand:SVE_2 4 "register_operand" "w, w")] 2334 UNSPEC_ST1_SCATTER))] 2335 "TARGET_SVE" 2336 "@ 2337 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw] 2338 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]" 2339 "&& !CONSTANT_P (operands[6])" 2340 { 2341 operands[6] = CONSTM1_RTX (<VPRED>mode); 2342 } 2343) 2344 2345;; Likewise, but with the offset being truncated to 32 bits and then 2346;; zero-extended. 2347(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw" 2348 [(set (mem:BLK (scratch)) 2349 (unspec:BLK 2350 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2351 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") 2352 (and:VNx2DI 2353 (match_operand:VNx2DI 1 "register_operand" "w, w") 2354 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 2355 (match_operand:DI 2 "const_int_operand") 2356 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 2357 (match_operand:SVE_2 4 "register_operand" "w, w")] 2358 UNSPEC_ST1_SCATTER))] 2359 "TARGET_SVE" 2360 "@ 2361 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw] 2362 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]" 2363) 2364 2365;; ------------------------------------------------------------------------- 2366;; ---- Truncating scatter stores 2367;; ------------------------------------------------------------------------- 2368;; Includes scatter forms of: 2369;; - ST1B 2370;; - ST1H 2371;; - ST1W 2372;; ------------------------------------------------------------------------- 2373 2374;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is 2375;; true for unsigned extension and false for signed extension. 2376(define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>" 2377 [(set (mem:BLK (scratch)) 2378 (unspec:BLK 2379 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 2380 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk") 2381 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") 2382 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 2383 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 2384 (truncate:VNx4_NARROW 2385 (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))] 2386 UNSPEC_ST1_SCATTER))] 2387 "TARGET_SVE" 2388 "@ 2389 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s] 2390 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0] 2391 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw] 2392 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw] 2393 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] 2394 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]" 2395) 2396 2397;; Predicated truncating scatter stores for 64-bit elements. The value of 2398;; operand 2 doesn't matter in this case. 2399(define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>" 2400 [(set (mem:BLK (scratch)) 2401 (unspec:BLK 2402 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 2403 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk") 2404 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") 2405 (match_operand:DI 2 "const_int_operand") 2406 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i") 2407 (truncate:VNx2_NARROW 2408 (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))] 2409 UNSPEC_ST1_SCATTER))] 2410 "TARGET_SVE" 2411 "@ 2412 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d] 2413 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0] 2414 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d] 2415 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]" 2416) 2417 2418;; Likewise, but with the offset being sign-extended from 32 bits. 2419(define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw" 2420 [(set (mem:BLK (scratch)) 2421 (unspec:BLK 2422 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2423 (match_operand:DI 0 "register_operand" "rk, rk") 2424 (unspec:VNx2DI 2425 [(match_operand 6) 2426 (sign_extend:VNx2DI 2427 (truncate:VNx2SI 2428 (match_operand:VNx2DI 1 "register_operand" "w, w")))] 2429 UNSPEC_PRED_X) 2430 (match_operand:DI 2 "const_int_operand") 2431 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 2432 (truncate:VNx2_NARROW 2433 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] 2434 UNSPEC_ST1_SCATTER))] 2435 "TARGET_SVE" 2436 "@ 2437 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw] 2438 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]" 2439 "&& !rtx_equal_p (operands[5], operands[6])" 2440 { 2441 operands[6] = copy_rtx (operands[5]); 2442 } 2443) 2444 2445;; Likewise, but with the offset being zero-extended from 32 bits. 2446(define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw" 2447 [(set (mem:BLK (scratch)) 2448 (unspec:BLK 2449 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2450 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") 2451 (and:VNx2DI 2452 (match_operand:VNx2DI 1 "register_operand" "w, w") 2453 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 2454 (match_operand:DI 2 "const_int_operand") 2455 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 2456 (truncate:VNx2_NARROW 2457 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] 2458 UNSPEC_ST1_SCATTER))] 2459 "TARGET_SVE" 2460 "@ 2461 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw] 2462 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]" 2463) 2464 2465;; ========================================================================= 2466;; == Vector creation 2467;; ========================================================================= 2468 2469;; ------------------------------------------------------------------------- 2470;; ---- [INT,FP] Duplicate element 2471;; ------------------------------------------------------------------------- 2472;; Includes: 2473;; - DUP 2474;; - MOV 2475;; - LD1RB 2476;; - LD1RD 2477;; - LD1RH 2478;; - LD1RW 2479;; - LD1ROB (F64MM) 2480;; - LD1ROD (F64MM) 2481;; - LD1ROH (F64MM) 2482;; - LD1ROW (F64MM) 2483;; - LD1RQB 2484;; - LD1RQD 2485;; - LD1RQH 2486;; - LD1RQW 2487;; ------------------------------------------------------------------------- 2488 2489(define_expand "vec_duplicate<mode>" 2490 [(parallel 2491 [(set (match_operand:SVE_ALL 0 "register_operand") 2492 (vec_duplicate:SVE_ALL 2493 (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) 2494 (clobber (scratch:VNx16BI))])] 2495 "TARGET_SVE" 2496 { 2497 if (MEM_P (operands[1])) 2498 { 2499 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode); 2500 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], 2501 CONST0_RTX (<MODE>mode))); 2502 DONE; 2503 } 2504 } 2505) 2506 2507;; Accept memory operands for the benefit of combine, and also in case 2508;; the scalar input gets spilled to memory during RA. We want to split 2509;; the load at the first opportunity in order to allow the PTRUE to be 2510;; optimized with surrounding code. 2511(define_insn_and_split "*vec_duplicate<mode>_reg" 2512 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") 2513 (vec_duplicate:SVE_ALL 2514 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) 2515 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))] 2516 "TARGET_SVE" 2517 "@ 2518 mov\t%0.<Vetype>, %<vwcore>1 2519 mov\t%0.<Vetype>, %<Vetype>1 2520 #" 2521 "&& MEM_P (operands[1])" 2522 [(const_int 0)] 2523 { 2524 if (GET_CODE (operands[2]) == SCRATCH) 2525 operands[2] = gen_reg_rtx (VNx16BImode); 2526 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); 2527 rtx gp = gen_lowpart (<VPRED>mode, operands[2]); 2528 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1], 2529 CONST0_RTX (<MODE>mode))); 2530 DONE; 2531 } 2532 [(set_attr "length" "4,4,8")] 2533) 2534 2535;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). 2536(define_insn "@aarch64_vec_duplicate_vq<mode>_le" 2537 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2538 (vec_duplicate:SVE_FULL 2539 (match_operand:<V128> 1 "register_operand" "w")))] 2540 "TARGET_SVE && !BYTES_BIG_ENDIAN" 2541 { 2542 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); 2543 return "dup\t%0.q, %1.q[0]"; 2544 } 2545) 2546 2547;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version). 2548;; The SVE register layout puts memory lane N into (architectural) 2549;; register lane N, whereas the Advanced SIMD layout puts the memory 2550;; lsb into the register lsb. We therefore have to describe this in rtl 2551;; terms as a reverse of the V128 vector followed by a duplicate. 2552(define_insn "@aarch64_vec_duplicate_vq<mode>_be" 2553 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2554 (vec_duplicate:SVE_FULL 2555 (vec_select:<V128> 2556 (match_operand:<V128> 1 "register_operand" "w") 2557 (match_operand 2 "descending_int_parallel"))))] 2558 "TARGET_SVE 2559 && BYTES_BIG_ENDIAN 2560 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), 2561 GET_MODE_NUNITS (<V128>mode) - 1)" 2562 { 2563 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); 2564 return "dup\t%0.q, %1.q[0]"; 2565 } 2566) 2567 2568;; This is used for vec_duplicate<mode>s from memory, but can also 2569;; be used by combine to optimize selects of a vec_duplicate<mode> 2570;; with zero. 2571(define_insn "sve_ld1r<mode>" 2572 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 2573 (unspec:SVE_ALL 2574 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2575 (vec_duplicate:SVE_ALL 2576 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) 2577 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] 2578 UNSPEC_SEL))] 2579 "TARGET_SVE" 2580 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" 2581) 2582 2583;; Load 128 bits from memory under predicate control and duplicate to 2584;; fill a vector. 2585(define_insn "@aarch64_sve_ld1rq<mode>" 2586 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2587 (unspec:SVE_FULL 2588 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2589 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")] 2590 UNSPEC_LD1RQ))] 2591 "TARGET_SVE" 2592 { 2593 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); 2594 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1"; 2595 } 2596) 2597 2598(define_insn "@aarch64_sve_ld1ro<mode>" 2599 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2600 (unspec:SVE_FULL 2601 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2602 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>" 2603 "UO<Vesize>")] 2604 UNSPEC_LD1RO))] 2605 "TARGET_SVE_F64MM" 2606 { 2607 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); 2608 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1"; 2609 } 2610) 2611 2612;; ------------------------------------------------------------------------- 2613;; ---- [INT,FP] Initialize from individual elements 2614;; ------------------------------------------------------------------------- 2615;; Includes: 2616;; - INSR 2617;; ------------------------------------------------------------------------- 2618 2619(define_expand "vec_init<mode><Vel>" 2620 [(match_operand:SVE_FULL 0 "register_operand") 2621 (match_operand 1 "")] 2622 "TARGET_SVE" 2623 { 2624 aarch64_sve_expand_vector_init (operands[0], operands[1]); 2625 DONE; 2626 } 2627) 2628 2629;; Shift an SVE vector left and insert a scalar into element 0. 2630(define_insn "vec_shl_insert_<mode>" 2631 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w") 2632 (unspec:SVE_FULL 2633 [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w") 2634 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] 2635 UNSPEC_INSR))] 2636 "TARGET_SVE" 2637 "@ 2638 insr\t%0.<Vetype>, %<vwcore>2 2639 insr\t%0.<Vetype>, %<Vetype>2 2640 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2 2641 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2" 2642 [(set_attr "movprfx" "*,*,yes,yes")] 2643) 2644 2645;; ------------------------------------------------------------------------- 2646;; ---- [INT] Linear series 2647;; ------------------------------------------------------------------------- 2648;; Includes: 2649;; - INDEX 2650;; ------------------------------------------------------------------------- 2651 2652(define_insn "vec_series<mode>" 2653 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") 2654 (vec_series:SVE_I 2655 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") 2656 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] 2657 "TARGET_SVE" 2658 "@ 2659 index\t%0.<Vctype>, #%1, %<vccore>2 2660 index\t%0.<Vctype>, %<vccore>1, #%2 2661 index\t%0.<Vctype>, %<vccore>1, %<vccore>2" 2662) 2663 2664;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range 2665;; of an INDEX instruction. 2666(define_insn "*vec_series<mode>_plus" 2667 [(set (match_operand:SVE_I 0 "register_operand" "=w") 2668 (plus:SVE_I 2669 (vec_duplicate:SVE_I 2670 (match_operand:<VEL> 1 "register_operand" "r")) 2671 (match_operand:SVE_I 2 "immediate_operand")))] 2672 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" 2673 { 2674 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); 2675 return "index\t%0.<Vctype>, %<vccore>1, #%2"; 2676 } 2677) 2678 2679;; ------------------------------------------------------------------------- 2680;; ---- [PRED] Duplicate element 2681;; ------------------------------------------------------------------------- 2682;; The patterns in this section are synthetic. 2683;; ------------------------------------------------------------------------- 2684 2685;; Implement a predicate broadcast by shifting the low bit of the scalar 2686;; input into the top bit and using a WHILELO. An alternative would be to 2687;; duplicate the input and do a compare with zero. 2688(define_expand "vec_duplicate<mode>" 2689 [(set (match_operand:PRED_ALL 0 "register_operand") 2690 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] 2691 "TARGET_SVE" 2692 { 2693 rtx tmp = gen_reg_rtx (DImode); 2694 rtx op1 = gen_lowpart (DImode, operands[1]); 2695 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); 2696 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); 2697 DONE; 2698 } 2699) 2700 2701;; ========================================================================= 2702;; == Vector decomposition 2703;; ========================================================================= 2704 2705;; ------------------------------------------------------------------------- 2706;; ---- [INT,FP] Extract index 2707;; ------------------------------------------------------------------------- 2708;; Includes: 2709;; - DUP (Advanced SIMD) 2710;; - DUP (SVE) 2711;; - EXT (SVE) 2712;; - ST1 (Advanced SIMD) 2713;; - UMOV (Advanced SIMD) 2714;; ------------------------------------------------------------------------- 2715 2716(define_expand "vec_extract<mode><Vel>" 2717 [(set (match_operand:<VEL> 0 "register_operand") 2718 (vec_select:<VEL> 2719 (match_operand:SVE_FULL 1 "register_operand") 2720 (parallel [(match_operand:SI 2 "nonmemory_operand")])))] 2721 "TARGET_SVE" 2722 { 2723 poly_int64 val; 2724 if (poly_int_rtx_p (operands[2], &val) 2725 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) 2726 { 2727 /* The last element can be extracted with a LASTB and a false 2728 predicate. */ 2729 rtx sel = aarch64_pfalse_reg (<VPRED>mode); 2730 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); 2731 DONE; 2732 } 2733 if (!CONST_INT_P (operands[2])) 2734 { 2735 /* Create an index with operand[2] as the base and -1 as the step. 2736 It will then be zero for the element we care about. */ 2737 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); 2738 index = force_reg (<VEL_INT>mode, index); 2739 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); 2740 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); 2741 2742 /* Get a predicate that is true for only that element. */ 2743 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); 2744 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); 2745 rtx sel = gen_reg_rtx (<VPRED>mode); 2746 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); 2747 2748 /* Select the element using LASTB. */ 2749 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); 2750 DONE; 2751 } 2752 } 2753) 2754 2755;; Extract element zero. This is a special case because we want to force 2756;; the registers to be the same for the second alternative, and then 2757;; split the instruction into nothing after RA. 2758(define_insn_and_split "*vec_extract<mode><Vel>_0" 2759 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 2760 (vec_select:<VEL> 2761 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w") 2762 (parallel [(const_int 0)])))] 2763 "TARGET_SVE" 2764 { 2765 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); 2766 switch (which_alternative) 2767 { 2768 case 0: 2769 return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; 2770 case 1: 2771 return "#"; 2772 case 2: 2773 return "st1\\t{%1.<Vetype>}[0], %0"; 2774 default: 2775 gcc_unreachable (); 2776 } 2777 } 2778 "&& reload_completed 2779 && REG_P (operands[0]) 2780 && REGNO (operands[0]) == REGNO (operands[1])" 2781 [(const_int 0)] 2782 { 2783 emit_note (NOTE_INSN_DELETED); 2784 DONE; 2785 } 2786 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] 2787) 2788 2789;; Extract an element from the Advanced SIMD portion of the register. 2790;; We don't just reuse the aarch64-simd.md pattern because we don't 2791;; want any change in lane number on big-endian targets. 2792(define_insn "*vec_extract<mode><Vel>_v128" 2793 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 2794 (vec_select:<VEL> 2795 (match_operand:SVE_FULL 1 "register_operand" "w, w, w") 2796 (parallel [(match_operand:SI 2 "const_int_operand")])))] 2797 "TARGET_SVE 2798 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" 2799 { 2800 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); 2801 switch (which_alternative) 2802 { 2803 case 0: 2804 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 2805 case 1: 2806 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 2807 case 2: 2808 return "st1\\t{%1.<Vetype>}[%2], %0"; 2809 default: 2810 gcc_unreachable (); 2811 } 2812 } 2813 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] 2814) 2815 2816;; Extract an element in the range of DUP. This pattern allows the 2817;; source and destination to be different. 2818(define_insn "*vec_extract<mode><Vel>_dup" 2819 [(set (match_operand:<VEL> 0 "register_operand" "=w") 2820 (vec_select:<VEL> 2821 (match_operand:SVE_FULL 1 "register_operand" "w") 2822 (parallel [(match_operand:SI 2 "const_int_operand")])))] 2823 "TARGET_SVE 2824 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" 2825 { 2826 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); 2827 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; 2828 } 2829) 2830 2831;; Extract an element outside the range of DUP. This pattern requires the 2832;; source and destination to be the same. 2833(define_insn "*vec_extract<mode><Vel>_ext" 2834 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w") 2835 (vec_select:<VEL> 2836 (match_operand:SVE_FULL 1 "register_operand" "0, w") 2837 (parallel [(match_operand:SI 2 "const_int_operand")])))] 2838 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" 2839 { 2840 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); 2841 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); 2842 return (which_alternative == 0 2843 ? "ext\t%0.b, %0.b, %0.b, #%2" 2844 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2"); 2845 } 2846 [(set_attr "movprfx" "*,yes")] 2847) 2848 2849;; ------------------------------------------------------------------------- 2850;; ---- [INT,FP] Extract active element 2851;; ------------------------------------------------------------------------- 2852;; Includes: 2853;; - LASTA 2854;; - LASTB 2855;; ------------------------------------------------------------------------- 2856 2857;; Extract the last active element of operand 1 into operand 0. 2858;; If no elements are active, extract the last inactive element instead. 2859(define_insn "@extract_<last_op>_<mode>" 2860 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") 2861 (unspec:<VEL> 2862 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2863 (match_operand:SVE_FULL 2 "register_operand" "w, w")] 2864 LAST))] 2865 "TARGET_SVE" 2866 "@ 2867 last<ab>\t%<vwcore>0, %1, %2.<Vetype> 2868 last<ab>\t%<Vetype>0, %1, %2.<Vetype>" 2869) 2870 2871;; ------------------------------------------------------------------------- 2872;; ---- [PRED] Extract index 2873;; ------------------------------------------------------------------------- 2874;; The patterns in this section are synthetic. 2875;; ------------------------------------------------------------------------- 2876 2877;; Handle extractions from a predicate by converting to an integer vector 2878;; and extracting from there. 2879(define_expand "vec_extract<vpred><Vel>" 2880 [(match_operand:<VEL> 0 "register_operand") 2881 (match_operand:<VPRED> 1 "register_operand") 2882 (match_operand:SI 2 "nonmemory_operand") 2883 ;; Dummy operand to which we can attach the iterator. 2884 (reg:SVE_FULL_I V0_REGNUM)] 2885 "TARGET_SVE" 2886 { 2887 rtx tmp = gen_reg_rtx (<MODE>mode); 2888 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], 2889 CONST1_RTX (<MODE>mode), 2890 CONST0_RTX (<MODE>mode))); 2891 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); 2892 DONE; 2893 } 2894) 2895 2896;; ========================================================================= 2897;; == Unary arithmetic 2898;; ========================================================================= 2899 2900;; ------------------------------------------------------------------------- 2901;; ---- [INT] General unary arithmetic corresponding to rtx codes 2902;; ------------------------------------------------------------------------- 2903;; Includes: 2904;; - ABS 2905;; - CLS (= clrsb) 2906;; - CLZ 2907;; - CNT (= popcount) 2908;; - NEG 2909;; - NOT 2910;; ------------------------------------------------------------------------- 2911 2912;; Unpredicated integer unary arithmetic. 2913(define_expand "<optab><mode>2" 2914 [(set (match_operand:SVE_FULL_I 0 "register_operand") 2915 (unspec:SVE_FULL_I 2916 [(match_dup 2) 2917 (SVE_INT_UNARY:SVE_FULL_I 2918 (match_operand:SVE_FULL_I 1 "register_operand"))] 2919 UNSPEC_PRED_X))] 2920 "TARGET_SVE" 2921 { 2922 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 2923 } 2924) 2925 2926;; Integer unary arithmetic predicated with a PTRUE. 2927(define_insn "@aarch64_pred_<optab><mode>" 2928 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 2929 (unspec:SVE_FULL_I 2930 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2931 (SVE_INT_UNARY:SVE_FULL_I 2932 (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))] 2933 UNSPEC_PRED_X))] 2934 "TARGET_SVE" 2935 "@ 2936 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2937 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2938 [(set_attr "movprfx" "*,yes")] 2939) 2940 2941;; Predicated integer unary arithmetic with merging. 2942(define_expand "@cond_<optab><mode>" 2943 [(set (match_operand:SVE_FULL_I 0 "register_operand") 2944 (unspec:SVE_FULL_I 2945 [(match_operand:<VPRED> 1 "register_operand") 2946 (SVE_INT_UNARY:SVE_FULL_I 2947 (match_operand:SVE_FULL_I 2 "register_operand")) 2948 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] 2949 UNSPEC_SEL))] 2950 "TARGET_SVE" 2951) 2952 2953;; Predicated integer unary arithmetic, merging with the first input. 2954(define_insn "*cond_<optab><mode>_2" 2955 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 2956 (unspec:SVE_FULL_I 2957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2958 (SVE_INT_UNARY:SVE_FULL_I 2959 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")) 2960 (match_dup 2)] 2961 UNSPEC_SEL))] 2962 "TARGET_SVE" 2963 "@ 2964 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype> 2965 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2966 [(set_attr "movprfx" "*,yes")] 2967) 2968 2969;; Predicated integer unary arithmetic, merging with an independent value. 2970;; 2971;; The earlyclobber isn't needed for the first alternative, but omitting 2972;; it would only help the case in which operands 2 and 3 are the same, 2973;; which is handled above rather than here. Marking all the alternatives 2974;; as earlyclobber helps to make the instruction more regular to the 2975;; register allocator. 2976(define_insn "*cond_<optab><mode>_any" 2977 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") 2978 (unspec:SVE_FULL_I 2979 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2980 (SVE_INT_UNARY:SVE_FULL_I 2981 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")) 2982 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2983 UNSPEC_SEL))] 2984 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" 2985 "@ 2986 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2987 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2988 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2989 [(set_attr "movprfx" "*,yes,yes")] 2990) 2991 2992;; ------------------------------------------------------------------------- 2993;; ---- [INT] General unary arithmetic corresponding to unspecs 2994;; ------------------------------------------------------------------------- 2995;; Includes 2996;; - RBIT 2997;; - REVB 2998;; - REVH 2999;; - REVW 3000;; ------------------------------------------------------------------------- 3001 3002;; Predicated integer unary operations. 3003(define_insn "@aarch64_pred_<optab><mode>" 3004 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3005 (unspec:SVE_FULL_I 3006 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3007 (unspec:SVE_FULL_I 3008 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")] 3009 SVE_INT_UNARY)] 3010 UNSPEC_PRED_X))] 3011 "TARGET_SVE && <elem_bits> >= <min_elem_bits>" 3012 "@ 3013 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3014 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3015 [(set_attr "movprfx" "*,yes")] 3016) 3017 3018;; Predicated integer unary operations with merging. 3019(define_insn "@cond_<optab><mode>" 3020 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w") 3021 (unspec:SVE_FULL_I 3022 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3023 (unspec:SVE_FULL_I 3024 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")] 3025 SVE_INT_UNARY) 3026 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3027 UNSPEC_SEL))] 3028 "TARGET_SVE && <elem_bits> >= <min_elem_bits>" 3029 "@ 3030 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3031 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3032 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3033 [(set_attr "movprfx" "*,yes,yes")] 3034) 3035 3036;; ------------------------------------------------------------------------- 3037;; ---- [INT] Sign and zero extension 3038;; ------------------------------------------------------------------------- 3039;; Includes: 3040;; - SXTB 3041;; - SXTH 3042;; - SXTW 3043;; - UXTB 3044;; - UXTH 3045;; - UXTW 3046;; ------------------------------------------------------------------------- 3047 3048;; Unpredicated sign and zero extension from a narrower mode. 3049(define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2" 3050 [(set (match_operand:SVE_HSDI 0 "register_operand") 3051 (unspec:SVE_HSDI 3052 [(match_dup 2) 3053 (ANY_EXTEND:SVE_HSDI 3054 (match_operand:SVE_PARTIAL_I 1 "register_operand"))] 3055 UNSPEC_PRED_X))] 3056 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3057 { 3058 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode); 3059 } 3060) 3061 3062;; Predicated sign and zero extension from a narrower mode. 3063(define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2" 3064 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w") 3065 (unspec:SVE_HSDI 3066 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 3067 (ANY_EXTEND:SVE_HSDI 3068 (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))] 3069 UNSPEC_PRED_X))] 3070 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3071 "@ 3072 <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> 3073 movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>" 3074 [(set_attr "movprfx" "*,yes")] 3075) 3076 3077;; Predicated truncate-and-sign-extend operations. 3078(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" 3079 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 3080 (unspec:SVE_FULL_HSDI 3081 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 3082 (sign_extend:SVE_FULL_HSDI 3083 (truncate:SVE_PARTIAL_I 3084 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))] 3085 UNSPEC_PRED_X))] 3086 "TARGET_SVE 3087 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3088 "@ 3089 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 3090 movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 3091 [(set_attr "movprfx" "*,yes")] 3092) 3093 3094;; Predicated truncate-and-sign-extend operations with merging. 3095(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" 3096 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w") 3097 (unspec:SVE_FULL_HSDI 3098 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 3099 (sign_extend:SVE_FULL_HSDI 3100 (truncate:SVE_PARTIAL_I 3101 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w"))) 3102 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3103 UNSPEC_SEL))] 3104 "TARGET_SVE 3105 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3106 "@ 3107 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 3108 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 3109 movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 3110 [(set_attr "movprfx" "*,yes,yes")] 3111) 3112 3113;; Predicated truncate-and-zero-extend operations, merging with the 3114;; first input. 3115;; 3116;; The canonical form of this operation is an AND of a constant rather 3117;; than (zero_extend (truncate ...)). 3118(define_insn "*cond_uxt<mode>_2" 3119 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3120 (unspec:SVE_FULL_I 3121 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3122 (and:SVE_FULL_I 3123 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 3124 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate")) 3125 (match_dup 2)] 3126 UNSPEC_SEL))] 3127 "TARGET_SVE" 3128 "@ 3129 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype> 3130 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" 3131 [(set_attr "movprfx" "*,yes")] 3132) 3133 3134;; Predicated truncate-and-zero-extend operations, merging with an 3135;; independent value. 3136;; 3137;; The earlyclobber isn't needed for the first alternative, but omitting 3138;; it would only help the case in which operands 2 and 4 are the same, 3139;; which is handled above rather than here. Marking all the alternatives 3140;; as early-clobber helps to make the instruction more regular to the 3141;; register allocator. 3142(define_insn "*cond_uxt<mode>_any" 3143 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") 3144 (unspec:SVE_FULL_I 3145 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3146 (and:SVE_FULL_I 3147 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") 3148 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate")) 3149 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3150 UNSPEC_SEL))] 3151 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 3152 "@ 3153 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> 3154 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> 3155 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" 3156 [(set_attr "movprfx" "*,yes,yes")] 3157) 3158 3159;; ------------------------------------------------------------------------- 3160;; ---- [INT] Truncation 3161;; ------------------------------------------------------------------------- 3162;; The patterns in this section are synthetic. 3163;; ------------------------------------------------------------------------- 3164 3165;; Truncate to a partial SVE vector from either a full vector or a 3166;; wider partial vector. This is a no-op, because we can just ignore 3167;; the unused upper bits of the source. 3168(define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2" 3169 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w") 3170 (truncate:SVE_PARTIAL_I 3171 (match_operand:SVE_HSDI 1 "register_operand" "w")))] 3172 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3173 "#" 3174 "&& reload_completed" 3175 [(set (match_dup 0) (match_dup 1))] 3176 { 3177 operands[1] = aarch64_replace_reg_mode (operands[1], 3178 <SVE_PARTIAL_I:MODE>mode); 3179 } 3180) 3181 3182;; ------------------------------------------------------------------------- 3183;; ---- [INT] Logical inverse 3184;; ------------------------------------------------------------------------- 3185;; Includes: 3186;; - CNOT 3187;; ------------------------------------------------------------------------- 3188 3189;; Predicated logical inverse. 3190(define_expand "@aarch64_pred_cnot<mode>" 3191 [(set (match_operand:SVE_FULL_I 0 "register_operand") 3192 (unspec:SVE_FULL_I 3193 [(unspec:<VPRED> 3194 [(match_operand:<VPRED> 1 "register_operand") 3195 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 3196 (eq:<VPRED> 3197 (match_operand:SVE_FULL_I 3 "register_operand") 3198 (match_dup 4))] 3199 UNSPEC_PRED_Z) 3200 (match_dup 5) 3201 (match_dup 4)] 3202 UNSPEC_SEL))] 3203 "TARGET_SVE" 3204 { 3205 operands[4] = CONST0_RTX (<MODE>mode); 3206 operands[5] = CONST1_RTX (<MODE>mode); 3207 } 3208) 3209 3210(define_insn "*cnot<mode>" 3211 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3212 (unspec:SVE_FULL_I 3213 [(unspec:<VPRED> 3214 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3215 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 3216 (eq:<VPRED> 3217 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 3218 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] 3219 UNSPEC_PRED_Z) 3220 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") 3221 (match_dup 3)] 3222 UNSPEC_SEL))] 3223 "TARGET_SVE" 3224 "@ 3225 cnot\t%0.<Vetype>, %1/m, %2.<Vetype> 3226 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" 3227 [(set_attr "movprfx" "*,yes")] 3228) 3229 3230;; Predicated logical inverse with merging. 3231(define_expand "@cond_cnot<mode>" 3232 [(set (match_operand:SVE_FULL_I 0 "register_operand") 3233 (unspec:SVE_FULL_I 3234 [(match_operand:<VPRED> 1 "register_operand") 3235 (unspec:SVE_FULL_I 3236 [(unspec:<VPRED> 3237 [(match_dup 4) 3238 (const_int SVE_KNOWN_PTRUE) 3239 (eq:<VPRED> 3240 (match_operand:SVE_FULL_I 2 "register_operand") 3241 (match_dup 5))] 3242 UNSPEC_PRED_Z) 3243 (match_dup 6) 3244 (match_dup 5)] 3245 UNSPEC_SEL) 3246 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] 3247 UNSPEC_SEL))] 3248 "TARGET_SVE" 3249 { 3250 operands[4] = CONSTM1_RTX (<VPRED>mode); 3251 operands[5] = CONST0_RTX (<MODE>mode); 3252 operands[6] = CONST1_RTX (<MODE>mode); 3253 } 3254) 3255 3256;; Predicated logical inverse, merging with the first input. 3257(define_insn_and_rewrite "*cond_cnot<mode>_2" 3258 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3259 (unspec:SVE_FULL_I 3260 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3261 ;; Logical inverse of operand 2 (as above). 3262 (unspec:SVE_FULL_I 3263 [(unspec:<VPRED> 3264 [(match_operand 5) 3265 (const_int SVE_KNOWN_PTRUE) 3266 (eq:<VPRED> 3267 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 3268 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] 3269 UNSPEC_PRED_Z) 3270 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") 3271 (match_dup 3)] 3272 UNSPEC_SEL) 3273 (match_dup 2)] 3274 UNSPEC_SEL))] 3275 "TARGET_SVE" 3276 "@ 3277 cnot\t%0.<Vetype>, %1/m, %0.<Vetype> 3278 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" 3279 "&& !CONSTANT_P (operands[5])" 3280 { 3281 operands[5] = CONSTM1_RTX (<VPRED>mode); 3282 } 3283 [(set_attr "movprfx" "*,yes")] 3284) 3285 3286;; Predicated logical inverse, merging with an independent value. 3287;; 3288;; The earlyclobber isn't needed for the first alternative, but omitting 3289;; it would only help the case in which operands 2 and 6 are the same, 3290;; which is handled above rather than here. Marking all the alternatives 3291;; as earlyclobber helps to make the instruction more regular to the 3292;; register allocator. 3293(define_insn_and_rewrite "*cond_cnot<mode>_any" 3294 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w") 3295 (unspec:SVE_FULL_I 3296 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3297 ;; Logical inverse of operand 2 (as above). 3298 (unspec:SVE_FULL_I 3299 [(unspec:<VPRED> 3300 [(match_operand 5) 3301 (const_int SVE_KNOWN_PTRUE) 3302 (eq:<VPRED> 3303 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") 3304 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))] 3305 UNSPEC_PRED_Z) 3306 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one") 3307 (match_dup 3)] 3308 UNSPEC_SEL) 3309 (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3310 UNSPEC_SEL))] 3311 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" 3312 "@ 3313 cnot\t%0.<Vetype>, %1/m, %2.<Vetype> 3314 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype> 3315 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" 3316 "&& !CONSTANT_P (operands[5])" 3317 { 3318 operands[5] = CONSTM1_RTX (<VPRED>mode); 3319 } 3320 [(set_attr "movprfx" "*,yes,yes")] 3321) 3322 3323;; ------------------------------------------------------------------------- 3324;; ---- [FP<-INT] General unary arithmetic that maps to unspecs 3325;; ------------------------------------------------------------------------- 3326;; Includes: 3327;; - FEXPA 3328;; ------------------------------------------------------------------------- 3329 3330;; Unpredicated unary operations that take an integer and return a float. 3331(define_insn "@aarch64_sve_<optab><mode>" 3332 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 3333 (unspec:SVE_FULL_F 3334 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")] 3335 SVE_FP_UNARY_INT))] 3336 "TARGET_SVE" 3337 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" 3338) 3339 3340;; ------------------------------------------------------------------------- 3341;; ---- [FP] General unary arithmetic corresponding to unspecs 3342;; ------------------------------------------------------------------------- 3343;; Includes: 3344;; - FABS 3345;; - FNEG 3346;; - FRECPE 3347;; - FRECPX 3348;; - FRINTA 3349;; - FRINTI 3350;; - FRINTM 3351;; - FRINTN 3352;; - FRINTP 3353;; - FRINTX 3354;; - FRINTZ 3355;; - FRSQRTE 3356;; - FSQRT 3357;; ------------------------------------------------------------------------- 3358 3359;; Unpredicated floating-point unary operations. 3360(define_insn "@aarch64_sve_<optab><mode>" 3361 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 3362 (unspec:SVE_FULL_F 3363 [(match_operand:SVE_FULL_F 1 "register_operand" "w")] 3364 SVE_FP_UNARY))] 3365 "TARGET_SVE" 3366 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" 3367) 3368 3369;; Unpredicated floating-point unary operations. 3370(define_expand "<optab><mode>2" 3371 [(set (match_operand:SVE_FULL_F 0 "register_operand") 3372 (unspec:SVE_FULL_F 3373 [(match_dup 2) 3374 (const_int SVE_RELAXED_GP) 3375 (match_operand:SVE_FULL_F 1 "register_operand")] 3376 SVE_COND_FP_UNARY_OPTAB))] 3377 "TARGET_SVE" 3378 { 3379 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 3380 } 3381) 3382 3383;; Predicated floating-point unary operations. 3384(define_insn "@aarch64_pred_<optab><mode>" 3385 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 3386 (unspec:SVE_FULL_F 3387 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3388 (match_operand:SI 3 "aarch64_sve_gp_strictness") 3389 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 3390 SVE_COND_FP_UNARY))] 3391 "TARGET_SVE" 3392 "@ 3393 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3394 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3395 [(set_attr "movprfx" "*,yes")] 3396) 3397 3398;; Predicated floating-point unary arithmetic with merging. 3399(define_expand "@cond_<optab><mode>" 3400 [(set (match_operand:SVE_FULL_F 0 "register_operand") 3401 (unspec:SVE_FULL_F 3402 [(match_operand:<VPRED> 1 "register_operand") 3403 (unspec:SVE_FULL_F 3404 [(match_dup 1) 3405 (const_int SVE_STRICT_GP) 3406 (match_operand:SVE_FULL_F 2 "register_operand")] 3407 SVE_COND_FP_UNARY) 3408 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] 3409 UNSPEC_SEL))] 3410 "TARGET_SVE" 3411) 3412 3413;; Predicated floating-point unary arithmetic, merging with the first input. 3414(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 3415 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 3416 (unspec:SVE_FULL_F 3417 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3418 (unspec:SVE_FULL_F 3419 [(match_operand 3) 3420 (const_int SVE_RELAXED_GP) 3421 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 3422 SVE_COND_FP_UNARY) 3423 (match_dup 2)] 3424 UNSPEC_SEL))] 3425 "TARGET_SVE" 3426 "@ 3427 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype> 3428 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3429 "&& !rtx_equal_p (operands[1], operands[3])" 3430 { 3431 operands[3] = copy_rtx (operands[1]); 3432 } 3433 [(set_attr "movprfx" "*,yes")] 3434) 3435 3436(define_insn "*cond_<optab><mode>_2_strict" 3437 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 3438 (unspec:SVE_FULL_F 3439 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3440 (unspec:SVE_FULL_F 3441 [(match_dup 1) 3442 (const_int SVE_STRICT_GP) 3443 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 3444 SVE_COND_FP_UNARY) 3445 (match_dup 2)] 3446 UNSPEC_SEL))] 3447 "TARGET_SVE" 3448 "@ 3449 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype> 3450 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3451 [(set_attr "movprfx" "*,yes")] 3452) 3453 3454;; Predicated floating-point unary arithmetic, merging with an independent 3455;; value. 3456;; 3457;; The earlyclobber isn't needed for the first alternative, but omitting 3458;; it would only help the case in which operands 2 and 3 are the same, 3459;; which is handled above rather than here. Marking all the alternatives 3460;; as earlyclobber helps to make the instruction more regular to the 3461;; register allocator. 3462(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 3463 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") 3464 (unspec:SVE_FULL_F 3465 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3466 (unspec:SVE_FULL_F 3467 [(match_operand 4) 3468 (const_int SVE_RELAXED_GP) 3469 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 3470 SVE_COND_FP_UNARY) 3471 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3472 UNSPEC_SEL))] 3473 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" 3474 "@ 3475 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3476 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3477 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3478 "&& !rtx_equal_p (operands[1], operands[4])" 3479 { 3480 operands[4] = copy_rtx (operands[1]); 3481 } 3482 [(set_attr "movprfx" "*,yes,yes")] 3483) 3484 3485(define_insn "*cond_<optab><mode>_any_strict" 3486 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") 3487 (unspec:SVE_FULL_F 3488 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3489 (unspec:SVE_FULL_F 3490 [(match_dup 1) 3491 (const_int SVE_STRICT_GP) 3492 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 3493 SVE_COND_FP_UNARY) 3494 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3495 UNSPEC_SEL))] 3496 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" 3497 "@ 3498 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3499 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3500 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3501 [(set_attr "movprfx" "*,yes,yes")] 3502) 3503 3504;; ------------------------------------------------------------------------- 3505;; ---- [FP] Square root 3506;; ------------------------------------------------------------------------- 3507 3508(define_expand "sqrt<mode>2" 3509 [(set (match_operand:SVE_FULL_F 0 "register_operand") 3510 (unspec:SVE_FULL_F 3511 [(match_dup 2) 3512 (const_int SVE_RELAXED_GP) 3513 (match_operand:SVE_FULL_F 1 "register_operand")] 3514 UNSPEC_COND_FSQRT))] 3515 "TARGET_SVE" 3516{ 3517 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) 3518 DONE; 3519 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 3520}) 3521 3522;; ------------------------------------------------------------------------- 3523;; ---- [FP] Reciprocal square root 3524;; ------------------------------------------------------------------------- 3525 3526(define_expand "rsqrt<mode>2" 3527 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 3528 (unspec:SVE_FULL_SDF 3529 [(match_operand:SVE_FULL_SDF 1 "register_operand")] 3530 UNSPEC_RSQRT))] 3531 "TARGET_SVE" 3532{ 3533 aarch64_emit_approx_sqrt (operands[0], operands[1], true); 3534 DONE; 3535}) 3536 3537(define_expand "@aarch64_rsqrte<mode>" 3538 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 3539 (unspec:SVE_FULL_SDF 3540 [(match_operand:SVE_FULL_SDF 1 "register_operand")] 3541 UNSPEC_RSQRTE))] 3542 "TARGET_SVE" 3543) 3544 3545(define_expand "@aarch64_rsqrts<mode>" 3546 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 3547 (unspec:SVE_FULL_SDF 3548 [(match_operand:SVE_FULL_SDF 1 "register_operand") 3549 (match_operand:SVE_FULL_SDF 2 "register_operand")] 3550 UNSPEC_RSQRTS))] 3551 "TARGET_SVE" 3552) 3553 3554;; ------------------------------------------------------------------------- 3555;; ---- [PRED] Inverse 3556;; ------------------------------------------------------------------------- 3557;; Includes: 3558;; - NOT 3559;; ------------------------------------------------------------------------- 3560 3561;; Unpredicated predicate inverse. 3562(define_expand "one_cmpl<mode>2" 3563 [(set (match_operand:PRED_ALL 0 "register_operand") 3564 (and:PRED_ALL 3565 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) 3566 (match_dup 2)))] 3567 "TARGET_SVE" 3568 { 3569 operands[2] = aarch64_ptrue_reg (<MODE>mode); 3570 } 3571) 3572 3573;; Predicated predicate inverse. 3574(define_insn "*one_cmpl<mode>3" 3575 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 3576 (and:PRED_ALL 3577 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) 3578 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 3579 "TARGET_SVE" 3580 "not\t%0.b, %1/z, %2.b" 3581) 3582 3583;; ========================================================================= 3584;; == Binary arithmetic 3585;; ========================================================================= 3586 3587;; ------------------------------------------------------------------------- 3588;; ---- [INT] General binary arithmetic corresponding to rtx codes 3589;; ------------------------------------------------------------------------- 3590;; Includes: 3591;; - ADD (merging form only) 3592;; - AND (merging form only) 3593;; - ASR (merging form only) 3594;; - EOR (merging form only) 3595;; - LSL (merging form only) 3596;; - LSR (merging form only) 3597;; - MUL 3598;; - ORR (merging form only) 3599;; - SMAX 3600;; - SMIN 3601;; - SQADD (SVE2 merging form only) 3602;; - SQSUB (SVE2 merging form only) 3603;; - SUB (merging form only) 3604;; - UMAX 3605;; - UMIN 3606;; - UQADD (SVE2 merging form only) 3607;; - UQSUB (SVE2 merging form only) 3608;; ------------------------------------------------------------------------- 3609 3610;; Unpredicated integer binary operations that have an immediate form. 3611(define_expand "<optab><mode>3" 3612 [(set (match_operand:SVE_FULL_I 0 "register_operand") 3613 (unspec:SVE_FULL_I 3614 [(match_dup 3) 3615 (SVE_INT_BINARY_IMM:SVE_FULL_I 3616 (match_operand:SVE_FULL_I 1 "register_operand") 3617 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))] 3618 UNSPEC_PRED_X))] 3619 "TARGET_SVE" 3620 { 3621 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 3622 } 3623) 3624 3625;; Integer binary operations that have an immediate form, predicated 3626;; with a PTRUE. We don't actually need the predicate for the first 3627;; and third alternatives, but using Upa or X isn't likely to gain much 3628;; and would make the instruction seem less uniform to the register 3629;; allocator. 3630(define_insn_and_split "@aarch64_pred_<optab><mode>" 3631 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w") 3632 (unspec:SVE_FULL_I 3633 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 3634 (SVE_INT_BINARY_IMM:SVE_FULL_I 3635 (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w") 3636 (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))] 3637 UNSPEC_PRED_X))] 3638 "TARGET_SVE" 3639 "@ 3640 # 3641 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3642 # 3643 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 3644 ; Split the unpredicated form after reload, so that we don't have 3645 ; the unnecessary PTRUE. 3646 "&& reload_completed 3647 && !register_operand (operands[3], <MODE>mode)" 3648 [(set (match_dup 0) 3649 (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))] 3650 "" 3651 [(set_attr "movprfx" "*,*,yes,yes")] 3652) 3653 3654;; Unpredicated binary operations with a constant (post-RA only). 3655;; These are generated by splitting a predicated instruction whose 3656;; predicate is unused. 3657(define_insn "*post_ra_<optab><mode>3" 3658 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3659 (SVE_INT_BINARY_IMM:SVE_FULL_I 3660 (match_operand:SVE_FULL_I 1 "register_operand" "0, w") 3661 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))] 3662 "TARGET_SVE && reload_completed" 3663 "@ 3664 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2 3665 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2" 3666 [(set_attr "movprfx" "*,yes")] 3667) 3668 3669;; Predicated integer operations with merging. 3670(define_expand "@cond_<optab><mode>" 3671 [(set (match_operand:SVE_FULL_I 0 "register_operand") 3672 (unspec:SVE_FULL_I 3673 [(match_operand:<VPRED> 1 "register_operand") 3674 (SVE_INT_BINARY:SVE_FULL_I 3675 (match_operand:SVE_FULL_I 2 "register_operand") 3676 (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>")) 3677 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 3678 UNSPEC_SEL))] 3679 "TARGET_SVE" 3680) 3681 3682;; Predicated integer operations, merging with the first input. 3683(define_insn "*cond_<optab><mode>_2" 3684 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3685 (unspec:SVE_FULL_I 3686 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3687 (SVE_INT_BINARY:SVE_FULL_I 3688 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 3689 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 3690 (match_dup 2)] 3691 UNSPEC_SEL))] 3692 "TARGET_SVE" 3693 "@ 3694 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3695 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 3696 [(set_attr "movprfx" "*,yes")] 3697) 3698 3699;; Predicated integer operations, merging with the second input. 3700(define_insn "*cond_<optab><mode>_3" 3701 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3702 (unspec:SVE_FULL_I 3703 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3704 (SVE_INT_BINARY:SVE_FULL_I 3705 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 3706 (match_operand:SVE_FULL_I 3 "register_operand" "0, w")) 3707 (match_dup 3)] 3708 UNSPEC_SEL))] 3709 "TARGET_SVE" 3710 "@ 3711 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 3712 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 3713 [(set_attr "movprfx" "*,yes")] 3714) 3715 3716;; Predicated integer operations, merging with an independent value. 3717(define_insn_and_rewrite "*cond_<optab><mode>_any" 3718 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") 3719 (unspec:SVE_FULL_I 3720 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 3721 (SVE_INT_BINARY:SVE_FULL_I 3722 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") 3723 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w")) 3724 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 3725 UNSPEC_SEL))] 3726 "TARGET_SVE 3727 && !rtx_equal_p (operands[2], operands[4]) 3728 && !rtx_equal_p (operands[3], operands[4])" 3729 "@ 3730 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3731 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 3732 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3733 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3734 #" 3735 "&& reload_completed 3736 && register_operand (operands[4], <MODE>mode) 3737 && !rtx_equal_p (operands[0], operands[4])" 3738 { 3739 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 3740 operands[4], operands[1])); 3741 operands[4] = operands[2] = operands[0]; 3742 } 3743 [(set_attr "movprfx" "yes")] 3744) 3745 3746;; ------------------------------------------------------------------------- 3747;; ---- [INT] Addition 3748;; ------------------------------------------------------------------------- 3749;; Includes: 3750;; - ADD 3751;; - DECB 3752;; - DECD 3753;; - DECH 3754;; - DECW 3755;; - INCB 3756;; - INCD 3757;; - INCH 3758;; - INCW 3759;; - SUB 3760;; ------------------------------------------------------------------------- 3761 3762(define_insn "add<mode>3" 3763 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w") 3764 (plus:SVE_I 3765 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w") 3766 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))] 3767 "TARGET_SVE" 3768 "@ 3769 add\t%0.<Vetype>, %0.<Vetype>, #%D2 3770 sub\t%0.<Vetype>, %0.<Vetype>, #%N2 3771 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]); 3772 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2 3773 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2 3774 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 3775 [(set_attr "movprfx" "*,*,*,yes,yes,*")] 3776) 3777 3778;; Merging forms are handled through SVE_INT_BINARY. 3779 3780;; ------------------------------------------------------------------------- 3781;; ---- [INT] Subtraction 3782;; ------------------------------------------------------------------------- 3783;; Includes: 3784;; - SUB 3785;; - SUBR 3786;; ------------------------------------------------------------------------- 3787 3788(define_insn "sub<mode>3" 3789 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") 3790 (minus:SVE_FULL_I 3791 (match_operand:SVE_FULL_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa") 3792 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w")))] 3793 "TARGET_SVE" 3794 "@ 3795 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> 3796 subr\t%0.<Vetype>, %0.<Vetype>, #%D1 3797 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1" 3798 [(set_attr "movprfx" "*,*,yes")] 3799) 3800 3801;; Merging forms are handled through SVE_INT_BINARY. 3802 3803;; ------------------------------------------------------------------------- 3804;; ---- [INT] Take address 3805;; ------------------------------------------------------------------------- 3806;; Includes: 3807;; - ADR 3808;; ------------------------------------------------------------------------- 3809 3810;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD, 3811;; but the svadrb intrinsics should preserve the user's choice. 3812(define_insn "@aarch64_adr<mode>" 3813 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") 3814 (unspec:SVE_FULL_SDI 3815 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w") 3816 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")] 3817 UNSPEC_ADR))] 3818 "TARGET_SVE" 3819 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]" 3820) 3821 3822;; Same, but with the offset being sign-extended from the low 32 bits. 3823(define_insn_and_rewrite "*aarch64_adr_sxtw" 3824 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3825 (unspec:VNx2DI 3826 [(match_operand:VNx2DI 1 "register_operand" "w") 3827 (unspec:VNx2DI 3828 [(match_operand 3) 3829 (sign_extend:VNx2DI 3830 (truncate:VNx2SI 3831 (match_operand:VNx2DI 2 "register_operand" "w")))] 3832 UNSPEC_PRED_X)] 3833 UNSPEC_ADR))] 3834 "TARGET_SVE" 3835 "adr\t%0.d, [%1.d, %2.d, sxtw]" 3836 "&& !CONSTANT_P (operands[3])" 3837 { 3838 operands[3] = CONSTM1_RTX (VNx2BImode); 3839 } 3840) 3841 3842;; Same, but with the offset being zero-extended from the low 32 bits. 3843(define_insn "*aarch64_adr_uxtw_unspec" 3844 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3845 (unspec:VNx2DI 3846 [(match_operand:VNx2DI 1 "register_operand" "w") 3847 (and:VNx2DI 3848 (match_operand:VNx2DI 2 "register_operand" "w") 3849 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))] 3850 UNSPEC_ADR))] 3851 "TARGET_SVE" 3852 "adr\t%0.d, [%1.d, %2.d, uxtw]" 3853) 3854 3855;; Same, matching as a PLUS rather than unspec. 3856(define_insn "*aarch64_adr_uxtw_and" 3857 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3858 (plus:VNx2DI 3859 (and:VNx2DI 3860 (match_operand:VNx2DI 2 "register_operand" "w") 3861 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) 3862 (match_operand:VNx2DI 1 "register_operand" "w")))] 3863 "TARGET_SVE" 3864 "adr\t%0.d, [%1.d, %2.d, uxtw]" 3865) 3866 3867;; ADR with a nonzero shift. 3868(define_expand "@aarch64_adr<mode>_shift" 3869 [(set (match_operand:SVE_FULL_SDI 0 "register_operand") 3870 (plus:SVE_FULL_SDI 3871 (unspec:SVE_FULL_SDI 3872 [(match_dup 4) 3873 (ashift:SVE_FULL_SDI 3874 (match_operand:SVE_FULL_SDI 2 "register_operand") 3875 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] 3876 UNSPEC_PRED_X) 3877 (match_operand:SVE_FULL_SDI 1 "register_operand")))] 3878 "TARGET_SVE" 3879 { 3880 operands[4] = CONSTM1_RTX (<VPRED>mode); 3881 } 3882) 3883 3884(define_insn_and_rewrite "*aarch64_adr<mode>_shift" 3885 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") 3886 (plus:SVE_FULL_SDI 3887 (unspec:SVE_FULL_SDI 3888 [(match_operand 4) 3889 (ashift:SVE_FULL_SDI 3890 (match_operand:SVE_FULL_SDI 2 "register_operand" "w") 3891 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] 3892 UNSPEC_PRED_X) 3893 (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))] 3894 "TARGET_SVE" 3895 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]" 3896 "&& !CONSTANT_P (operands[4])" 3897 { 3898 operands[4] = CONSTM1_RTX (<VPRED>mode); 3899 } 3900) 3901 3902;; Same, but with the index being sign-extended from the low 32 bits. 3903(define_insn_and_rewrite "*aarch64_adr_shift_sxtw" 3904 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3905 (plus:VNx2DI 3906 (unspec:VNx2DI 3907 [(match_operand 4) 3908 (ashift:VNx2DI 3909 (unspec:VNx2DI 3910 [(match_operand 5) 3911 (sign_extend:VNx2DI 3912 (truncate:VNx2SI 3913 (match_operand:VNx2DI 2 "register_operand" "w")))] 3914 UNSPEC_PRED_X) 3915 (match_operand:VNx2DI 3 "const_1_to_3_operand"))] 3916 UNSPEC_PRED_X) 3917 (match_operand:VNx2DI 1 "register_operand" "w")))] 3918 "TARGET_SVE" 3919 "adr\t%0.d, [%1.d, %2.d, sxtw %3]" 3920 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" 3921 { 3922 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode); 3923 } 3924) 3925 3926;; Same, but with the index being zero-extended from the low 32 bits. 3927(define_insn_and_rewrite "*aarch64_adr_shift_uxtw" 3928 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3929 (plus:VNx2DI 3930 (unspec:VNx2DI 3931 [(match_operand 5) 3932 (ashift:VNx2DI 3933 (and:VNx2DI 3934 (match_operand:VNx2DI 2 "register_operand" "w") 3935 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) 3936 (match_operand:VNx2DI 3 "const_1_to_3_operand"))] 3937 UNSPEC_PRED_X) 3938 (match_operand:VNx2DI 1 "register_operand" "w")))] 3939 "TARGET_SVE" 3940 "adr\t%0.d, [%1.d, %2.d, uxtw %3]" 3941 "&& !CONSTANT_P (operands[5])" 3942 { 3943 operands[5] = CONSTM1_RTX (VNx2BImode); 3944 } 3945) 3946 3947;; ------------------------------------------------------------------------- 3948;; ---- [INT] Absolute difference 3949;; ------------------------------------------------------------------------- 3950;; Includes: 3951;; - SABD 3952;; - UABD 3953;; ------------------------------------------------------------------------- 3954 3955;; Unpredicated integer absolute difference. 3956(define_expand "<su>abd<mode>_3" 3957 [(use (match_operand:SVE_FULL_I 0 "register_operand")) 3958 (USMAX:SVE_FULL_I 3959 (match_operand:SVE_FULL_I 1 "register_operand") 3960 (match_operand:SVE_FULL_I 2 "register_operand"))] 3961 "TARGET_SVE" 3962 { 3963 rtx pred = aarch64_ptrue_reg (<VPRED>mode); 3964 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1], 3965 operands[2])); 3966 DONE; 3967 } 3968) 3969 3970;; Predicated integer absolute difference. 3971(define_insn "@aarch64_pred_<su>abd<mode>" 3972 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3973 (unspec:SVE_FULL_I 3974 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3975 (minus:SVE_FULL_I 3976 (USMAX:SVE_FULL_I 3977 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w") 3978 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 3979 (<max_opp>:SVE_FULL_I 3980 (match_dup 2) 3981 (match_dup 3)))] 3982 UNSPEC_PRED_X))] 3983 "TARGET_SVE" 3984 "@ 3985 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3986 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 3987 [(set_attr "movprfx" "*,yes")] 3988) 3989 3990(define_expand "@aarch64_cond_<su>abd<mode>" 3991 [(set (match_operand:SVE_FULL_I 0 "register_operand") 3992 (unspec:SVE_FULL_I 3993 [(match_operand:<VPRED> 1 "register_operand") 3994 (minus:SVE_FULL_I 3995 (unspec:SVE_FULL_I 3996 [(match_dup 1) 3997 (USMAX:SVE_FULL_I 3998 (match_operand:SVE_FULL_I 2 "register_operand") 3999 (match_operand:SVE_FULL_I 3 "register_operand"))] 4000 UNSPEC_PRED_X) 4001 (unspec:SVE_FULL_I 4002 [(match_dup 1) 4003 (<max_opp>:SVE_FULL_I 4004 (match_dup 2) 4005 (match_dup 3))] 4006 UNSPEC_PRED_X)) 4007 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 4008 UNSPEC_SEL))] 4009 "TARGET_SVE" 4010{ 4011 if (rtx_equal_p (operands[3], operands[4])) 4012 std::swap (operands[2], operands[3]); 4013}) 4014 4015;; Predicated integer absolute difference, merging with the first input. 4016(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2" 4017 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 4018 (unspec:SVE_FULL_I 4019 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4020 (minus:SVE_FULL_I 4021 (unspec:SVE_FULL_I 4022 [(match_operand 4) 4023 (USMAX:SVE_FULL_I 4024 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 4025 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))] 4026 UNSPEC_PRED_X) 4027 (unspec:SVE_FULL_I 4028 [(match_operand 5) 4029 (<max_opp>:SVE_FULL_I 4030 (match_dup 2) 4031 (match_dup 3))] 4032 UNSPEC_PRED_X)) 4033 (match_dup 2)] 4034 UNSPEC_SEL))] 4035 "TARGET_SVE" 4036 "@ 4037 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4038 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4039 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" 4040 { 4041 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode); 4042 } 4043 [(set_attr "movprfx" "*,yes")] 4044) 4045 4046;; Predicated integer absolute difference, merging with an independent value. 4047(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any" 4048 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") 4049 (unspec:SVE_FULL_I 4050 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 4051 (minus:SVE_FULL_I 4052 (unspec:SVE_FULL_I 4053 [(match_operand 5) 4054 (USMAX:SVE_FULL_I 4055 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w") 4056 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))] 4057 UNSPEC_PRED_X) 4058 (unspec:SVE_FULL_I 4059 [(match_operand 6) 4060 (<max_opp>:SVE_FULL_I 4061 (match_dup 2) 4062 (match_dup 3))] 4063 UNSPEC_PRED_X)) 4064 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 4065 UNSPEC_SEL))] 4066 "TARGET_SVE 4067 && !rtx_equal_p (operands[2], operands[4]) 4068 && !rtx_equal_p (operands[3], operands[4])" 4069 "@ 4070 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4071 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4072 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4073 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4074 #" 4075 "&& 1" 4076 { 4077 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6])) 4078 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode); 4079 else if (reload_completed 4080 && register_operand (operands[4], <MODE>mode) 4081 && !rtx_equal_p (operands[0], operands[4])) 4082 { 4083 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4084 operands[4], operands[1])); 4085 operands[4] = operands[2] = operands[0]; 4086 } 4087 else 4088 FAIL; 4089 } 4090 [(set_attr "movprfx" "yes")] 4091) 4092 4093;; ------------------------------------------------------------------------- 4094;; ---- [INT] Saturating addition and subtraction 4095;; ------------------------------------------------------------------------- 4096;; - SQADD 4097;; - SQSUB 4098;; - UQADD 4099;; - UQSUB 4100;; ------------------------------------------------------------------------- 4101 4102;; Unpredicated saturating signed addition and subtraction. 4103(define_insn "@aarch64_sve_<optab><mode>" 4104 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w") 4105 (SBINQOPS:SVE_FULL_I 4106 (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w") 4107 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))] 4108 "TARGET_SVE" 4109 "@ 4110 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4111 <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2 4112 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4113 movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2 4114 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 4115 [(set_attr "movprfx" "*,*,yes,yes,*")] 4116) 4117 4118;; Unpredicated saturating unsigned addition and subtraction. 4119(define_insn "@aarch64_sve_<optab><mode>" 4120 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w") 4121 (UBINQOPS:SVE_FULL_I 4122 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w") 4123 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))] 4124 "TARGET_SVE" 4125 "@ 4126 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4127 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4128 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 4129 [(set_attr "movprfx" "*,yes,*")] 4130) 4131 4132;; ------------------------------------------------------------------------- 4133;; ---- [INT] Highpart multiplication 4134;; ------------------------------------------------------------------------- 4135;; Includes: 4136;; - SMULH 4137;; - UMULH 4138;; ------------------------------------------------------------------------- 4139 4140;; Unpredicated highpart multiplication. 4141(define_expand "<su>mul<mode>3_highpart" 4142 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4143 (unspec:SVE_FULL_I 4144 [(match_dup 3) 4145 (unspec:SVE_FULL_I 4146 [(match_operand:SVE_FULL_I 1 "register_operand") 4147 (match_operand:SVE_FULL_I 2 "register_operand")] 4148 MUL_HIGHPART)] 4149 UNSPEC_PRED_X))] 4150 "TARGET_SVE" 4151 { 4152 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4153 } 4154) 4155 4156;; Predicated highpart multiplication. 4157(define_insn "@aarch64_pred_<optab><mode>" 4158 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 4159 (unspec:SVE_FULL_I 4160 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4161 (unspec:SVE_FULL_I 4162 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") 4163 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 4164 MUL_HIGHPART)] 4165 UNSPEC_PRED_X))] 4166 "TARGET_SVE" 4167 "@ 4168 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4169 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4170 [(set_attr "movprfx" "*,yes")] 4171) 4172 4173;; Predicated highpart multiplications with merging. 4174(define_expand "@cond_<optab><mode>" 4175 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4176 (unspec:SVE_FULL_I 4177 [(match_operand:<VPRED> 1 "register_operand") 4178 (unspec:SVE_FULL_I 4179 [(match_operand:SVE_FULL_I 2 "register_operand") 4180 (match_operand:SVE_FULL_I 3 "register_operand")] 4181 MUL_HIGHPART) 4182 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 4183 UNSPEC_SEL))] 4184 "TARGET_SVE" 4185{ 4186 /* Only target code is aware of these operations, so we don't need 4187 to handle the fully-general case. */ 4188 gcc_assert (rtx_equal_p (operands[2], operands[4]) 4189 || CONSTANT_P (operands[4])); 4190}) 4191 4192;; Predicated highpart multiplications, merging with the first input. 4193(define_insn "*cond_<optab><mode>_2" 4194 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 4195 (unspec:SVE_FULL_I 4196 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4197 (unspec:SVE_FULL_I 4198 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") 4199 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 4200 MUL_HIGHPART) 4201 (match_dup 2)] 4202 UNSPEC_SEL))] 4203 "TARGET_SVE" 4204 "@ 4205 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4206 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4207 [(set_attr "movprfx" "*,yes")]) 4208 4209;; Predicated highpart multiplications, merging with zero. 4210(define_insn "*cond_<optab><mode>_z" 4211 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") 4212 (unspec:SVE_FULL_I 4213 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4214 (unspec:SVE_FULL_I 4215 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") 4216 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 4217 MUL_HIGHPART) 4218 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] 4219 UNSPEC_SEL))] 4220 "TARGET_SVE" 4221 "@ 4222 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4223 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4224 [(set_attr "movprfx" "yes")]) 4225 4226;; ------------------------------------------------------------------------- 4227;; ---- [INT] Division 4228;; ------------------------------------------------------------------------- 4229;; Includes: 4230;; - SDIV 4231;; - SDIVR 4232;; - UDIV 4233;; - UDIVR 4234;; ------------------------------------------------------------------------- 4235 4236;; Unpredicated integer division. 4237(define_expand "<optab><mode>3" 4238 [(set (match_operand:SVE_FULL_SDI 0 "register_operand") 4239 (unspec:SVE_FULL_SDI 4240 [(match_dup 3) 4241 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4242 (match_operand:SVE_FULL_SDI 1 "register_operand") 4243 (match_operand:SVE_FULL_SDI 2 "register_operand"))] 4244 UNSPEC_PRED_X))] 4245 "TARGET_SVE" 4246 { 4247 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4248 } 4249) 4250 4251;; Integer division predicated with a PTRUE. 4252(define_insn "@aarch64_pred_<optab><mode>" 4253 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w") 4254 (unspec:SVE_FULL_SDI 4255 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 4256 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4257 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w") 4258 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))] 4259 UNSPEC_PRED_X))] 4260 "TARGET_SVE" 4261 "@ 4262 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4263 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4264 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4265 [(set_attr "movprfx" "*,*,yes")] 4266) 4267 4268;; Predicated integer division with merging. 4269(define_expand "@cond_<optab><mode>" 4270 [(set (match_operand:SVE_FULL_SDI 0 "register_operand") 4271 (unspec:SVE_FULL_SDI 4272 [(match_operand:<VPRED> 1 "register_operand") 4273 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4274 (match_operand:SVE_FULL_SDI 2 "register_operand") 4275 (match_operand:SVE_FULL_SDI 3 "register_operand")) 4276 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")] 4277 UNSPEC_SEL))] 4278 "TARGET_SVE" 4279) 4280 4281;; Predicated integer division, merging with the first input. 4282(define_insn "*cond_<optab><mode>_2" 4283 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 4284 (unspec:SVE_FULL_SDI 4285 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4286 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4287 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w") 4288 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w")) 4289 (match_dup 2)] 4290 UNSPEC_SEL))] 4291 "TARGET_SVE" 4292 "@ 4293 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4294 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4295 [(set_attr "movprfx" "*,yes")] 4296) 4297 4298;; Predicated integer division, merging with the second input. 4299(define_insn "*cond_<optab><mode>_3" 4300 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 4301 (unspec:SVE_FULL_SDI 4302 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4303 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4304 (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w") 4305 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")) 4306 (match_dup 3)] 4307 UNSPEC_SEL))] 4308 "TARGET_SVE" 4309 "@ 4310 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4311 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 4312 [(set_attr "movprfx" "*,yes")] 4313) 4314 4315;; Predicated integer division, merging with an independent value. 4316(define_insn_and_rewrite "*cond_<optab><mode>_any" 4317 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") 4318 (unspec:SVE_FULL_SDI 4319 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 4320 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4321 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w") 4322 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w")) 4323 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 4324 UNSPEC_SEL))] 4325 "TARGET_SVE 4326 && !rtx_equal_p (operands[2], operands[4]) 4327 && !rtx_equal_p (operands[3], operands[4])" 4328 "@ 4329 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4330 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4331 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4332 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4333 #" 4334 "&& reload_completed 4335 && register_operand (operands[4], <MODE>mode) 4336 && !rtx_equal_p (operands[0], operands[4])" 4337 { 4338 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4339 operands[4], operands[1])); 4340 operands[4] = operands[2] = operands[0]; 4341 } 4342 [(set_attr "movprfx" "yes")] 4343) 4344 4345;; ------------------------------------------------------------------------- 4346;; ---- [INT] Binary logical operations 4347;; ------------------------------------------------------------------------- 4348;; Includes: 4349;; - AND 4350;; - EOR 4351;; - ORR 4352;; ------------------------------------------------------------------------- 4353 4354;; Unpredicated integer binary logical operations. 4355(define_insn "<optab><mode>3" 4356 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?w, w") 4357 (LOGICAL:SVE_FULL_I 4358 (match_operand:SVE_FULL_I 1 "register_operand" "%0, w, w") 4359 (match_operand:SVE_FULL_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] 4360 "TARGET_SVE" 4361 "@ 4362 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 4363 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 4364 <logical>\t%0.d, %1.d, %2.d" 4365 [(set_attr "movprfx" "*,yes,*")] 4366) 4367 4368;; Merging forms are handled through SVE_INT_BINARY. 4369 4370;; ------------------------------------------------------------------------- 4371;; ---- [INT] Binary logical operations (inverted second input) 4372;; ------------------------------------------------------------------------- 4373;; Includes: 4374;; - BIC 4375;; ------------------------------------------------------------------------- 4376 4377;; Unpredicated BIC. 4378(define_expand "@aarch64_bic<mode>" 4379 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4380 (and:SVE_FULL_I 4381 (unspec:SVE_FULL_I 4382 [(match_dup 3) 4383 (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 "register_operand"))] 4384 UNSPEC_PRED_X) 4385 (match_operand:SVE_FULL_I 1 "register_operand")))] 4386 "TARGET_SVE" 4387 { 4388 operands[3] = CONSTM1_RTX (<VPRED>mode); 4389 } 4390) 4391 4392;; Predicated BIC. 4393(define_insn_and_rewrite "*bic<mode>3" 4394 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 4395 (and:SVE_FULL_I 4396 (unspec:SVE_FULL_I 4397 [(match_operand 3) 4398 (not:SVE_FULL_I 4399 (match_operand:SVE_FULL_I 2 "register_operand" "w"))] 4400 UNSPEC_PRED_X) 4401 (match_operand:SVE_FULL_I 1 "register_operand" "w")))] 4402 "TARGET_SVE" 4403 "bic\t%0.d, %1.d, %2.d" 4404 "&& !CONSTANT_P (operands[3])" 4405 { 4406 operands[3] = CONSTM1_RTX (<VPRED>mode); 4407 } 4408) 4409 4410;; Predicated BIC with merging. 4411(define_expand "@cond_bic<mode>" 4412 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4413 (unspec:SVE_FULL_I 4414 [(match_operand:<VPRED> 1 "register_operand") 4415 (and:SVE_FULL_I 4416 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand")) 4417 (match_operand:SVE_FULL_I 2 "register_operand")) 4418 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 4419 UNSPEC_SEL))] 4420 "TARGET_SVE" 4421) 4422 4423;; Predicated integer BIC, merging with the first input. 4424(define_insn "*cond_bic<mode>_2" 4425 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 4426 (unspec:SVE_FULL_I 4427 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4428 (and:SVE_FULL_I 4429 (not:SVE_FULL_I 4430 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 4431 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")) 4432 (match_dup 2)] 4433 UNSPEC_SEL))] 4434 "TARGET_SVE" 4435 "@ 4436 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4437 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4438 [(set_attr "movprfx" "*,yes")] 4439) 4440 4441;; Predicated integer BIC, merging with an independent value. 4442(define_insn_and_rewrite "*cond_bic<mode>_any" 4443 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w") 4444 (unspec:SVE_FULL_I 4445 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4446 (and:SVE_FULL_I 4447 (not:SVE_FULL_I 4448 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w")) 4449 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w")) 4450 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 4451 UNSPEC_SEL))] 4452 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4453 "@ 4454 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4455 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4456 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4457 #" 4458 "&& reload_completed 4459 && register_operand (operands[4], <MODE>mode) 4460 && !rtx_equal_p (operands[0], operands[4])" 4461 { 4462 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4463 operands[4], operands[1])); 4464 operands[4] = operands[2] = operands[0]; 4465 } 4466 [(set_attr "movprfx" "yes")] 4467) 4468 4469;; ------------------------------------------------------------------------- 4470;; ---- [INT] Shifts (rounding towards -Inf) 4471;; ------------------------------------------------------------------------- 4472;; Includes: 4473;; - ASR 4474;; - ASRR 4475;; - LSL 4476;; - LSLR 4477;; - LSR 4478;; - LSRR 4479;; ------------------------------------------------------------------------- 4480 4481;; Unpredicated shift by a scalar, which expands into one of the vector 4482;; shifts below. 4483(define_expand "<ASHIFT:optab><mode>3" 4484 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4485 (ASHIFT:SVE_FULL_I 4486 (match_operand:SVE_FULL_I 1 "register_operand") 4487 (match_operand:<VEL> 2 "general_operand")))] 4488 "TARGET_SVE" 4489 { 4490 rtx amount; 4491 if (CONST_INT_P (operands[2])) 4492 { 4493 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); 4494 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) 4495 amount = force_reg (<MODE>mode, amount); 4496 } 4497 else 4498 { 4499 amount = gen_reg_rtx (<MODE>mode); 4500 emit_insn (gen_vec_duplicate<mode> (amount, 4501 convert_to_mode (<VEL>mode, 4502 operands[2], 0))); 4503 } 4504 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); 4505 DONE; 4506 } 4507) 4508 4509;; Unpredicated shift by a vector. 4510(define_expand "v<optab><mode>3" 4511 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4512 (unspec:SVE_FULL_I 4513 [(match_dup 3) 4514 (ASHIFT:SVE_FULL_I 4515 (match_operand:SVE_FULL_I 1 "register_operand") 4516 (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))] 4517 UNSPEC_PRED_X))] 4518 "TARGET_SVE" 4519 { 4520 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4521 } 4522) 4523 4524;; Shift by a vector, predicated with a PTRUE. We don't actually need 4525;; the predicate for the first alternative, but using Upa or X isn't 4526;; likely to gain much and would make the instruction seem less uniform 4527;; to the register allocator. 4528(define_insn_and_split "@aarch64_pred_<optab><mode>" 4529 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w") 4530 (unspec:SVE_FULL_I 4531 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4532 (ASHIFT:SVE_FULL_I 4533 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w") 4534 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))] 4535 UNSPEC_PRED_X))] 4536 "TARGET_SVE" 4537 "@ 4538 # 4539 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4540 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype> 4541 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4542 "&& reload_completed 4543 && !register_operand (operands[3], <MODE>mode)" 4544 [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))] 4545 "" 4546 [(set_attr "movprfx" "*,*,*,yes")] 4547) 4548 4549;; Unpredicated shift operations by a constant (post-RA only). 4550;; These are generated by splitting a predicated instruction whose 4551;; predicate is unused. 4552(define_insn "*post_ra_v<optab><mode>3" 4553 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 4554 (ASHIFT:SVE_FULL_I 4555 (match_operand:SVE_FULL_I 1 "register_operand" "w") 4556 (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))] 4557 "TARGET_SVE && reload_completed" 4558 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" 4559) 4560 4561;; Predicated integer shift, merging with the first input. 4562(define_insn "*cond_<optab><mode>_2_const" 4563 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 4564 (unspec:SVE_FULL_I 4565 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4566 (ASHIFT:SVE_FULL_I 4567 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 4568 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")) 4569 (match_dup 2)] 4570 UNSPEC_SEL))] 4571 "TARGET_SVE" 4572 "@ 4573 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4574 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 4575 [(set_attr "movprfx" "*,yes")] 4576) 4577 4578;; Predicated integer shift, merging with an independent value. 4579(define_insn_and_rewrite "*cond_<optab><mode>_any_const" 4580 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w") 4581 (unspec:SVE_FULL_I 4582 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 4583 (ASHIFT:SVE_FULL_I 4584 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w") 4585 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")) 4586 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 4587 UNSPEC_SEL))] 4588 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4589 "@ 4590 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4591 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4592 #" 4593 "&& reload_completed 4594 && register_operand (operands[4], <MODE>mode) 4595 && !rtx_equal_p (operands[0], operands[4])" 4596 { 4597 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4598 operands[4], operands[1])); 4599 operands[4] = operands[2] = operands[0]; 4600 } 4601 [(set_attr "movprfx" "yes")] 4602) 4603 4604;; Unpredicated shifts of narrow elements by 64-bit amounts. 4605(define_insn "@aarch64_sve_<sve_int_op><mode>" 4606 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") 4607 (unspec:SVE_FULL_BHSI 4608 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w") 4609 (match_operand:VNx2DI 2 "register_operand" "w")] 4610 SVE_SHIFT_WIDE))] 4611 "TARGET_SVE" 4612 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d" 4613) 4614 4615;; Merging predicated shifts of narrow elements by 64-bit amounts. 4616(define_expand "@cond_<sve_int_op><mode>" 4617 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") 4618 (unspec:SVE_FULL_BHSI 4619 [(match_operand:<VPRED> 1 "register_operand") 4620 (unspec:SVE_FULL_BHSI 4621 [(match_operand:SVE_FULL_BHSI 2 "register_operand") 4622 (match_operand:VNx2DI 3 "register_operand")] 4623 SVE_SHIFT_WIDE) 4624 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")] 4625 UNSPEC_SEL))] 4626 "TARGET_SVE" 4627) 4628 4629;; Predicated shifts of narrow elements by 64-bit amounts, merging with 4630;; the first input. 4631(define_insn "*cond_<sve_int_op><mode>_m" 4632 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w") 4633 (unspec:SVE_FULL_BHSI 4634 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4635 (unspec:SVE_FULL_BHSI 4636 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") 4637 (match_operand:VNx2DI 3 "register_operand" "w, w")] 4638 SVE_SHIFT_WIDE) 4639 (match_dup 2)] 4640 UNSPEC_SEL))] 4641 "TARGET_SVE" 4642 "@ 4643 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d 4644 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d" 4645 [(set_attr "movprfx" "*, yes")]) 4646 4647;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero. 4648(define_insn "*cond_<sve_int_op><mode>_z" 4649 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w") 4650 (unspec:SVE_FULL_BHSI 4651 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4652 (unspec:SVE_FULL_BHSI 4653 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") 4654 (match_operand:VNx2DI 3 "register_operand" "w, w")] 4655 SVE_SHIFT_WIDE) 4656 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")] 4657 UNSPEC_SEL))] 4658 "TARGET_SVE" 4659 "@ 4660 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d 4661 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d" 4662 [(set_attr "movprfx" "yes")]) 4663 4664;; ------------------------------------------------------------------------- 4665;; ---- [INT] Shifts (rounding towards 0) 4666;; ------------------------------------------------------------------------- 4667;; Includes: 4668;; - ASRD 4669;; - SQSHLU (SVE2) 4670;; - SRSHR (SVE2) 4671;; - URSHR (SVE2) 4672;; ------------------------------------------------------------------------- 4673 4674;; Unpredicated <SVE_INT_OP>. 4675(define_expand "sdiv_pow2<mode>3" 4676 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4677 (unspec:SVE_FULL_I 4678 [(match_dup 3) 4679 (unspec:SVE_FULL_I 4680 [(match_operand:SVE_FULL_I 1 "register_operand") 4681 (match_operand 2 "aarch64_simd_rshift_imm")] 4682 UNSPEC_ASRD) 4683 (match_dup 1)] 4684 UNSPEC_SEL))] 4685 "TARGET_SVE" 4686 { 4687 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4688 } 4689) 4690 4691;; Predicated right shift with merging. 4692(define_expand "@cond_<sve_int_op><mode>" 4693 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4694 (unspec:SVE_FULL_I 4695 [(match_operand:<VPRED> 1 "register_operand") 4696 (unspec:SVE_FULL_I 4697 [(match_operand:SVE_FULL_I 2 "register_operand") 4698 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")] 4699 SVE_INT_SHIFT_IMM) 4700 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 4701 UNSPEC_SEL))] 4702 "TARGET_SVE" 4703) 4704 4705;; Predicated right shift, merging with the first input. 4706(define_insn "*cond_<sve_int_op><mode>_2" 4707 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 4708 (unspec:SVE_FULL_I 4709 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4710 (unspec:SVE_FULL_I 4711 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") 4712 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")] 4713 SVE_INT_SHIFT_IMM) 4714 (match_dup 2)] 4715 UNSPEC_SEL))] 4716 "TARGET_SVE" 4717 "@ 4718 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4719 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 4720 [(set_attr "movprfx" "*,yes")]) 4721 4722;; Predicated right shift, merging with zero. 4723(define_insn "*cond_<sve_int_op><mode>_z" 4724 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w") 4725 (unspec:SVE_FULL_I 4726 [(match_operand:<VPRED> 1 "register_operand" "Upl") 4727 (unspec:SVE_FULL_I 4728 [(match_operand:SVE_FULL_I 2 "register_operand" "w") 4729 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")] 4730 SVE_INT_SHIFT_IMM) 4731 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] 4732 UNSPEC_SEL))] 4733 "TARGET_SVE" 4734 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 4735 [(set_attr "movprfx" "yes")]) 4736 4737;; ------------------------------------------------------------------------- 4738;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs 4739;; ------------------------------------------------------------------------- 4740;; Includes: 4741;; - FSCALE 4742;; - FTSMUL 4743;; - FTSSEL 4744;; ------------------------------------------------------------------------- 4745 4746;; Unpredicated floating-point binary operations that take an integer as 4747;; their second operand. 4748(define_insn "@aarch64_sve_<optab><mode>" 4749 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 4750 (unspec:SVE_FULL_F 4751 [(match_operand:SVE_FULL_F 1 "register_operand" "w") 4752 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] 4753 SVE_FP_BINARY_INT))] 4754 "TARGET_SVE" 4755 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 4756) 4757 4758;; Predicated floating-point binary operations that take an integer 4759;; as their second operand. 4760(define_insn "@aarch64_pred_<optab><mode>" 4761 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 4762 (unspec:SVE_FULL_F 4763 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4764 (match_operand:SI 4 "aarch64_sve_gp_strictness") 4765 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 4766 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] 4767 SVE_COND_FP_BINARY_INT))] 4768 "TARGET_SVE" 4769 "@ 4770 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4771 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4772 [(set_attr "movprfx" "*,yes")] 4773) 4774 4775;; Predicated floating-point binary operations with merging, taking an 4776;; integer as their second operand. 4777(define_expand "@cond_<optab><mode>" 4778 [(set (match_operand:SVE_FULL_F 0 "register_operand") 4779 (unspec:SVE_FULL_F 4780 [(match_operand:<VPRED> 1 "register_operand") 4781 (unspec:SVE_FULL_F 4782 [(match_dup 1) 4783 (const_int SVE_STRICT_GP) 4784 (match_operand:SVE_FULL_F 2 "register_operand") 4785 (match_operand:<V_INT_EQUIV> 3 "register_operand")] 4786 SVE_COND_FP_BINARY_INT) 4787 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 4788 UNSPEC_SEL))] 4789 "TARGET_SVE" 4790) 4791 4792;; Predicated floating-point binary operations that take an integer as their 4793;; second operand, with inactive lanes coming from the first operand. 4794(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 4795 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 4796 (unspec:SVE_FULL_F 4797 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4798 (unspec:SVE_FULL_F 4799 [(match_operand 4) 4800 (const_int SVE_RELAXED_GP) 4801 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 4802 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] 4803 SVE_COND_FP_BINARY_INT) 4804 (match_dup 2)] 4805 UNSPEC_SEL))] 4806 "TARGET_SVE" 4807 "@ 4808 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4809 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4810 "&& !rtx_equal_p (operands[1], operands[4])" 4811 { 4812 operands[4] = copy_rtx (operands[1]); 4813 } 4814 [(set_attr "movprfx" "*,yes")] 4815) 4816 4817(define_insn "*cond_<optab><mode>_2_strict" 4818 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 4819 (unspec:SVE_FULL_F 4820 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4821 (unspec:SVE_FULL_F 4822 [(match_dup 1) 4823 (const_int SVE_STRICT_GP) 4824 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 4825 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] 4826 SVE_COND_FP_BINARY_INT) 4827 (match_dup 2)] 4828 UNSPEC_SEL))] 4829 "TARGET_SVE" 4830 "@ 4831 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4832 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4833 [(set_attr "movprfx" "*,yes")] 4834) 4835 4836;; Predicated floating-point binary operations that take an integer as 4837;; their second operand, with the values of inactive lanes being distinct 4838;; from the other inputs. 4839(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 4840 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 4841 (unspec:SVE_FULL_F 4842 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4843 (unspec:SVE_FULL_F 4844 [(match_operand 5) 4845 (const_int SVE_RELAXED_GP) 4846 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") 4847 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")] 4848 SVE_COND_FP_BINARY_INT) 4849 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 4850 UNSPEC_SEL))] 4851 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4852 "@ 4853 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4854 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4855 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4856 #" 4857 "&& 1" 4858 { 4859 if (reload_completed 4860 && register_operand (operands[4], <MODE>mode) 4861 && !rtx_equal_p (operands[0], operands[4])) 4862 { 4863 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4864 operands[4], operands[1])); 4865 operands[4] = operands[2] = operands[0]; 4866 } 4867 else if (!rtx_equal_p (operands[1], operands[5])) 4868 operands[5] = copy_rtx (operands[1]); 4869 else 4870 FAIL; 4871 } 4872 [(set_attr "movprfx" "yes")] 4873) 4874 4875(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 4876 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 4877 (unspec:SVE_FULL_F 4878 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4879 (unspec:SVE_FULL_F 4880 [(match_dup 1) 4881 (const_int SVE_STRICT_GP) 4882 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") 4883 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")] 4884 SVE_COND_FP_BINARY_INT) 4885 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 4886 UNSPEC_SEL))] 4887 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4888 "@ 4889 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4890 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4891 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4892 #" 4893 "&& reload_completed 4894 && register_operand (operands[4], <MODE>mode) 4895 && !rtx_equal_p (operands[0], operands[4])" 4896 { 4897 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4898 operands[4], operands[1])); 4899 operands[4] = operands[2] = operands[0]; 4900 } 4901 [(set_attr "movprfx" "yes")] 4902) 4903 4904;; ------------------------------------------------------------------------- 4905;; ---- [FP] General binary arithmetic corresponding to rtx codes 4906;; ------------------------------------------------------------------------- 4907;; Includes post-RA forms of: 4908;; - FADD 4909;; - FMUL 4910;; - FSUB 4911;; ------------------------------------------------------------------------- 4912 4913;; Unpredicated floating-point binary operations (post-RA only). 4914;; These are generated by splitting a predicated instruction whose 4915;; predicate is unused. 4916(define_insn "*post_ra_<sve_fp_op><mode>3" 4917 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 4918 (SVE_UNPRED_FP_BINARY:SVE_FULL_F 4919 (match_operand:SVE_FULL_F 1 "register_operand" "w") 4920 (match_operand:SVE_FULL_F 2 "register_operand" "w")))] 4921 "TARGET_SVE && reload_completed" 4922 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") 4923 4924;; ------------------------------------------------------------------------- 4925;; ---- [FP] General binary arithmetic corresponding to unspecs 4926;; ------------------------------------------------------------------------- 4927;; Includes merging forms of: 4928;; - FADD (constant forms handled in the "Addition" section) 4929;; - FDIV 4930;; - FDIVR 4931;; - FMAX 4932;; - FMAXNM (including #0.0 and #1.0) 4933;; - FMIN 4934;; - FMINNM (including #0.0 and #1.0) 4935;; - FMUL (including #0.5 and #2.0) 4936;; - FMULX 4937;; - FRECPS 4938;; - FRSQRTS 4939;; - FSUB (constant forms handled in the "Addition" section) 4940;; - FSUBR (constant forms handled in the "Subtraction" section) 4941;; ------------------------------------------------------------------------- 4942 4943;; Unpredicated floating-point binary operations. 4944(define_insn "@aarch64_sve_<optab><mode>" 4945 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 4946 (unspec:SVE_FULL_F 4947 [(match_operand:SVE_FULL_F 1 "register_operand" "w") 4948 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 4949 SVE_FP_BINARY))] 4950 "TARGET_SVE" 4951 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 4952) 4953 4954;; Unpredicated floating-point binary operations that need to be predicated 4955;; for SVE. 4956(define_expand "<optab><mode>3" 4957 [(set (match_operand:SVE_FULL_F 0 "register_operand") 4958 (unspec:SVE_FULL_F 4959 [(match_dup 3) 4960 (const_int SVE_RELAXED_GP) 4961 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>") 4962 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")] 4963 SVE_COND_FP_BINARY_OPTAB))] 4964 "TARGET_SVE" 4965 { 4966 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4967 } 4968) 4969 4970;; Predicated floating-point binary operations that have no immediate forms. 4971(define_insn "@aarch64_pred_<optab><mode>" 4972 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") 4973 (unspec:SVE_FULL_F 4974 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 4975 (match_operand:SI 4 "aarch64_sve_gp_strictness") 4976 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w") 4977 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")] 4978 SVE_COND_FP_BINARY_REG))] 4979 "TARGET_SVE" 4980 "@ 4981 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4982 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4983 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4984 [(set_attr "movprfx" "*,*,yes")] 4985) 4986 4987;; Predicated floating-point operations with merging. 4988(define_expand "@cond_<optab><mode>" 4989 [(set (match_operand:SVE_FULL_F 0 "register_operand") 4990 (unspec:SVE_FULL_F 4991 [(match_operand:<VPRED> 1 "register_operand") 4992 (unspec:SVE_FULL_F 4993 [(match_dup 1) 4994 (const_int SVE_STRICT_GP) 4995 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>") 4996 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")] 4997 SVE_COND_FP_BINARY) 4998 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 4999 UNSPEC_SEL))] 5000 "TARGET_SVE" 5001) 5002 5003;; Predicated floating-point operations, merging with the first input. 5004(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 5005 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5006 (unspec:SVE_FULL_F 5007 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5008 (unspec:SVE_FULL_F 5009 [(match_operand 4) 5010 (const_int SVE_RELAXED_GP) 5011 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5012 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5013 SVE_COND_FP_BINARY) 5014 (match_dup 2)] 5015 UNSPEC_SEL))] 5016 "TARGET_SVE" 5017 "@ 5018 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5019 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5020 "&& !rtx_equal_p (operands[1], operands[4])" 5021 { 5022 operands[4] = copy_rtx (operands[1]); 5023 } 5024 [(set_attr "movprfx" "*,yes")] 5025) 5026 5027(define_insn "*cond_<optab><mode>_2_strict" 5028 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5029 (unspec:SVE_FULL_F 5030 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5031 (unspec:SVE_FULL_F 5032 [(match_dup 1) 5033 (const_int SVE_STRICT_GP) 5034 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5035 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5036 SVE_COND_FP_BINARY) 5037 (match_dup 2)] 5038 UNSPEC_SEL))] 5039 "TARGET_SVE" 5040 "@ 5041 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5042 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5043 [(set_attr "movprfx" "*,yes")] 5044) 5045 5046;; Same for operations that take a 1-bit constant. 5047(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed" 5048 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5049 (unspec:SVE_FULL_F 5050 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5051 (unspec:SVE_FULL_F 5052 [(match_operand 4) 5053 (const_int SVE_RELAXED_GP) 5054 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5055 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5056 SVE_COND_FP_BINARY_I1) 5057 (match_dup 2)] 5058 UNSPEC_SEL))] 5059 "TARGET_SVE" 5060 "@ 5061 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5062 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 5063 "&& !rtx_equal_p (operands[1], operands[4])" 5064 { 5065 operands[4] = copy_rtx (operands[1]); 5066 } 5067 [(set_attr "movprfx" "*,yes")] 5068) 5069 5070(define_insn "*cond_<optab><mode>_2_const_strict" 5071 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5072 (unspec:SVE_FULL_F 5073 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5074 (unspec:SVE_FULL_F 5075 [(match_dup 1) 5076 (const_int SVE_STRICT_GP) 5077 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5078 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5079 SVE_COND_FP_BINARY_I1) 5080 (match_dup 2)] 5081 UNSPEC_SEL))] 5082 "TARGET_SVE" 5083 "@ 5084 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5085 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 5086 [(set_attr "movprfx" "*,yes")] 5087) 5088 5089;; Predicated floating-point operations, merging with the second input. 5090(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed" 5091 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5092 (unspec:SVE_FULL_F 5093 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5094 (unspec:SVE_FULL_F 5095 [(match_operand 4) 5096 (const_int SVE_RELAXED_GP) 5097 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 5098 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5099 SVE_COND_FP_BINARY) 5100 (match_dup 3)] 5101 UNSPEC_SEL))] 5102 "TARGET_SVE" 5103 "@ 5104 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5105 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 5106 "&& !rtx_equal_p (operands[1], operands[4])" 5107 { 5108 operands[4] = copy_rtx (operands[1]); 5109 } 5110 [(set_attr "movprfx" "*,yes")] 5111) 5112 5113(define_insn "*cond_<optab><mode>_3_strict" 5114 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5115 (unspec:SVE_FULL_F 5116 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5117 (unspec:SVE_FULL_F 5118 [(match_dup 1) 5119 (const_int SVE_STRICT_GP) 5120 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 5121 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5122 SVE_COND_FP_BINARY) 5123 (match_dup 3)] 5124 UNSPEC_SEL))] 5125 "TARGET_SVE" 5126 "@ 5127 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5128 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 5129 [(set_attr "movprfx" "*,yes")] 5130) 5131 5132;; Predicated floating-point operations, merging with an independent value. 5133(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 5134 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 5135 (unspec:SVE_FULL_F 5136 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 5137 (unspec:SVE_FULL_F 5138 [(match_operand 5) 5139 (const_int SVE_RELAXED_GP) 5140 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 5141 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 5142 SVE_COND_FP_BINARY) 5143 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 5144 UNSPEC_SEL))] 5145 "TARGET_SVE 5146 && !rtx_equal_p (operands[2], operands[4]) 5147 && !rtx_equal_p (operands[3], operands[4])" 5148 "@ 5149 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5150 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5151 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5152 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5153 #" 5154 "&& 1" 5155 { 5156 if (reload_completed 5157 && register_operand (operands[4], <MODE>mode) 5158 && !rtx_equal_p (operands[0], operands[4])) 5159 { 5160 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5161 operands[4], operands[1])); 5162 operands[4] = operands[2] = operands[0]; 5163 } 5164 else if (!rtx_equal_p (operands[1], operands[5])) 5165 operands[5] = copy_rtx (operands[1]); 5166 else 5167 FAIL; 5168 } 5169 [(set_attr "movprfx" "yes")] 5170) 5171 5172(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 5173 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 5174 (unspec:SVE_FULL_F 5175 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 5176 (unspec:SVE_FULL_F 5177 [(match_dup 1) 5178 (const_int SVE_STRICT_GP) 5179 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 5180 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 5181 SVE_COND_FP_BINARY) 5182 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 5183 UNSPEC_SEL))] 5184 "TARGET_SVE 5185 && !rtx_equal_p (operands[2], operands[4]) 5186 && !rtx_equal_p (operands[3], operands[4])" 5187 "@ 5188 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5189 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5190 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5191 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5192 #" 5193 "&& reload_completed 5194 && register_operand (operands[4], <MODE>mode) 5195 && !rtx_equal_p (operands[0], operands[4])" 5196 { 5197 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5198 operands[4], operands[1])); 5199 operands[4] = operands[2] = operands[0]; 5200 } 5201 [(set_attr "movprfx" "yes")] 5202) 5203 5204;; Same for operations that take a 1-bit constant. 5205(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed" 5206 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5207 (unspec:SVE_FULL_F 5208 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5209 (unspec:SVE_FULL_F 5210 [(match_operand 5) 5211 (const_int SVE_RELAXED_GP) 5212 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") 5213 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5214 SVE_COND_FP_BINARY_I1) 5215 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5216 UNSPEC_SEL))] 5217 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5218 "@ 5219 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5220 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5221 #" 5222 "&& 1" 5223 { 5224 if (reload_completed 5225 && register_operand (operands[4], <MODE>mode) 5226 && !rtx_equal_p (operands[0], operands[4])) 5227 { 5228 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5229 operands[4], operands[1])); 5230 operands[4] = operands[2] = operands[0]; 5231 } 5232 else if (!rtx_equal_p (operands[1], operands[5])) 5233 operands[5] = copy_rtx (operands[1]); 5234 else 5235 FAIL; 5236 } 5237 [(set_attr "movprfx" "yes")] 5238) 5239 5240(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" 5241 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5242 (unspec:SVE_FULL_F 5243 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5244 (unspec:SVE_FULL_F 5245 [(match_dup 1) 5246 (const_int SVE_STRICT_GP) 5247 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") 5248 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5249 SVE_COND_FP_BINARY_I1) 5250 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5251 UNSPEC_SEL))] 5252 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5253 "@ 5254 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5255 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5256 #" 5257 "&& reload_completed 5258 && register_operand (operands[4], <MODE>mode) 5259 && !rtx_equal_p (operands[0], operands[4])" 5260 { 5261 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5262 operands[4], operands[1])); 5263 operands[4] = operands[2] = operands[0]; 5264 } 5265 [(set_attr "movprfx" "yes")] 5266) 5267 5268;; ------------------------------------------------------------------------- 5269;; ---- [FP] Addition 5270;; ------------------------------------------------------------------------- 5271;; Includes: 5272;; - FADD 5273;; - FSUB 5274;; ------------------------------------------------------------------------- 5275 5276;; Predicated floating-point addition. 5277(define_insn_and_split "@aarch64_pred_<optab><mode>" 5278 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w") 5279 (unspec:SVE_FULL_F 5280 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl") 5281 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1") 5282 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w") 5283 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")] 5284 SVE_COND_FP_ADD))] 5285 "TARGET_SVE" 5286 "@ 5287 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5288 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5289 # 5290 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5291 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5292 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5293 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5294 ; Split the unpredicated form after reload, so that we don't have 5295 ; the unnecessary PTRUE. 5296 "&& reload_completed 5297 && register_operand (operands[3], <MODE>mode) 5298 && INTVAL (operands[4]) == SVE_RELAXED_GP" 5299 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))] 5300 "" 5301 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] 5302) 5303 5304;; Predicated floating-point addition of a constant, merging with the 5305;; first input. 5306(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed" 5307 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") 5308 (unspec:SVE_FULL_F 5309 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5310 (unspec:SVE_FULL_F 5311 [(match_operand 4) 5312 (const_int SVE_RELAXED_GP) 5313 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") 5314 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] 5315 UNSPEC_COND_FADD) 5316 (match_dup 2)] 5317 UNSPEC_SEL))] 5318 "TARGET_SVE" 5319 "@ 5320 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5321 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5322 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5323 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" 5324 "&& !rtx_equal_p (operands[1], operands[4])" 5325 { 5326 operands[4] = copy_rtx (operands[1]); 5327 } 5328 [(set_attr "movprfx" "*,*,yes,yes")] 5329) 5330 5331(define_insn "*cond_add<mode>_2_const_strict" 5332 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") 5333 (unspec:SVE_FULL_F 5334 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5335 (unspec:SVE_FULL_F 5336 [(match_dup 1) 5337 (const_int SVE_STRICT_GP) 5338 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") 5339 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] 5340 UNSPEC_COND_FADD) 5341 (match_dup 2)] 5342 UNSPEC_SEL))] 5343 "TARGET_SVE" 5344 "@ 5345 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5346 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5347 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5348 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" 5349 [(set_attr "movprfx" "*,*,yes,yes")] 5350) 5351 5352;; Predicated floating-point addition of a constant, merging with an 5353;; independent value. 5354(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed" 5355 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") 5356 (unspec:SVE_FULL_F 5357 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 5358 (unspec:SVE_FULL_F 5359 [(match_operand 5) 5360 (const_int SVE_RELAXED_GP) 5361 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") 5362 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] 5363 UNSPEC_COND_FADD) 5364 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] 5365 UNSPEC_SEL))] 5366 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5367 "@ 5368 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5369 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5370 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5371 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5372 # 5373 #" 5374 "&& 1" 5375 { 5376 if (reload_completed 5377 && register_operand (operands[4], <MODE>mode) 5378 && !rtx_equal_p (operands[0], operands[4])) 5379 { 5380 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5381 operands[4], operands[1])); 5382 operands[4] = operands[2] = operands[0]; 5383 } 5384 else if (!rtx_equal_p (operands[1], operands[5])) 5385 operands[5] = copy_rtx (operands[1]); 5386 else 5387 FAIL; 5388 } 5389 [(set_attr "movprfx" "yes")] 5390) 5391 5392(define_insn_and_rewrite "*cond_add<mode>_any_const_strict" 5393 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") 5394 (unspec:SVE_FULL_F 5395 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 5396 (unspec:SVE_FULL_F 5397 [(match_dup 1) 5398 (const_int SVE_STRICT_GP) 5399 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") 5400 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] 5401 UNSPEC_COND_FADD) 5402 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] 5403 UNSPEC_SEL))] 5404 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5405 "@ 5406 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5407 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5408 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5409 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5410 # 5411 #" 5412 "&& reload_completed 5413 && register_operand (operands[4], <MODE>mode) 5414 && !rtx_equal_p (operands[0], operands[4])" 5415 { 5416 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5417 operands[4], operands[1])); 5418 operands[4] = operands[2] = operands[0]; 5419 } 5420 [(set_attr "movprfx" "yes")] 5421) 5422 5423;; Register merging forms are handled through SVE_COND_FP_BINARY. 5424 5425;; ------------------------------------------------------------------------- 5426;; ---- [FP] Complex addition 5427;; ------------------------------------------------------------------------- 5428;; Includes: 5429;; - FCADD 5430;; ------------------------------------------------------------------------- 5431 5432;; Predicated FCADD. 5433(define_insn "@aarch64_pred_<optab><mode>" 5434 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5435 (unspec:SVE_FULL_F 5436 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5437 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5438 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5439 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5440 SVE_COND_FCADD))] 5441 "TARGET_SVE" 5442 "@ 5443 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5444 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" 5445 [(set_attr "movprfx" "*,yes")] 5446) 5447 5448;; Predicated FCADD with merging. 5449(define_expand "@cond_<optab><mode>" 5450 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5451 (unspec:SVE_FULL_F 5452 [(match_operand:<VPRED> 1 "register_operand") 5453 (unspec:SVE_FULL_F 5454 [(match_dup 1) 5455 (const_int SVE_STRICT_GP) 5456 (match_operand:SVE_FULL_F 2 "register_operand") 5457 (match_operand:SVE_FULL_F 3 "register_operand")] 5458 SVE_COND_FCADD) 5459 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 5460 UNSPEC_SEL))] 5461 "TARGET_SVE" 5462) 5463 5464;; Predicated FCADD, merging with the first input. 5465(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 5466 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5467 (unspec:SVE_FULL_F 5468 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5469 (unspec:SVE_FULL_F 5470 [(match_operand 4) 5471 (const_int SVE_RELAXED_GP) 5472 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5473 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5474 SVE_COND_FCADD) 5475 (match_dup 2)] 5476 UNSPEC_SEL))] 5477 "TARGET_SVE" 5478 "@ 5479 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5480 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" 5481 "&& !rtx_equal_p (operands[1], operands[4])" 5482 { 5483 operands[4] = copy_rtx (operands[1]); 5484 } 5485 [(set_attr "movprfx" "*,yes")] 5486) 5487 5488(define_insn "*cond_<optab><mode>_2_strict" 5489 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5490 (unspec:SVE_FULL_F 5491 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5492 (unspec:SVE_FULL_F 5493 [(match_dup 1) 5494 (const_int SVE_STRICT_GP) 5495 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5496 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5497 SVE_COND_FCADD) 5498 (match_dup 2)] 5499 UNSPEC_SEL))] 5500 "TARGET_SVE" 5501 "@ 5502 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5503 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" 5504 [(set_attr "movprfx" "*,yes")] 5505) 5506 5507;; Predicated FCADD, merging with an independent value. 5508(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 5509 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 5510 (unspec:SVE_FULL_F 5511 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5512 (unspec:SVE_FULL_F 5513 [(match_operand 5) 5514 (const_int SVE_RELAXED_GP) 5515 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") 5516 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] 5517 SVE_COND_FCADD) 5518 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 5519 UNSPEC_SEL))] 5520 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5521 "@ 5522 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5523 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5524 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5525 #" 5526 "&& 1" 5527 { 5528 if (reload_completed 5529 && register_operand (operands[4], <MODE>mode) 5530 && !rtx_equal_p (operands[0], operands[4])) 5531 { 5532 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5533 operands[4], operands[1])); 5534 operands[4] = operands[2] = operands[0]; 5535 } 5536 else if (!rtx_equal_p (operands[1], operands[5])) 5537 operands[5] = copy_rtx (operands[1]); 5538 else 5539 FAIL; 5540 } 5541 [(set_attr "movprfx" "yes")] 5542) 5543 5544(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 5545 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 5546 (unspec:SVE_FULL_F 5547 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5548 (unspec:SVE_FULL_F 5549 [(match_dup 1) 5550 (const_int SVE_STRICT_GP) 5551 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") 5552 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] 5553 SVE_COND_FCADD) 5554 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 5555 UNSPEC_SEL))] 5556 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5557 "@ 5558 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5559 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5560 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5561 #" 5562 "&& reload_completed 5563 && register_operand (operands[4], <MODE>mode) 5564 && !rtx_equal_p (operands[0], operands[4])" 5565 { 5566 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5567 operands[4], operands[1])); 5568 operands[4] = operands[2] = operands[0]; 5569 } 5570 [(set_attr "movprfx" "yes")] 5571) 5572 5573;; ------------------------------------------------------------------------- 5574;; ---- [FP] Subtraction 5575;; ------------------------------------------------------------------------- 5576;; Includes: 5577;; - FSUB 5578;; - FSUBR 5579;; ------------------------------------------------------------------------- 5580 5581;; Predicated floating-point subtraction. 5582(define_insn_and_split "@aarch64_pred_<optab><mode>" 5583 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w") 5584 (unspec:SVE_FULL_F 5585 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 5586 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1") 5587 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w") 5588 (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")] 5589 SVE_COND_FP_SUB))] 5590 "TARGET_SVE" 5591 "@ 5592 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5593 # 5594 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5595 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5596 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5597 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5598 ; Split the unpredicated form after reload, so that we don't have 5599 ; the unnecessary PTRUE. 5600 "&& reload_completed 5601 && register_operand (operands[2], <MODE>mode) 5602 && INTVAL (operands[4]) == SVE_RELAXED_GP" 5603 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))] 5604 "" 5605 [(set_attr "movprfx" "*,*,*,*,yes,yes")] 5606) 5607 5608;; Predicated floating-point subtraction from a constant, merging with the 5609;; second input. 5610(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed" 5611 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5612 (unspec:SVE_FULL_F 5613 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5614 (unspec:SVE_FULL_F 5615 [(match_operand 4) 5616 (const_int SVE_RELAXED_GP) 5617 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5618 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5619 UNSPEC_COND_FSUB) 5620 (match_dup 3)] 5621 UNSPEC_SEL))] 5622 "TARGET_SVE" 5623 "@ 5624 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5625 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" 5626 "&& !rtx_equal_p (operands[1], operands[4])" 5627 { 5628 operands[4] = copy_rtx (operands[1]); 5629 } 5630 [(set_attr "movprfx" "*,yes")] 5631) 5632 5633(define_insn "*cond_sub<mode>_3_const_strict" 5634 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5635 (unspec:SVE_FULL_F 5636 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5637 (unspec:SVE_FULL_F 5638 [(match_dup 1) 5639 (const_int SVE_STRICT_GP) 5640 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5641 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5642 UNSPEC_COND_FSUB) 5643 (match_dup 3)] 5644 UNSPEC_SEL))] 5645 "TARGET_SVE" 5646 "@ 5647 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5648 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" 5649 [(set_attr "movprfx" "*,yes")] 5650) 5651 5652;; Predicated floating-point subtraction from a constant, merging with an 5653;; independent value. 5654(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed" 5655 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5656 (unspec:SVE_FULL_F 5657 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5658 (unspec:SVE_FULL_F 5659 [(match_operand 5) 5660 (const_int SVE_RELAXED_GP) 5661 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5662 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] 5663 UNSPEC_COND_FSUB) 5664 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5665 UNSPEC_SEL))] 5666 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" 5667 "@ 5668 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5669 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5670 #" 5671 "&& 1" 5672 { 5673 if (reload_completed 5674 && register_operand (operands[4], <MODE>mode) 5675 && !rtx_equal_p (operands[0], operands[4])) 5676 { 5677 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 5678 operands[4], operands[1])); 5679 operands[4] = operands[3] = operands[0]; 5680 } 5681 else if (!rtx_equal_p (operands[1], operands[5])) 5682 operands[5] = copy_rtx (operands[1]); 5683 else 5684 FAIL; 5685 } 5686 [(set_attr "movprfx" "yes")] 5687) 5688 5689(define_insn_and_rewrite "*cond_sub<mode>_const_strict" 5690 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5691 (unspec:SVE_FULL_F 5692 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5693 (unspec:SVE_FULL_F 5694 [(match_dup 1) 5695 (const_int SVE_STRICT_GP) 5696 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5697 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] 5698 UNSPEC_COND_FSUB) 5699 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5700 UNSPEC_SEL))] 5701 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" 5702 "@ 5703 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5704 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5705 #" 5706 "&& reload_completed 5707 && register_operand (operands[4], <MODE>mode) 5708 && !rtx_equal_p (operands[0], operands[4])" 5709 { 5710 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 5711 operands[4], operands[1])); 5712 operands[4] = operands[3] = operands[0]; 5713 } 5714 [(set_attr "movprfx" "yes")] 5715) 5716;; Register merging forms are handled through SVE_COND_FP_BINARY. 5717 5718;; ------------------------------------------------------------------------- 5719;; ---- [FP] Absolute difference 5720;; ------------------------------------------------------------------------- 5721;; Includes: 5722;; - FABD 5723;; ------------------------------------------------------------------------- 5724 5725;; Predicated floating-point absolute difference. 5726(define_expand "@aarch64_pred_abd<mode>" 5727 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5728 (unspec:SVE_FULL_F 5729 [(match_operand:<VPRED> 1 "register_operand") 5730 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5731 (unspec:SVE_FULL_F 5732 [(match_dup 1) 5733 (match_dup 4) 5734 (match_operand:SVE_FULL_F 2 "register_operand") 5735 (match_operand:SVE_FULL_F 3 "register_operand")] 5736 UNSPEC_COND_FSUB)] 5737 UNSPEC_COND_FABS))] 5738 "TARGET_SVE" 5739) 5740 5741;; Predicated floating-point absolute difference. 5742(define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed" 5743 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5744 (unspec:SVE_FULL_F 5745 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5746 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5747 (unspec:SVE_FULL_F 5748 [(match_operand 5) 5749 (const_int SVE_RELAXED_GP) 5750 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") 5751 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5752 UNSPEC_COND_FSUB)] 5753 UNSPEC_COND_FABS))] 5754 "TARGET_SVE" 5755 "@ 5756 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5757 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5758 "&& !rtx_equal_p (operands[1], operands[5])" 5759 { 5760 operands[5] = copy_rtx (operands[1]); 5761 } 5762 [(set_attr "movprfx" "*,yes")] 5763) 5764 5765(define_insn "*aarch64_pred_abd<mode>_strict" 5766 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5767 (unspec:SVE_FULL_F 5768 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5769 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5770 (unspec:SVE_FULL_F 5771 [(match_dup 1) 5772 (const_int SVE_STRICT_GP) 5773 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") 5774 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5775 UNSPEC_COND_FSUB)] 5776 UNSPEC_COND_FABS))] 5777 "TARGET_SVE" 5778 "@ 5779 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5780 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5781 [(set_attr "movprfx" "*,yes")] 5782) 5783 5784(define_expand "@aarch64_cond_abd<mode>" 5785 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5786 (unspec:SVE_FULL_F 5787 [(match_operand:<VPRED> 1 "register_operand") 5788 (unspec:SVE_FULL_F 5789 [(match_dup 1) 5790 (const_int SVE_STRICT_GP) 5791 (unspec:SVE_FULL_F 5792 [(match_dup 1) 5793 (const_int SVE_STRICT_GP) 5794 (match_operand:SVE_FULL_F 2 "register_operand") 5795 (match_operand:SVE_FULL_F 3 "register_operand")] 5796 UNSPEC_COND_FSUB)] 5797 UNSPEC_COND_FABS) 5798 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 5799 UNSPEC_SEL))] 5800 "TARGET_SVE" 5801{ 5802 if (rtx_equal_p (operands[3], operands[4])) 5803 std::swap (operands[2], operands[3]); 5804}) 5805 5806;; Predicated floating-point absolute difference, merging with the first 5807;; input. 5808(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed" 5809 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5810 (unspec:SVE_FULL_F 5811 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5812 (unspec:SVE_FULL_F 5813 [(match_operand 4) 5814 (const_int SVE_RELAXED_GP) 5815 (unspec:SVE_FULL_F 5816 [(match_operand 5) 5817 (const_int SVE_RELAXED_GP) 5818 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5819 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5820 UNSPEC_COND_FSUB)] 5821 UNSPEC_COND_FABS) 5822 (match_dup 2)] 5823 UNSPEC_SEL))] 5824 "TARGET_SVE" 5825 "@ 5826 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5827 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5828 "&& (!rtx_equal_p (operands[1], operands[4]) 5829 || !rtx_equal_p (operands[1], operands[5]))" 5830 { 5831 operands[4] = copy_rtx (operands[1]); 5832 operands[5] = copy_rtx (operands[1]); 5833 } 5834 [(set_attr "movprfx" "*,yes")] 5835) 5836 5837(define_insn "*aarch64_cond_abd<mode>_2_strict" 5838 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5839 (unspec:SVE_FULL_F 5840 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5841 (unspec:SVE_FULL_F 5842 [(match_dup 1) 5843 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5844 (unspec:SVE_FULL_F 5845 [(match_dup 1) 5846 (match_operand:SI 5 "aarch64_sve_gp_strictness") 5847 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5848 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5849 UNSPEC_COND_FSUB)] 5850 UNSPEC_COND_FABS) 5851 (match_dup 2)] 5852 UNSPEC_SEL))] 5853 "TARGET_SVE" 5854 "@ 5855 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5856 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5857 [(set_attr "movprfx" "*,yes")] 5858) 5859 5860;; Predicated floating-point absolute difference, merging with the second 5861;; input. 5862(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed" 5863 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5864 (unspec:SVE_FULL_F 5865 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5866 (unspec:SVE_FULL_F 5867 [(match_operand 4) 5868 (const_int SVE_RELAXED_GP) 5869 (unspec:SVE_FULL_F 5870 [(match_operand 5) 5871 (const_int SVE_RELAXED_GP) 5872 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 5873 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5874 UNSPEC_COND_FSUB)] 5875 UNSPEC_COND_FABS) 5876 (match_dup 3)] 5877 UNSPEC_SEL))] 5878 "TARGET_SVE" 5879 "@ 5880 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5881 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 5882 "&& (!rtx_equal_p (operands[1], operands[4]) 5883 || !rtx_equal_p (operands[1], operands[5]))" 5884 { 5885 operands[4] = copy_rtx (operands[1]); 5886 operands[5] = copy_rtx (operands[1]); 5887 } 5888 [(set_attr "movprfx" "*,yes")] 5889) 5890 5891(define_insn "*aarch64_cond_abd<mode>_3_strict" 5892 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5893 (unspec:SVE_FULL_F 5894 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5895 (unspec:SVE_FULL_F 5896 [(match_dup 1) 5897 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5898 (unspec:SVE_FULL_F 5899 [(match_dup 1) 5900 (match_operand:SI 5 "aarch64_sve_gp_strictness") 5901 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 5902 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5903 UNSPEC_COND_FSUB)] 5904 UNSPEC_COND_FABS) 5905 (match_dup 3)] 5906 UNSPEC_SEL))] 5907 "TARGET_SVE" 5908 "@ 5909 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5910 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 5911 [(set_attr "movprfx" "*,yes")] 5912) 5913 5914;; Predicated floating-point absolute difference, merging with an 5915;; independent value. 5916(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed" 5917 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 5918 (unspec:SVE_FULL_F 5919 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 5920 (unspec:SVE_FULL_F 5921 [(match_operand 5) 5922 (const_int SVE_RELAXED_GP) 5923 (unspec:SVE_FULL_F 5924 [(match_operand 6) 5925 (const_int SVE_RELAXED_GP) 5926 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 5927 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 5928 UNSPEC_COND_FSUB)] 5929 UNSPEC_COND_FABS) 5930 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 5931 UNSPEC_SEL))] 5932 "TARGET_SVE 5933 && !rtx_equal_p (operands[2], operands[4]) 5934 && !rtx_equal_p (operands[3], operands[4])" 5935 "@ 5936 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5937 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5938 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5939 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5940 #" 5941 "&& 1" 5942 { 5943 if (reload_completed 5944 && register_operand (operands[4], <MODE>mode) 5945 && !rtx_equal_p (operands[0], operands[4])) 5946 { 5947 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 5948 operands[4], operands[1])); 5949 operands[4] = operands[3] = operands[0]; 5950 } 5951 else if (!rtx_equal_p (operands[1], operands[5]) 5952 || !rtx_equal_p (operands[1], operands[6])) 5953 { 5954 operands[5] = copy_rtx (operands[1]); 5955 operands[6] = copy_rtx (operands[1]); 5956 } 5957 else 5958 FAIL; 5959 } 5960 [(set_attr "movprfx" "yes")] 5961) 5962 5963(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict" 5964 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 5965 (unspec:SVE_FULL_F 5966 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 5967 (unspec:SVE_FULL_F 5968 [(match_dup 1) 5969 (match_operand:SI 5 "aarch64_sve_gp_strictness") 5970 (unspec:SVE_FULL_F 5971 [(match_dup 1) 5972 (match_operand:SI 6 "aarch64_sve_gp_strictness") 5973 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 5974 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 5975 UNSPEC_COND_FSUB)] 5976 UNSPEC_COND_FABS) 5977 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 5978 UNSPEC_SEL))] 5979 "TARGET_SVE 5980 && !rtx_equal_p (operands[2], operands[4]) 5981 && !rtx_equal_p (operands[3], operands[4])" 5982 "@ 5983 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5984 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5985 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5986 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5987 #" 5988 "&& reload_completed 5989 && register_operand (operands[4], <MODE>mode) 5990 && !rtx_equal_p (operands[0], operands[4])" 5991 { 5992 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 5993 operands[4], operands[1])); 5994 operands[4] = operands[3] = operands[0]; 5995 } 5996 [(set_attr "movprfx" "yes")] 5997) 5998 5999;; ------------------------------------------------------------------------- 6000;; ---- [FP] Multiplication 6001;; ------------------------------------------------------------------------- 6002;; Includes: 6003;; - FMUL 6004;; ------------------------------------------------------------------------- 6005 6006;; Predicated floating-point multiplication. 6007(define_insn_and_split "@aarch64_pred_<optab><mode>" 6008 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w") 6009 (unspec:SVE_FULL_F 6010 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 6011 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1") 6012 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w") 6013 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")] 6014 SVE_COND_FP_MUL))] 6015 "TARGET_SVE" 6016 "@ 6017 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6018 # 6019 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6020 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6021 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 6022 ; Split the unpredicated form after reload, so that we don't have 6023 ; the unnecessary PTRUE. 6024 "&& reload_completed 6025 && register_operand (operands[3], <MODE>mode) 6026 && INTVAL (operands[4]) == SVE_RELAXED_GP" 6027 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))] 6028 "" 6029 [(set_attr "movprfx" "*,*,*,yes,yes")] 6030) 6031 6032;; Merging forms are handled through SVE_COND_FP_BINARY and 6033;; SVE_COND_FP_BINARY_I1. 6034 6035;; Unpredicated multiplication by selected lanes. 6036(define_insn "@aarch64_mul_lane_<mode>" 6037 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 6038 (mult:SVE_FULL_F 6039 (unspec:SVE_FULL_F 6040 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>") 6041 (match_operand:SI 3 "const_int_operand")] 6042 UNSPEC_SVE_LANE_SELECT) 6043 (match_operand:SVE_FULL_F 1 "register_operand" "w")))] 6044 "TARGET_SVE" 6045 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 6046) 6047 6048;; ------------------------------------------------------------------------- 6049;; ---- [FP] Division 6050;; ------------------------------------------------------------------------- 6051;; The patterns in this section are synthetic. 6052;; ------------------------------------------------------------------------- 6053 6054(define_expand "div<mode>3" 6055 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6056 (unspec:SVE_FULL_F 6057 [(match_dup 3) 6058 (const_int SVE_RELAXED_GP) 6059 (match_operand:SVE_FULL_F 1 "nonmemory_operand") 6060 (match_operand:SVE_FULL_F 2 "register_operand")] 6061 UNSPEC_COND_FDIV))] 6062 "TARGET_SVE" 6063 { 6064 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) 6065 DONE; 6066 6067 operands[1] = force_reg (<MODE>mode, operands[1]); 6068 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 6069 } 6070) 6071 6072(define_expand "@aarch64_frecpe<mode>" 6073 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6074 (unspec:SVE_FULL_F 6075 [(match_operand:SVE_FULL_F 1 "register_operand")] 6076 UNSPEC_FRECPE))] 6077 "TARGET_SVE" 6078) 6079 6080(define_expand "@aarch64_frecps<mode>" 6081 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6082 (unspec:SVE_FULL_F 6083 [(match_operand:SVE_FULL_F 1 "register_operand") 6084 (match_operand:SVE_FULL_F 2 "register_operand")] 6085 UNSPEC_FRECPS))] 6086 "TARGET_SVE" 6087) 6088 6089;; ------------------------------------------------------------------------- 6090;; ---- [FP] Binary logical operations 6091;; ------------------------------------------------------------------------- 6092;; Includes 6093;; - AND 6094;; - EOR 6095;; - ORR 6096;; ------------------------------------------------------------------------- 6097 6098;; Binary logical operations on floating-point modes. We avoid subregs 6099;; by providing this, but we need to use UNSPECs since rtx logical ops 6100;; aren't defined for floating-point modes. 6101(define_insn "*<optab><mode>3" 6102 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 6103 (unspec:SVE_FULL_F 6104 [(match_operand:SVE_FULL_F 1 "register_operand" "w") 6105 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 6106 LOGICALF))] 6107 "TARGET_SVE" 6108 "<logicalf_op>\t%0.d, %1.d, %2.d" 6109) 6110 6111;; ------------------------------------------------------------------------- 6112;; ---- [FP] Sign copying 6113;; ------------------------------------------------------------------------- 6114;; The patterns in this section are synthetic. 6115;; ------------------------------------------------------------------------- 6116 6117(define_expand "copysign<mode>3" 6118 [(match_operand:SVE_FULL_F 0 "register_operand") 6119 (match_operand:SVE_FULL_F 1 "register_operand") 6120 (match_operand:SVE_FULL_F 2 "register_operand")] 6121 "TARGET_SVE" 6122 { 6123 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); 6124 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); 6125 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); 6126 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 6127 6128 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); 6129 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); 6130 6131 emit_insn (gen_and<v_int_equiv>3 6132 (sign, arg2, 6133 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 6134 HOST_WIDE_INT_M1U 6135 << bits))); 6136 emit_insn (gen_and<v_int_equiv>3 6137 (mant, arg1, 6138 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 6139 ~(HOST_WIDE_INT_M1U 6140 << bits)))); 6141 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant)); 6142 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); 6143 DONE; 6144 } 6145) 6146 6147(define_expand "xorsign<mode>3" 6148 [(match_operand:SVE_FULL_F 0 "register_operand") 6149 (match_operand:SVE_FULL_F 1 "register_operand") 6150 (match_operand:SVE_FULL_F 2 "register_operand")] 6151 "TARGET_SVE" 6152 { 6153 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); 6154 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); 6155 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 6156 6157 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); 6158 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); 6159 6160 emit_insn (gen_and<v_int_equiv>3 6161 (sign, arg2, 6162 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 6163 HOST_WIDE_INT_M1U 6164 << bits))); 6165 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign)); 6166 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); 6167 DONE; 6168 } 6169) 6170 6171;; ------------------------------------------------------------------------- 6172;; ---- [FP] Maximum and minimum 6173;; ------------------------------------------------------------------------- 6174;; Includes: 6175;; - FMAX 6176;; - FMAXNM 6177;; - FMIN 6178;; - FMINNM 6179;; ------------------------------------------------------------------------- 6180 6181;; Unpredicated fmax/fmin (the libm functions). The optabs for the 6182;; smin/smax rtx codes are handled in the generic section above. 6183(define_expand "<maxmin_uns><mode>3" 6184 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6185 (unspec:SVE_FULL_F 6186 [(match_dup 3) 6187 (const_int SVE_RELAXED_GP) 6188 (match_operand:SVE_FULL_F 1 "register_operand") 6189 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")] 6190 SVE_COND_FP_MAXMIN_PUBLIC))] 6191 "TARGET_SVE" 6192 { 6193 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 6194 } 6195) 6196 6197;; Predicated floating-point maximum/minimum. 6198(define_insn "@aarch64_pred_<optab><mode>" 6199 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w") 6200 (unspec:SVE_FULL_F 6201 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 6202 (match_operand:SI 4 "aarch64_sve_gp_strictness") 6203 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w") 6204 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")] 6205 SVE_COND_FP_MAXMIN))] 6206 "TARGET_SVE" 6207 "@ 6208 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6209 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6210 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6211 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 6212 [(set_attr "movprfx" "*,*,yes,yes")] 6213) 6214 6215;; Merging forms are handled through SVE_COND_FP_BINARY and 6216;; SVE_COND_FP_BINARY_I1. 6217 6218;; ------------------------------------------------------------------------- 6219;; ---- [PRED] Binary logical operations 6220;; ------------------------------------------------------------------------- 6221;; Includes: 6222;; - AND 6223;; - ANDS 6224;; - EOR 6225;; - EORS 6226;; - ORR 6227;; - ORRS 6228;; ------------------------------------------------------------------------- 6229 6230;; Predicate AND. We can reuse one of the inputs as the GP. 6231;; Doubling the second operand is the preferred implementation 6232;; of the MOV alias, so we use that instead of %1/z, %1, %2. 6233(define_insn "and<mode>3" 6234 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6235 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") 6236 (match_operand:PRED_ALL 2 "register_operand" "Upa")))] 6237 "TARGET_SVE" 6238 "and\t%0.b, %1/z, %2.b, %2.b" 6239) 6240 6241;; Unpredicated predicate EOR and ORR. 6242(define_expand "<optab><mode>3" 6243 [(set (match_operand:PRED_ALL 0 "register_operand") 6244 (and:PRED_ALL 6245 (LOGICAL_OR:PRED_ALL 6246 (match_operand:PRED_ALL 1 "register_operand") 6247 (match_operand:PRED_ALL 2 "register_operand")) 6248 (match_dup 3)))] 6249 "TARGET_SVE" 6250 { 6251 operands[3] = aarch64_ptrue_reg (<MODE>mode); 6252 } 6253) 6254 6255;; Predicated predicate AND, EOR and ORR. 6256(define_insn "@aarch64_pred_<optab><mode>_z" 6257 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6258 (and:PRED_ALL 6259 (LOGICAL:PRED_ALL 6260 (match_operand:PRED_ALL 2 "register_operand" "Upa") 6261 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6262 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 6263 "TARGET_SVE" 6264 "<logical>\t%0.b, %1/z, %2.b, %3.b" 6265) 6266 6267;; Perform a logical operation on operands 2 and 3, using operand 1 as 6268;; the GP. Store the result in operand 0 and set the flags in the same 6269;; way as for PTEST. 6270(define_insn "*<optab><mode>3_cc" 6271 [(set (reg:CC_NZC CC_REGNUM) 6272 (unspec:CC_NZC 6273 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6274 (match_operand 4) 6275 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6276 (and:PRED_ALL 6277 (LOGICAL:PRED_ALL 6278 (match_operand:PRED_ALL 2 "register_operand" "Upa") 6279 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6280 (match_dup 4))] 6281 UNSPEC_PTEST)) 6282 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6283 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) 6284 (match_dup 4)))] 6285 "TARGET_SVE" 6286 "<logical>s\t%0.b, %1/z, %2.b, %3.b" 6287) 6288 6289;; Same with just the flags result. 6290(define_insn "*<optab><mode>3_ptest" 6291 [(set (reg:CC_NZC CC_REGNUM) 6292 (unspec:CC_NZC 6293 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6294 (match_operand 4) 6295 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6296 (and:PRED_ALL 6297 (LOGICAL:PRED_ALL 6298 (match_operand:PRED_ALL 2 "register_operand" "Upa") 6299 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6300 (match_dup 4))] 6301 UNSPEC_PTEST)) 6302 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 6303 "TARGET_SVE" 6304 "<logical>s\t%0.b, %1/z, %2.b, %3.b" 6305) 6306 6307;; ------------------------------------------------------------------------- 6308;; ---- [PRED] Binary logical operations (inverted second input) 6309;; ------------------------------------------------------------------------- 6310;; Includes: 6311;; - BIC 6312;; - ORN 6313;; ------------------------------------------------------------------------- 6314 6315;; Predicated predicate BIC and ORN. 6316(define_insn "aarch64_pred_<nlogical><mode>_z" 6317 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6318 (and:PRED_ALL 6319 (NLOGICAL:PRED_ALL 6320 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6321 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6322 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 6323 "TARGET_SVE" 6324 "<nlogical>\t%0.b, %1/z, %2.b, %3.b" 6325) 6326 6327;; Same, but set the flags as a side-effect. 6328(define_insn "*<nlogical><mode>3_cc" 6329 [(set (reg:CC_NZC CC_REGNUM) 6330 (unspec:CC_NZC 6331 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6332 (match_operand 4) 6333 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6334 (and:PRED_ALL 6335 (NLOGICAL:PRED_ALL 6336 (not:PRED_ALL 6337 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6338 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6339 (match_dup 4))] 6340 UNSPEC_PTEST)) 6341 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6342 (and:PRED_ALL (NLOGICAL:PRED_ALL 6343 (not:PRED_ALL (match_dup 3)) 6344 (match_dup 2)) 6345 (match_dup 4)))] 6346 "TARGET_SVE" 6347 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b" 6348) 6349 6350;; Same with just the flags result. 6351(define_insn "*<nlogical><mode>3_ptest" 6352 [(set (reg:CC_NZC CC_REGNUM) 6353 (unspec:CC_NZC 6354 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6355 (match_operand 4) 6356 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6357 (and:PRED_ALL 6358 (NLOGICAL:PRED_ALL 6359 (not:PRED_ALL 6360 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6361 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6362 (match_dup 4))] 6363 UNSPEC_PTEST)) 6364 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 6365 "TARGET_SVE" 6366 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b" 6367) 6368 6369;; ------------------------------------------------------------------------- 6370;; ---- [PRED] Binary logical operations (inverted result) 6371;; ------------------------------------------------------------------------- 6372;; Includes: 6373;; - NAND 6374;; - NOR 6375;; ------------------------------------------------------------------------- 6376 6377;; Predicated predicate NAND and NOR. 6378(define_insn "aarch64_pred_<logical_nn><mode>_z" 6379 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6380 (and:PRED_ALL 6381 (NLOGICAL:PRED_ALL 6382 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6383 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) 6384 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 6385 "TARGET_SVE" 6386 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" 6387) 6388 6389;; Same, but set the flags as a side-effect. 6390(define_insn "*<logical_nn><mode>3_cc" 6391 [(set (reg:CC_NZC CC_REGNUM) 6392 (unspec:CC_NZC 6393 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6394 (match_operand 4) 6395 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6396 (and:PRED_ALL 6397 (NLOGICAL:PRED_ALL 6398 (not:PRED_ALL 6399 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6400 (not:PRED_ALL 6401 (match_operand:PRED_ALL 3 "register_operand" "Upa"))) 6402 (match_dup 4))] 6403 UNSPEC_PTEST)) 6404 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6405 (and:PRED_ALL (NLOGICAL:PRED_ALL 6406 (not:PRED_ALL (match_dup 2)) 6407 (not:PRED_ALL (match_dup 3))) 6408 (match_dup 4)))] 6409 "TARGET_SVE" 6410 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b" 6411) 6412 6413;; Same with just the flags result. 6414(define_insn "*<logical_nn><mode>3_ptest" 6415 [(set (reg:CC_NZC CC_REGNUM) 6416 (unspec:CC_NZC 6417 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6418 (match_operand 4) 6419 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6420 (and:PRED_ALL 6421 (NLOGICAL:PRED_ALL 6422 (not:PRED_ALL 6423 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6424 (not:PRED_ALL 6425 (match_operand:PRED_ALL 3 "register_operand" "Upa"))) 6426 (match_dup 4))] 6427 UNSPEC_PTEST)) 6428 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 6429 "TARGET_SVE" 6430 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b" 6431) 6432 6433;; ========================================================================= 6434;; == Ternary arithmetic 6435;; ========================================================================= 6436 6437;; ------------------------------------------------------------------------- 6438;; ---- [INT] MLA and MAD 6439;; ------------------------------------------------------------------------- 6440;; Includes: 6441;; - MAD 6442;; - MLA 6443;; ------------------------------------------------------------------------- 6444 6445;; Unpredicated integer addition of product. 6446(define_expand "fma<mode>4" 6447 [(set (match_operand:SVE_FULL_I 0 "register_operand") 6448 (plus:SVE_FULL_I 6449 (unspec:SVE_FULL_I 6450 [(match_dup 4) 6451 (mult:SVE_FULL_I 6452 (match_operand:SVE_FULL_I 1 "register_operand") 6453 (match_operand:SVE_FULL_I 2 "nonmemory_operand"))] 6454 UNSPEC_PRED_X) 6455 (match_operand:SVE_FULL_I 3 "register_operand")))] 6456 "TARGET_SVE" 6457 { 6458 if (aarch64_prepare_sve_int_fma (operands, PLUS)) 6459 DONE; 6460 operands[4] = aarch64_ptrue_reg (<VPRED>mode); 6461 } 6462) 6463 6464;; Predicated integer addition of product. 6465(define_insn "@aarch64_pred_fma<mode>" 6466 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") 6467 (plus:SVE_FULL_I 6468 (unspec:SVE_FULL_I 6469 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 6470 (mult:SVE_FULL_I 6471 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") 6472 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] 6473 UNSPEC_PRED_X) 6474 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))] 6475 "TARGET_SVE" 6476 "@ 6477 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6478 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6479 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6480 [(set_attr "movprfx" "*,*,yes")] 6481) 6482 6483;; Predicated integer addition of product with merging. 6484(define_expand "cond_fma<mode>" 6485 [(set (match_operand:SVE_FULL_I 0 "register_operand") 6486 (unspec:SVE_FULL_I 6487 [(match_operand:<VPRED> 1 "register_operand") 6488 (plus:SVE_FULL_I 6489 (mult:SVE_FULL_I 6490 (match_operand:SVE_FULL_I 2 "register_operand") 6491 (match_operand:SVE_FULL_I 3 "general_operand")) 6492 (match_operand:SVE_FULL_I 4 "register_operand")) 6493 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] 6494 UNSPEC_SEL))] 6495 "TARGET_SVE" 6496 { 6497 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS)) 6498 DONE; 6499 /* Swap the multiplication operands if the fallback value is the 6500 second of the two. */ 6501 if (rtx_equal_p (operands[3], operands[5])) 6502 std::swap (operands[2], operands[3]); 6503 } 6504) 6505 6506;; Predicated integer addition of product, merging with the first input. 6507(define_insn "*cond_fma<mode>_2" 6508 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 6509 (unspec:SVE_FULL_I 6510 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6511 (plus:SVE_FULL_I 6512 (mult:SVE_FULL_I 6513 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 6514 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 6515 (match_operand:SVE_FULL_I 4 "register_operand" "w, w")) 6516 (match_dup 2)] 6517 UNSPEC_SEL))] 6518 "TARGET_SVE" 6519 "@ 6520 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6521 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 6522 [(set_attr "movprfx" "*,yes")] 6523) 6524 6525;; Predicated integer addition of product, merging with the third input. 6526(define_insn "*cond_fma<mode>_4" 6527 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 6528 (unspec:SVE_FULL_I 6529 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6530 (plus:SVE_FULL_I 6531 (mult:SVE_FULL_I 6532 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 6533 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")) 6534 (match_operand:SVE_FULL_I 4 "register_operand" "0, w")) 6535 (match_dup 4)] 6536 UNSPEC_SEL))] 6537 "TARGET_SVE" 6538 "@ 6539 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6540 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6541 [(set_attr "movprfx" "*,yes")] 6542) 6543 6544;; Predicated integer addition of product, merging with an independent value. 6545(define_insn_and_rewrite "*cond_fma<mode>_any" 6546 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 6547 (unspec:SVE_FULL_I 6548 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 6549 (plus:SVE_FULL_I 6550 (mult:SVE_FULL_I 6551 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") 6552 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")) 6553 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")) 6554 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 6555 UNSPEC_SEL))] 6556 "TARGET_SVE 6557 && !rtx_equal_p (operands[2], operands[5]) 6558 && !rtx_equal_p (operands[3], operands[5]) 6559 && !rtx_equal_p (operands[4], operands[5])" 6560 "@ 6561 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6562 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6563 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6564 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 6565 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6566 #" 6567 "&& reload_completed 6568 && register_operand (operands[5], <MODE>mode) 6569 && !rtx_equal_p (operands[0], operands[5])" 6570 { 6571 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 6572 operands[5], operands[1])); 6573 operands[5] = operands[4] = operands[0]; 6574 } 6575 [(set_attr "movprfx" "yes")] 6576) 6577 6578;; ------------------------------------------------------------------------- 6579;; ---- [INT] MLS and MSB 6580;; ------------------------------------------------------------------------- 6581;; Includes: 6582;; - MLS 6583;; - MSB 6584;; ------------------------------------------------------------------------- 6585 6586;; Unpredicated integer subtraction of product. 6587(define_expand "fnma<mode>4" 6588 [(set (match_operand:SVE_FULL_I 0 "register_operand") 6589 (minus:SVE_FULL_I 6590 (match_operand:SVE_FULL_I 3 "register_operand") 6591 (unspec:SVE_FULL_I 6592 [(match_dup 4) 6593 (mult:SVE_FULL_I 6594 (match_operand:SVE_FULL_I 1 "register_operand") 6595 (match_operand:SVE_FULL_I 2 "general_operand"))] 6596 UNSPEC_PRED_X)))] 6597 "TARGET_SVE" 6598 { 6599 if (aarch64_prepare_sve_int_fma (operands, MINUS)) 6600 DONE; 6601 operands[4] = aarch64_ptrue_reg (<VPRED>mode); 6602 } 6603) 6604 6605;; Predicated integer subtraction of product. 6606(define_insn "@aarch64_pred_fnma<mode>" 6607 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w") 6608 (minus:SVE_FULL_I 6609 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w") 6610 (unspec:SVE_FULL_I 6611 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 6612 (mult:SVE_FULL_I 6613 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w") 6614 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))] 6615 UNSPEC_PRED_X)))] 6616 "TARGET_SVE" 6617 "@ 6618 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6619 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6620 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6621 [(set_attr "movprfx" "*,*,yes")] 6622) 6623 6624;; Predicated integer subtraction of product with merging. 6625(define_expand "cond_fnma<mode>" 6626 [(set (match_operand:SVE_FULL_I 0 "register_operand") 6627 (unspec:SVE_FULL_I 6628 [(match_operand:<VPRED> 1 "register_operand") 6629 (minus:SVE_FULL_I 6630 (match_operand:SVE_FULL_I 4 "register_operand") 6631 (mult:SVE_FULL_I 6632 (match_operand:SVE_FULL_I 2 "register_operand") 6633 (match_operand:SVE_FULL_I 3 "general_operand"))) 6634 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")] 6635 UNSPEC_SEL))] 6636 "TARGET_SVE" 6637 { 6638 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS)) 6639 DONE; 6640 /* Swap the multiplication operands if the fallback value is the 6641 second of the two. */ 6642 if (rtx_equal_p (operands[3], operands[5])) 6643 std::swap (operands[2], operands[3]); 6644 } 6645) 6646 6647;; Predicated integer subtraction of product, merging with the first input. 6648(define_insn "*cond_fnma<mode>_2" 6649 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 6650 (unspec:SVE_FULL_I 6651 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6652 (minus:SVE_FULL_I 6653 (match_operand:SVE_FULL_I 4 "register_operand" "w, w") 6654 (mult:SVE_FULL_I 6655 (match_operand:SVE_FULL_I 2 "register_operand" "0, w") 6656 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) 6657 (match_dup 2)] 6658 UNSPEC_SEL))] 6659 "TARGET_SVE" 6660 "@ 6661 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6662 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 6663 [(set_attr "movprfx" "*,yes")] 6664) 6665 6666;; Predicated integer subtraction of product, merging with the third input. 6667(define_insn "*cond_fnma<mode>_4" 6668 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 6669 (unspec:SVE_FULL_I 6670 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6671 (minus:SVE_FULL_I 6672 (match_operand:SVE_FULL_I 4 "register_operand" "0, w") 6673 (mult:SVE_FULL_I 6674 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 6675 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))) 6676 (match_dup 4)] 6677 UNSPEC_SEL))] 6678 "TARGET_SVE" 6679 "@ 6680 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6681 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6682 [(set_attr "movprfx" "*,yes")] 6683) 6684 6685;; Predicated integer subtraction of product, merging with an 6686;; independent value. 6687(define_insn_and_rewrite "*cond_fnma<mode>_any" 6688 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 6689 (unspec:SVE_FULL_I 6690 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 6691 (minus:SVE_FULL_I 6692 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w") 6693 (mult:SVE_FULL_I 6694 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w") 6695 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))) 6696 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 6697 UNSPEC_SEL))] 6698 "TARGET_SVE 6699 && !rtx_equal_p (operands[2], operands[5]) 6700 && !rtx_equal_p (operands[3], operands[5]) 6701 && !rtx_equal_p (operands[4], operands[5])" 6702 "@ 6703 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6704 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6705 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6706 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 6707 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6708 #" 6709 "&& reload_completed 6710 && register_operand (operands[5], <MODE>mode) 6711 && !rtx_equal_p (operands[0], operands[5])" 6712 { 6713 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 6714 operands[5], operands[1])); 6715 operands[5] = operands[4] = operands[0]; 6716 } 6717 [(set_attr "movprfx" "yes")] 6718) 6719 6720;; ------------------------------------------------------------------------- 6721;; ---- [INT] Dot product 6722;; ------------------------------------------------------------------------- 6723;; Includes: 6724;; - SDOT 6725;; - SUDOT (I8MM) 6726;; - UDOT 6727;; - USDOT (I8MM) 6728;; ------------------------------------------------------------------------- 6729 6730;; Four-element integer dot-product with accumulation. 6731(define_insn "<sur>dot_prod<vsi2qi>" 6732 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 6733 (plus:SVE_FULL_SDI 6734 (unspec:SVE_FULL_SDI 6735 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6736 (match_operand:<VSI2QI> 2 "register_operand" "w, w")] 6737 DOTPROD) 6738 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))] 6739 "TARGET_SVE" 6740 "@ 6741 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth> 6742 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>" 6743 [(set_attr "movprfx" "*,yes")] 6744) 6745 6746;; Four-element integer dot-product by selected lanes with accumulation. 6747(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" 6748 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 6749 (plus:SVE_FULL_SDI 6750 (unspec:SVE_FULL_SDI 6751 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6752 (unspec:<VSI2QI> 6753 [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") 6754 (match_operand:SI 3 "const_int_operand")] 6755 UNSPEC_SVE_LANE_SELECT)] 6756 DOTPROD) 6757 (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))] 6758 "TARGET_SVE" 6759 "@ 6760 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3] 6761 movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]" 6762 [(set_attr "movprfx" "*,yes")] 6763) 6764 6765(define_insn "@aarch64_<sur>dot_prod<vsi2qi>" 6766 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 6767 (plus:VNx4SI_ONLY 6768 (unspec:VNx4SI_ONLY 6769 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6770 (match_operand:<VSI2QI> 2 "register_operand" "w, w")] 6771 DOTPROD_US_ONLY) 6772 (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))] 6773 "TARGET_SVE_I8MM" 6774 "@ 6775 <sur>dot\\t%0.s, %1.b, %2.b 6776 movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b" 6777 [(set_attr "movprfx" "*,yes")] 6778) 6779 6780(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" 6781 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 6782 (plus:VNx4SI_ONLY 6783 (unspec:VNx4SI_ONLY 6784 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6785 (unspec:<VSI2QI> 6786 [(match_operand:<VSI2QI> 2 "register_operand" "y, y") 6787 (match_operand:SI 3 "const_int_operand")] 6788 UNSPEC_SVE_LANE_SELECT)] 6789 DOTPROD_I8MM) 6790 (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))] 6791 "TARGET_SVE_I8MM" 6792 "@ 6793 <sur>dot\\t%0.s, %1.b, %2.b[%3] 6794 movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]" 6795 [(set_attr "movprfx" "*,yes")] 6796) 6797 6798;; ------------------------------------------------------------------------- 6799;; ---- [INT] Sum of absolute differences 6800;; ------------------------------------------------------------------------- 6801;; The patterns in this section are synthetic. 6802;; ------------------------------------------------------------------------- 6803 6804;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in 6805;; operands 1 and 2. The sequence also has to perform a widening reduction of 6806;; the difference into a vector and accumulate that into operand 3 before 6807;; copying that into the result operand 0. 6808;; Perform that with a sequence of: 6809;; MOV ones.b, #1 6810;; [SU]ABD diff.b, p0/m, op1.b, op2.b 6811;; MOVPRFX op0, op3 // If necessary 6812;; UDOT op0.s, diff.b, ones.b 6813(define_expand "<sur>sad<vsi2qi>" 6814 [(use (match_operand:SVE_FULL_SDI 0 "register_operand")) 6815 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand")) 6816 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL) 6817 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))] 6818 "TARGET_SVE" 6819 { 6820 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); 6821 rtx diff = gen_reg_rtx (<VSI2QI>mode); 6822 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2])); 6823 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3])); 6824 DONE; 6825 } 6826) 6827 6828;; ------------------------------------------------------------------------- 6829;; ---- [INT] Matrix multiply-accumulate 6830;; ------------------------------------------------------------------------- 6831;; Includes: 6832;; - SMMLA (I8MM) 6833;; - UMMLA (I8MM) 6834;; - USMMLA (I8MM) 6835;; ------------------------------------------------------------------------- 6836 6837(define_insn "@aarch64_sve_add_<optab><vsi2qi>" 6838 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 6839 (plus:VNx4SI_ONLY 6840 (unspec:VNx4SI_ONLY 6841 [(match_operand:<VSI2QI> 2 "register_operand" "w, w") 6842 (match_operand:<VSI2QI> 3 "register_operand" "w, w")] 6843 MATMUL) 6844 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] 6845 "TARGET_SVE_I8MM" 6846 "@ 6847 <sur>mmla\\t%0.s, %2.b, %3.b 6848 movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b" 6849 [(set_attr "movprfx" "*,yes")] 6850) 6851 6852;; ------------------------------------------------------------------------- 6853;; ---- [FP] General ternary arithmetic corresponding to unspecs 6854;; ------------------------------------------------------------------------- 6855;; Includes merging patterns for: 6856;; - FMAD 6857;; - FMLA 6858;; - FMLS 6859;; - FMSB 6860;; - FNMAD 6861;; - FNMLA 6862;; - FNMLS 6863;; - FNMSB 6864;; ------------------------------------------------------------------------- 6865 6866;; Unpredicated floating-point ternary operations. 6867(define_expand "<optab><mode>4" 6868 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6869 (unspec:SVE_FULL_F 6870 [(match_dup 4) 6871 (const_int SVE_RELAXED_GP) 6872 (match_operand:SVE_FULL_F 1 "register_operand") 6873 (match_operand:SVE_FULL_F 2 "register_operand") 6874 (match_operand:SVE_FULL_F 3 "register_operand")] 6875 SVE_COND_FP_TERNARY))] 6876 "TARGET_SVE" 6877 { 6878 operands[4] = aarch64_ptrue_reg (<VPRED>mode); 6879 } 6880) 6881 6882;; Predicated floating-point ternary operations. 6883(define_insn "@aarch64_pred_<optab><mode>" 6884 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") 6885 (unspec:SVE_FULL_F 6886 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 6887 (match_operand:SI 5 "aarch64_sve_gp_strictness") 6888 (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w") 6889 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w") 6890 (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")] 6891 SVE_COND_FP_TERNARY))] 6892 "TARGET_SVE" 6893 "@ 6894 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6895 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6896 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6897 [(set_attr "movprfx" "*,*,yes")] 6898) 6899 6900;; Predicated floating-point ternary operations with merging. 6901(define_expand "@cond_<optab><mode>" 6902 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6903 (unspec:SVE_FULL_F 6904 [(match_operand:<VPRED> 1 "register_operand") 6905 (unspec:SVE_FULL_F 6906 [(match_dup 1) 6907 (const_int SVE_STRICT_GP) 6908 (match_operand:SVE_FULL_F 2 "register_operand") 6909 (match_operand:SVE_FULL_F 3 "register_operand") 6910 (match_operand:SVE_FULL_F 4 "register_operand")] 6911 SVE_COND_FP_TERNARY) 6912 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] 6913 UNSPEC_SEL))] 6914 "TARGET_SVE" 6915{ 6916 /* Swap the multiplication operands if the fallback value is the 6917 second of the two. */ 6918 if (rtx_equal_p (operands[3], operands[5])) 6919 std::swap (operands[2], operands[3]); 6920}) 6921 6922;; Predicated floating-point ternary operations, merging with the 6923;; first input. 6924(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 6925 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 6926 (unspec:SVE_FULL_F 6927 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6928 (unspec:SVE_FULL_F 6929 [(match_operand 5) 6930 (const_int SVE_RELAXED_GP) 6931 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 6932 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 6933 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] 6934 SVE_COND_FP_TERNARY) 6935 (match_dup 2)] 6936 UNSPEC_SEL))] 6937 "TARGET_SVE" 6938 "@ 6939 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6940 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 6941 "&& !rtx_equal_p (operands[1], operands[5])" 6942 { 6943 operands[5] = copy_rtx (operands[1]); 6944 } 6945 [(set_attr "movprfx" "*,yes")] 6946) 6947 6948(define_insn "*cond_<optab><mode>_2_strict" 6949 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 6950 (unspec:SVE_FULL_F 6951 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6952 (unspec:SVE_FULL_F 6953 [(match_dup 1) 6954 (const_int SVE_STRICT_GP) 6955 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 6956 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 6957 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] 6958 SVE_COND_FP_TERNARY) 6959 (match_dup 2)] 6960 UNSPEC_SEL))] 6961 "TARGET_SVE" 6962 "@ 6963 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6964 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 6965 [(set_attr "movprfx" "*,yes")] 6966) 6967 6968;; Predicated floating-point ternary operations, merging with the 6969;; third input. 6970(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" 6971 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 6972 (unspec:SVE_FULL_F 6973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6974 (unspec:SVE_FULL_F 6975 [(match_operand 5) 6976 (const_int SVE_RELAXED_GP) 6977 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 6978 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 6979 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 6980 SVE_COND_FP_TERNARY) 6981 (match_dup 4)] 6982 UNSPEC_SEL))] 6983 "TARGET_SVE" 6984 "@ 6985 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6986 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6987 "&& !rtx_equal_p (operands[1], operands[5])" 6988 { 6989 operands[5] = copy_rtx (operands[1]); 6990 } 6991 [(set_attr "movprfx" "*,yes")] 6992) 6993 6994(define_insn "*cond_<optab><mode>_4_strict" 6995 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 6996 (unspec:SVE_FULL_F 6997 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6998 (unspec:SVE_FULL_F 6999 [(match_dup 1) 7000 (const_int SVE_STRICT_GP) 7001 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7002 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7003 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7004 SVE_COND_FP_TERNARY) 7005 (match_dup 4)] 7006 UNSPEC_SEL))] 7007 "TARGET_SVE" 7008 "@ 7009 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7010 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 7011 [(set_attr "movprfx" "*,yes")] 7012) 7013 7014;; Predicated floating-point ternary operations, merging with an 7015;; independent value. 7016(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 7017 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 7018 (unspec:SVE_FULL_F 7019 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 7020 (unspec:SVE_FULL_F 7021 [(match_operand 6) 7022 (const_int SVE_RELAXED_GP) 7023 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") 7024 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") 7025 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] 7026 SVE_COND_FP_TERNARY) 7027 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 7028 UNSPEC_SEL))] 7029 "TARGET_SVE 7030 && !rtx_equal_p (operands[2], operands[5]) 7031 && !rtx_equal_p (operands[3], operands[5]) 7032 && !rtx_equal_p (operands[4], operands[5])" 7033 "@ 7034 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7035 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7036 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 7037 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 7038 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7039 #" 7040 "&& 1" 7041 { 7042 if (reload_completed 7043 && register_operand (operands[5], <MODE>mode) 7044 && !rtx_equal_p (operands[0], operands[5])) 7045 { 7046 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7047 operands[5], operands[1])); 7048 operands[5] = operands[4] = operands[0]; 7049 } 7050 else if (!rtx_equal_p (operands[1], operands[6])) 7051 operands[6] = copy_rtx (operands[1]); 7052 else 7053 FAIL; 7054 } 7055 [(set_attr "movprfx" "yes")] 7056) 7057 7058(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 7059 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 7060 (unspec:SVE_FULL_F 7061 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 7062 (unspec:SVE_FULL_F 7063 [(match_dup 1) 7064 (const_int SVE_STRICT_GP) 7065 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") 7066 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") 7067 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] 7068 SVE_COND_FP_TERNARY) 7069 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 7070 UNSPEC_SEL))] 7071 "TARGET_SVE 7072 && !rtx_equal_p (operands[2], operands[5]) 7073 && !rtx_equal_p (operands[3], operands[5]) 7074 && !rtx_equal_p (operands[4], operands[5])" 7075 "@ 7076 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7077 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7078 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 7079 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 7080 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7081 #" 7082 "&& reload_completed 7083 && register_operand (operands[5], <MODE>mode) 7084 && !rtx_equal_p (operands[0], operands[5])" 7085 { 7086 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7087 operands[5], operands[1])); 7088 operands[5] = operands[4] = operands[0]; 7089 } 7090 [(set_attr "movprfx" "yes")] 7091) 7092 7093;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using 7094;; (fma ...) since target-independent code won't understand the indexing. 7095(define_insn "@aarch64_<optab>_lane_<mode>" 7096 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7097 (unspec:SVE_FULL_F 7098 [(match_operand:SVE_FULL_F 1 "register_operand" "w, w") 7099 (unspec:SVE_FULL_F 7100 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") 7101 (match_operand:SI 3 "const_int_operand")] 7102 UNSPEC_SVE_LANE_SELECT) 7103 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7104 SVE_FP_TERNARY_LANE))] 7105 "TARGET_SVE" 7106 "@ 7107 <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3] 7108 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 7109 [(set_attr "movprfx" "*,yes")] 7110) 7111 7112;; ------------------------------------------------------------------------- 7113;; ---- [FP] Complex multiply-add 7114;; ------------------------------------------------------------------------- 7115;; Includes merging patterns for: 7116;; - FCMLA 7117;; ------------------------------------------------------------------------- 7118 7119;; Predicated FCMLA. 7120(define_insn "@aarch64_pred_<optab><mode>" 7121 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7122 (unspec:SVE_FULL_F 7123 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7124 (match_operand:SI 5 "aarch64_sve_gp_strictness") 7125 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7126 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7127 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7128 SVE_COND_FCMLA))] 7129 "TARGET_SVE" 7130 "@ 7131 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7132 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" 7133 [(set_attr "movprfx" "*,yes")] 7134) 7135 7136;; Predicated FCMLA with merging. 7137(define_expand "@cond_<optab><mode>" 7138 [(set (match_operand:SVE_FULL_F 0 "register_operand") 7139 (unspec:SVE_FULL_F 7140 [(match_operand:<VPRED> 1 "register_operand") 7141 (unspec:SVE_FULL_F 7142 [(match_dup 1) 7143 (const_int SVE_STRICT_GP) 7144 (match_operand:SVE_FULL_F 2 "register_operand") 7145 (match_operand:SVE_FULL_F 3 "register_operand") 7146 (match_operand:SVE_FULL_F 4 "register_operand")] 7147 SVE_COND_FCMLA) 7148 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] 7149 UNSPEC_SEL))] 7150 "TARGET_SVE" 7151) 7152 7153;; Predicated FCMLA, merging with the third input. 7154(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" 7155 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7156 (unspec:SVE_FULL_F 7157 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7158 (unspec:SVE_FULL_F 7159 [(match_operand 5) 7160 (const_int SVE_RELAXED_GP) 7161 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7162 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7163 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7164 SVE_COND_FCMLA) 7165 (match_dup 4)] 7166 UNSPEC_SEL))] 7167 "TARGET_SVE" 7168 "@ 7169 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7170 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" 7171 "&& !rtx_equal_p (operands[1], operands[5])" 7172 { 7173 operands[5] = copy_rtx (operands[1]); 7174 } 7175 [(set_attr "movprfx" "*,yes")] 7176) 7177 7178(define_insn "*cond_<optab><mode>_4_strict" 7179 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7180 (unspec:SVE_FULL_F 7181 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7182 (unspec:SVE_FULL_F 7183 [(match_dup 1) 7184 (const_int SVE_STRICT_GP) 7185 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7186 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7187 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7188 SVE_COND_FCMLA) 7189 (match_dup 4)] 7190 UNSPEC_SEL))] 7191 "TARGET_SVE" 7192 "@ 7193 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7194 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" 7195 [(set_attr "movprfx" "*,yes")] 7196) 7197 7198;; Predicated FCMLA, merging with an independent value. 7199(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 7200 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 7201 (unspec:SVE_FULL_F 7202 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 7203 (unspec:SVE_FULL_F 7204 [(match_operand 6) 7205 (const_int SVE_RELAXED_GP) 7206 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") 7207 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") 7208 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] 7209 SVE_COND_FCMLA) 7210 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 7211 UNSPEC_SEL))] 7212 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" 7213 "@ 7214 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7215 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7216 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7217 #" 7218 "&& 1" 7219 { 7220 if (reload_completed 7221 && register_operand (operands[5], <MODE>mode) 7222 && !rtx_equal_p (operands[0], operands[5])) 7223 { 7224 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7225 operands[5], operands[1])); 7226 operands[5] = operands[4] = operands[0]; 7227 } 7228 else if (!rtx_equal_p (operands[1], operands[6])) 7229 operands[6] = copy_rtx (operands[1]); 7230 else 7231 FAIL; 7232 } 7233 [(set_attr "movprfx" "yes")] 7234) 7235 7236(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 7237 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 7238 (unspec:SVE_FULL_F 7239 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 7240 (unspec:SVE_FULL_F 7241 [(match_dup 1) 7242 (const_int SVE_STRICT_GP) 7243 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") 7244 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") 7245 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] 7246 SVE_COND_FCMLA) 7247 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 7248 UNSPEC_SEL))] 7249 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" 7250 "@ 7251 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7252 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7253 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7254 #" 7255 "&& reload_completed 7256 && register_operand (operands[5], <MODE>mode) 7257 && !rtx_equal_p (operands[0], operands[5])" 7258 { 7259 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7260 operands[5], operands[1])); 7261 operands[5] = operands[4] = operands[0]; 7262 } 7263 [(set_attr "movprfx" "yes")] 7264) 7265 7266;; Unpredicated FCMLA with indexing. 7267(define_insn "@aarch64_<optab>_lane_<mode>" 7268 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") 7269 (unspec:SVE_FULL_HSF 7270 [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w") 7271 (unspec:SVE_FULL_HSF 7272 [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>") 7273 (match_operand:SI 3 "const_int_operand")] 7274 UNSPEC_SVE_LANE_SELECT) 7275 (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")] 7276 FCMLA))] 7277 "TARGET_SVE" 7278 "@ 7279 fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot> 7280 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>" 7281 [(set_attr "movprfx" "*,yes")] 7282) 7283 7284;; ------------------------------------------------------------------------- 7285;; ---- [FP] Trigonometric multiply-add 7286;; ------------------------------------------------------------------------- 7287;; Includes: 7288;; - FTMAD 7289;; ------------------------------------------------------------------------- 7290 7291(define_insn "@aarch64_sve_tmad<mode>" 7292 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7293 (unspec:SVE_FULL_F 7294 [(match_operand:SVE_FULL_F 1 "register_operand" "0, w") 7295 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7296 (match_operand:DI 3 "const_int_operand")] 7297 UNSPEC_FTMAD))] 7298 "TARGET_SVE" 7299 "@ 7300 ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3 7301 movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3" 7302 [(set_attr "movprfx" "*,yes")] 7303) 7304 7305;; ------------------------------------------------------------------------- 7306;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) 7307;; ------------------------------------------------------------------------- 7308;; Includes: 7309;; - BFDOT (BF16) 7310;; - BFMLALB (BF16) 7311;; - BFMLALT (BF16) 7312;; - BFMMLA (BF16) 7313;; ------------------------------------------------------------------------- 7314 7315(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf" 7316 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") 7317 (unspec:VNx4SF 7318 [(match_operand:VNx4SF 1 "register_operand" "0, w") 7319 (match_operand:VNx8BF 2 "register_operand" "w, w") 7320 (match_operand:VNx8BF 3 "register_operand" "w, w")] 7321 SVE_BFLOAT_TERNARY_LONG))] 7322 "TARGET_SVE_BF16" 7323 "@ 7324 <sve_fp_op>\t%0.s, %2.h, %3.h 7325 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h" 7326 [(set_attr "movprfx" "*,yes")] 7327) 7328 7329;; The immediate range is enforced before generating the instruction. 7330(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf" 7331 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") 7332 (unspec:VNx4SF 7333 [(match_operand:VNx4SF 1 "register_operand" "0, w") 7334 (match_operand:VNx8BF 2 "register_operand" "w, w") 7335 (match_operand:VNx8BF 3 "register_operand" "y, y") 7336 (match_operand:SI 4 "const_int_operand")] 7337 SVE_BFLOAT_TERNARY_LONG_LANE))] 7338 "TARGET_SVE_BF16" 7339 "@ 7340 <sve_fp_op>\t%0.s, %2.h, %3.h[%4] 7341 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]" 7342 [(set_attr "movprfx" "*,yes")] 7343) 7344 7345;; ------------------------------------------------------------------------- 7346;; ---- [FP] Matrix multiply-accumulate 7347;; ------------------------------------------------------------------------- 7348;; Includes: 7349;; - FMMLA (F32MM,F64MM) 7350;; ------------------------------------------------------------------------- 7351 7352;; The mode iterator enforces the target requirements. 7353(define_insn "@aarch64_sve_<sve_fp_op><mode>" 7354 [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w") 7355 (unspec:SVE_MATMULF 7356 [(match_operand:SVE_MATMULF 2 "register_operand" "w, w") 7357 (match_operand:SVE_MATMULF 3 "register_operand" "w, w") 7358 (match_operand:SVE_MATMULF 1 "register_operand" "0, w")] 7359 FMMLA))] 7360 "TARGET_SVE" 7361 "@ 7362 <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> 7363 movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 7364 [(set_attr "movprfx" "*,yes")] 7365) 7366 7367;; ========================================================================= 7368;; == Comparisons and selects 7369;; ========================================================================= 7370 7371;; ------------------------------------------------------------------------- 7372;; ---- [INT,FP] Select based on predicates 7373;; ------------------------------------------------------------------------- 7374;; Includes merging patterns for: 7375;; - FMOV 7376;; - MOV 7377;; - SEL 7378;; ------------------------------------------------------------------------- 7379 7380;; vcond_mask operand order: true, false, mask 7381;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) 7382;; SEL operand order: mask, true, false 7383(define_expand "@vcond_mask_<mode><vpred>" 7384 [(set (match_operand:SVE_FULL 0 "register_operand") 7385 (unspec:SVE_FULL 7386 [(match_operand:<VPRED> 3 "register_operand") 7387 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm") 7388 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")] 7389 UNSPEC_SEL))] 7390 "TARGET_SVE" 7391 { 7392 if (register_operand (operands[1], <MODE>mode)) 7393 operands[2] = force_reg (<MODE>mode, operands[2]); 7394 } 7395) 7396 7397;; Selects between: 7398;; - two registers 7399;; - a duplicated immediate and a register 7400;; - a duplicated immediate and zero 7401(define_insn "*vcond_mask_<mode><vpred>" 7402 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w") 7403 (unspec:SVE_FULL 7404 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl") 7405 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc") 7406 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")] 7407 UNSPEC_SEL))] 7408 "TARGET_SVE 7409 && (!register_operand (operands[1], <MODE>mode) 7410 || register_operand (operands[2], <MODE>mode))" 7411 "@ 7412 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype> 7413 mov\t%0.<Vetype>, %3/m, #%I1 7414 mov\t%0.<Vetype>, %3/z, #%I1 7415 fmov\t%0.<Vetype>, %3/m, #%1 7416 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1 7417 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1 7418 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1" 7419 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] 7420) 7421 7422;; Optimize selects between a duplicated scalar variable and another vector, 7423;; the latter of which can be a zero constant or a variable. Treat duplicates 7424;; of GPRs as being more expensive than duplicates of FPRs, since they 7425;; involve a cross-file move. 7426(define_insn "@aarch64_sel_dup<mode>" 7427 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") 7428 (unspec:SVE_FULL 7429 [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 7430 (vec_duplicate:SVE_FULL 7431 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w")) 7432 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] 7433 UNSPEC_SEL))] 7434 "TARGET_SVE" 7435 "@ 7436 mov\t%0.<Vetype>, %3/m, %<vwcore>1 7437 mov\t%0.<Vetype>, %3/m, %<Vetype>1 7438 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 7439 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1 7440 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 7441 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1" 7442 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")] 7443) 7444 7445;; ------------------------------------------------------------------------- 7446;; ---- [INT,FP] Compare and select 7447;; ------------------------------------------------------------------------- 7448;; The patterns in this section are synthetic. 7449;; ------------------------------------------------------------------------- 7450 7451;; Integer (signed) vcond. Don't enforce an immediate range here, since it 7452;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. 7453(define_expand "vcond<mode><v_int_equiv>" 7454 [(set (match_operand:SVE_FULL 0 "register_operand") 7455 (if_then_else:SVE_FULL 7456 (match_operator 3 "comparison_operator" 7457 [(match_operand:<V_INT_EQUIV> 4 "register_operand") 7458 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) 7459 (match_operand:SVE_FULL 1 "nonmemory_operand") 7460 (match_operand:SVE_FULL 2 "nonmemory_operand")))] 7461 "TARGET_SVE" 7462 { 7463 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); 7464 DONE; 7465 } 7466) 7467 7468;; Integer vcondu. Don't enforce an immediate range here, since it 7469;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. 7470(define_expand "vcondu<mode><v_int_equiv>" 7471 [(set (match_operand:SVE_FULL 0 "register_operand") 7472 (if_then_else:SVE_FULL 7473 (match_operator 3 "comparison_operator" 7474 [(match_operand:<V_INT_EQUIV> 4 "register_operand") 7475 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")]) 7476 (match_operand:SVE_FULL 1 "nonmemory_operand") 7477 (match_operand:SVE_FULL 2 "nonmemory_operand")))] 7478 "TARGET_SVE" 7479 { 7480 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands); 7481 DONE; 7482 } 7483) 7484 7485;; Floating-point vcond. All comparisons except FCMUO allow a zero operand; 7486;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero. 7487(define_expand "vcond<mode><v_fp_equiv>" 7488 [(set (match_operand:SVE_FULL_HSD 0 "register_operand") 7489 (if_then_else:SVE_FULL_HSD 7490 (match_operator 3 "comparison_operator" 7491 [(match_operand:<V_FP_EQUIV> 4 "register_operand") 7492 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) 7493 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand") 7494 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))] 7495 "TARGET_SVE" 7496 { 7497 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); 7498 DONE; 7499 } 7500) 7501 7502;; ------------------------------------------------------------------------- 7503;; ---- [INT] Comparisons 7504;; ------------------------------------------------------------------------- 7505;; Includes: 7506;; - CMPEQ 7507;; - CMPGE 7508;; - CMPGT 7509;; - CMPHI 7510;; - CMPHS 7511;; - CMPLE 7512;; - CMPLO 7513;; - CMPLS 7514;; - CMPLT 7515;; - CMPNE 7516;; ------------------------------------------------------------------------- 7517 7518;; Signed integer comparisons. Don't enforce an immediate range here, since 7519;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int 7520;; instead. 7521(define_expand "vec_cmp<mode><vpred>" 7522 [(parallel 7523 [(set (match_operand:<VPRED> 0 "register_operand") 7524 (match_operator:<VPRED> 1 "comparison_operator" 7525 [(match_operand:SVE_FULL_I 2 "register_operand") 7526 (match_operand:SVE_FULL_I 3 "nonmemory_operand")])) 7527 (clobber (reg:CC_NZC CC_REGNUM))])] 7528 "TARGET_SVE" 7529 { 7530 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), 7531 operands[2], operands[3]); 7532 DONE; 7533 } 7534) 7535 7536;; Unsigned integer comparisons. Don't enforce an immediate range here, since 7537;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int 7538;; instead. 7539(define_expand "vec_cmpu<mode><vpred>" 7540 [(parallel 7541 [(set (match_operand:<VPRED> 0 "register_operand") 7542 (match_operator:<VPRED> 1 "comparison_operator" 7543 [(match_operand:SVE_FULL_I 2 "register_operand") 7544 (match_operand:SVE_FULL_I 3 "nonmemory_operand")])) 7545 (clobber (reg:CC_NZC CC_REGNUM))])] 7546 "TARGET_SVE" 7547 { 7548 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), 7549 operands[2], operands[3]); 7550 DONE; 7551 } 7552) 7553 7554;; Predicated integer comparisons. 7555(define_insn "@aarch64_pred_cmp<cmp_op><mode>" 7556 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7557 (unspec:<VPRED> 7558 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7559 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 7560 (SVE_INT_CMP:<VPRED> 7561 (match_operand:SVE_FULL_I 3 "register_operand" "w, w") 7562 (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7563 UNSPEC_PRED_Z)) 7564 (clobber (reg:CC_NZC CC_REGNUM))] 7565 "TARGET_SVE" 7566 "@ 7567 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4 7568 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 7569) 7570 7571;; Predicated integer comparisons in which both the flag and predicate 7572;; results are interesting. 7573(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc" 7574 [(set (reg:CC_NZC CC_REGNUM) 7575 (unspec:CC_NZC 7576 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") 7577 (match_operand 4) 7578 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7579 (unspec:<VPRED> 7580 [(match_operand 6) 7581 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7582 (SVE_INT_CMP:<VPRED> 7583 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 7584 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7585 UNSPEC_PRED_Z)] 7586 UNSPEC_PTEST)) 7587 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7588 (unspec:<VPRED> 7589 [(match_dup 6) 7590 (match_dup 7) 7591 (SVE_INT_CMP:<VPRED> 7592 (match_dup 2) 7593 (match_dup 3))] 7594 UNSPEC_PRED_Z))] 7595 "TARGET_SVE 7596 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7597 "@ 7598 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 7599 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 7600 "&& !rtx_equal_p (operands[4], operands[6])" 7601 { 7602 operands[6] = copy_rtx (operands[4]); 7603 operands[7] = operands[5]; 7604 } 7605) 7606 7607;; Predicated integer comparisons in which only the flags result is 7608;; interesting. 7609(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" 7610 [(set (reg:CC_NZC CC_REGNUM) 7611 (unspec:CC_NZC 7612 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") 7613 (match_operand 4) 7614 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7615 (unspec:<VPRED> 7616 [(match_operand 6) 7617 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7618 (SVE_INT_CMP:<VPRED> 7619 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 7620 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7621 UNSPEC_PRED_Z)] 7622 UNSPEC_PTEST)) 7623 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] 7624 "TARGET_SVE 7625 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7626 "@ 7627 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 7628 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 7629 "&& !rtx_equal_p (operands[4], operands[6])" 7630 { 7631 operands[6] = copy_rtx (operands[4]); 7632 operands[7] = operands[5]; 7633 } 7634) 7635 7636;; Predicated integer comparisons, formed by combining a PTRUE-predicated 7637;; comparison with an AND. Split the instruction into its preferred form 7638;; at the earliest opportunity, in order to get rid of the redundant 7639;; operand 4. 7640(define_insn_and_split "*cmp<cmp_op><mode>_and" 7641 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7642 (and:<VPRED> 7643 (unspec:<VPRED> 7644 [(match_operand 4) 7645 (const_int SVE_KNOWN_PTRUE) 7646 (SVE_INT_CMP:<VPRED> 7647 (match_operand:SVE_FULL_I 2 "register_operand" "w, w") 7648 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7649 UNSPEC_PRED_Z) 7650 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) 7651 (clobber (reg:CC_NZC CC_REGNUM))] 7652 "TARGET_SVE" 7653 "#" 7654 "&& 1" 7655 [(parallel 7656 [(set (match_dup 0) 7657 (unspec:<VPRED> 7658 [(match_dup 1) 7659 (const_int SVE_MAYBE_NOT_PTRUE) 7660 (SVE_INT_CMP:<VPRED> 7661 (match_dup 2) 7662 (match_dup 3))] 7663 UNSPEC_PRED_Z)) 7664 (clobber (reg:CC_NZC CC_REGNUM))])] 7665) 7666 7667;; Predicated integer wide comparisons. 7668(define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide" 7669 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7670 (unspec:<VPRED> 7671 [(match_operand:VNx16BI 1 "register_operand" "Upl") 7672 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 7673 (unspec:<VPRED> 7674 [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w") 7675 (match_operand:VNx2DI 4 "register_operand" "w")] 7676 SVE_COND_INT_CMP_WIDE)] 7677 UNSPEC_PRED_Z)) 7678 (clobber (reg:CC_NZC CC_REGNUM))] 7679 "TARGET_SVE" 7680 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d" 7681) 7682 7683;; Predicated integer wide comparisons in which both the flag and 7684;; predicate results are interesting. 7685(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" 7686 [(set (reg:CC_NZC CC_REGNUM) 7687 (unspec:CC_NZC 7688 [(match_operand:VNx16BI 1 "register_operand" "Upl") 7689 (match_operand 4) 7690 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7691 (unspec:<VPRED> 7692 [(match_operand:VNx16BI 6 "register_operand" "Upl") 7693 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7694 (unspec:<VPRED> 7695 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") 7696 (match_operand:VNx2DI 3 "register_operand" "w")] 7697 SVE_COND_INT_CMP_WIDE)] 7698 UNSPEC_PRED_Z)] 7699 UNSPEC_PTEST)) 7700 (set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7701 (unspec:<VPRED> 7702 [(match_dup 6) 7703 (match_dup 7) 7704 (unspec:<VPRED> 7705 [(match_dup 2) 7706 (match_dup 3)] 7707 SVE_COND_INT_CMP_WIDE)] 7708 UNSPEC_PRED_Z))] 7709 "TARGET_SVE 7710 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7711 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d" 7712) 7713 7714;; Predicated integer wide comparisons in which only the flags result 7715;; is interesting. 7716(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest" 7717 [(set (reg:CC_NZC CC_REGNUM) 7718 (unspec:CC_NZC 7719 [(match_operand:VNx16BI 1 "register_operand" "Upl") 7720 (match_operand 4) 7721 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7722 (unspec:<VPRED> 7723 [(match_operand:VNx16BI 6 "register_operand" "Upl") 7724 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7725 (unspec:<VPRED> 7726 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") 7727 (match_operand:VNx2DI 3 "register_operand" "w")] 7728 SVE_COND_INT_CMP_WIDE)] 7729 UNSPEC_PRED_Z)] 7730 UNSPEC_PTEST)) 7731 (clobber (match_scratch:<VPRED> 0 "=Upa"))] 7732 "TARGET_SVE 7733 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7734 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d" 7735) 7736 7737;; ------------------------------------------------------------------------- 7738;; ---- [INT] While tests 7739;; ------------------------------------------------------------------------- 7740;; Includes: 7741;; - WHILEGE (SVE2) 7742;; - WHILEGT (SVE2) 7743;; - WHILEHI (SVE2) 7744;; - WHILEHS (SVE2) 7745;; - WHILELE 7746;; - WHILELO 7747;; - WHILELS 7748;; - WHILELT 7749;; - WHILERW (SVE2) 7750;; - WHILEWR (SVE2) 7751;; ------------------------------------------------------------------------- 7752 7753;; Set element I of the result if (cmp (plus operand1 J) operand2) is 7754;; true for all J in [0, I]. 7755(define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>" 7756 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 7757 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") 7758 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] 7759 SVE_WHILE)) 7760 (clobber (reg:CC_NZC CC_REGNUM))] 7761 "TARGET_SVE" 7762 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" 7763) 7764 7765;; The WHILE instructions set the flags in the same way as a PTEST with 7766;; a PTRUE GP. Handle the case in which both results are useful. The GP 7767;; operands to the PTEST aren't needed, so we allow them to be anything. 7768(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc" 7769 [(set (reg:CC_NZC CC_REGNUM) 7770 (unspec:CC_NZC 7771 [(match_operand 3) 7772 (match_operand 4) 7773 (const_int SVE_KNOWN_PTRUE) 7774 (unspec:PRED_ALL 7775 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") 7776 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] 7777 SVE_WHILE)] 7778 UNSPEC_PTEST)) 7779 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 7780 (unspec:PRED_ALL [(match_dup 1) 7781 (match_dup 2)] 7782 SVE_WHILE))] 7783 "TARGET_SVE" 7784 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" 7785 ;; Force the compiler to drop the unused predicate operand, so that we 7786 ;; don't have an unnecessary PTRUE. 7787 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" 7788 { 7789 operands[3] = CONSTM1_RTX (VNx16BImode); 7790 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 7791 } 7792) 7793 7794;; Same, but handle the case in which only the flags result is useful. 7795(define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest" 7796 [(set (reg:CC_NZC CC_REGNUM) 7797 (unspec:CC_NZC 7798 [(match_operand 3) 7799 (match_operand 4) 7800 (const_int SVE_KNOWN_PTRUE) 7801 (unspec:PRED_ALL 7802 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") 7803 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] 7804 SVE_WHILE)] 7805 UNSPEC_PTEST)) 7806 (clobber (match_scratch:PRED_ALL 0 "=Upa"))] 7807 "TARGET_SVE" 7808 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" 7809 ;; Force the compiler to drop the unused predicate operand, so that we 7810 ;; don't have an unnecessary PTRUE. 7811 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" 7812 { 7813 operands[3] = CONSTM1_RTX (VNx16BImode); 7814 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 7815 } 7816) 7817 7818;; ------------------------------------------------------------------------- 7819;; ---- [FP] Direct comparisons 7820;; ------------------------------------------------------------------------- 7821;; Includes: 7822;; - FCMEQ 7823;; - FCMGE 7824;; - FCMGT 7825;; - FCMLE 7826;; - FCMLT 7827;; - FCMNE 7828;; - FCMUO 7829;; ------------------------------------------------------------------------- 7830 7831;; Floating-point comparisons. All comparisons except FCMUO allow a zero 7832;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO 7833;; with zero. 7834(define_expand "vec_cmp<mode><vpred>" 7835 [(set (match_operand:<VPRED> 0 "register_operand") 7836 (match_operator:<VPRED> 1 "comparison_operator" 7837 [(match_operand:SVE_FULL_F 2 "register_operand") 7838 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))] 7839 "TARGET_SVE" 7840 { 7841 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), 7842 operands[2], operands[3], false); 7843 DONE; 7844 } 7845) 7846 7847;; Predicated floating-point comparisons. 7848(define_insn "@aarch64_pred_fcm<cmp_op><mode>" 7849 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7850 (unspec:<VPRED> 7851 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7852 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 7853 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7854 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")] 7855 SVE_COND_FP_CMP_I0))] 7856 "TARGET_SVE" 7857 "@ 7858 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0 7859 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 7860) 7861 7862;; Same for unordered comparisons. 7863(define_insn "@aarch64_pred_fcmuo<mode>" 7864 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7865 (unspec:<VPRED> 7866 [(match_operand:<VPRED> 1 "register_operand" "Upl") 7867 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 7868 (match_operand:SVE_FULL_F 3 "register_operand" "w") 7869 (match_operand:SVE_FULL_F 4 "register_operand" "w")] 7870 UNSPEC_COND_FCMUO))] 7871 "TARGET_SVE" 7872 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 7873) 7874 7875;; Floating-point comparisons predicated on a PTRUE, with the results ANDed 7876;; with another predicate P. This does not have the same trapping behavior 7877;; as predicating the comparison itself on P, but it's a legitimate fold, 7878;; since we can drop any potentially-trapping operations whose results 7879;; are not needed. 7880;; 7881;; Split the instruction into its preferred form (below) at the earliest 7882;; opportunity, in order to get rid of the redundant operand 1. 7883(define_insn_and_split "*fcm<cmp_op><mode>_and_combine" 7884 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7885 (and:<VPRED> 7886 (unspec:<VPRED> 7887 [(match_operand:<VPRED> 1) 7888 (const_int SVE_KNOWN_PTRUE) 7889 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7890 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] 7891 SVE_COND_FP_CMP_I0) 7892 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] 7893 "TARGET_SVE" 7894 "#" 7895 "&& 1" 7896 [(set (match_dup 0) 7897 (unspec:<VPRED> 7898 [(match_dup 4) 7899 (const_int SVE_MAYBE_NOT_PTRUE) 7900 (match_dup 2) 7901 (match_dup 3)] 7902 SVE_COND_FP_CMP_I0))] 7903) 7904 7905;; Same for unordered comparisons. 7906(define_insn_and_split "*fcmuo<mode>_and_combine" 7907 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7908 (and:<VPRED> 7909 (unspec:<VPRED> 7910 [(match_operand:<VPRED> 1) 7911 (const_int SVE_KNOWN_PTRUE) 7912 (match_operand:SVE_FULL_F 2 "register_operand" "w") 7913 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 7914 UNSPEC_COND_FCMUO) 7915 (match_operand:<VPRED> 4 "register_operand" "Upl")))] 7916 "TARGET_SVE" 7917 "#" 7918 "&& 1" 7919 [(set (match_dup 0) 7920 (unspec:<VPRED> 7921 [(match_dup 4) 7922 (const_int SVE_MAYBE_NOT_PTRUE) 7923 (match_dup 2) 7924 (match_dup 3)] 7925 UNSPEC_COND_FCMUO))] 7926) 7927 7928;; ------------------------------------------------------------------------- 7929;; ---- [FP] Absolute comparisons 7930;; ------------------------------------------------------------------------- 7931;; Includes: 7932;; - FACGE 7933;; - FACGT 7934;; - FACLE 7935;; - FACLT 7936;; ------------------------------------------------------------------------- 7937 7938;; Predicated floating-point absolute comparisons. 7939(define_expand "@aarch64_pred_fac<cmp_op><mode>" 7940 [(set (match_operand:<VPRED> 0 "register_operand") 7941 (unspec:<VPRED> 7942 [(match_operand:<VPRED> 1 "register_operand") 7943 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 7944 (unspec:SVE_FULL_F 7945 [(match_dup 1) 7946 (match_dup 2) 7947 (match_operand:SVE_FULL_F 3 "register_operand")] 7948 UNSPEC_COND_FABS) 7949 (unspec:SVE_FULL_F 7950 [(match_dup 1) 7951 (match_dup 2) 7952 (match_operand:SVE_FULL_F 4 "register_operand")] 7953 UNSPEC_COND_FABS)] 7954 SVE_COND_FP_ABS_CMP))] 7955 "TARGET_SVE" 7956) 7957 7958(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed" 7959 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7960 (unspec:<VPRED> 7961 [(match_operand:<VPRED> 1 "register_operand" "Upl") 7962 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 7963 (unspec:SVE_FULL_F 7964 [(match_operand 5) 7965 (const_int SVE_RELAXED_GP) 7966 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 7967 UNSPEC_COND_FABS) 7968 (unspec:SVE_FULL_F 7969 [(match_operand 6) 7970 (const_int SVE_RELAXED_GP) 7971 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 7972 UNSPEC_COND_FABS)] 7973 SVE_COND_FP_ABS_CMP))] 7974 "TARGET_SVE" 7975 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 7976 "&& (!rtx_equal_p (operands[1], operands[5]) 7977 || !rtx_equal_p (operands[1], operands[6]))" 7978 { 7979 operands[5] = copy_rtx (operands[1]); 7980 operands[6] = copy_rtx (operands[1]); 7981 } 7982) 7983 7984(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict" 7985 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7986 (unspec:<VPRED> 7987 [(match_operand:<VPRED> 1 "register_operand" "Upl") 7988 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 7989 (unspec:SVE_FULL_F 7990 [(match_dup 1) 7991 (match_operand:SI 5 "aarch64_sve_gp_strictness") 7992 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 7993 UNSPEC_COND_FABS) 7994 (unspec:SVE_FULL_F 7995 [(match_dup 1) 7996 (match_operand:SI 6 "aarch64_sve_gp_strictness") 7997 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 7998 UNSPEC_COND_FABS)] 7999 SVE_COND_FP_ABS_CMP))] 8000 "TARGET_SVE" 8001 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 8002) 8003 8004;; ------------------------------------------------------------------------- 8005;; ---- [PRED] Select 8006;; ------------------------------------------------------------------------- 8007;; Includes: 8008;; - SEL 8009;; ------------------------------------------------------------------------- 8010 8011(define_insn "@vcond_mask_<mode><mode>" 8012 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 8013 (ior:PRED_ALL 8014 (and:PRED_ALL 8015 (match_operand:PRED_ALL 3 "register_operand" "Upa") 8016 (match_operand:PRED_ALL 1 "register_operand" "Upa")) 8017 (and:PRED_ALL 8018 (not (match_dup 3)) 8019 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))] 8020 "TARGET_SVE" 8021 "sel\t%0.b, %3, %1.b, %2.b" 8022) 8023 8024;; ------------------------------------------------------------------------- 8025;; ---- [PRED] Test bits 8026;; ------------------------------------------------------------------------- 8027;; Includes: 8028;; - PTEST 8029;; ------------------------------------------------------------------------- 8030 8031;; Branch based on predicate equality or inequality. 8032(define_expand "cbranch<mode>4" 8033 [(set (pc) 8034 (if_then_else 8035 (match_operator 0 "aarch64_equality_operator" 8036 [(match_operand:PRED_ALL 1 "register_operand") 8037 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) 8038 (label_ref (match_operand 3 "")) 8039 (pc)))] 8040 "" 8041 { 8042 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); 8043 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); 8044 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); 8045 rtx pred; 8046 if (operands[2] == CONST0_RTX (<MODE>mode)) 8047 pred = operands[1]; 8048 else 8049 { 8050 pred = gen_reg_rtx (<MODE>mode); 8051 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1], 8052 operands[2])); 8053 } 8054 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred)); 8055 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); 8056 operands[2] = const0_rtx; 8057 } 8058) 8059 8060;; See "Description of UNSPEC_PTEST" above for details. 8061(define_insn "aarch64_ptest<mode>" 8062 [(set (reg:CC_NZC CC_REGNUM) 8063 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") 8064 (match_operand 1) 8065 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 8066 (match_operand:PRED_ALL 3 "register_operand" "Upa")] 8067 UNSPEC_PTEST))] 8068 "TARGET_SVE" 8069 "ptest\t%0, %3.b" 8070) 8071 8072;; ========================================================================= 8073;; == Reductions 8074;; ========================================================================= 8075 8076;; ------------------------------------------------------------------------- 8077;; ---- [INT,FP] Conditional reductions 8078;; ------------------------------------------------------------------------- 8079;; Includes: 8080;; - CLASTA 8081;; - CLASTB 8082;; ------------------------------------------------------------------------- 8083 8084;; Set operand 0 to the last active element in operand 3, or to tied 8085;; operand 1 if no elements are active. 8086(define_insn "@fold_extract_<last_op>_<mode>" 8087 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") 8088 (unspec:<VEL> 8089 [(match_operand:<VEL> 1 "register_operand" "0, 0") 8090 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") 8091 (match_operand:SVE_FULL 3 "register_operand" "w, w")] 8092 CLAST))] 8093 "TARGET_SVE" 8094 "@ 8095 clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> 8096 clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" 8097) 8098 8099(define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>" 8100 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") 8101 (unspec:SVE_FULL 8102 [(match_operand:SVE_FULL 1 "register_operand" "0, w") 8103 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") 8104 (match_operand:SVE_FULL 3 "register_operand" "w, w")] 8105 CLAST))] 8106 "TARGET_SVE" 8107 "@ 8108 clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype> 8109 movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>" 8110) 8111 8112;; ------------------------------------------------------------------------- 8113;; ---- [INT] Tree reductions 8114;; ------------------------------------------------------------------------- 8115;; Includes: 8116;; - ANDV 8117;; - EORV 8118;; - ORV 8119;; - SADDV 8120;; - SMAXV 8121;; - SMINV 8122;; - UADDV 8123;; - UMAXV 8124;; - UMINV 8125;; ------------------------------------------------------------------------- 8126 8127;; Unpredicated integer add reduction. 8128(define_expand "reduc_plus_scal_<mode>" 8129 [(match_operand:<VEL> 0 "register_operand") 8130 (match_operand:SVE_FULL_I 1 "register_operand")] 8131 "TARGET_SVE" 8132 { 8133 rtx pred = aarch64_ptrue_reg (<VPRED>mode); 8134 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode); 8135 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1])); 8136 if (tmp != operands[0]) 8137 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp)); 8138 DONE; 8139 } 8140) 8141 8142;; Predicated integer add reduction. The result is always 64-bits. 8143(define_insn "@aarch64_pred_reduc_<optab>_<mode>" 8144 [(set (match_operand:DI 0 "register_operand" "=w") 8145 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl") 8146 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 8147 SVE_INT_ADDV))] 8148 "TARGET_SVE && <max_elem_bits> >= <elem_bits>" 8149 "<su>addv\t%d0, %1, %2.<Vetype>" 8150) 8151 8152;; Unpredicated integer reductions. 8153(define_expand "reduc_<optab>_scal_<mode>" 8154 [(set (match_operand:<VEL> 0 "register_operand") 8155 (unspec:<VEL> [(match_dup 2) 8156 (match_operand:SVE_FULL_I 1 "register_operand")] 8157 SVE_INT_REDUCTION))] 8158 "TARGET_SVE" 8159 { 8160 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 8161 } 8162) 8163 8164;; Predicated integer reductions. 8165(define_insn "@aarch64_pred_reduc_<optab>_<mode>" 8166 [(set (match_operand:<VEL> 0 "register_operand" "=w") 8167 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 8168 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 8169 SVE_INT_REDUCTION))] 8170 "TARGET_SVE" 8171 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>" 8172) 8173 8174;; ------------------------------------------------------------------------- 8175;; ---- [FP] Tree reductions 8176;; ------------------------------------------------------------------------- 8177;; Includes: 8178;; - FADDV 8179;; - FMAXNMV 8180;; - FMAXV 8181;; - FMINNMV 8182;; - FMINV 8183;; ------------------------------------------------------------------------- 8184 8185;; Unpredicated floating-point tree reductions. 8186(define_expand "reduc_<optab>_scal_<mode>" 8187 [(set (match_operand:<VEL> 0 "register_operand") 8188 (unspec:<VEL> [(match_dup 2) 8189 (match_operand:SVE_FULL_F 1 "register_operand")] 8190 SVE_FP_REDUCTION))] 8191 "TARGET_SVE" 8192 { 8193 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 8194 } 8195) 8196 8197;; Predicated floating-point tree reductions. 8198(define_insn "@aarch64_pred_reduc_<optab>_<mode>" 8199 [(set (match_operand:<VEL> 0 "register_operand" "=w") 8200 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 8201 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 8202 SVE_FP_REDUCTION))] 8203 "TARGET_SVE" 8204 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>" 8205) 8206 8207;; ------------------------------------------------------------------------- 8208;; ---- [FP] Left-to-right reductions 8209;; ------------------------------------------------------------------------- 8210;; Includes: 8211;; - FADDA 8212;; ------------------------------------------------------------------------- 8213 8214;; Unpredicated in-order FP reductions. 8215(define_expand "fold_left_plus_<mode>" 8216 [(set (match_operand:<VEL> 0 "register_operand") 8217 (unspec:<VEL> [(match_dup 3) 8218 (match_operand:<VEL> 1 "register_operand") 8219 (match_operand:SVE_FULL_F 2 "register_operand")] 8220 UNSPEC_FADDA))] 8221 "TARGET_SVE" 8222 { 8223 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 8224 } 8225) 8226 8227;; Predicated in-order FP reductions. 8228(define_insn "mask_fold_left_plus_<mode>" 8229 [(set (match_operand:<VEL> 0 "register_operand" "=w") 8230 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl") 8231 (match_operand:<VEL> 1 "register_operand" "0") 8232 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 8233 UNSPEC_FADDA))] 8234 "TARGET_SVE" 8235 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>" 8236) 8237 8238;; ========================================================================= 8239;; == Permutes 8240;; ========================================================================= 8241 8242;; ------------------------------------------------------------------------- 8243;; ---- [INT,FP] General permutes 8244;; ------------------------------------------------------------------------- 8245;; Includes: 8246;; - TBL 8247;; ------------------------------------------------------------------------- 8248 8249(define_expand "vec_perm<mode>" 8250 [(match_operand:SVE_FULL 0 "register_operand") 8251 (match_operand:SVE_FULL 1 "register_operand") 8252 (match_operand:SVE_FULL 2 "register_operand") 8253 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] 8254 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" 8255 { 8256 aarch64_expand_sve_vec_perm (operands[0], operands[1], 8257 operands[2], operands[3]); 8258 DONE; 8259 } 8260) 8261 8262(define_insn "@aarch64_sve_tbl<mode>" 8263 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8264 (unspec:SVE_FULL 8265 [(match_operand:SVE_FULL 1 "register_operand" "w") 8266 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] 8267 UNSPEC_TBL))] 8268 "TARGET_SVE" 8269 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 8270) 8271 8272;; ------------------------------------------------------------------------- 8273;; ---- [INT,FP] Special-purpose unary permutes 8274;; ------------------------------------------------------------------------- 8275;; Includes: 8276;; - COMPACT 8277;; - DUP 8278;; - REV 8279;; ------------------------------------------------------------------------- 8280 8281;; Compact active elements and pad with zeros. 8282(define_insn "@aarch64_sve_compact<mode>" 8283 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w") 8284 (unspec:SVE_FULL_SD 8285 [(match_operand:<VPRED> 1 "register_operand" "Upl") 8286 (match_operand:SVE_FULL_SD 2 "register_operand" "w")] 8287 UNSPEC_SVE_COMPACT))] 8288 "TARGET_SVE" 8289 "compact\t%0.<Vetype>, %1, %2.<Vetype>" 8290) 8291 8292;; Duplicate one element of a vector. 8293(define_insn "@aarch64_sve_dup_lane<mode>" 8294 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8295 (vec_duplicate:SVE_FULL 8296 (vec_select:<VEL> 8297 (match_operand:SVE_FULL 1 "register_operand" "w") 8298 (parallel [(match_operand:SI 2 "const_int_operand")]))))] 8299 "TARGET_SVE 8300 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)" 8301 "dup\t%0.<Vetype>, %1.<Vetype>[%2]" 8302) 8303 8304;; Use DUP.Q to duplicate a 128-bit segment of a register. 8305;; 8306;; The vec_select:<V128> sets memory lane number N of the V128 to lane 8307;; number op2 + N of op1. (We don't need to distinguish between memory 8308;; and architectural register lane numbering for op1 or op0, since the 8309;; two numbering schemes are the same for SVE.) 8310;; 8311;; The vec_duplicate:SVE_FULL then copies memory lane number N of the 8312;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP 8313;; of op0. We therefore get the correct result for both endiannesses. 8314;; 8315;; The wrinkle is that for big-endian V128 registers, memory lane numbering 8316;; is in the opposite order to architectural register lane numbering. 8317;; Thus if we were to do this operation via a V128 temporary register, 8318;; the vec_select and vec_duplicate would both involve a reverse operation 8319;; for big-endian targets. In this fused pattern the two reverses cancel 8320;; each other out. 8321(define_insn "@aarch64_sve_dupq_lane<mode>" 8322 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8323 (vec_duplicate:SVE_FULL 8324 (vec_select:<V128> 8325 (match_operand:SVE_FULL 1 "register_operand" "w") 8326 (match_operand 2 "ascending_int_parallel"))))] 8327 "TARGET_SVE 8328 && (INTVAL (XVECEXP (operands[2], 0, 0)) 8329 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0 8330 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0)) 8331 * GET_MODE_SIZE (<VEL>mode), 0, 63)" 8332 { 8333 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0)) 8334 * GET_MODE_SIZE (<VEL>mode)); 8335 operands[2] = gen_int_mode (byte / 16, DImode); 8336 return "dup\t%0.q, %1.q[%2]"; 8337 } 8338) 8339 8340;; Reverse the order of elements within a full vector. 8341(define_insn "@aarch64_sve_rev<mode>" 8342 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8343 (unspec:SVE_FULL 8344 [(match_operand:SVE_FULL 1 "register_operand" "w")] 8345 UNSPEC_REV))] 8346 "TARGET_SVE" 8347 "rev\t%0.<Vetype>, %1.<Vetype>") 8348 8349;; ------------------------------------------------------------------------- 8350;; ---- [INT,FP] Special-purpose binary permutes 8351;; ------------------------------------------------------------------------- 8352;; Includes: 8353;; - SPLICE 8354;; - TRN1 8355;; - TRN2 8356;; - UZP1 8357;; - UZP2 8358;; - ZIP1 8359;; - ZIP2 8360;; ------------------------------------------------------------------------- 8361 8362;; Like EXT, but start at the first active element. 8363(define_insn "@aarch64_sve_splice<mode>" 8364 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") 8365 (unspec:SVE_FULL 8366 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 8367 (match_operand:SVE_FULL 2 "register_operand" "0, w") 8368 (match_operand:SVE_FULL 3 "register_operand" "w, w")] 8369 UNSPEC_SVE_SPLICE))] 8370 "TARGET_SVE" 8371 "@ 8372 splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype> 8373 movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>" 8374 [(set_attr "movprfx" "*, yes")] 8375) 8376 8377;; Permutes that take half the elements from one vector and half the 8378;; elements from the other. 8379(define_insn "@aarch64_sve_<perm_insn><mode>" 8380 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8381 (unspec:SVE_FULL 8382 [(match_operand:SVE_FULL 1 "register_operand" "w") 8383 (match_operand:SVE_FULL 2 "register_operand" "w")] 8384 PERMUTE))] 8385 "TARGET_SVE" 8386 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 8387) 8388 8389;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns 8390;; doesn't depend on the mode. 8391(define_insn "@aarch64_sve_<optab><mode>" 8392 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8393 (unspec:SVE_FULL 8394 [(match_operand:SVE_FULL 1 "register_operand" "w") 8395 (match_operand:SVE_FULL 2 "register_operand" "w")] 8396 PERMUTEQ))] 8397 "TARGET_SVE_F64MM" 8398 "<perm_insn>\t%0.q, %1.q, %2.q" 8399) 8400 8401;; Concatenate two vectors and extract a subvector. Note that the 8402;; immediate (third) operand is the lane index not the byte index. 8403(define_insn "@aarch64_sve_ext<mode>" 8404 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") 8405 (unspec:SVE_FULL 8406 [(match_operand:SVE_FULL 1 "register_operand" "0, w") 8407 (match_operand:SVE_FULL 2 "register_operand" "w, w") 8408 (match_operand:SI 3 "const_int_operand")] 8409 UNSPEC_EXT))] 8410 "TARGET_SVE 8411 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)" 8412 { 8413 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode)); 8414 return (which_alternative == 0 8415 ? "ext\\t%0.b, %0.b, %2.b, #%3" 8416 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3"); 8417 } 8418 [(set_attr "movprfx" "*,yes")] 8419) 8420 8421;; ------------------------------------------------------------------------- 8422;; ---- [PRED] Special-purpose unary permutes 8423;; ------------------------------------------------------------------------- 8424;; Includes: 8425;; - REV 8426;; ------------------------------------------------------------------------- 8427 8428(define_insn "@aarch64_sve_rev<mode>" 8429 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 8430 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")] 8431 UNSPEC_REV))] 8432 "TARGET_SVE" 8433 "rev\t%0.<Vetype>, %1.<Vetype>") 8434 8435;; ------------------------------------------------------------------------- 8436;; ---- [PRED] Special-purpose binary permutes 8437;; ------------------------------------------------------------------------- 8438;; Includes: 8439;; - TRN1 8440;; - TRN2 8441;; - UZP1 8442;; - UZP2 8443;; - ZIP1 8444;; - ZIP2 8445;; ------------------------------------------------------------------------- 8446 8447;; Permutes that take half the elements from one vector and half the 8448;; elements from the other. 8449(define_insn "@aarch64_sve_<perm_insn><mode>" 8450 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 8451 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") 8452 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 8453 PERMUTE))] 8454 "TARGET_SVE" 8455 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 8456) 8457 8458;; Special purpose permute used by the predicate generation instructions. 8459;; Unlike the normal permute patterns, these instructions operate on VNx16BI 8460;; regardless of the element size, so that all input and output bits are 8461;; well-defined. Operand 3 then indicates the size of the permute. 8462(define_insn "@aarch64_sve_trn1_conv<mode>" 8463 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 8464 (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa") 8465 (match_operand:VNx16BI 2 "register_operand" "Upa") 8466 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")] 8467 UNSPEC_TRN1_CONV))] 8468 "TARGET_SVE" 8469 "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" 8470) 8471 8472;; ========================================================================= 8473;; == Conversions 8474;; ========================================================================= 8475 8476;; ------------------------------------------------------------------------- 8477;; ---- [INT<-INT] Packs 8478;; ------------------------------------------------------------------------- 8479;; Includes: 8480;; - UZP1 8481;; ------------------------------------------------------------------------- 8482 8483;; Integer pack. Use UZP1 on the narrower type, which discards 8484;; the high part of each wide element. 8485(define_insn "vec_pack_trunc_<Vwide>" 8486 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") 8487 (unspec:SVE_FULL_BHSI 8488 [(match_operand:<VWIDE> 1 "register_operand" "w") 8489 (match_operand:<VWIDE> 2 "register_operand" "w")] 8490 UNSPEC_PACK))] 8491 "TARGET_SVE" 8492 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 8493) 8494 8495;; ------------------------------------------------------------------------- 8496;; ---- [INT<-INT] Unpacks 8497;; ------------------------------------------------------------------------- 8498;; Includes: 8499;; - SUNPKHI 8500;; - SUNPKLO 8501;; - UUNPKHI 8502;; - UUNPKLO 8503;; ------------------------------------------------------------------------- 8504 8505;; Unpack the low or high half of a vector, where "high" refers to 8506;; the low-numbered lanes for big-endian and the high-numbered lanes 8507;; for little-endian. 8508(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>" 8509 [(match_operand:<VWIDE> 0 "register_operand") 8510 (unspec:<VWIDE> 8511 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)] 8512 "TARGET_SVE" 8513 { 8514 emit_insn ((<hi_lanes_optab> 8515 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode> 8516 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>) 8517 (operands[0], operands[1])); 8518 DONE; 8519 } 8520) 8521 8522(define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>" 8523 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 8524 (unspec:<VWIDE> 8525 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")] 8526 UNPACK))] 8527 "TARGET_SVE" 8528 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" 8529) 8530 8531;; ------------------------------------------------------------------------- 8532;; ---- [INT<-FP] Conversions 8533;; ------------------------------------------------------------------------- 8534;; Includes: 8535;; - FCVTZS 8536;; - FCVTZU 8537;; ------------------------------------------------------------------------- 8538 8539;; Unpredicated conversion of floats to integers of the same size (HF to HI, 8540;; SF to SI or DF to DI). 8541(define_expand "<optab><mode><v_int_equiv>2" 8542 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 8543 (unspec:<V_INT_EQUIV> 8544 [(match_dup 2) 8545 (const_int SVE_RELAXED_GP) 8546 (match_operand:SVE_FULL_F 1 "register_operand")] 8547 SVE_COND_FCVTI))] 8548 "TARGET_SVE" 8549 { 8550 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 8551 } 8552) 8553 8554;; Predicated float-to-integer conversion, either to the same width or wider. 8555(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>" 8556 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 8557 (unspec:SVE_FULL_HSDI 8558 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 8559 (match_operand:SI 3 "aarch64_sve_gp_strictness") 8560 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 8561 SVE_COND_FCVTI))] 8562 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8563 "@ 8564 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 8565 movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" 8566 [(set_attr "movprfx" "*,yes")] 8567) 8568 8569;; Predicated narrowing float-to-integer conversion. 8570(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" 8571 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 8572 (unspec:VNx4SI_ONLY 8573 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl") 8574 (match_operand:SI 3 "aarch64_sve_gp_strictness") 8575 (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")] 8576 SVE_COND_FCVTI))] 8577 "TARGET_SVE" 8578 "@ 8579 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> 8580 movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>" 8581 [(set_attr "movprfx" "*,yes")] 8582) 8583 8584;; Predicated float-to-integer conversion with merging, either to the same 8585;; width or wider. 8586(define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>" 8587 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") 8588 (unspec:SVE_FULL_HSDI 8589 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand") 8590 (unspec:SVE_FULL_HSDI 8591 [(match_dup 1) 8592 (const_int SVE_STRICT_GP) 8593 (match_operand:SVE_FULL_F 2 "register_operand")] 8594 SVE_COND_FCVTI) 8595 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")] 8596 UNSPEC_SEL))] 8597 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8598) 8599 8600;; The first alternative doesn't need the earlyclobber, but the only case 8601;; it would help is the uninteresting one in which operands 2 and 3 are 8602;; the same register (despite having different modes). Making all the 8603;; alternatives earlyclobber makes things more consistent for the 8604;; register allocator. 8605(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed" 8606 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") 8607 (unspec:SVE_FULL_HSDI 8608 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 8609 (unspec:SVE_FULL_HSDI 8610 [(match_operand 4) 8611 (const_int SVE_RELAXED_GP) 8612 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 8613 SVE_COND_FCVTI) 8614 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8615 UNSPEC_SEL))] 8616 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8617 "@ 8618 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 8619 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 8620 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" 8621 "&& !rtx_equal_p (operands[1], operands[4])" 8622 { 8623 operands[4] = copy_rtx (operands[1]); 8624 } 8625 [(set_attr "movprfx" "*,yes,yes")] 8626) 8627 8628(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict" 8629 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") 8630 (unspec:SVE_FULL_HSDI 8631 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 8632 (unspec:SVE_FULL_HSDI 8633 [(match_dup 1) 8634 (const_int SVE_STRICT_GP) 8635 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 8636 SVE_COND_FCVTI) 8637 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8638 UNSPEC_SEL))] 8639 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8640 "@ 8641 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 8642 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 8643 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" 8644 [(set_attr "movprfx" "*,yes,yes")] 8645) 8646 8647;; Predicated narrowing float-to-integer conversion with merging. 8648(define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" 8649 [(set (match_operand:VNx4SI_ONLY 0 "register_operand") 8650 (unspec:VNx4SI_ONLY 8651 [(match_operand:VNx2BI 1 "register_operand") 8652 (unspec:VNx4SI_ONLY 8653 [(match_dup 1) 8654 (const_int SVE_STRICT_GP) 8655 (match_operand:VNx2DF_ONLY 2 "register_operand")] 8656 SVE_COND_FCVTI) 8657 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] 8658 UNSPEC_SEL))] 8659 "TARGET_SVE" 8660) 8661 8662(define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" 8663 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w") 8664 (unspec:VNx4SI_ONLY 8665 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") 8666 (unspec:VNx4SI_ONLY 8667 [(match_dup 1) 8668 (match_operand:SI 4 "aarch64_sve_gp_strictness") 8669 (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")] 8670 SVE_COND_FCVTI) 8671 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8672 UNSPEC_SEL))] 8673 "TARGET_SVE" 8674 "@ 8675 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> 8676 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> 8677 movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>" 8678 [(set_attr "movprfx" "*,yes,yes")] 8679) 8680 8681;; ------------------------------------------------------------------------- 8682;; ---- [INT<-FP] Packs 8683;; ------------------------------------------------------------------------- 8684;; The patterns in this section are synthetic. 8685;; ------------------------------------------------------------------------- 8686 8687;; Convert two vectors of DF to SI and pack the results into a single vector. 8688(define_expand "vec_pack_<su>fix_trunc_vnx2df" 8689 [(set (match_dup 4) 8690 (unspec:VNx4SI 8691 [(match_dup 3) 8692 (const_int SVE_RELAXED_GP) 8693 (match_operand:VNx2DF 1 "register_operand")] 8694 SVE_COND_FCVTI)) 8695 (set (match_dup 5) 8696 (unspec:VNx4SI 8697 [(match_dup 3) 8698 (const_int SVE_RELAXED_GP) 8699 (match_operand:VNx2DF 2 "register_operand")] 8700 SVE_COND_FCVTI)) 8701 (set (match_operand:VNx4SI 0 "register_operand") 8702 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] 8703 "TARGET_SVE" 8704 { 8705 operands[3] = aarch64_ptrue_reg (VNx2BImode); 8706 operands[4] = gen_reg_rtx (VNx4SImode); 8707 operands[5] = gen_reg_rtx (VNx4SImode); 8708 } 8709) 8710 8711;; ------------------------------------------------------------------------- 8712;; ---- [INT<-FP] Unpacks 8713;; ------------------------------------------------------------------------- 8714;; No patterns here yet! 8715;; ------------------------------------------------------------------------- 8716 8717;; ------------------------------------------------------------------------- 8718;; ---- [FP<-INT] Conversions 8719;; ------------------------------------------------------------------------- 8720;; Includes: 8721;; - SCVTF 8722;; - UCVTF 8723;; ------------------------------------------------------------------------- 8724 8725;; Unpredicated conversion of integers to floats of the same size 8726;; (HI to HF, SI to SF or DI to DF). 8727(define_expand "<optab><v_int_equiv><mode>2" 8728 [(set (match_operand:SVE_FULL_F 0 "register_operand") 8729 (unspec:SVE_FULL_F 8730 [(match_dup 2) 8731 (const_int SVE_RELAXED_GP) 8732 (match_operand:<V_INT_EQUIV> 1 "register_operand")] 8733 SVE_COND_ICVTF))] 8734 "TARGET_SVE" 8735 { 8736 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 8737 } 8738) 8739 8740;; Predicated integer-to-float conversion, either to the same width or 8741;; narrower. 8742(define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>" 8743 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 8744 (unspec:SVE_FULL_F 8745 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 8746 (match_operand:SI 3 "aarch64_sve_gp_strictness") 8747 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")] 8748 SVE_COND_ICVTF))] 8749 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8750 "@ 8751 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 8752 movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 8753 [(set_attr "movprfx" "*,yes")] 8754) 8755 8756;; Predicated widening integer-to-float conversion. 8757(define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" 8758 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w") 8759 (unspec:VNx2DF_ONLY 8760 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl") 8761 (match_operand:SI 3 "aarch64_sve_gp_strictness") 8762 (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")] 8763 SVE_COND_ICVTF))] 8764 "TARGET_SVE" 8765 "@ 8766 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> 8767 movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>" 8768 [(set_attr "movprfx" "*,yes")] 8769) 8770 8771;; Predicated integer-to-float conversion with merging, either to the same 8772;; width or narrower. 8773(define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>" 8774 [(set (match_operand:SVE_FULL_F 0 "register_operand") 8775 (unspec:SVE_FULL_F 8776 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand") 8777 (unspec:SVE_FULL_F 8778 [(match_dup 1) 8779 (const_int SVE_STRICT_GP) 8780 (match_operand:SVE_FULL_HSDI 2 "register_operand")] 8781 SVE_COND_ICVTF) 8782 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] 8783 UNSPEC_SEL))] 8784 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8785) 8786 8787;; The first alternative doesn't need the earlyclobber, but the only case 8788;; it would help is the uninteresting one in which operands 2 and 3 are 8789;; the same register (despite having different modes). Making all the 8790;; alternatives earlyclobber makes things more consistent for the 8791;; register allocator. 8792(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed" 8793 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") 8794 (unspec:SVE_FULL_F 8795 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 8796 (unspec:SVE_FULL_F 8797 [(match_operand 4) 8798 (const_int SVE_RELAXED_GP) 8799 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] 8800 SVE_COND_ICVTF) 8801 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8802 UNSPEC_SEL))] 8803 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8804 "@ 8805 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 8806 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 8807 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 8808 "&& !rtx_equal_p (operands[1], operands[4])" 8809 { 8810 operands[4] = copy_rtx (operands[1]); 8811 } 8812 [(set_attr "movprfx" "*,yes,yes")] 8813) 8814 8815(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict" 8816 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") 8817 (unspec:SVE_FULL_F 8818 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 8819 (unspec:SVE_FULL_F 8820 [(match_dup 1) 8821 (const_int SVE_STRICT_GP) 8822 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] 8823 SVE_COND_ICVTF) 8824 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8825 UNSPEC_SEL))] 8826 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8827 "@ 8828 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 8829 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 8830 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 8831 [(set_attr "movprfx" "*,yes,yes")] 8832) 8833 8834;; Predicated widening integer-to-float conversion with merging. 8835(define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" 8836 [(set (match_operand:VNx2DF_ONLY 0 "register_operand") 8837 (unspec:VNx2DF_ONLY 8838 [(match_operand:VNx2BI 1 "register_operand") 8839 (unspec:VNx2DF_ONLY 8840 [(match_dup 1) 8841 (const_int SVE_STRICT_GP) 8842 (match_operand:VNx4SI_ONLY 2 "register_operand")] 8843 SVE_COND_ICVTF) 8844 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")] 8845 UNSPEC_SEL))] 8846 "TARGET_SVE" 8847) 8848 8849(define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" 8850 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w") 8851 (unspec:VNx2DF_ONLY 8852 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") 8853 (unspec:VNx2DF_ONLY 8854 [(match_dup 1) 8855 (match_operand:SI 4 "aarch64_sve_gp_strictness") 8856 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] 8857 SVE_COND_ICVTF) 8858 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8859 UNSPEC_SEL))] 8860 "TARGET_SVE" 8861 "@ 8862 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> 8863 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> 8864 movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>" 8865 [(set_attr "movprfx" "*,yes,yes")] 8866) 8867 8868;; ------------------------------------------------------------------------- 8869;; ---- [FP<-INT] Packs 8870;; ------------------------------------------------------------------------- 8871;; No patterns here yet! 8872;; ------------------------------------------------------------------------- 8873 8874;; ------------------------------------------------------------------------- 8875;; ---- [FP<-INT] Unpacks 8876;; ------------------------------------------------------------------------- 8877;; The patterns in this section are synthetic. 8878;; ------------------------------------------------------------------------- 8879 8880;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI 8881;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the 8882;; unpacked VNx4SI to VNx2DF. 8883(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" 8884 [(match_operand:VNx2DF 0 "register_operand") 8885 (FLOATUORS:VNx2DF 8886 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] 8887 UNPACK_UNSIGNED))] 8888 "TARGET_SVE" 8889 { 8890 /* Use ZIP to do the unpack, since we don't care about the upper halves 8891 and since it has the nice property of not needing any subregs. 8892 If using UUNPK* turns out to be preferable, we could model it as 8893 a ZIP whose first operand is zero. */ 8894 rtx temp = gen_reg_rtx (VNx4SImode); 8895 emit_insn ((<hi_lanes_optab> 8896 ? gen_aarch64_sve_zip2vnx4si 8897 : gen_aarch64_sve_zip1vnx4si) 8898 (temp, operands[1], operands[1])); 8899 rtx ptrue = aarch64_ptrue_reg (VNx2BImode); 8900 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); 8901 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df 8902 (operands[0], ptrue, temp, strictness)); 8903 DONE; 8904 } 8905) 8906 8907;; ------------------------------------------------------------------------- 8908;; ---- [FP<-FP] Packs 8909;; ------------------------------------------------------------------------- 8910;; Includes: 8911;; - FCVT 8912;; ------------------------------------------------------------------------- 8913 8914;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack 8915;; the results into a single vector. 8916(define_expand "vec_pack_trunc_<Vwide>" 8917 [(set (match_dup 4) 8918 (unspec:SVE_FULL_HSF 8919 [(match_dup 3) 8920 (const_int SVE_RELAXED_GP) 8921 (match_operand:<VWIDE> 1 "register_operand")] 8922 UNSPEC_COND_FCVT)) 8923 (set (match_dup 5) 8924 (unspec:SVE_FULL_HSF 8925 [(match_dup 3) 8926 (const_int SVE_RELAXED_GP) 8927 (match_operand:<VWIDE> 2 "register_operand")] 8928 UNSPEC_COND_FCVT)) 8929 (set (match_operand:SVE_FULL_HSF 0 "register_operand") 8930 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] 8931 "TARGET_SVE" 8932 { 8933 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode); 8934 operands[4] = gen_reg_rtx (<MODE>mode); 8935 operands[5] = gen_reg_rtx (<MODE>mode); 8936 } 8937) 8938 8939;; Predicated float-to-float truncation. 8940(define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" 8941 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") 8942 (unspec:SVE_FULL_HSF 8943 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl") 8944 (match_operand:SI 3 "aarch64_sve_gp_strictness") 8945 (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")] 8946 SVE_COND_FCVT))] 8947 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 8948 "@ 8949 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> 8950 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>" 8951 [(set_attr "movprfx" "*,yes")] 8952) 8953 8954;; Predicated float-to-float truncation with merging. 8955(define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" 8956 [(set (match_operand:SVE_FULL_HSF 0 "register_operand") 8957 (unspec:SVE_FULL_HSF 8958 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand") 8959 (unspec:SVE_FULL_HSF 8960 [(match_dup 1) 8961 (const_int SVE_STRICT_GP) 8962 (match_operand:SVE_FULL_SDF 2 "register_operand")] 8963 SVE_COND_FCVT) 8964 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")] 8965 UNSPEC_SEL))] 8966 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 8967) 8968 8969(define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" 8970 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w") 8971 (unspec:SVE_FULL_HSF 8972 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl") 8973 (unspec:SVE_FULL_HSF 8974 [(match_dup 1) 8975 (match_operand:SI 4 "aarch64_sve_gp_strictness") 8976 (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")] 8977 SVE_COND_FCVT) 8978 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8979 UNSPEC_SEL))] 8980 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 8981 "@ 8982 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> 8983 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> 8984 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>" 8985 [(set_attr "movprfx" "*,yes,yes")] 8986) 8987 8988;; ------------------------------------------------------------------------- 8989;; ---- [FP<-FP] Packs (bfloat16) 8990;; ------------------------------------------------------------------------- 8991;; Includes: 8992;; - BFCVT (BF16) 8993;; - BFCVTNT (BF16) 8994;; ------------------------------------------------------------------------- 8995 8996;; Predicated BFCVT. 8997(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" 8998 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w") 8999 (unspec:VNx8BF_ONLY 9000 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl") 9001 (match_operand:SI 3 "aarch64_sve_gp_strictness") 9002 (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")] 9003 SVE_COND_FCVT))] 9004 "TARGET_SVE_BF16" 9005 "@ 9006 bfcvt\t%0.h, %1/m, %2.s 9007 movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s" 9008 [(set_attr "movprfx" "*,yes")] 9009) 9010 9011;; Predicated BFCVT with merging. 9012(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" 9013 [(set (match_operand:VNx8BF_ONLY 0 "register_operand") 9014 (unspec:VNx8BF_ONLY 9015 [(match_operand:VNx4BI 1 "register_operand") 9016 (unspec:VNx8BF_ONLY 9017 [(match_dup 1) 9018 (const_int SVE_STRICT_GP) 9019 (match_operand:VNx4SF_ONLY 2 "register_operand")] 9020 SVE_COND_FCVT) 9021 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")] 9022 UNSPEC_SEL))] 9023 "TARGET_SVE_BF16" 9024) 9025 9026(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" 9027 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w") 9028 (unspec:VNx8BF_ONLY 9029 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl") 9030 (unspec:VNx8BF_ONLY 9031 [(match_dup 1) 9032 (match_operand:SI 4 "aarch64_sve_gp_strictness") 9033 (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")] 9034 SVE_COND_FCVT) 9035 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9036 UNSPEC_SEL))] 9037 "TARGET_SVE_BF16" 9038 "@ 9039 bfcvt\t%0.h, %1/m, %2.s 9040 movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s 9041 movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s" 9042 [(set_attr "movprfx" "*,yes,yes")] 9043) 9044 9045;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_* 9046;; pair because the even elements always have to be supplied for active 9047;; elements, even if the inactive elements don't matter. 9048;; 9049;; This instructions does not take MOVPRFX. 9050(define_insn "@aarch64_sve_cvtnt<mode>" 9051 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") 9052 (unspec:VNx8BF_ONLY 9053 [(match_operand:VNx4BI 2 "register_operand" "Upl") 9054 (const_int SVE_STRICT_GP) 9055 (match_operand:VNx8BF_ONLY 1 "register_operand" "0") 9056 (match_operand:VNx4SF 3 "register_operand" "w")] 9057 UNSPEC_COND_FCVTNT))] 9058 "TARGET_SVE_BF16" 9059 "bfcvtnt\t%0.h, %2/m, %3.s" 9060) 9061 9062;; ------------------------------------------------------------------------- 9063;; ---- [FP<-FP] Unpacks 9064;; ------------------------------------------------------------------------- 9065;; Includes: 9066;; - FCVT 9067;; ------------------------------------------------------------------------- 9068 9069;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. 9070;; First unpack the source without conversion, then float-convert the 9071;; unpacked source. 9072(define_expand "vec_unpacks_<perm_hilo>_<mode>" 9073 [(match_operand:<VWIDE> 0 "register_operand") 9074 (unspec:SVE_FULL_HSF 9075 [(match_operand:SVE_FULL_HSF 1 "register_operand")] 9076 UNPACK_UNSIGNED)] 9077 "TARGET_SVE" 9078 { 9079 /* Use ZIP to do the unpack, since we don't care about the upper halves 9080 and since it has the nice property of not needing any subregs. 9081 If using UUNPK* turns out to be preferable, we could model it as 9082 a ZIP whose first operand is zero. */ 9083 rtx temp = gen_reg_rtx (<MODE>mode); 9084 emit_insn ((<hi_lanes_optab> 9085 ? gen_aarch64_sve_zip2<mode> 9086 : gen_aarch64_sve_zip1<mode>) 9087 (temp, operands[1], operands[1])); 9088 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode); 9089 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); 9090 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide> 9091 (operands[0], ptrue, temp, strictness)); 9092 DONE; 9093 } 9094) 9095 9096;; Predicated float-to-float extension. 9097(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" 9098 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w") 9099 (unspec:SVE_FULL_SDF 9100 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl") 9101 (match_operand:SI 3 "aarch64_sve_gp_strictness") 9102 (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")] 9103 SVE_COND_FCVT))] 9104 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9105 "@ 9106 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> 9107 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>" 9108 [(set_attr "movprfx" "*,yes")] 9109) 9110 9111;; Predicated float-to-float extension with merging. 9112(define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" 9113 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 9114 (unspec:SVE_FULL_SDF 9115 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand") 9116 (unspec:SVE_FULL_SDF 9117 [(match_dup 1) 9118 (const_int SVE_STRICT_GP) 9119 (match_operand:SVE_FULL_HSF 2 "register_operand")] 9120 SVE_COND_FCVT) 9121 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")] 9122 UNSPEC_SEL))] 9123 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9124) 9125 9126(define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" 9127 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w") 9128 (unspec:SVE_FULL_SDF 9129 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl") 9130 (unspec:SVE_FULL_SDF 9131 [(match_dup 1) 9132 (match_operand:SI 4 "aarch64_sve_gp_strictness") 9133 (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")] 9134 SVE_COND_FCVT) 9135 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9136 UNSPEC_SEL))] 9137 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9138 "@ 9139 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> 9140 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> 9141 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>" 9142 [(set_attr "movprfx" "*,yes,yes")] 9143) 9144 9145;; ------------------------------------------------------------------------- 9146;; ---- [PRED<-PRED] Packs 9147;; ------------------------------------------------------------------------- 9148;; Includes: 9149;; - UZP1 9150;; ------------------------------------------------------------------------- 9151 9152;; Predicate pack. Use UZP1 on the narrower type, which discards 9153;; the high part of each wide element. 9154(define_insn "vec_pack_trunc_<Vwide>" 9155 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") 9156 (unspec:PRED_BHS 9157 [(match_operand:<VWIDE> 1 "register_operand" "Upa") 9158 (match_operand:<VWIDE> 2 "register_operand" "Upa")] 9159 UNSPEC_PACK))] 9160 "TARGET_SVE" 9161 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 9162) 9163 9164;; ------------------------------------------------------------------------- 9165;; ---- [PRED<-PRED] Unpacks 9166;; ------------------------------------------------------------------------- 9167;; Includes: 9168;; - PUNPKHI 9169;; - PUNPKLO 9170;; ------------------------------------------------------------------------- 9171 9172;; Unpack the low or high half of a predicate, where "high" refers to 9173;; the low-numbered lanes for big-endian and the high-numbered lanes 9174;; for little-endian. 9175(define_expand "vec_unpack<su>_<perm_hilo>_<mode>" 9176 [(match_operand:<VWIDE> 0 "register_operand") 9177 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] 9178 UNPACK)] 9179 "TARGET_SVE" 9180 { 9181 emit_insn ((<hi_lanes_optab> 9182 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> 9183 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) 9184 (operands[0], operands[1])); 9185 DONE; 9186 } 9187) 9188 9189(define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>" 9190 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") 9191 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] 9192 UNPACK_UNSIGNED))] 9193 "TARGET_SVE" 9194 "punpk<perm_hilo>\t%0.h, %1.b" 9195) 9196 9197;; ========================================================================= 9198;; == Vector partitioning 9199;; ========================================================================= 9200 9201;; ------------------------------------------------------------------------- 9202;; ---- [PRED] Unary partitioning 9203;; ------------------------------------------------------------------------- 9204;; Includes: 9205;; - BRKA 9206;; - BRKAS 9207;; - BRKB 9208;; - BRKBS 9209;; ------------------------------------------------------------------------- 9210 9211;; Note that unlike most other instructions that have both merging and 9212;; zeroing forms, these instructions don't operate elementwise and so 9213;; don't fit the IFN_COND model. 9214(define_insn "@aarch64_brk<brk_op>" 9215 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") 9216 (unspec:VNx16BI 9217 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") 9218 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") 9219 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] 9220 SVE_BRK_UNARY))] 9221 "TARGET_SVE" 9222 "@ 9223 brk<brk_op>\t%0.b, %1/z, %2.b 9224 brk<brk_op>\t%0.b, %1/m, %2.b" 9225) 9226 9227;; Same, but also producing a flags result. 9228(define_insn "*aarch64_brk<brk_op>_cc" 9229 [(set (reg:CC_NZC CC_REGNUM) 9230 (unspec:CC_NZC 9231 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9232 (match_dup 1) 9233 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9234 (unspec:VNx16BI 9235 [(match_dup 1) 9236 (match_operand:VNx16BI 2 "register_operand" "Upa") 9237 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")] 9238 SVE_BRK_UNARY)] 9239 UNSPEC_PTEST)) 9240 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9241 (unspec:VNx16BI 9242 [(match_dup 1) 9243 (match_dup 2) 9244 (match_dup 3)] 9245 SVE_BRK_UNARY))] 9246 "TARGET_SVE" 9247 "brk<brk_op>s\t%0.b, %1/z, %2.b" 9248) 9249 9250;; Same, but with only the flags result being interesting. 9251(define_insn "*aarch64_brk<brk_op>_ptest" 9252 [(set (reg:CC_NZC CC_REGNUM) 9253 (unspec:CC_NZC 9254 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9255 (match_dup 1) 9256 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9257 (unspec:VNx16BI 9258 [(match_dup 1) 9259 (match_operand:VNx16BI 2 "register_operand" "Upa") 9260 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")] 9261 SVE_BRK_UNARY)] 9262 UNSPEC_PTEST)) 9263 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 9264 "TARGET_SVE" 9265 "brk<brk_op>s\t%0.b, %1/z, %2.b" 9266) 9267 9268;; ------------------------------------------------------------------------- 9269;; ---- [PRED] Binary partitioning 9270;; ------------------------------------------------------------------------- 9271;; Includes: 9272;; - BRKN 9273;; - BRKNS 9274;; - BRKPA 9275;; - BRKPAS 9276;; - BRKPB 9277;; - BRKPBS 9278;; ------------------------------------------------------------------------- 9279 9280;; Binary BRKs (BRKN, BRKPA, BRKPB). 9281(define_insn "@aarch64_brk<brk_op>" 9282 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9283 (unspec:VNx16BI 9284 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9285 (match_operand:VNx16BI 2 "register_operand" "Upa") 9286 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")] 9287 SVE_BRK_BINARY))] 9288 "TARGET_SVE" 9289 "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b" 9290) 9291 9292;; BRKN, producing both a predicate and a flags result. Unlike other 9293;; flag-setting instructions, these flags are always set wrt a ptrue. 9294(define_insn_and_rewrite "*aarch64_brkn_cc" 9295 [(set (reg:CC_NZC CC_REGNUM) 9296 (unspec:CC_NZC 9297 [(match_operand:VNx16BI 4) 9298 (match_operand:VNx16BI 5) 9299 (const_int SVE_KNOWN_PTRUE) 9300 (unspec:VNx16BI 9301 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9302 (match_operand:VNx16BI 2 "register_operand" "Upa") 9303 (match_operand:VNx16BI 3 "register_operand" "0")] 9304 UNSPEC_BRKN)] 9305 UNSPEC_PTEST)) 9306 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9307 (unspec:VNx16BI 9308 [(match_dup 1) 9309 (match_dup 2) 9310 (match_dup 3)] 9311 UNSPEC_BRKN))] 9312 "TARGET_SVE" 9313 "brkns\t%0.b, %1/z, %2.b, %0.b" 9314 "&& (operands[4] != CONST0_RTX (VNx16BImode) 9315 || operands[5] != CONST0_RTX (VNx16BImode))" 9316 { 9317 operands[4] = CONST0_RTX (VNx16BImode); 9318 operands[5] = CONST0_RTX (VNx16BImode); 9319 } 9320) 9321 9322;; Same, but with only the flags result being interesting. 9323(define_insn_and_rewrite "*aarch64_brkn_ptest" 9324 [(set (reg:CC_NZC CC_REGNUM) 9325 (unspec:CC_NZC 9326 [(match_operand:VNx16BI 4) 9327 (match_operand:VNx16BI 5) 9328 (const_int SVE_KNOWN_PTRUE) 9329 (unspec:VNx16BI 9330 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9331 (match_operand:VNx16BI 2 "register_operand" "Upa") 9332 (match_operand:VNx16BI 3 "register_operand" "0")] 9333 UNSPEC_BRKN)] 9334 UNSPEC_PTEST)) 9335 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 9336 "TARGET_SVE" 9337 "brkns\t%0.b, %1/z, %2.b, %0.b" 9338 "&& (operands[4] != CONST0_RTX (VNx16BImode) 9339 || operands[5] != CONST0_RTX (VNx16BImode))" 9340 { 9341 operands[4] = CONST0_RTX (VNx16BImode); 9342 operands[5] = CONST0_RTX (VNx16BImode); 9343 } 9344) 9345 9346;; BRKPA and BRKPB, producing both a predicate and a flags result. 9347(define_insn "*aarch64_brk<brk_op>_cc" 9348 [(set (reg:CC_NZC CC_REGNUM) 9349 (unspec:CC_NZC 9350 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9351 (match_dup 1) 9352 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9353 (unspec:VNx16BI 9354 [(match_dup 1) 9355 (match_operand:VNx16BI 2 "register_operand" "Upa") 9356 (match_operand:VNx16BI 3 "register_operand" "Upa")] 9357 SVE_BRKP)] 9358 UNSPEC_PTEST)) 9359 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9360 (unspec:VNx16BI 9361 [(match_dup 1) 9362 (match_dup 2) 9363 (match_dup 3)] 9364 SVE_BRKP))] 9365 "TARGET_SVE" 9366 "brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b" 9367) 9368 9369;; Same, but with only the flags result being interesting. 9370(define_insn "*aarch64_brk<brk_op>_ptest" 9371 [(set (reg:CC_NZC CC_REGNUM) 9372 (unspec:CC_NZC 9373 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9374 (match_dup 1) 9375 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9376 (unspec:VNx16BI 9377 [(match_dup 1) 9378 (match_operand:VNx16BI 2 "register_operand" "Upa") 9379 (match_operand:VNx16BI 3 "register_operand" "Upa")] 9380 SVE_BRKP)] 9381 UNSPEC_PTEST)) 9382 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 9383 "TARGET_SVE" 9384 "brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b" 9385) 9386 9387;; ------------------------------------------------------------------------- 9388;; ---- [PRED] Scalarization 9389;; ------------------------------------------------------------------------- 9390;; Includes: 9391;; - PFIRST 9392;; - PNEXT 9393;; ------------------------------------------------------------------------- 9394 9395(define_insn "@aarch64_sve_<sve_pred_op><mode>" 9396 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 9397 (unspec:PRED_ALL 9398 [(match_operand:PRED_ALL 1 "register_operand" "Upa") 9399 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 9400 (match_operand:PRED_ALL 3 "register_operand" "0")] 9401 SVE_PITER)) 9402 (clobber (reg:CC_NZC CC_REGNUM))] 9403 "TARGET_SVE && <max_elem_bits> >= <elem_bits>" 9404 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" 9405) 9406 9407;; Same, but also producing a flags result. 9408(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc" 9409 [(set (reg:CC_NZC CC_REGNUM) 9410 (unspec:CC_NZC 9411 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9412 (match_operand 2) 9413 (match_operand:SI 3 "aarch64_sve_ptrue_flag") 9414 (unspec:PRED_ALL 9415 [(match_operand 4) 9416 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 9417 (match_operand:PRED_ALL 6 "register_operand" "0")] 9418 SVE_PITER)] 9419 UNSPEC_PTEST)) 9420 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 9421 (unspec:PRED_ALL 9422 [(match_dup 4) 9423 (match_dup 5) 9424 (match_dup 6)] 9425 SVE_PITER))] 9426 "TARGET_SVE 9427 && <max_elem_bits> >= <elem_bits> 9428 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" 9429 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" 9430 "&& !rtx_equal_p (operands[2], operands[4])" 9431 { 9432 operands[4] = operands[2]; 9433 operands[5] = operands[3]; 9434 } 9435) 9436 9437;; Same, but with only the flags result being interesting. 9438(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest" 9439 [(set (reg:CC_NZC CC_REGNUM) 9440 (unspec:CC_NZC 9441 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9442 (match_operand 2) 9443 (match_operand:SI 3 "aarch64_sve_ptrue_flag") 9444 (unspec:PRED_ALL 9445 [(match_operand 4) 9446 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 9447 (match_operand:PRED_ALL 6 "register_operand" "0")] 9448 SVE_PITER)] 9449 UNSPEC_PTEST)) 9450 (clobber (match_scratch:PRED_ALL 0 "=Upa"))] 9451 "TARGET_SVE 9452 && <max_elem_bits> >= <elem_bits> 9453 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" 9454 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" 9455 "&& !rtx_equal_p (operands[2], operands[4])" 9456 { 9457 operands[4] = operands[2]; 9458 operands[5] = operands[3]; 9459 } 9460) 9461 9462;; ========================================================================= 9463;; == Counting elements 9464;; ========================================================================= 9465 9466;; ------------------------------------------------------------------------- 9467;; ---- [INT] Count elements in a pattern (scalar) 9468;; ------------------------------------------------------------------------- 9469;; Includes: 9470;; - CNTB 9471;; - CNTD 9472;; - CNTH 9473;; - CNTW 9474;; ------------------------------------------------------------------------- 9475 9476;; Count the number of elements in an svpattern. Operand 1 is the pattern, 9477;; operand 2 is the number of elements that fit in a 128-bit block, and 9478;; operand 3 is a multiplier in the range [1, 16]. 9479;; 9480;; Note that this pattern isn't used for SV_ALL (but would work for that too). 9481(define_insn "aarch64_sve_cnt_pat" 9482 [(set (match_operand:DI 0 "register_operand" "=r") 9483 (zero_extend:DI 9484 (unspec:SI [(match_operand:DI 1 "const_int_operand") 9485 (match_operand:DI 2 "const_int_operand") 9486 (match_operand:DI 3 "const_int_operand")] 9487 UNSPEC_SVE_CNT_PAT)))] 9488 "TARGET_SVE" 9489 { 9490 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1); 9491 } 9492) 9493 9494;; ------------------------------------------------------------------------- 9495;; ---- [INT] Increment by the number of elements in a pattern (scalar) 9496;; ------------------------------------------------------------------------- 9497;; Includes: 9498;; - INC 9499;; - SQINC 9500;; - UQINC 9501;; ------------------------------------------------------------------------- 9502 9503;; Increment a DImode register by the number of elements in an svpattern. 9504;; See aarch64_sve_cnt_pat for the counting behavior. 9505(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9506 [(set (match_operand:DI 0 "register_operand" "=r") 9507 (ANY_PLUS:DI (zero_extend:DI 9508 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9509 (match_operand:DI 3 "const_int_operand") 9510 (match_operand:DI 4 "const_int_operand")] 9511 UNSPEC_SVE_CNT_PAT)) 9512 (match_operand:DI_ONLY 1 "register_operand" "0")))] 9513 "TARGET_SVE" 9514 { 9515 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0", 9516 operands + 2); 9517 } 9518) 9519 9520;; Increment an SImode register by the number of elements in an svpattern 9521;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting 9522;; behavior. 9523(define_insn "*aarch64_sve_incsi_pat" 9524 [(set (match_operand:SI 0 "register_operand" "=r") 9525 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand") 9526 (match_operand:DI 3 "const_int_operand") 9527 (match_operand:DI 4 "const_int_operand")] 9528 UNSPEC_SVE_CNT_PAT) 9529 (match_operand:SI 1 "register_operand" "0")))] 9530 "TARGET_SVE" 9531 { 9532 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2); 9533 } 9534) 9535 9536;; Increment an SImode register by the number of elements in an svpattern 9537;; using saturating arithmetic, extending the result to 64 bits. 9538;; 9539;; See aarch64_sve_cnt_pat for the counting behavior. 9540(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9541 [(set (match_operand:DI 0 "register_operand" "=r") 9542 (<paired_extend>:DI 9543 (SAT_PLUS:SI 9544 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9545 (match_operand:DI 3 "const_int_operand") 9546 (match_operand:DI 4 "const_int_operand")] 9547 UNSPEC_SVE_CNT_PAT) 9548 (match_operand:SI_ONLY 1 "register_operand" "0"))))] 9549 "TARGET_SVE" 9550 { 9551 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0"); 9552 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers, 9553 operands + 2); 9554 } 9555) 9556 9557;; ------------------------------------------------------------------------- 9558;; ---- [INT] Increment by the number of elements in a pattern (vector) 9559;; ------------------------------------------------------------------------- 9560;; Includes: 9561;; - INC 9562;; - SQINC 9563;; - UQINC 9564;; ------------------------------------------------------------------------- 9565 9566;; Increment a vector of DIs by the number of elements in an svpattern. 9567;; See aarch64_sve_cnt_pat for the counting behavior. 9568(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9569 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 9570 (ANY_PLUS:VNx2DI 9571 (vec_duplicate:VNx2DI 9572 (zero_extend:DI 9573 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9574 (match_operand:DI 3 "const_int_operand") 9575 (match_operand:DI 4 "const_int_operand")] 9576 UNSPEC_SVE_CNT_PAT))) 9577 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] 9578 "TARGET_SVE" 9579 { 9580 if (which_alternative == 1) 9581 output_asm_insn ("movprfx\t%0, %1", operands); 9582 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9583 operands + 2); 9584 } 9585 [(set_attr "movprfx" "*,yes")] 9586) 9587 9588;; Increment a vector of SIs by the number of elements in an svpattern. 9589;; See aarch64_sve_cnt_pat for the counting behavior. 9590(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9591 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 9592 (ANY_PLUS:VNx4SI 9593 (vec_duplicate:VNx4SI 9594 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9595 (match_operand:DI 3 "const_int_operand") 9596 (match_operand:DI 4 "const_int_operand")] 9597 UNSPEC_SVE_CNT_PAT)) 9598 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] 9599 "TARGET_SVE" 9600 { 9601 if (which_alternative == 1) 9602 output_asm_insn ("movprfx\t%0, %1", operands); 9603 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9604 operands + 2); 9605 } 9606 [(set_attr "movprfx" "*,yes")] 9607) 9608 9609;; Increment a vector of HIs by the number of elements in an svpattern. 9610;; See aarch64_sve_cnt_pat for the counting behavior. 9611(define_expand "@aarch64_sve_<inc_dec><mode>_pat" 9612 [(set (match_operand:VNx8HI 0 "register_operand") 9613 (ANY_PLUS:VNx8HI 9614 (vec_duplicate:VNx8HI 9615 (truncate:HI 9616 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9617 (match_operand:DI 3 "const_int_operand") 9618 (match_operand:DI 4 "const_int_operand")] 9619 UNSPEC_SVE_CNT_PAT))) 9620 (match_operand:VNx8HI_ONLY 1 "register_operand")))] 9621 "TARGET_SVE" 9622) 9623 9624(define_insn "*aarch64_sve_<inc_dec><mode>_pat" 9625 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 9626 (ANY_PLUS:VNx8HI 9627 (vec_duplicate:VNx8HI 9628 (match_operator:HI 5 "subreg_lowpart_operator" 9629 [(unspec:SI [(match_operand:DI 2 "const_int_operand") 9630 (match_operand:DI 3 "const_int_operand") 9631 (match_operand:DI 4 "const_int_operand")] 9632 UNSPEC_SVE_CNT_PAT)])) 9633 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] 9634 "TARGET_SVE" 9635 { 9636 if (which_alternative == 1) 9637 output_asm_insn ("movprfx\t%0, %1", operands); 9638 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9639 operands + 2); 9640 } 9641 [(set_attr "movprfx" "*,yes")] 9642) 9643 9644;; ------------------------------------------------------------------------- 9645;; ---- [INT] Decrement by the number of elements in a pattern (scalar) 9646;; ------------------------------------------------------------------------- 9647;; Includes: 9648;; - DEC 9649;; - SQDEC 9650;; - UQDEC 9651;; ------------------------------------------------------------------------- 9652 9653;; Decrement a DImode register by the number of elements in an svpattern. 9654;; See aarch64_sve_cnt_pat for the counting behavior. 9655(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9656 [(set (match_operand:DI 0 "register_operand" "=r") 9657 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0") 9658 (zero_extend:DI 9659 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9660 (match_operand:DI 3 "const_int_operand") 9661 (match_operand:DI 4 "const_int_operand")] 9662 UNSPEC_SVE_CNT_PAT))))] 9663 "TARGET_SVE" 9664 { 9665 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0", 9666 operands + 2); 9667 } 9668) 9669 9670;; Decrement an SImode register by the number of elements in an svpattern 9671;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting 9672;; behavior. 9673(define_insn "*aarch64_sve_decsi_pat" 9674 [(set (match_operand:SI 0 "register_operand" "=r") 9675 (minus:SI (match_operand:SI 1 "register_operand" "0") 9676 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9677 (match_operand:DI 3 "const_int_operand") 9678 (match_operand:DI 4 "const_int_operand")] 9679 UNSPEC_SVE_CNT_PAT)))] 9680 "TARGET_SVE" 9681 { 9682 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2); 9683 } 9684) 9685 9686;; Decrement an SImode register by the number of elements in an svpattern 9687;; using saturating arithmetic, extending the result to 64 bits. 9688;; 9689;; See aarch64_sve_cnt_pat for the counting behavior. 9690(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9691 [(set (match_operand:DI 0 "register_operand" "=r") 9692 (<paired_extend>:DI 9693 (SAT_MINUS:SI 9694 (match_operand:SI_ONLY 1 "register_operand" "0") 9695 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9696 (match_operand:DI 3 "const_int_operand") 9697 (match_operand:DI 4 "const_int_operand")] 9698 UNSPEC_SVE_CNT_PAT))))] 9699 "TARGET_SVE" 9700 { 9701 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0"); 9702 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers, 9703 operands + 2); 9704 } 9705) 9706 9707;; ------------------------------------------------------------------------- 9708;; ---- [INT] Decrement by the number of elements in a pattern (vector) 9709;; ------------------------------------------------------------------------- 9710;; Includes: 9711;; - DEC 9712;; - SQDEC 9713;; - UQDEC 9714;; ------------------------------------------------------------------------- 9715 9716;; Decrement a vector of DIs by the number of elements in an svpattern. 9717;; See aarch64_sve_cnt_pat for the counting behavior. 9718(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9719 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 9720 (ANY_MINUS:VNx2DI 9721 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") 9722 (vec_duplicate:VNx2DI 9723 (zero_extend:DI 9724 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9725 (match_operand:DI 3 "const_int_operand") 9726 (match_operand:DI 4 "const_int_operand")] 9727 UNSPEC_SVE_CNT_PAT)))))] 9728 "TARGET_SVE" 9729 { 9730 if (which_alternative == 1) 9731 output_asm_insn ("movprfx\t%0, %1", operands); 9732 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9733 operands + 2); 9734 } 9735 [(set_attr "movprfx" "*,yes")] 9736) 9737 9738;; Decrement a vector of SIs by the number of elements in an svpattern. 9739;; See aarch64_sve_cnt_pat for the counting behavior. 9740(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9741 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 9742 (ANY_MINUS:VNx4SI 9743 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") 9744 (vec_duplicate:VNx4SI 9745 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9746 (match_operand:DI 3 "const_int_operand") 9747 (match_operand:DI 4 "const_int_operand")] 9748 UNSPEC_SVE_CNT_PAT))))] 9749 "TARGET_SVE" 9750 { 9751 if (which_alternative == 1) 9752 output_asm_insn ("movprfx\t%0, %1", operands); 9753 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9754 operands + 2); 9755 } 9756 [(set_attr "movprfx" "*,yes")] 9757) 9758 9759;; Decrement a vector of HIs by the number of elements in an svpattern. 9760;; See aarch64_sve_cnt_pat for the counting behavior. 9761(define_expand "@aarch64_sve_<inc_dec><mode>_pat" 9762 [(set (match_operand:VNx8HI 0 "register_operand") 9763 (ANY_MINUS:VNx8HI 9764 (match_operand:VNx8HI_ONLY 1 "register_operand") 9765 (vec_duplicate:VNx8HI 9766 (truncate:HI 9767 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9768 (match_operand:DI 3 "const_int_operand") 9769 (match_operand:DI 4 "const_int_operand")] 9770 UNSPEC_SVE_CNT_PAT)))))] 9771 "TARGET_SVE" 9772) 9773 9774(define_insn "*aarch64_sve_<inc_dec><mode>_pat" 9775 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 9776 (ANY_MINUS:VNx8HI 9777 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") 9778 (vec_duplicate:VNx8HI 9779 (match_operator:HI 5 "subreg_lowpart_operator" 9780 [(unspec:SI [(match_operand:DI 2 "const_int_operand") 9781 (match_operand:DI 3 "const_int_operand") 9782 (match_operand:DI 4 "const_int_operand")] 9783 UNSPEC_SVE_CNT_PAT)]))))] 9784 "TARGET_SVE" 9785 { 9786 if (which_alternative == 1) 9787 output_asm_insn ("movprfx\t%0, %1", operands); 9788 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9789 operands + 2); 9790 } 9791 [(set_attr "movprfx" "*,yes")] 9792) 9793 9794;; ------------------------------------------------------------------------- 9795;; ---- [INT] Count elements in a predicate (scalar) 9796;; ------------------------------------------------------------------------- 9797;; Includes: 9798;; - CNTP 9799;; ------------------------------------------------------------------------- 9800 9801;; Count the number of set bits in a predicate. Operand 3 is true if 9802;; operand 1 is known to be all-true. 9803(define_insn "@aarch64_pred_cntp<mode>" 9804 [(set (match_operand:DI 0 "register_operand" "=r") 9805 (zero_extend:DI 9806 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl") 9807 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 9808 (match_operand:PRED_ALL 3 "register_operand" "Upa")] 9809 UNSPEC_CNTP)))] 9810 "TARGET_SVE" 9811 "cntp\t%x0, %1, %3.<Vetype>") 9812 9813;; ------------------------------------------------------------------------- 9814;; ---- [INT] Increment by the number of elements in a predicate (scalar) 9815;; ------------------------------------------------------------------------- 9816;; Includes: 9817;; - INCP 9818;; - SQINCP 9819;; - UQINCP 9820;; ------------------------------------------------------------------------- 9821 9822;; Increment a DImode register by the number of set bits in a predicate. 9823;; See aarch64_sve_cntp for a description of the operands. 9824(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 9825 [(set (match_operand:DI 0 "register_operand") 9826 (ANY_PLUS:DI 9827 (zero_extend:DI 9828 (unspec:SI [(match_dup 3) 9829 (const_int SVE_KNOWN_PTRUE) 9830 (match_operand:PRED_ALL 2 "register_operand")] 9831 UNSPEC_CNTP)) 9832 (match_operand:DI_ONLY 1 "register_operand")))] 9833 "TARGET_SVE" 9834 { 9835 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 9836 } 9837) 9838 9839(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 9840 [(set (match_operand:DI 0 "register_operand" "=r") 9841 (ANY_PLUS:DI 9842 (zero_extend:DI 9843 (unspec:SI [(match_operand 3) 9844 (const_int SVE_KNOWN_PTRUE) 9845 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 9846 UNSPEC_CNTP)) 9847 (match_operand:DI_ONLY 1 "register_operand" "0")))] 9848 "TARGET_SVE" 9849 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>" 9850 "&& !CONSTANT_P (operands[3])" 9851 { 9852 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 9853 } 9854) 9855 9856;; Increment an SImode register by the number of set bits in a predicate 9857;; using modular arithmetic. See aarch64_sve_cntp for a description of 9858;; the operands. 9859(define_insn_and_rewrite "*aarch64_incsi<mode>_cntp" 9860 [(set (match_operand:SI 0 "register_operand" "=r") 9861 (plus:SI 9862 (unspec:SI [(match_operand 3) 9863 (const_int SVE_KNOWN_PTRUE) 9864 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 9865 UNSPEC_CNTP) 9866 (match_operand:SI 1 "register_operand" "0")))] 9867 "TARGET_SVE" 9868 "incp\t%x0, %2.<Vetype>" 9869 "&& !CONSTANT_P (operands[3])" 9870 { 9871 operands[3] = CONSTM1_RTX (<MODE>mode); 9872 } 9873) 9874 9875;; Increment an SImode register by the number of set bits in a predicate 9876;; using saturating arithmetic, extending the result to 64 bits. 9877;; 9878;; See aarch64_sve_cntp for a description of the operands. 9879(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 9880 [(set (match_operand:DI 0 "register_operand") 9881 (<paired_extend>:DI 9882 (SAT_PLUS:SI 9883 (unspec:SI [(match_dup 3) 9884 (const_int SVE_KNOWN_PTRUE) 9885 (match_operand:PRED_ALL 2 "register_operand")] 9886 UNSPEC_CNTP) 9887 (match_operand:SI_ONLY 1 "register_operand"))))] 9888 "TARGET_SVE" 9889 { 9890 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 9891 } 9892) 9893 9894(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 9895 [(set (match_operand:DI 0 "register_operand" "=r") 9896 (<paired_extend>:DI 9897 (SAT_PLUS:SI 9898 (unspec:SI [(match_operand 3) 9899 (const_int SVE_KNOWN_PTRUE) 9900 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 9901 UNSPEC_CNTP) 9902 (match_operand:SI_ONLY 1 "register_operand" "0"))))] 9903 "TARGET_SVE" 9904 { 9905 if (<CODE> == SS_PLUS) 9906 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0"; 9907 else 9908 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>"; 9909 } 9910 "&& !CONSTANT_P (operands[3])" 9911 { 9912 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 9913 } 9914) 9915 9916;; ------------------------------------------------------------------------- 9917;; ---- [INT] Increment by the number of elements in a predicate (vector) 9918;; ------------------------------------------------------------------------- 9919;; Includes: 9920;; - INCP 9921;; - SQINCP 9922;; - UQINCP 9923;; ------------------------------------------------------------------------- 9924 9925;; Increment a vector of DIs by the number of set bits in a predicate. 9926;; See aarch64_sve_cntp for a description of the operands. 9927(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 9928 [(set (match_operand:VNx2DI 0 "register_operand") 9929 (ANY_PLUS:VNx2DI 9930 (vec_duplicate:VNx2DI 9931 (zero_extend:DI 9932 (unspec:SI 9933 [(match_dup 3) 9934 (const_int SVE_KNOWN_PTRUE) 9935 (match_operand:<VPRED> 2 "register_operand")] 9936 UNSPEC_CNTP))) 9937 (match_operand:VNx2DI_ONLY 1 "register_operand")))] 9938 "TARGET_SVE" 9939 { 9940 operands[3] = CONSTM1_RTX (<VPRED>mode); 9941 } 9942) 9943 9944(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 9945 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 9946 (ANY_PLUS:VNx2DI 9947 (vec_duplicate:VNx2DI 9948 (zero_extend:DI 9949 (unspec:SI 9950 [(match_operand 3) 9951 (const_int SVE_KNOWN_PTRUE) 9952 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 9953 UNSPEC_CNTP))) 9954 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] 9955 "TARGET_SVE" 9956 "@ 9957 <inc_dec>p\t%0.d, %2 9958 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2" 9959 "&& !CONSTANT_P (operands[3])" 9960 { 9961 operands[3] = CONSTM1_RTX (<VPRED>mode); 9962 } 9963 [(set_attr "movprfx" "*,yes")] 9964) 9965 9966;; Increment a vector of SIs by the number of set bits in a predicate. 9967;; See aarch64_sve_cntp for a description of the operands. 9968(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 9969 [(set (match_operand:VNx4SI 0 "register_operand") 9970 (ANY_PLUS:VNx4SI 9971 (vec_duplicate:VNx4SI 9972 (unspec:SI 9973 [(match_dup 3) 9974 (const_int SVE_KNOWN_PTRUE) 9975 (match_operand:<VPRED> 2 "register_operand")] 9976 UNSPEC_CNTP)) 9977 (match_operand:VNx4SI_ONLY 1 "register_operand")))] 9978 "TARGET_SVE" 9979 { 9980 operands[3] = CONSTM1_RTX (<VPRED>mode); 9981 } 9982) 9983 9984(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 9985 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 9986 (ANY_PLUS:VNx4SI 9987 (vec_duplicate:VNx4SI 9988 (unspec:SI 9989 [(match_operand 3) 9990 (const_int SVE_KNOWN_PTRUE) 9991 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 9992 UNSPEC_CNTP)) 9993 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] 9994 "TARGET_SVE" 9995 "@ 9996 <inc_dec>p\t%0.s, %2 9997 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2" 9998 "&& !CONSTANT_P (operands[3])" 9999 { 10000 operands[3] = CONSTM1_RTX (<VPRED>mode); 10001 } 10002 [(set_attr "movprfx" "*,yes")] 10003) 10004 10005;; Increment a vector of HIs by the number of set bits in a predicate. 10006;; See aarch64_sve_cntp for a description of the operands. 10007(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10008 [(set (match_operand:VNx8HI 0 "register_operand") 10009 (ANY_PLUS:VNx8HI 10010 (vec_duplicate:VNx8HI 10011 (truncate:HI 10012 (unspec:SI 10013 [(match_dup 3) 10014 (const_int SVE_KNOWN_PTRUE) 10015 (match_operand:<VPRED> 2 "register_operand")] 10016 UNSPEC_CNTP))) 10017 (match_operand:VNx8HI_ONLY 1 "register_operand")))] 10018 "TARGET_SVE" 10019 { 10020 operands[3] = CONSTM1_RTX (<VPRED>mode); 10021 } 10022) 10023 10024(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10025 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 10026 (ANY_PLUS:VNx8HI 10027 (vec_duplicate:VNx8HI 10028 (match_operator:HI 3 "subreg_lowpart_operator" 10029 [(unspec:SI 10030 [(match_operand 4) 10031 (const_int SVE_KNOWN_PTRUE) 10032 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10033 UNSPEC_CNTP)])) 10034 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] 10035 "TARGET_SVE" 10036 "@ 10037 <inc_dec>p\t%0.h, %2 10038 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2" 10039 "&& !CONSTANT_P (operands[4])" 10040 { 10041 operands[4] = CONSTM1_RTX (<VPRED>mode); 10042 } 10043 [(set_attr "movprfx" "*,yes")] 10044) 10045 10046;; ------------------------------------------------------------------------- 10047;; ---- [INT] Decrement by the number of elements in a predicate (scalar) 10048;; ------------------------------------------------------------------------- 10049;; Includes: 10050;; - DECP 10051;; - SQDECP 10052;; - UQDECP 10053;; ------------------------------------------------------------------------- 10054 10055;; Decrement a DImode register by the number of set bits in a predicate. 10056;; See aarch64_sve_cntp for a description of the operands. 10057(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 10058 [(set (match_operand:DI 0 "register_operand") 10059 (ANY_MINUS:DI 10060 (match_operand:DI_ONLY 1 "register_operand") 10061 (zero_extend:DI 10062 (unspec:SI [(match_dup 3) 10063 (const_int SVE_KNOWN_PTRUE) 10064 (match_operand:PRED_ALL 2 "register_operand")] 10065 UNSPEC_CNTP))))] 10066 "TARGET_SVE" 10067 { 10068 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10069 } 10070) 10071 10072(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 10073 [(set (match_operand:DI 0 "register_operand" "=r") 10074 (ANY_MINUS:DI 10075 (match_operand:DI_ONLY 1 "register_operand" "0") 10076 (zero_extend:DI 10077 (unspec:SI [(match_operand 3) 10078 (const_int SVE_KNOWN_PTRUE) 10079 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10080 UNSPEC_CNTP))))] 10081 "TARGET_SVE" 10082 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>" 10083 "&& !CONSTANT_P (operands[3])" 10084 { 10085 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10086 } 10087) 10088 10089;; Decrement an SImode register by the number of set bits in a predicate 10090;; using modular arithmetic. See aarch64_sve_cntp for a description of the 10091;; operands. 10092(define_insn_and_rewrite "*aarch64_decsi<mode>_cntp" 10093 [(set (match_operand:SI 0 "register_operand" "=r") 10094 (minus:SI 10095 (match_operand:SI 1 "register_operand" "0") 10096 (unspec:SI [(match_operand 3) 10097 (const_int SVE_KNOWN_PTRUE) 10098 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10099 UNSPEC_CNTP)))] 10100 "TARGET_SVE" 10101 "decp\t%x0, %2.<Vetype>" 10102 "&& !CONSTANT_P (operands[3])" 10103 { 10104 operands[3] = CONSTM1_RTX (<MODE>mode); 10105 } 10106) 10107 10108;; Decrement an SImode register by the number of set bits in a predicate 10109;; using saturating arithmetic, extending the result to 64 bits. 10110;; 10111;; See aarch64_sve_cntp for a description of the operands. 10112(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 10113 [(set (match_operand:DI 0 "register_operand") 10114 (<paired_extend>:DI 10115 (SAT_MINUS:SI 10116 (match_operand:SI_ONLY 1 "register_operand") 10117 (unspec:SI [(match_dup 3) 10118 (const_int SVE_KNOWN_PTRUE) 10119 (match_operand:PRED_ALL 2 "register_operand")] 10120 UNSPEC_CNTP))))] 10121 "TARGET_SVE" 10122 { 10123 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10124 } 10125) 10126 10127(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 10128 [(set (match_operand:DI 0 "register_operand" "=r") 10129 (<paired_extend>:DI 10130 (SAT_MINUS:SI 10131 (match_operand:SI_ONLY 1 "register_operand" "0") 10132 (unspec:SI [(match_operand 3) 10133 (const_int SVE_KNOWN_PTRUE) 10134 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10135 UNSPEC_CNTP))))] 10136 "TARGET_SVE" 10137 { 10138 if (<CODE> == SS_MINUS) 10139 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0"; 10140 else 10141 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>"; 10142 } 10143 "&& !CONSTANT_P (operands[3])" 10144 { 10145 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10146 } 10147) 10148 10149;; ------------------------------------------------------------------------- 10150;; ---- [INT] Decrement by the number of elements in a predicate (vector) 10151;; ------------------------------------------------------------------------- 10152;; Includes: 10153;; - DECP 10154;; - SQDECP 10155;; - UQDECP 10156;; ------------------------------------------------------------------------- 10157 10158;; Decrement a vector of DIs by the number of set bits in a predicate. 10159;; See aarch64_sve_cntp for a description of the operands. 10160(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10161 [(set (match_operand:VNx2DI 0 "register_operand") 10162 (ANY_MINUS:VNx2DI 10163 (match_operand:VNx2DI_ONLY 1 "register_operand") 10164 (vec_duplicate:VNx2DI 10165 (zero_extend:DI 10166 (unspec:SI 10167 [(match_dup 3) 10168 (const_int SVE_KNOWN_PTRUE) 10169 (match_operand:<VPRED> 2 "register_operand")] 10170 UNSPEC_CNTP)))))] 10171 "TARGET_SVE" 10172 { 10173 operands[3] = CONSTM1_RTX (<VPRED>mode); 10174 } 10175) 10176 10177(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10178 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 10179 (ANY_MINUS:VNx2DI 10180 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") 10181 (vec_duplicate:VNx2DI 10182 (zero_extend:DI 10183 (unspec:SI 10184 [(match_operand 3) 10185 (const_int SVE_KNOWN_PTRUE) 10186 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10187 UNSPEC_CNTP)))))] 10188 "TARGET_SVE" 10189 "@ 10190 <inc_dec>p\t%0.d, %2 10191 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2" 10192 "&& !CONSTANT_P (operands[3])" 10193 { 10194 operands[3] = CONSTM1_RTX (<VPRED>mode); 10195 } 10196 [(set_attr "movprfx" "*,yes")] 10197) 10198 10199;; Decrement a vector of SIs by the number of set bits in a predicate. 10200;; See aarch64_sve_cntp for a description of the operands. 10201(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10202 [(set (match_operand:VNx4SI 0 "register_operand") 10203 (ANY_MINUS:VNx4SI 10204 (match_operand:VNx4SI_ONLY 1 "register_operand") 10205 (vec_duplicate:VNx4SI 10206 (unspec:SI 10207 [(match_dup 3) 10208 (const_int SVE_KNOWN_PTRUE) 10209 (match_operand:<VPRED> 2 "register_operand")] 10210 UNSPEC_CNTP))))] 10211 "TARGET_SVE" 10212 { 10213 operands[3] = CONSTM1_RTX (<VPRED>mode); 10214 } 10215) 10216 10217(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10218 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 10219 (ANY_MINUS:VNx4SI 10220 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") 10221 (vec_duplicate:VNx4SI 10222 (unspec:SI 10223 [(match_operand 3) 10224 (const_int SVE_KNOWN_PTRUE) 10225 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10226 UNSPEC_CNTP))))] 10227 "TARGET_SVE" 10228 "@ 10229 <inc_dec>p\t%0.s, %2 10230 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2" 10231 "&& !CONSTANT_P (operands[3])" 10232 { 10233 operands[3] = CONSTM1_RTX (<VPRED>mode); 10234 } 10235 [(set_attr "movprfx" "*,yes")] 10236) 10237 10238;; Decrement a vector of HIs by the number of set bits in a predicate. 10239;; See aarch64_sve_cntp for a description of the operands. 10240(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10241 [(set (match_operand:VNx8HI 0 "register_operand") 10242 (ANY_MINUS:VNx8HI 10243 (match_operand:VNx8HI_ONLY 1 "register_operand") 10244 (vec_duplicate:VNx8HI 10245 (truncate:HI 10246 (unspec:SI 10247 [(match_dup 3) 10248 (const_int SVE_KNOWN_PTRUE) 10249 (match_operand:<VPRED> 2 "register_operand")] 10250 UNSPEC_CNTP)))))] 10251 "TARGET_SVE" 10252 { 10253 operands[3] = CONSTM1_RTX (<VPRED>mode); 10254 } 10255) 10256 10257(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10258 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 10259 (ANY_MINUS:VNx8HI 10260 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") 10261 (vec_duplicate:VNx8HI 10262 (match_operator:HI 3 "subreg_lowpart_operator" 10263 [(unspec:SI 10264 [(match_operand 4) 10265 (const_int SVE_KNOWN_PTRUE) 10266 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10267 UNSPEC_CNTP)]))))] 10268 "TARGET_SVE" 10269 "@ 10270 <inc_dec>p\t%0.h, %2 10271 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2" 10272 "&& !CONSTANT_P (operands[4])" 10273 { 10274 operands[4] = CONSTM1_RTX (<VPRED>mode); 10275 } 10276 [(set_attr "movprfx" "*,yes")] 10277) 10278