1;; Machine description for AArch64 SVE. 2;; Copyright (C) 2009-2022 Free Software Foundation, Inc. 3;; Contributed by ARM Ltd. 4;; 5;; This file is part of GCC. 6;; 7;; GCC is free software; you can redistribute it and/or modify it 8;; under the terms of the GNU General Public License as published by 9;; the Free Software Foundation; either version 3, or (at your option) 10;; any later version. 11;; 12;; GCC is distributed in the hope that it will be useful, but 13;; WITHOUT ANY WARRANTY; without even the implied warranty of 14;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15;; General Public License for more details. 16;; 17;; You should have received a copy of the GNU General Public License 18;; along with GCC; see the file COPYING3. If not see 19;; <http://www.gnu.org/licenses/>. 20 21;; The file is organised into the following sections (search for the full 22;; line): 23;; 24;; == General notes 25;; ---- Note on the handling of big-endian SVE 26;; ---- Description of UNSPEC_PTEST 27;; ---- Description of UNSPEC_PRED_Z 28;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X 29;; ---- Note on predicated FP arithmetic patterns and GP "strictness" 30;; ---- Note on FFR handling 31;; 32;; == Moves 33;; ---- Moves of single vectors 34;; ---- Moves of multiple vectors 35;; ---- Moves of predicates 36;; ---- Moves relating to the FFR 37;; 38;; == Loads 39;; ---- Normal contiguous loads 40;; ---- Extending contiguous loads 41;; ---- First-faulting contiguous loads 42;; ---- First-faulting extending contiguous loads 43;; ---- Non-temporal contiguous loads 44;; ---- Normal gather loads 45;; ---- Extending gather loads 46;; ---- First-faulting gather loads 47;; ---- First-faulting extending gather loads 48;; 49;; == Prefetches 50;; ---- Contiguous prefetches 51;; ---- Gather prefetches 52;; 53;; == Stores 54;; ---- Normal contiguous stores 55;; ---- Truncating contiguous stores 56;; ---- Non-temporal contiguous stores 57;; ---- Normal scatter stores 58;; ---- Truncating scatter stores 59;; 60;; == Vector creation 61;; ---- [INT,FP] Duplicate element 62;; ---- [INT,FP] Initialize from individual elements 63;; ---- [INT] Linear series 64;; ---- [PRED] Duplicate element 65;; 66;; == Vector decomposition 67;; ---- [INT,FP] Extract index 68;; ---- [INT,FP] Extract active element 69;; ---- [PRED] Extract index 70;; 71;; == Unary arithmetic 72;; ---- [INT] General unary arithmetic corresponding to rtx codes 73;; ---- [INT] General unary arithmetic corresponding to unspecs 74;; ---- [INT] Sign and zero extension 75;; ---- [INT] Truncation 76;; ---- [INT] Logical inverse 77;; ---- [FP<-INT] General unary arithmetic that maps to unspecs 78;; ---- [FP] General unary arithmetic corresponding to unspecs 79;; ---- [FP] Square root 80;; ---- [FP] Reciprocal square root 81;; ---- [PRED] Inverse 82 83;; == Binary arithmetic 84;; ---- [INT] General binary arithmetic corresponding to rtx codes 85;; ---- [INT] Addition 86;; ---- [INT] Subtraction 87;; ---- [INT] Take address 88;; ---- [INT] Absolute difference 89;; ---- [INT] Saturating addition and subtraction 90;; ---- [INT] Highpart multiplication 91;; ---- [INT] Division 92;; ---- [INT] Binary logical operations 93;; ---- [INT] Binary logical operations (inverted second input) 94;; ---- [INT] Shifts (rounding towards -Inf) 95;; ---- [INT] Shifts (rounding towards 0) 96;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs 97;; ---- [FP] General binary arithmetic corresponding to rtx codes 98;; ---- [FP] General binary arithmetic corresponding to unspecs 99;; ---- [FP] Addition 100;; ---- [FP] Complex addition 101;; ---- [FP] Subtraction 102;; ---- [FP] Absolute difference 103;; ---- [FP] Multiplication 104;; ---- [FP] Division 105;; ---- [FP] Binary logical operations 106;; ---- [FP] Sign copying 107;; ---- [FP] Maximum and minimum 108;; ---- [PRED] Binary logical operations 109;; ---- [PRED] Binary logical operations (inverted second input) 110;; ---- [PRED] Binary logical operations (inverted result) 111;; 112;; == Ternary arithmetic 113;; ---- [INT] MLA and MAD 114;; ---- [INT] MLS and MSB 115;; ---- [INT] Dot product 116;; ---- [INT] Sum of absolute differences 117;; ---- [INT] Matrix multiply-accumulate 118;; ---- [FP] General ternary arithmetic corresponding to unspecs 119;; ---- [FP] Complex multiply-add 120;; ---- [FP] Trigonometric multiply-add 121;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) 122;; ---- [FP] Matrix multiply-accumulate 123;; 124;; == Comparisons and selects 125;; ---- [INT,FP] Select based on predicates 126;; ---- [INT,FP] Compare and select 127;; ---- [INT] Comparisons 128;; ---- [INT] While tests 129;; ---- [FP] Direct comparisons 130;; ---- [FP] Absolute comparisons 131;; ---- [PRED] Select 132;; ---- [PRED] Test bits 133;; 134;; == Reductions 135;; ---- [INT,FP] Conditional reductions 136;; ---- [INT] Tree reductions 137;; ---- [FP] Tree reductions 138;; ---- [FP] Left-to-right reductions 139;; 140;; == Permutes 141;; ---- [INT,FP] General permutes 142;; ---- [INT,FP] Special-purpose unary permutes 143;; ---- [INT,FP] Special-purpose binary permutes 144;; ---- [PRED] Special-purpose unary permutes 145;; ---- [PRED] Special-purpose binary permutes 146;; 147;; == Conversions 148;; ---- [INT<-INT] Packs 149;; ---- [INT<-INT] Unpacks 150;; ---- [INT<-FP] Conversions 151;; ---- [INT<-FP] Packs 152;; ---- [INT<-FP] Unpacks 153;; ---- [FP<-INT] Conversions 154;; ---- [FP<-INT] Packs 155;; ---- [FP<-INT] Unpacks 156;; ---- [FP<-FP] Packs 157;; ---- [FP<-FP] Packs (bfloat16) 158;; ---- [FP<-FP] Unpacks 159;; ---- [PRED<-PRED] Packs 160;; ---- [PRED<-PRED] Unpacks 161;; 162;; == Vector partitioning 163;; ---- [PRED] Unary partitioning 164;; ---- [PRED] Binary partitioning 165;; ---- [PRED] Scalarization 166;; 167;; == Counting elements 168;; ---- [INT] Count elements in a pattern (scalar) 169;; ---- [INT] Increment by the number of elements in a pattern (scalar) 170;; ---- [INT] Increment by the number of elements in a pattern (vector) 171;; ---- [INT] Decrement by the number of elements in a pattern (scalar) 172;; ---- [INT] Decrement by the number of elements in a pattern (vector) 173;; ---- [INT] Count elements in a predicate (scalar) 174;; ---- [INT] Increment by the number of elements in a predicate (scalar) 175;; ---- [INT] Increment by the number of elements in a predicate (vector) 176;; ---- [INT] Decrement by the number of elements in a predicate (scalar) 177;; ---- [INT] Decrement by the number of elements in a predicate (vector) 178 179;; ========================================================================= 180;; == General notes 181;; ========================================================================= 182;; 183;; ------------------------------------------------------------------------- 184;; ---- Note on the handling of big-endian SVE 185;; ------------------------------------------------------------------------- 186;; 187;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the 188;; same way as movdi or movti would: the first byte of memory goes 189;; into the most significant byte of the register and the last byte 190;; of memory goes into the least significant byte of the register. 191;; This is the most natural ordering for Advanced SIMD and matches 192;; the ABI layout for 64-bit and 128-bit vector types. 193;; 194;; As a result, the order of bytes within the register is what GCC 195;; expects for a big-endian target, and subreg offsets therefore work 196;; as expected, with the first element in memory having subreg offset 0 197;; and the last element in memory having the subreg offset associated 198;; with a big-endian lowpart. However, this ordering also means that 199;; GCC's lane numbering does not match the architecture's numbering: 200;; GCC always treats the element at the lowest address in memory 201;; (subreg offset 0) as element 0, while the architecture treats 202;; the least significant end of the register as element 0. 203;; 204;; The situation for SVE is different. We want the layout of the 205;; SVE register to be same for mov<mode> as it is for maskload<mode>: 206;; logically, a mov<mode> load must be indistinguishable from a 207;; maskload<mode> whose mask is all true. We therefore need the 208;; register layout to match LD1 rather than LDR. The ABI layout of 209;; SVE types also matches LD1 byte ordering rather than LDR byte ordering. 210;; 211;; As a result, the architecture lane numbering matches GCC's lane 212;; numbering, with element 0 always being the first in memory. 213;; However: 214;; 215;; - Applying a subreg offset to a register does not give the element 216;; that GCC expects: the first element in memory has the subreg offset 217;; associated with a big-endian lowpart while the last element in memory 218;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS. 219;; 220;; - We cannot use LDR and STR for spill slots that might be accessed 221;; via subregs, since although the elements have the order GCC expects, 222;; the order of the bytes within the elements is different. We instead 223;; access spill slots via LD1 and ST1, using secondary reloads to 224;; reserve a predicate register. 225;; 226;; ------------------------------------------------------------------------- 227;; ---- Description of UNSPEC_PTEST 228;; ------------------------------------------------------------------------- 229;; 230;; SVE provides a PTEST instruction for testing the active lanes of a 231;; predicate and setting the flags based on the result. The associated 232;; condition code tests are: 233;; 234;; - any (= ne): at least one active bit is set 235;; - none (= eq): all active bits are clear (*) 236;; - first (= mi): the first active bit is set 237;; - nfrst (= pl): the first active bit is clear (*) 238;; - last (= cc): the last active bit is set 239;; - nlast (= cs): the last active bit is clear (*) 240;; 241;; where the conditions marked (*) are also true when there are no active 242;; lanes (i.e. when the governing predicate is a PFALSE). The flags results 243;; of a PTEST use the condition code mode CC_NZC. 244;; 245;; PTEST is always a .B operation (i.e. it always operates on VNx16BI). 246;; This means that for other predicate modes, we need a governing predicate 247;; in which all bits are defined. 248;; 249;; For example, most predicated .H operations ignore the odd bits of the 250;; governing predicate, so that an active lane is represented by the 251;; bits "1x" and an inactive lane by the bits "0x", where "x" can be 252;; any value. To test a .H predicate, we instead need "10" and "00" 253;; respectively, so that the condition only tests the even bits of the 254;; predicate. 255;; 256;; Several instructions set the flags as a side-effect, in the same way 257;; that a separate PTEST would. It's important for code quality that we 258;; use these flags results as often as possible, particularly in the case 259;; of WHILE* and RDFFR. 260;; 261;; Also, some of the instructions that set the flags are unpredicated 262;; and instead implicitly test all .B, .H, .S or .D elements, as though 263;; they were predicated on a PTRUE of that size. For example, a .S 264;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE 265;; would. 266;; 267;; We therefore need to represent PTEST operations in a way that 268;; makes it easy to combine them with both predicated and unpredicated 269;; operations, while using a VNx16BI governing predicate for all 270;; predicate modes. We do this using: 271;; 272;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST) 273;; 274;; where: 275;; 276;; - GP is the real VNx16BI governing predicate 277;; 278;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting 279;; GP to CAST_GP are guaranteed to be clear in GP. 280;; 281;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value 282;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and 283;; SVE_MAYBE_NOT_PTRUE otherwise. 284;; 285;; - OP is the predicate we want to test, of the same mode as CAST_GP. 286;; 287;; ------------------------------------------------------------------------- 288;; ---- Description of UNSPEC_PRED_Z 289;; ------------------------------------------------------------------------- 290;; 291;; SVE integer comparisons are predicated and return zero for inactive 292;; lanes. Sometimes we use them with predicates that are all-true and 293;; sometimes we use them with general predicates. 294;; 295;; The integer comparisons also set the flags and so build-in the effect 296;; of a PTEST. We therefore want to be able to combine integer comparison 297;; patterns with PTESTs of the result. One difficulty with doing this is 298;; that (as noted above) the PTEST is always a .B operation and so can place 299;; stronger requirements on the governing predicate than the comparison does. 300;; 301;; For example, when applying a separate PTEST to the result of a full-vector 302;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a 303;; .B PTRUE. In constrast, the comparison might be predicated on either 304;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate 305;; bits don't matter for .H operations. 306;; 307;; We therefore can't rely on a full-vector comparison using the same 308;; predicate register as a following PTEST. We instead need to remember 309;; whether a comparison is known to be a full-vector comparison and use 310;; this information in addition to a check for equal predicate registers. 311;; At the same time, it's useful to have a common representation for all 312;; integer comparisons, so that they can be handled by a single set of 313;; patterns. 314;; 315;; We therefore take a similar approach to UNSPEC_PTEST above and use: 316;; 317;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z) 318;; 319;; where: 320;; 321;; - GP is the governing predicate, of mode <M:VPRED> 322;; 323;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value 324;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE 325;; otherwise 326;; 327;; - CODE is the comparison code 328;; 329;; - OP0 and OP1 are the values being compared, of mode M 330;; 331;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero. 332;; 333;; ------------------------------------------------------------------------- 334;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X 335;; ------------------------------------------------------------------------- 336;; 337;; Many SVE integer operations are predicated. We can generate them 338;; from four sources: 339;; 340;; (1) Using normal unpredicated optabs. In this case we need to create 341;; an all-true predicate register to act as the governing predicate 342;; for the SVE instruction. There are no inactive lanes, and thus 343;; the values of inactive lanes don't matter. 344;; 345;; (2) Using _x ACLE functions. In this case the function provides a 346;; specific predicate and some lanes might be inactive. However, 347;; as for (1), the values of the inactive lanes don't matter. 348;; We can make extra lanes active without changing the behavior 349;; (although for code-quality reasons we should avoid doing so 350;; needlessly). 351;; 352;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. 353;; These optabs have a predicate operand that specifies which lanes are 354;; active and another operand that provides the values of inactive lanes. 355;; 356;; (4) Using _m and _z ACLE functions. These functions map to the same 357;; patterns as (3), with the _z functions setting inactive lanes to zero 358;; and the _m functions setting the inactive lanes to one of the function 359;; arguments. 360;; 361;; For (1) and (2) we need a way of attaching the predicate to a normal 362;; unpredicated integer operation. We do this using: 363;; 364;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X) 365;; 366;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED 367;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE; 368;; it always is for (1), but might not be for (2). 369;; 370;; The unspec as a whole has the same value as (code:M ...) when PRED is 371;; all-true. It is always semantically valid to replace PRED with a PTRUE, 372;; but as noted above, we should only do so if there's a specific benefit. 373;; 374;; (The "_X" in the unspec is named after the ACLE functions in (2).) 375;; 376;; For (3) and (4) we can simply use the SVE port's normal representation 377;; of a predicate-based select: 378;; 379;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL) 380;; 381;; where INACTIVE specifies the values of inactive lanes. 382;; 383;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather 384;; than inserting the integer operation directly. This is mostly useful 385;; if we want the combine pass to merge an integer operation with an explicit 386;; vcond_mask (in other words, with a following SEL instruction). However, 387;; it's generally better to merge such operations at the gimple level 388;; using (3). 389;; 390;; ------------------------------------------------------------------------- 391;; ---- Note on predicated FP arithmetic patterns and GP "strictness" 392;; ------------------------------------------------------------------------- 393;; 394;; Most SVE floating-point operations are predicated. We can generate 395;; them from four sources: 396;; 397;; (1) Using normal unpredicated optabs. In this case we need to create 398;; an all-true predicate register to act as the governing predicate 399;; for the SVE instruction. There are no inactive lanes, and thus 400;; the values of inactive lanes don't matter. 401;; 402;; (2) Using _x ACLE functions. In this case the function provides a 403;; specific predicate and some lanes might be inactive. However, 404;; as for (1), the values of the inactive lanes don't matter. 405;; 406;; The instruction must have the same exception behavior as the 407;; function call unless things like command-line flags specifically 408;; allow otherwise. For example, with -ffast-math, it is OK to 409;; raise exceptions for inactive lanes, but normally it isn't. 410;; 411;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions. 412;; These optabs have a predicate operand that specifies which lanes are 413;; active and another operand that provides the values of inactive lanes. 414;; 415;; (4) Using _m and _z ACLE functions. These functions map to the same 416;; patterns as (3), with the _z functions setting inactive lanes to zero 417;; and the _m functions setting the inactive lanes to one of the function 418;; arguments. 419;; 420;; So: 421;; 422;; - In (1), the predicate is known to be all true and the pattern can use 423;; unpredicated operations where available. 424;; 425;; - In (2), the predicate might or might not be all true. The pattern can 426;; use unpredicated instructions if the predicate is all-true or if things 427;; like command-line flags allow exceptions for inactive lanes. 428;; 429;; - (3) and (4) represent a native SVE predicated operation. Some lanes 430;; might be inactive and inactive lanes of the result must have specific 431;; values. There is no scope for using unpredicated instructions (and no 432;; reason to want to), so the question about command-line flags doesn't 433;; arise. 434;; 435;; It would be inaccurate to model (2) as an rtx code like (sqrt ...) 436;; in combination with a separate predicate operand, e.g. 437;; 438;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl") 439;; (sqrt:SVE_FULL_F 2 "register_operand" "w")] 440;; ....) 441;; 442;; because (sqrt ...) can raise an exception for any lane, including 443;; inactive ones. We therefore need to use an unspec instead. 444;; 445;; Also, (2) requires some way of distinguishing the case in which the 446;; predicate might have inactive lanes and cannot be changed from the 447;; case in which the predicate has no inactive lanes or can be changed. 448;; This information is also useful when matching combined FP patterns 449;; in which the predicates might not be equal. 450;; 451;; We therefore model FP operations as an unspec of the form: 452;; 453;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>) 454;; 455;; where: 456;; 457;; - PRED is the governing predicate. 458;; 459;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the 460;; value SVE_STRICT_GP if PRED might have inactive lanes and if those 461;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise. 462;; 463;; - OP0 OP1 ... are the normal input operands to the operation. 464;; 465;; - MNEMONIC is the mnemonic of the associated SVE instruction. 466;; 467;; For (3) and (4), we combine these operations with an UNSPEC_SEL 468;; that selects between the result of the FP operation and the "else" 469;; value. (This else value is a merge input for _m ACLE functions 470;; and zero for _z ACLE functions.) The outer pattern then has the form: 471;; 472;; (unspec [pred fp_operation else_value] UNSPEC_SEL) 473;; 474;; This means that the patterns for (3) and (4) have two predicates: 475;; one for the FP operation itself and one for the UNSPEC_SEL. 476;; This pattern is equivalent to the result of combining an instance 477;; of (1) or (2) with a separate vcond instruction, so these patterns 478;; are useful as combine targets too. 479;; 480;; However, in the combine case, the instructions that we want to 481;; combine might use different predicates. Then: 482;; 483;; - Some of the active lanes of the FP operation might be discarded 484;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes, 485;; even for SVE_STRICT_GP, since the operations on those lanes are 486;; effectively dead code. 487;; 488;; - Some of the inactive lanes of the FP operation might be selected 489;; by the UNSPEC_SEL, giving unspecified values for those lanes. 490;; SVE_RELAXED_GP lets us extend the FP operation to cover these 491;; extra lanes, but SVE_STRICT_GP does not. 492;; 493;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation 494;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate. 495;; This typically leads to patterns like: 496;; 497;; (unspec [(match_operand 1 "register_operand" "Upl") 498;; (unspec [(match_operand N) 499;; (const_int SVE_RELAXED_GP) 500;; ...] 501;; UNSPEC_COND_<MNEMONIC>) 502;; ...]) 503;; 504;; where operand N is allowed to be anything. These instructions then 505;; have rewrite rules to replace operand N with operand 1, which gives the 506;; instructions a canonical form and means that the original operand N is 507;; not kept live unnecessarily. 508;; 509;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be 510;; a subset of the FP operation predicate. This case isn't interesting 511;; for FP operations that have an all-true predicate, since such operations 512;; use SVE_RELAXED_GP instead. And it is not possible for instruction 513;; conditions to track the subset relationship for arbitrary registers. 514;; So in practice, the only useful case for SVE_STRICT_GP is the one 515;; in which the predicates match: 516;; 517;; (unspec [(match_operand 1 "register_operand" "Upl") 518;; (unspec [(match_dup 1) 519;; (const_int SVE_STRICT_GP) 520;; ...] 521;; UNSPEC_COND_<MNEMONIC>) 522;; ...]) 523;; 524;; This pattern would also be correct for SVE_RELAXED_GP, but it would 525;; be redundant with the one above. However, if the combine pattern 526;; has multiple FP operations, using a match_operand allows combinations 527;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided 528;; that the predicates are the same: 529;; 530;; (unspec [(match_operand 1 "register_operand" "Upl") 531;; (... 532;; (unspec [(match_dup 1) 533;; (match_operand:SI N "aarch64_sve_gp_strictness") 534;; ...] 535;; UNSPEC_COND_<MNEMONIC1>) 536;; (unspec [(match_dup 1) 537;; (match_operand:SI M "aarch64_sve_gp_strictness") 538;; ...] 539;; UNSPEC_COND_<MNEMONIC2>) ...) 540;; ...]) 541;; 542;; The fully-relaxed version of this pattern is: 543;; 544;; (unspec [(match_operand 1 "register_operand" "Upl") 545;; (... 546;; (unspec [(match_operand:SI N) 547;; (const_int SVE_RELAXED_GP) 548;; ...] 549;; UNSPEC_COND_<MNEMONIC1>) 550;; (unspec [(match_operand:SI M) 551;; (const_int SVE_RELAXED_GP) 552;; ...] 553;; UNSPEC_COND_<MNEMONIC2>) ...) 554;; ...]) 555;; 556;; ------------------------------------------------------------------------- 557;; ---- Note on FFR handling 558;; ------------------------------------------------------------------------- 559;; 560;; Logically we want to divide FFR-related instructions into regions 561;; that contain exactly one of: 562;; 563;; - a single write to the FFR 564;; - any number of reads from the FFR (but only one read is likely) 565;; - any number of LDFF1 and LDNF1 instructions 566;; 567;; However, LDFF1 and LDNF1 instructions should otherwise behave like 568;; normal loads as far as possible. This means that they should be 569;; schedulable within a region in the same way that LD1 would be, 570;; and they should be deleted as dead if the result is unused. The loads 571;; should therefore not write to the FFR, since that would both serialize 572;; the loads with respect to each other and keep the loads live for any 573;; later RDFFR. 574;; 575;; We get around this by using a fake "FFR token" (FFRT) to help describe 576;; the dependencies. Writing to the FFRT starts a new "FFRT region", 577;; while using the FFRT keeps the instruction within its region. 578;; Specifically: 579;; 580;; - Writes start a new FFRT region as well as setting the FFR: 581;; 582;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>) 583;; 584;; - Loads use an LD1-like instruction that also uses the FFRT, so that the 585;; loads stay within the same FFRT region: 586;; 587;; L1: load data while using the FFRT 588;; 589;; In addition, any FFRT region that includes a load also has at least one 590;; instance of: 591;; 592;; L2: FFR = update(FFR, FFRT) [type == no_insn] 593;; 594;; to make it clear that the region both reads from and writes to the FFR. 595;; 596;; - Reads do the following: 597;; 598;; R1: FFRT = FFR [type == no_insn] 599;; R2: read from the FFRT 600;; R3: FFRT = update(FFRT) [type == no_insn] 601;; 602;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and 603;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s 604;; cannot move backwards across R3. 605;; 606;; This way, writes are only kept alive by later loads or reads, 607;; and write/read pairs fold normally. For two consecutive reads, 608;; the first R3 is made dead by the second R1, which in turn becomes 609;; redundant with the first R1. We then have: 610;; 611;; first R1: FFRT = FFR 612;; first read from the FFRT 613;; second read from the FFRT 614;; second R3: FFRT = update(FFRT) 615;; 616;; i.e. the two FFRT regions collapse into a single one with two 617;; independent reads. 618;; 619;; The model still prevents some valid optimizations though. For example, 620;; if all loads in an FFRT region are deleted as dead, nothing would remove 621;; the L2 instructions. 622 623;; ========================================================================= 624;; == Moves 625;; ========================================================================= 626 627;; ------------------------------------------------------------------------- 628;; ---- Moves of single vectors 629;; ------------------------------------------------------------------------- 630;; Includes: 631;; - MOV (including aliases) 632;; - LD1B (contiguous form) 633;; - LD1D ( " " ) 634;; - LD1H ( " " ) 635;; - LD1W ( " " ) 636;; - LDR 637;; - ST1B (contiguous form) 638;; - ST1D ( " " ) 639;; - ST1H ( " " ) 640;; - ST1W ( " " ) 641;; - STR 642;; ------------------------------------------------------------------------- 643 644(define_expand "mov<mode>" 645 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") 646 (match_operand:SVE_ALL 1 "general_operand"))] 647 "TARGET_SVE" 648 { 649 /* Use the predicated load and store patterns where possible. 650 This is required for big-endian targets (see the comment at the 651 head of the file) and increases the addressing choices for 652 little-endian. */ 653 if ((MEM_P (operands[0]) || MEM_P (operands[1])) 654 && can_create_pseudo_p ()) 655 { 656 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); 657 DONE; 658 } 659 660 if (CONSTANT_P (operands[1])) 661 { 662 aarch64_expand_mov_immediate (operands[0], operands[1]); 663 DONE; 664 } 665 666 /* Optimize subregs on big-endian targets: we can use REV[BHW] 667 instead of going through memory. */ 668 if (BYTES_BIG_ENDIAN 669 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1])) 670 DONE; 671 } 672) 673 674(define_expand "movmisalign<mode>" 675 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand") 676 (match_operand:SVE_ALL 1 "general_operand"))] 677 "TARGET_SVE" 678 { 679 /* Equivalent to a normal move for our purpooses. */ 680 emit_move_insn (operands[0], operands[1]); 681 DONE; 682 } 683) 684 685;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which 686;; little-endian ordering is acceptable. Only allow memory operations during 687;; and after RA; before RA we want the predicated load and store patterns to 688;; be used instead. 689(define_insn "*aarch64_sve_mov<mode>_ldr_str" 690 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") 691 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] 692 "TARGET_SVE 693 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN) 694 && ((lra_in_progress || reload_completed) 695 || (register_operand (operands[0], <MODE>mode) 696 && nonmemory_operand (operands[1], <MODE>mode)))" 697 "@ 698 ldr\t%0, %1 699 str\t%1, %0 700 mov\t%0.d, %1.d 701 * return aarch64_output_sve_mov_immediate (operands[1]);" 702) 703 704;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors 705;; or vectors for which little-endian ordering isn't acceptable. Memory 706;; accesses require secondary reloads. 707(define_insn "*aarch64_sve_mov<mode>_no_ldr_str" 708 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w") 709 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))] 710 "TARGET_SVE 711 && <MODE>mode != VNx16QImode 712 && (BYTES_BIG_ENDIAN 713 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))" 714 "@ 715 mov\t%0.d, %1.d 716 * return aarch64_output_sve_mov_immediate (operands[1]);" 717) 718 719;; Handle memory reloads for modes that can't use LDR and STR. We use 720;; byte PTRUE for all modes to try to encourage reuse. This pattern 721;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD. 722(define_expand "aarch64_sve_reload_mem" 723 [(parallel 724 [(set (match_operand 0) 725 (match_operand 1)) 726 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])] 727 "TARGET_SVE" 728 { 729 /* Create a PTRUE. */ 730 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); 731 732 /* Refer to the PTRUE in the appropriate mode for this move. */ 733 machine_mode mode = GET_MODE (operands[0]); 734 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]); 735 736 /* Emit a predicated load or store. */ 737 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]); 738 DONE; 739 } 740) 741 742;; A predicated move in which the predicate is known to be all-true. 743;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move, 744;; so changes to this pattern will need changes there as well. 745(define_insn_and_split "@aarch64_pred_mov<mode>" 746 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m") 747 (unspec:SVE_ALL 748 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 749 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")] 750 UNSPEC_PRED_X))] 751 "TARGET_SVE 752 && (register_operand (operands[0], <MODE>mode) 753 || register_operand (operands[2], <MODE>mode))" 754 "@ 755 # 756 ld1<Vesize>\t%0.<Vctype>, %1/z, %2 757 st1<Vesize>\t%2.<Vctype>, %1, %0" 758 "&& register_operand (operands[0], <MODE>mode) 759 && register_operand (operands[2], <MODE>mode)" 760 [(set (match_dup 0) (match_dup 2))] 761) 762 763;; A pattern for optimizing SUBREGs that have a reinterpreting effect 764;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move 765;; for details. We use a special predicate for operand 2 to reduce 766;; the number of patterns. 767(define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be" 768 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w") 769 (unspec:SVE_ALL 770 [(match_operand:VNx16BI 1 "register_operand" "Upl") 771 (match_operand 2 "aarch64_any_register_operand" "w")] 772 UNSPEC_REV_SUBREG))] 773 "TARGET_SVE && BYTES_BIG_ENDIAN" 774 "#" 775 "&& reload_completed" 776 [(const_int 0)] 777 { 778 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]); 779 DONE; 780 } 781) 782 783;; Reinterpret operand 1 in operand 0's mode, without changing its contents. 784;; This is equivalent to a subreg on little-endian targets but not for 785;; big-endian; see the comment at the head of the file for details. 786(define_expand "@aarch64_sve_reinterpret<mode>" 787 [(set (match_operand:SVE_ALL 0 "register_operand") 788 (unspec:SVE_ALL 789 [(match_operand 1 "aarch64_any_register_operand")] 790 UNSPEC_REINTERPRET))] 791 "TARGET_SVE" 792 { 793 machine_mode src_mode = GET_MODE (operands[1]); 794 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS)) 795 { 796 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1])); 797 DONE; 798 } 799 } 800) 801 802;; A pattern for handling type punning on big-endian targets. We use a 803;; special predicate for operand 1 to reduce the number of patterns. 804(define_insn_and_split "*aarch64_sve_reinterpret<mode>" 805 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 806 (unspec:SVE_ALL 807 [(match_operand 1 "aarch64_any_register_operand" "w")] 808 UNSPEC_REINTERPRET))] 809 "TARGET_SVE" 810 "#" 811 "&& reload_completed" 812 [(set (match_dup 0) (match_dup 1))] 813 { 814 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode); 815 } 816) 817 818;; ------------------------------------------------------------------------- 819;; ---- Moves of multiple vectors 820;; ------------------------------------------------------------------------- 821;; All patterns in this section are synthetic and split to real 822;; instructions after reload. 823;; ------------------------------------------------------------------------- 824 825(define_expand "mov<mode>" 826 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand") 827 (match_operand:SVE_STRUCT 1 "general_operand"))] 828 "TARGET_SVE" 829 { 830 /* Big-endian loads and stores need to be done via LD1 and ST1; 831 see the comment at the head of the file for details. */ 832 if ((MEM_P (operands[0]) || MEM_P (operands[1])) 833 && BYTES_BIG_ENDIAN) 834 { 835 gcc_assert (can_create_pseudo_p ()); 836 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode); 837 DONE; 838 } 839 840 if (CONSTANT_P (operands[1])) 841 { 842 aarch64_expand_mov_immediate (operands[0], operands[1]); 843 DONE; 844 } 845 } 846) 847 848;; Unpredicated structure moves (little-endian). 849(define_insn "*aarch64_sve_mov<mode>_le" 850 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w") 851 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))] 852 "TARGET_SVE && !BYTES_BIG_ENDIAN" 853 "#" 854 [(set_attr "length" "<insn_length>")] 855) 856 857;; Unpredicated structure moves (big-endian). Memory accesses require 858;; secondary reloads. 859(define_insn "*aarch64_sve_mov<mode>_be" 860 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w") 861 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))] 862 "TARGET_SVE && BYTES_BIG_ENDIAN" 863 "#" 864 [(set_attr "length" "<insn_length>")] 865) 866 867;; Split unpredicated structure moves into pieces. This is the same 868;; for both big-endian and little-endian code, although it only needs 869;; to handle memory operands for little-endian code. 870(define_split 871 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand") 872 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))] 873 "TARGET_SVE && reload_completed" 874 [(const_int 0)] 875 { 876 rtx dest = operands[0]; 877 rtx src = operands[1]; 878 if (REG_P (dest) && REG_P (src)) 879 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>); 880 else 881 for (unsigned int i = 0; i < <vector_count>; ++i) 882 { 883 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode, 884 i * BYTES_PER_SVE_VECTOR); 885 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode, 886 i * BYTES_PER_SVE_VECTOR); 887 emit_insn (gen_rtx_SET (subdest, subsrc)); 888 } 889 DONE; 890 } 891) 892 893;; Predicated structure moves. This works for both endiannesses but in 894;; practice is only useful for big-endian. 895(define_insn_and_split "@aarch64_pred_mov<mode>" 896 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx") 897 (unspec:SVE_STRUCT 898 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 899 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")] 900 UNSPEC_PRED_X))] 901 "TARGET_SVE 902 && (register_operand (operands[0], <MODE>mode) 903 || register_operand (operands[2], <MODE>mode))" 904 "#" 905 "&& reload_completed" 906 [(const_int 0)] 907 { 908 for (unsigned int i = 0; i < <vector_count>; ++i) 909 { 910 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0], 911 <MODE>mode, 912 i * BYTES_PER_SVE_VECTOR); 913 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2], 914 <MODE>mode, 915 i * BYTES_PER_SVE_VECTOR); 916 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc); 917 } 918 DONE; 919 } 920 [(set_attr "length" "<insn_length>")] 921) 922 923;; ------------------------------------------------------------------------- 924;; ---- Moves of predicates 925;; ------------------------------------------------------------------------- 926;; Includes: 927;; - MOV 928;; - LDR 929;; - PFALSE 930;; - PTRUE 931;; - PTRUES 932;; - STR 933;; ------------------------------------------------------------------------- 934 935(define_expand "mov<mode>" 936 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand") 937 (match_operand:PRED_ALL 1 "general_operand"))] 938 "TARGET_SVE" 939 { 940 if (GET_CODE (operands[0]) == MEM) 941 operands[1] = force_reg (<MODE>mode, operands[1]); 942 943 if (CONSTANT_P (operands[1])) 944 { 945 aarch64_expand_mov_immediate (operands[0], operands[1]); 946 DONE; 947 } 948 } 949) 950 951(define_insn "*aarch64_sve_mov<mode>" 952 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa") 953 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))] 954 "TARGET_SVE 955 && (register_operand (operands[0], <MODE>mode) 956 || register_operand (operands[1], <MODE>mode))" 957 "@ 958 mov\t%0.b, %1.b 959 str\t%1, %0 960 ldr\t%0, %1 961 * return aarch64_output_sve_mov_immediate (operands[1]);" 962) 963 964;; Match PTRUES Pn.B when both the predicate and flags are useful. 965(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc" 966 [(set (reg:CC_NZC CC_REGNUM) 967 (unspec:CC_NZC 968 [(match_operand 2) 969 (match_operand 3) 970 (const_int SVE_KNOWN_PTRUE) 971 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 972 [(unspec:VNx16BI 973 [(match_operand:SI 4 "const_int_operand") 974 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] 975 UNSPEC_PTRUE)])] 976 UNSPEC_PTEST)) 977 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 978 (match_dup 1))] 979 "TARGET_SVE" 980 { 981 return aarch64_output_sve_ptrues (operands[1]); 982 } 983 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 984 { 985 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); 986 } 987) 988 989;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful. 990(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc" 991 [(set (reg:CC_NZC CC_REGNUM) 992 (unspec:CC_NZC 993 [(match_operand 2) 994 (match_operand 3) 995 (const_int SVE_KNOWN_PTRUE) 996 (subreg:PRED_HSD 997 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 998 [(unspec:VNx16BI 999 [(match_operand:SI 4 "const_int_operand") 1000 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] 1001 UNSPEC_PTRUE)]) 0)] 1002 UNSPEC_PTEST)) 1003 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1004 (match_dup 1))] 1005 "TARGET_SVE" 1006 { 1007 return aarch64_output_sve_ptrues (operands[1]); 1008 } 1009 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 1010 { 1011 operands[2] = CONSTM1_RTX (VNx16BImode); 1012 operands[3] = CONSTM1_RTX (<MODE>mode); 1013 } 1014) 1015 1016;; Match PTRUES Pn.B when only the flags result is useful (which is 1017;; a way of testing VL). 1018(define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest" 1019 [(set (reg:CC_NZC CC_REGNUM) 1020 (unspec:CC_NZC 1021 [(match_operand 2) 1022 (match_operand 3) 1023 (const_int SVE_KNOWN_PTRUE) 1024 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 1025 [(unspec:VNx16BI 1026 [(match_operand:SI 4 "const_int_operand") 1027 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")] 1028 UNSPEC_PTRUE)])] 1029 UNSPEC_PTEST)) 1030 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1031 "TARGET_SVE" 1032 { 1033 return aarch64_output_sve_ptrues (operands[1]); 1034 } 1035 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 1036 { 1037 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode); 1038 } 1039) 1040 1041;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is 1042;; a way of testing VL). 1043(define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest" 1044 [(set (reg:CC_NZC CC_REGNUM) 1045 (unspec:CC_NZC 1046 [(match_operand 2) 1047 (match_operand 3) 1048 (const_int SVE_KNOWN_PTRUE) 1049 (subreg:PRED_HSD 1050 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate" 1051 [(unspec:VNx16BI 1052 [(match_operand:SI 4 "const_int_operand") 1053 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")] 1054 UNSPEC_PTRUE)]) 0)] 1055 UNSPEC_PTEST)) 1056 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1057 "TARGET_SVE" 1058 { 1059 return aarch64_output_sve_ptrues (operands[1]); 1060 } 1061 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))" 1062 { 1063 operands[2] = CONSTM1_RTX (VNx16BImode); 1064 operands[3] = CONSTM1_RTX (<MODE>mode); 1065 } 1066) 1067 1068;; ------------------------------------------------------------------------- 1069;; ---- Moves relating to the FFR 1070;; ------------------------------------------------------------------------- 1071;; RDFFR 1072;; RDFFRS 1073;; SETFFR 1074;; WRFFR 1075;; ------------------------------------------------------------------------- 1076 1077;; [W1 in the block comment above about FFR handling] 1078;; 1079;; Write to the FFR and start a new FFRT scheduling region. 1080(define_insn "aarch64_wrffr" 1081 [(set (reg:VNx16BI FFR_REGNUM) 1082 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa")) 1083 (set (reg:VNx16BI FFRT_REGNUM) 1084 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))] 1085 "TARGET_SVE" 1086 "@ 1087 setffr 1088 wrffr\t%0.b" 1089) 1090 1091;; [L2 in the block comment above about FFR handling] 1092;; 1093;; Introduce a read from and write to the FFR in the current FFRT region, 1094;; so that the FFR value is live on entry to the region and so that the FFR 1095;; value visibly changes within the region. This is used (possibly multiple 1096;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions. 1097(define_insn "aarch64_update_ffr_for_load" 1098 [(set (reg:VNx16BI FFR_REGNUM) 1099 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM) 1100 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))] 1101 "TARGET_SVE" 1102 "" 1103 [(set_attr "type" "no_insn")] 1104) 1105 1106;; [R1 in the block comment above about FFR handling] 1107;; 1108;; Notionally copy the FFR to the FFRT, so that the current FFR value 1109;; can be read from there by the RDFFR instructions below. This acts 1110;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and 1111;; creates a natural dependency with earlier writes. 1112(define_insn "aarch64_copy_ffr_to_ffrt" 1113 [(set (reg:VNx16BI FFRT_REGNUM) 1114 (reg:VNx16BI FFR_REGNUM))] 1115 "TARGET_SVE" 1116 "" 1117 [(set_attr "type" "no_insn")] 1118) 1119 1120;; [R2 in the block comment above about FFR handling] 1121;; 1122;; Read the FFR via the FFRT. 1123(define_insn "aarch64_rdffr" 1124 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1125 (reg:VNx16BI FFRT_REGNUM))] 1126 "TARGET_SVE" 1127 "rdffr\t%0.b" 1128) 1129 1130;; Likewise with zero predication. 1131(define_insn "aarch64_rdffr_z" 1132 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1133 (and:VNx16BI 1134 (reg:VNx16BI FFRT_REGNUM) 1135 (match_operand:VNx16BI 1 "register_operand" "Upa")))] 1136 "TARGET_SVE" 1137 "rdffr\t%0.b, %1/z" 1138) 1139 1140;; Read the FFR to test for a fault, without using the predicate result. 1141(define_insn "*aarch64_rdffr_z_ptest" 1142 [(set (reg:CC_NZC CC_REGNUM) 1143 (unspec:CC_NZC 1144 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1145 (match_dup 1) 1146 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 1147 (and:VNx16BI 1148 (reg:VNx16BI FFRT_REGNUM) 1149 (match_dup 1))] 1150 UNSPEC_PTEST)) 1151 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1152 "TARGET_SVE" 1153 "rdffrs\t%0.b, %1/z" 1154) 1155 1156;; Same for unpredicated RDFFR when tested with a known PTRUE. 1157(define_insn "*aarch64_rdffr_ptest" 1158 [(set (reg:CC_NZC CC_REGNUM) 1159 (unspec:CC_NZC 1160 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1161 (match_dup 1) 1162 (const_int SVE_KNOWN_PTRUE) 1163 (reg:VNx16BI FFRT_REGNUM)] 1164 UNSPEC_PTEST)) 1165 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 1166 "TARGET_SVE" 1167 "rdffrs\t%0.b, %1/z" 1168) 1169 1170;; Read the FFR with zero predication and test the result. 1171(define_insn "*aarch64_rdffr_z_cc" 1172 [(set (reg:CC_NZC CC_REGNUM) 1173 (unspec:CC_NZC 1174 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1175 (match_dup 1) 1176 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 1177 (and:VNx16BI 1178 (reg:VNx16BI FFRT_REGNUM) 1179 (match_dup 1))] 1180 UNSPEC_PTEST)) 1181 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1182 (and:VNx16BI 1183 (reg:VNx16BI FFRT_REGNUM) 1184 (match_dup 1)))] 1185 "TARGET_SVE" 1186 "rdffrs\t%0.b, %1/z" 1187) 1188 1189;; Same for unpredicated RDFFR when tested with a known PTRUE. 1190(define_insn "*aarch64_rdffr_cc" 1191 [(set (reg:CC_NZC CC_REGNUM) 1192 (unspec:CC_NZC 1193 [(match_operand:VNx16BI 1 "register_operand" "Upa") 1194 (match_dup 1) 1195 (const_int SVE_KNOWN_PTRUE) 1196 (reg:VNx16BI FFRT_REGNUM)] 1197 UNSPEC_PTEST)) 1198 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 1199 (reg:VNx16BI FFRT_REGNUM))] 1200 "TARGET_SVE" 1201 "rdffrs\t%0.b, %1/z" 1202) 1203 1204;; [R3 in the block comment above about FFR handling] 1205;; 1206;; Arbitrarily update the FFRT after a read from the FFR. This acts as 1207;; a scheduling barrier for later LDFF1 and LDNF1 instructions. 1208(define_insn "aarch64_update_ffrt" 1209 [(set (reg:VNx16BI FFRT_REGNUM) 1210 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))] 1211 "TARGET_SVE" 1212 "" 1213 [(set_attr "type" "no_insn")] 1214) 1215 1216;; ========================================================================= 1217;; == Loads 1218;; ========================================================================= 1219 1220;; ------------------------------------------------------------------------- 1221;; ---- Normal contiguous loads 1222;; ------------------------------------------------------------------------- 1223;; Includes contiguous forms of: 1224;; - LD1B 1225;; - LD1D 1226;; - LD1H 1227;; - LD1W 1228;; - LD2B 1229;; - LD2D 1230;; - LD2H 1231;; - LD2W 1232;; - LD3B 1233;; - LD3D 1234;; - LD3H 1235;; - LD3W 1236;; - LD4B 1237;; - LD4D 1238;; - LD4H 1239;; - LD4W 1240;; ------------------------------------------------------------------------- 1241 1242;; Predicated LD1. 1243(define_insn "maskload<mode><vpred>" 1244 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 1245 (unspec:SVE_ALL 1246 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1247 (match_operand:SVE_ALL 1 "memory_operand" "m")] 1248 UNSPEC_LD1_SVE))] 1249 "TARGET_SVE" 1250 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1" 1251) 1252 1253;; Unpredicated LD[234]. 1254(define_expand "vec_load_lanes<mode><vsingle>" 1255 [(set (match_operand:SVE_STRUCT 0 "register_operand") 1256 (unspec:SVE_STRUCT 1257 [(match_dup 2) 1258 (match_operand:SVE_STRUCT 1 "memory_operand")] 1259 UNSPEC_LDN))] 1260 "TARGET_SVE" 1261 { 1262 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 1263 } 1264) 1265 1266;; Predicated LD[234]. 1267(define_insn "vec_mask_load_lanes<mode><vsingle>" 1268 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w") 1269 (unspec:SVE_STRUCT 1270 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1271 (match_operand:SVE_STRUCT 1 "memory_operand" "m")] 1272 UNSPEC_LDN))] 1273 "TARGET_SVE" 1274 "ld<vector_count><Vesize>\t%0, %2/z, %1" 1275) 1276 1277;; ------------------------------------------------------------------------- 1278;; ---- Extending contiguous loads 1279;; ------------------------------------------------------------------------- 1280;; Includes contiguous forms of: 1281;; LD1B 1282;; LD1H 1283;; LD1SB 1284;; LD1SH 1285;; LD1SW 1286;; LD1W 1287;; ------------------------------------------------------------------------- 1288 1289;; Predicated load and extend, with 8 elements per 128-bit block. 1290(define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" 1291 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") 1292 (unspec:SVE_HSDI 1293 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") 1294 (ANY_EXTEND:SVE_HSDI 1295 (unspec:SVE_PARTIAL_I 1296 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") 1297 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")] 1298 SVE_PRED_LOAD))] 1299 UNSPEC_PRED_X))] 1300 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 1301 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" 1302 "&& !CONSTANT_P (operands[3])" 1303 { 1304 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); 1305 } 1306) 1307 1308;; ------------------------------------------------------------------------- 1309;; ---- First-faulting contiguous loads 1310;; ------------------------------------------------------------------------- 1311;; Includes contiguous forms of: 1312;; - LDFF1B 1313;; - LDFF1D 1314;; - LDFF1H 1315;; - LDFF1W 1316;; - LDNF1B 1317;; - LDNF1D 1318;; - LDNF1H 1319;; - LDNF1W 1320;; ------------------------------------------------------------------------- 1321 1322;; Contiguous non-extending first-faulting or non-faulting loads. 1323(define_insn "@aarch64_ld<fn>f1<mode>" 1324 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 1325 (unspec:SVE_FULL 1326 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1327 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") 1328 (reg:VNx16BI FFRT_REGNUM)] 1329 SVE_LDFF1_LDNF1))] 1330 "TARGET_SVE" 1331 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1" 1332) 1333 1334;; ------------------------------------------------------------------------- 1335;; ---- First-faulting extending contiguous loads 1336;; ------------------------------------------------------------------------- 1337;; Includes contiguous forms of: 1338;; - LDFF1B 1339;; - LDFF1H 1340;; - LDFF1SB 1341;; - LDFF1SH 1342;; - LDFF1SW 1343;; - LDFF1W 1344;; - LDNF1B 1345;; - LDNF1H 1346;; - LDNF1SB 1347;; - LDNF1SH 1348;; - LDNF1SW 1349;; - LDNF1W 1350;; ------------------------------------------------------------------------- 1351 1352;; Predicated first-faulting or non-faulting load and extend. 1353(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>" 1354 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w") 1355 (unspec:SVE_HSDI 1356 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm") 1357 (ANY_EXTEND:SVE_HSDI 1358 (unspec:SVE_PARTIAL_I 1359 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl") 1360 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>") 1361 (reg:VNx16BI FFRT_REGNUM)] 1362 SVE_LDFF1_LDNF1))] 1363 UNSPEC_PRED_X))] 1364 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 1365 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1" 1366 "&& !CONSTANT_P (operands[3])" 1367 { 1368 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode); 1369 } 1370) 1371 1372;; ------------------------------------------------------------------------- 1373;; ---- Non-temporal contiguous loads 1374;; ------------------------------------------------------------------------- 1375;; Includes: 1376;; - LDNT1B 1377;; - LDNT1D 1378;; - LDNT1H 1379;; - LDNT1W 1380;; ------------------------------------------------------------------------- 1381 1382;; Predicated contiguous non-temporal load. 1383(define_insn "@aarch64_ldnt1<mode>" 1384 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 1385 (unspec:SVE_FULL 1386 [(match_operand:<VPRED> 2 "register_operand" "Upl") 1387 (match_operand:SVE_FULL 1 "memory_operand" "m")] 1388 UNSPEC_LDNT1_SVE))] 1389 "TARGET_SVE" 1390 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1" 1391) 1392 1393;; ------------------------------------------------------------------------- 1394;; ---- Normal gather loads 1395;; ------------------------------------------------------------------------- 1396;; Includes gather forms of: 1397;; - LD1D 1398;; - LD1W 1399;; ------------------------------------------------------------------------- 1400 1401;; Unpredicated gather loads. 1402(define_expand "gather_load<mode><v_int_container>" 1403 [(set (match_operand:SVE_24 0 "register_operand") 1404 (unspec:SVE_24 1405 [(match_dup 5) 1406 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>") 1407 (match_operand:<V_INT_CONTAINER> 2 "register_operand") 1408 (match_operand:DI 3 "const_int_operand") 1409 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>") 1410 (mem:BLK (scratch))] 1411 UNSPEC_LD1_GATHER))] 1412 "TARGET_SVE" 1413 { 1414 operands[5] = aarch64_ptrue_reg (<VPRED>mode); 1415 } 1416) 1417 1418;; Predicated gather loads for 32-bit elements. Operand 3 is true for 1419;; unsigned extension and false for signed extension. 1420(define_insn "mask_gather_load<mode><v_int_container>" 1421 [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w") 1422 (unspec:SVE_4 1423 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1424 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") 1425 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") 1426 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 1427 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1428 (mem:BLK (scratch))] 1429 UNSPEC_LD1_GATHER))] 1430 "TARGET_SVE" 1431 "@ 1432 ld1<Vesize>\t%0.s, %5/z, [%2.s] 1433 ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1] 1434 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] 1435 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] 1436 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1437 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1438) 1439 1440;; Predicated gather loads for 64-bit elements. The value of operand 3 1441;; doesn't matter in this case. 1442(define_insn "mask_gather_load<mode><v_int_container>" 1443 [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w") 1444 (unspec:SVE_2 1445 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1446 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") 1447 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") 1448 (match_operand:DI 3 "const_int_operand") 1449 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") 1450 (mem:BLK (scratch))] 1451 UNSPEC_LD1_GATHER))] 1452 "TARGET_SVE" 1453 "@ 1454 ld1<Vesize>\t%0.d, %5/z, [%2.d] 1455 ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1] 1456 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d] 1457 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1458) 1459 1460;; Likewise, but with the offset being extended from 32 bits. 1461(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked" 1462 [(set (match_operand:SVE_2 0 "register_operand" "=w, w") 1463 (unspec:SVE_2 1464 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1465 (match_operand:DI 1 "register_operand" "rk, rk") 1466 (unspec:VNx2DI 1467 [(match_operand 6) 1468 (ANY_EXTEND:VNx2DI 1469 (match_operand:VNx2SI 2 "register_operand" "w, w"))] 1470 UNSPEC_PRED_X) 1471 (match_operand:DI 3 "const_int_operand") 1472 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 1473 (mem:BLK (scratch))] 1474 UNSPEC_LD1_GATHER))] 1475 "TARGET_SVE" 1476 "@ 1477 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw] 1478 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]" 1479 "&& !CONSTANT_P (operands[6])" 1480 { 1481 operands[6] = CONSTM1_RTX (VNx2BImode); 1482 } 1483) 1484 1485;; Likewise, but with the offset being truncated to 32 bits and then 1486;; sign-extended. 1487(define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw" 1488 [(set (match_operand:SVE_2 0 "register_operand" "=w, w") 1489 (unspec:SVE_2 1490 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1491 (match_operand:DI 1 "register_operand" "rk, rk") 1492 (unspec:VNx2DI 1493 [(match_operand 6) 1494 (sign_extend:VNx2DI 1495 (truncate:VNx2SI 1496 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1497 UNSPEC_PRED_X) 1498 (match_operand:DI 3 "const_int_operand") 1499 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 1500 (mem:BLK (scratch))] 1501 UNSPEC_LD1_GATHER))] 1502 "TARGET_SVE" 1503 "@ 1504 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] 1505 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1506 "&& !CONSTANT_P (operands[6])" 1507 { 1508 operands[6] = CONSTM1_RTX (VNx2BImode); 1509 } 1510) 1511 1512;; Likewise, but with the offset being truncated to 32 bits and then 1513;; zero-extended. 1514(define_insn "*mask_gather_load<mode><v_int_container>_uxtw" 1515 [(set (match_operand:SVE_2 0 "register_operand" "=w, w") 1516 (unspec:SVE_2 1517 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1518 (match_operand:DI 1 "register_operand" "rk, rk") 1519 (and:VNx2DI 1520 (match_operand:VNx2DI 2 "register_operand" "w, w") 1521 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1522 (match_operand:DI 3 "const_int_operand") 1523 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 1524 (mem:BLK (scratch))] 1525 UNSPEC_LD1_GATHER))] 1526 "TARGET_SVE" 1527 "@ 1528 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] 1529 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1530) 1531 1532;; ------------------------------------------------------------------------- 1533;; ---- Extending gather loads 1534;; ------------------------------------------------------------------------- 1535;; Includes gather forms of: 1536;; - LD1B 1537;; - LD1H 1538;; - LD1SB 1539;; - LD1SH 1540;; - LD1SW 1541;; - LD1W 1542;; ------------------------------------------------------------------------- 1543 1544;; Predicated extending gather loads for 32-bit elements. Operand 3 is 1545;; true for unsigned extension and false for signed extension. 1546(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>" 1547 [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w") 1548 (unspec:SVE_4HSI 1549 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm") 1550 (ANY_EXTEND:SVE_4HSI 1551 (unspec:SVE_4BHI 1552 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1553 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk") 1554 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") 1555 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 1556 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1557 (mem:BLK (scratch))] 1558 UNSPEC_LD1_GATHER))] 1559 UNSPEC_PRED_X))] 1560 "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0" 1561 "@ 1562 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s] 1563 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1] 1564 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] 1565 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] 1566 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1567 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1568 "&& !CONSTANT_P (operands[6])" 1569 { 1570 operands[6] = CONSTM1_RTX (VNx4BImode); 1571 } 1572) 1573 1574;; Predicated extending gather loads for 64-bit elements. The value of 1575;; operand 3 doesn't matter in this case. 1576(define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>" 1577 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w") 1578 (unspec:SVE_2HSDI 1579 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm") 1580 (ANY_EXTEND:SVE_2HSDI 1581 (unspec:SVE_2BHSI 1582 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1583 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk") 1584 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") 1585 (match_operand:DI 3 "const_int_operand") 1586 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i") 1587 (mem:BLK (scratch))] 1588 UNSPEC_LD1_GATHER))] 1589 UNSPEC_PRED_X))] 1590 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1591 "@ 1592 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d] 1593 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1] 1594 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d] 1595 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1596 "&& !CONSTANT_P (operands[6])" 1597 { 1598 operands[6] = CONSTM1_RTX (VNx2BImode); 1599 } 1600) 1601 1602;; Likewise, but with the offset being extended from 32 bits. 1603(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked" 1604 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") 1605 (unspec:SVE_2HSDI 1606 [(match_operand 6) 1607 (ANY_EXTEND:SVE_2HSDI 1608 (unspec:SVE_2BHSI 1609 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1610 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1611 (unspec:VNx2DI 1612 [(match_operand 7) 1613 (ANY_EXTEND2:VNx2DI 1614 (match_operand:VNx2SI 2 "register_operand" "w, w"))] 1615 UNSPEC_PRED_X) 1616 (match_operand:DI 3 "const_int_operand") 1617 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") 1618 (mem:BLK (scratch))] 1619 UNSPEC_LD1_GATHER))] 1620 UNSPEC_PRED_X))] 1621 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1622 "@ 1623 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw] 1624 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]" 1625 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" 1626 { 1627 operands[6] = CONSTM1_RTX (VNx2BImode); 1628 operands[7] = CONSTM1_RTX (VNx2BImode); 1629 } 1630) 1631 1632;; Likewise, but with the offset being truncated to 32 bits and then 1633;; sign-extended. 1634(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw" 1635 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") 1636 (unspec:SVE_2HSDI 1637 [(match_operand 6) 1638 (ANY_EXTEND:SVE_2HSDI 1639 (unspec:SVE_2BHSI 1640 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1641 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1642 (unspec:VNx2DI 1643 [(match_operand 7) 1644 (sign_extend:VNx2DI 1645 (truncate:VNx2SI 1646 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1647 UNSPEC_PRED_X) 1648 (match_operand:DI 3 "const_int_operand") 1649 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") 1650 (mem:BLK (scratch))] 1651 UNSPEC_LD1_GATHER))] 1652 UNSPEC_PRED_X))] 1653 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1654 "@ 1655 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] 1656 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1657 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" 1658 { 1659 operands[6] = CONSTM1_RTX (VNx2BImode); 1660 operands[7] = CONSTM1_RTX (VNx2BImode); 1661 } 1662) 1663 1664;; Likewise, but with the offset being truncated to 32 bits and then 1665;; zero-extended. 1666(define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw" 1667 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w") 1668 (unspec:SVE_2HSDI 1669 [(match_operand 7) 1670 (ANY_EXTEND:SVE_2HSDI 1671 (unspec:SVE_2BHSI 1672 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1673 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1674 (and:VNx2DI 1675 (match_operand:VNx2DI 2 "register_operand" "w, w") 1676 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1677 (match_operand:DI 3 "const_int_operand") 1678 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i") 1679 (mem:BLK (scratch))] 1680 UNSPEC_LD1_GATHER))] 1681 UNSPEC_PRED_X))] 1682 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0" 1683 "@ 1684 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] 1685 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1686 "&& !CONSTANT_P (operands[7])" 1687 { 1688 operands[7] = CONSTM1_RTX (VNx2BImode); 1689 } 1690) 1691 1692;; ------------------------------------------------------------------------- 1693;; ---- First-faulting gather loads 1694;; ------------------------------------------------------------------------- 1695;; Includes gather forms of: 1696;; - LDFF1D 1697;; - LDFF1W 1698;; ------------------------------------------------------------------------- 1699 1700;; Predicated first-faulting gather loads for 32-bit elements. Operand 1701;; 3 is true for unsigned extension and false for signed extension. 1702(define_insn "@aarch64_ldff1_gather<mode>" 1703 [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w") 1704 (unspec:SVE_FULL_S 1705 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1706 (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk") 1707 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") 1708 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") 1709 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i") 1710 (mem:BLK (scratch)) 1711 (reg:VNx16BI FFRT_REGNUM)] 1712 UNSPEC_LDFF1_GATHER))] 1713 "TARGET_SVE" 1714 "@ 1715 ldff1w\t%0.s, %5/z, [%2.s] 1716 ldff1w\t%0.s, %5/z, [%2.s, #%1] 1717 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw] 1718 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw] 1719 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1720 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1721) 1722 1723;; Predicated first-faulting gather loads for 64-bit elements. The value 1724;; of operand 3 doesn't matter in this case. 1725(define_insn "@aarch64_ldff1_gather<mode>" 1726 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w") 1727 (unspec:SVE_FULL_D 1728 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1729 (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk") 1730 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") 1731 (match_operand:DI 3 "const_int_operand") 1732 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i") 1733 (mem:BLK (scratch)) 1734 (reg:VNx16BI FFRT_REGNUM)] 1735 UNSPEC_LDFF1_GATHER))] 1736 "TARGET_SVE" 1737 "@ 1738 ldff1d\t%0.d, %5/z, [%2.d] 1739 ldff1d\t%0.d, %5/z, [%2.d, #%1] 1740 ldff1d\t%0.d, %5/z, [%1, %2.d] 1741 ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1742) 1743 1744;; Likewise, but with the offset being sign-extended from 32 bits. 1745(define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw" 1746 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") 1747 (unspec:SVE_FULL_D 1748 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1749 (match_operand:DI 1 "register_operand" "rk, rk") 1750 (unspec:VNx2DI 1751 [(match_operand 6) 1752 (sign_extend:VNx2DI 1753 (truncate:VNx2SI 1754 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1755 UNSPEC_PRED_X) 1756 (match_operand:DI 3 "const_int_operand") 1757 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") 1758 (mem:BLK (scratch)) 1759 (reg:VNx16BI FFRT_REGNUM)] 1760 UNSPEC_LDFF1_GATHER))] 1761 "TARGET_SVE" 1762 "@ 1763 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw] 1764 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1765 "&& !CONSTANT_P (operands[6])" 1766 { 1767 operands[6] = CONSTM1_RTX (VNx2BImode); 1768 } 1769) 1770 1771;; Likewise, but with the offset being zero-extended from 32 bits. 1772(define_insn "*aarch64_ldff1_gather<mode>_uxtw" 1773 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w") 1774 (unspec:SVE_FULL_D 1775 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1776 (match_operand:DI 1 "register_operand" "rk, rk") 1777 (and:VNx2DI 1778 (match_operand:VNx2DI 2 "register_operand" "w, w") 1779 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1780 (match_operand:DI 3 "const_int_operand") 1781 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i") 1782 (mem:BLK (scratch)) 1783 (reg:VNx16BI FFRT_REGNUM)] 1784 UNSPEC_LDFF1_GATHER))] 1785 "TARGET_SVE" 1786 "@ 1787 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw] 1788 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1789) 1790 1791;; ------------------------------------------------------------------------- 1792;; ---- First-faulting extending gather loads 1793;; ------------------------------------------------------------------------- 1794;; Includes gather forms of: 1795;; - LDFF1B 1796;; - LDFF1H 1797;; - LDFF1SB 1798;; - LDFF1SH 1799;; - LDFF1SW 1800;; - LDFF1W 1801;; ------------------------------------------------------------------------- 1802 1803;; Predicated extending first-faulting gather loads for 32-bit elements. 1804;; Operand 3 is true for unsigned extension and false for signed extension. 1805(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>" 1806 [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w") 1807 (unspec:VNx4_WIDE 1808 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm") 1809 (ANY_EXTEND:VNx4_WIDE 1810 (unspec:VNx4_NARROW 1811 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1812 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk") 1813 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w") 1814 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") 1815 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1816 (mem:BLK (scratch)) 1817 (reg:VNx16BI FFRT_REGNUM)] 1818 UNSPEC_LDFF1_GATHER))] 1819 UNSPEC_PRED_X))] 1820 "TARGET_SVE" 1821 "@ 1822 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s] 1823 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1] 1824 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw] 1825 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw] 1826 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4] 1827 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]" 1828 "&& !CONSTANT_P (operands[6])" 1829 { 1830 operands[6] = CONSTM1_RTX (VNx4BImode); 1831 } 1832) 1833 1834;; Predicated extending first-faulting gather loads for 64-bit elements. 1835;; The value of operand 3 doesn't matter in this case. 1836(define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>" 1837 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w") 1838 (unspec:VNx2_WIDE 1839 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm") 1840 (ANY_EXTEND:VNx2_WIDE 1841 (unspec:VNx2_NARROW 1842 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 1843 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk") 1844 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w") 1845 (match_operand:DI 3 "const_int_operand") 1846 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i") 1847 (mem:BLK (scratch)) 1848 (reg:VNx16BI FFRT_REGNUM)] 1849 UNSPEC_LDFF1_GATHER))] 1850 UNSPEC_PRED_X))] 1851 "TARGET_SVE" 1852 "@ 1853 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d] 1854 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1] 1855 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d] 1856 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]" 1857 "&& !CONSTANT_P (operands[6])" 1858 { 1859 operands[6] = CONSTM1_RTX (VNx2BImode); 1860 } 1861) 1862 1863;; Likewise, but with the offset being sign-extended from 32 bits. 1864(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw" 1865 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") 1866 (unspec:VNx2_WIDE 1867 [(match_operand 6) 1868 (ANY_EXTEND:VNx2_WIDE 1869 (unspec:VNx2_NARROW 1870 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1871 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1872 (unspec:VNx2DI 1873 [(match_operand 7) 1874 (sign_extend:VNx2DI 1875 (truncate:VNx2SI 1876 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 1877 UNSPEC_PRED_X) 1878 (match_operand:DI 3 "const_int_operand") 1879 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 1880 (mem:BLK (scratch)) 1881 (reg:VNx16BI FFRT_REGNUM)] 1882 UNSPEC_LDFF1_GATHER))] 1883 UNSPEC_PRED_X))] 1884 "TARGET_SVE" 1885 "@ 1886 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw] 1887 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]" 1888 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))" 1889 { 1890 operands[6] = CONSTM1_RTX (VNx2BImode); 1891 operands[7] = CONSTM1_RTX (VNx2BImode); 1892 } 1893) 1894 1895;; Likewise, but with the offset being zero-extended from 32 bits. 1896(define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw" 1897 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w") 1898 (unspec:VNx2_WIDE 1899 [(match_operand 7) 1900 (ANY_EXTEND:VNx2_WIDE 1901 (unspec:VNx2_NARROW 1902 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 1903 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk") 1904 (and:VNx2DI 1905 (match_operand:VNx2DI 2 "register_operand" "w, w") 1906 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 1907 (match_operand:DI 3 "const_int_operand") 1908 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 1909 (mem:BLK (scratch)) 1910 (reg:VNx16BI FFRT_REGNUM)] 1911 UNSPEC_LDFF1_GATHER))] 1912 UNSPEC_PRED_X))] 1913 "TARGET_SVE" 1914 "@ 1915 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw] 1916 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]" 1917 "&& !CONSTANT_P (operands[7])" 1918 { 1919 operands[7] = CONSTM1_RTX (VNx2BImode); 1920 } 1921) 1922 1923;; ========================================================================= 1924;; == Prefetches 1925;; ========================================================================= 1926 1927;; ------------------------------------------------------------------------- 1928;; ---- Contiguous prefetches 1929;; ------------------------------------------------------------------------- 1930;; Includes contiguous forms of: 1931;; - PRFB 1932;; - PRFD 1933;; - PRFH 1934;; - PRFW 1935;; ------------------------------------------------------------------------- 1936 1937;; Contiguous predicated prefetches. Operand 2 gives the real prefetch 1938;; operation (as an svprfop), with operands 3 and 4 providing distilled 1939;; information. 1940(define_insn "@aarch64_sve_prefetch<mode>" 1941 [(prefetch (unspec:DI 1942 [(match_operand:<VPRED> 0 "register_operand" "Upl") 1943 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>") 1944 (match_operand:DI 2 "const_int_operand")] 1945 UNSPEC_SVE_PREFETCH) 1946 (match_operand:DI 3 "const_int_operand") 1947 (match_operand:DI 4 "const_int_operand"))] 1948 "TARGET_SVE" 1949 { 1950 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]); 1951 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1"); 1952 } 1953) 1954 1955;; ------------------------------------------------------------------------- 1956;; ---- Gather prefetches 1957;; ------------------------------------------------------------------------- 1958;; Includes gather forms of: 1959;; - PRFB 1960;; - PRFD 1961;; - PRFH 1962;; - PRFW 1963;; ------------------------------------------------------------------------- 1964 1965;; Predicated gather prefetches for 32-bit bases and offsets. The operands 1966;; are: 1967;; 0: the governing predicate 1968;; 1: the scalar component of the address 1969;; 2: the vector component of the address 1970;; 3: 1 for zero extension, 0 for sign extension 1971;; 4: the scale multiplier 1972;; 5: a vector zero that identifies the mode of data being accessed 1973;; 6: the prefetch operator (an svprfop) 1974;; 7: the normal RTL prefetch rw flag 1975;; 8: the normal RTL prefetch locality value 1976(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>" 1977 [(prefetch (unspec:DI 1978 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 1979 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk") 1980 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w") 1981 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1") 1982 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 1983 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 1984 (match_operand:DI 6 "const_int_operand")] 1985 UNSPEC_SVE_PREFETCH_GATHER) 1986 (match_operand:DI 7 "const_int_operand") 1987 (match_operand:DI 8 "const_int_operand"))] 1988 "TARGET_SVE" 1989 { 1990 static const char *const insns[][2] = { 1991 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]", 1992 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]", 1993 "prfb", "%0, [%1, %2.s, sxtw]", 1994 "prfb", "%0, [%1, %2.s, uxtw]", 1995 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]", 1996 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]" 1997 }; 1998 const char *const *parts = insns[which_alternative]; 1999 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2000 } 2001) 2002 2003;; Predicated gather prefetches for 64-bit elements. The value of operand 3 2004;; doesn't matter in this case. 2005(define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>" 2006 [(prefetch (unspec:DI 2007 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl") 2008 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk") 2009 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w") 2010 (match_operand:DI 3 "const_int_operand") 2011 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i") 2012 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 2013 (match_operand:DI 6 "const_int_operand")] 2014 UNSPEC_SVE_PREFETCH_GATHER) 2015 (match_operand:DI 7 "const_int_operand") 2016 (match_operand:DI 8 "const_int_operand"))] 2017 "TARGET_SVE" 2018 { 2019 static const char *const insns[][2] = { 2020 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]", 2021 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]", 2022 "prfb", "%0, [%1, %2.d]", 2023 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]" 2024 }; 2025 const char *const *parts = insns[which_alternative]; 2026 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2027 } 2028) 2029 2030;; Likewise, but with the offset being sign-extended from 32 bits. 2031(define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw" 2032 [(prefetch (unspec:DI 2033 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") 2034 (match_operand:DI 1 "register_operand" "rk, rk") 2035 (unspec:VNx2DI_ONLY 2036 [(match_operand 9) 2037 (sign_extend:VNx2DI 2038 (truncate:VNx2SI 2039 (match_operand:VNx2DI 2 "register_operand" "w, w")))] 2040 UNSPEC_PRED_X) 2041 (match_operand:DI 3 "const_int_operand") 2042 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i") 2043 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 2044 (match_operand:DI 6 "const_int_operand")] 2045 UNSPEC_SVE_PREFETCH_GATHER) 2046 (match_operand:DI 7 "const_int_operand") 2047 (match_operand:DI 8 "const_int_operand"))] 2048 "TARGET_SVE" 2049 { 2050 static const char *const insns[][2] = { 2051 "prfb", "%0, [%1, %2.d, sxtw]", 2052 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]" 2053 }; 2054 const char *const *parts = insns[which_alternative]; 2055 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2056 } 2057 "&& !rtx_equal_p (operands[0], operands[9])" 2058 { 2059 operands[9] = copy_rtx (operands[0]); 2060 } 2061) 2062 2063;; Likewise, but with the offset being zero-extended from 32 bits. 2064(define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw" 2065 [(prefetch (unspec:DI 2066 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl") 2067 (match_operand:DI 1 "register_operand" "rk, rk") 2068 (and:VNx2DI_ONLY 2069 (match_operand:VNx2DI 2 "register_operand" "w, w") 2070 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate")) 2071 (match_operand:DI 3 "const_int_operand") 2072 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i") 2073 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero") 2074 (match_operand:DI 6 "const_int_operand")] 2075 UNSPEC_SVE_PREFETCH_GATHER) 2076 (match_operand:DI 7 "const_int_operand") 2077 (match_operand:DI 8 "const_int_operand"))] 2078 "TARGET_SVE" 2079 { 2080 static const char *const insns[][2] = { 2081 "prfb", "%0, [%1, %2.d, uxtw]", 2082 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]" 2083 }; 2084 const char *const *parts = insns[which_alternative]; 2085 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]); 2086 } 2087) 2088 2089;; ========================================================================= 2090;; == Stores 2091;; ========================================================================= 2092 2093;; ------------------------------------------------------------------------- 2094;; ---- Normal contiguous stores 2095;; ------------------------------------------------------------------------- 2096;; Includes contiguous forms of: 2097;; - ST1B 2098;; - ST1D 2099;; - ST1H 2100;; - ST1W 2101;; - ST2B 2102;; - ST2D 2103;; - ST2H 2104;; - ST2W 2105;; - ST3B 2106;; - ST3D 2107;; - ST3H 2108;; - ST3W 2109;; - ST4B 2110;; - ST4D 2111;; - ST4H 2112;; - ST4W 2113;; ------------------------------------------------------------------------- 2114 2115;; Predicated ST1. 2116(define_insn "maskstore<mode><vpred>" 2117 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m") 2118 (unspec:SVE_ALL 2119 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2120 (match_operand:SVE_ALL 1 "register_operand" "w") 2121 (match_dup 0)] 2122 UNSPEC_ST1_SVE))] 2123 "TARGET_SVE" 2124 "st1<Vesize>\t%1.<Vctype>, %2, %0" 2125) 2126 2127;; Unpredicated ST[234]. This is always a full update, so the dependence 2128;; on the old value of the memory location (via (match_dup 0)) is redundant. 2129;; There doesn't seem to be any obvious benefit to treating the all-true 2130;; case differently though. In particular, it's very unlikely that we'll 2131;; only find out during RTL that a store_lanes is dead. 2132(define_expand "vec_store_lanes<mode><vsingle>" 2133 [(set (match_operand:SVE_STRUCT 0 "memory_operand") 2134 (unspec:SVE_STRUCT 2135 [(match_dup 2) 2136 (match_operand:SVE_STRUCT 1 "register_operand") 2137 (match_dup 0)] 2138 UNSPEC_STN))] 2139 "TARGET_SVE" 2140 { 2141 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 2142 } 2143) 2144 2145;; Predicated ST[234]. 2146(define_insn "vec_mask_store_lanes<mode><vsingle>" 2147 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m") 2148 (unspec:SVE_STRUCT 2149 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2150 (match_operand:SVE_STRUCT 1 "register_operand" "w") 2151 (match_dup 0)] 2152 UNSPEC_STN))] 2153 "TARGET_SVE" 2154 "st<vector_count><Vesize>\t%1, %2, %0" 2155) 2156 2157;; ------------------------------------------------------------------------- 2158;; ---- Truncating contiguous stores 2159;; ------------------------------------------------------------------------- 2160;; Includes: 2161;; - ST1B 2162;; - ST1H 2163;; - ST1W 2164;; ------------------------------------------------------------------------- 2165 2166;; Predicated truncate and store, with 8 elements per 128-bit block. 2167(define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>" 2168 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m") 2169 (unspec:VNx8_NARROW 2170 [(match_operand:VNx8BI 2 "register_operand" "Upl") 2171 (truncate:VNx8_NARROW 2172 (match_operand:VNx8_WIDE 1 "register_operand" "w")) 2173 (match_dup 0)] 2174 UNSPEC_ST1_SVE))] 2175 "TARGET_SVE" 2176 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0" 2177) 2178 2179;; Predicated truncate and store, with 4 elements per 128-bit block. 2180(define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>" 2181 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m") 2182 (unspec:VNx4_NARROW 2183 [(match_operand:VNx4BI 2 "register_operand" "Upl") 2184 (truncate:VNx4_NARROW 2185 (match_operand:VNx4_WIDE 1 "register_operand" "w")) 2186 (match_dup 0)] 2187 UNSPEC_ST1_SVE))] 2188 "TARGET_SVE" 2189 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0" 2190) 2191 2192;; Predicated truncate and store, with 2 elements per 128-bit block. 2193(define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>" 2194 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m") 2195 (unspec:VNx2_NARROW 2196 [(match_operand:VNx2BI 2 "register_operand" "Upl") 2197 (truncate:VNx2_NARROW 2198 (match_operand:VNx2_WIDE 1 "register_operand" "w")) 2199 (match_dup 0)] 2200 UNSPEC_ST1_SVE))] 2201 "TARGET_SVE" 2202 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0" 2203) 2204 2205;; ------------------------------------------------------------------------- 2206;; ---- Non-temporal contiguous stores 2207;; ------------------------------------------------------------------------- 2208;; Includes: 2209;; - STNT1B 2210;; - STNT1D 2211;; - STNT1H 2212;; - STNT1W 2213;; ------------------------------------------------------------------------- 2214 2215(define_insn "@aarch64_stnt1<mode>" 2216 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m") 2217 (unspec:SVE_FULL 2218 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2219 (match_operand:SVE_FULL 1 "register_operand" "w") 2220 (match_dup 0)] 2221 UNSPEC_STNT1_SVE))] 2222 "TARGET_SVE" 2223 "stnt1<Vesize>\t%1.<Vetype>, %2, %0" 2224) 2225 2226;; ------------------------------------------------------------------------- 2227;; ---- Normal scatter stores 2228;; ------------------------------------------------------------------------- 2229;; Includes scatter forms of: 2230;; - ST1D 2231;; - ST1W 2232;; ------------------------------------------------------------------------- 2233 2234;; Unpredicated scatter stores. 2235(define_expand "scatter_store<mode><v_int_container>" 2236 [(set (mem:BLK (scratch)) 2237 (unspec:BLK 2238 [(match_dup 5) 2239 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>") 2240 (match_operand:<V_INT_CONTAINER> 1 "register_operand") 2241 (match_operand:DI 2 "const_int_operand") 2242 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>") 2243 (match_operand:SVE_24 4 "register_operand")] 2244 UNSPEC_ST1_SCATTER))] 2245 "TARGET_SVE" 2246 { 2247 operands[5] = aarch64_ptrue_reg (<VPRED>mode); 2248 } 2249) 2250 2251;; Predicated scatter stores for 32-bit elements. Operand 2 is true for 2252;; unsigned extension and false for signed extension. 2253(define_insn "mask_scatter_store<mode><v_int_container>" 2254 [(set (mem:BLK (scratch)) 2255 (unspec:BLK 2256 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 2257 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") 2258 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") 2259 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 2260 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 2261 (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")] 2262 UNSPEC_ST1_SCATTER))] 2263 "TARGET_SVE" 2264 "@ 2265 st1<Vesize>\t%4.s, %5, [%1.s] 2266 st1<Vesize>\t%4.s, %5, [%1.s, #%0] 2267 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw] 2268 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw] 2269 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] 2270 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]" 2271) 2272 2273;; Predicated scatter stores for 64-bit elements. The value of operand 2 2274;; doesn't matter in this case. 2275(define_insn "mask_scatter_store<mode><v_int_container>" 2276 [(set (mem:BLK (scratch)) 2277 (unspec:BLK 2278 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 2279 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") 2280 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") 2281 (match_operand:DI 2 "const_int_operand") 2282 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") 2283 (match_operand:SVE_2 4 "register_operand" "w, w, w, w")] 2284 UNSPEC_ST1_SCATTER))] 2285 "TARGET_SVE" 2286 "@ 2287 st1<Vesize>\t%4.d, %5, [%1.d] 2288 st1<Vesize>\t%4.d, %5, [%1.d, #%0] 2289 st1<Vesize>\t%4.d, %5, [%0, %1.d] 2290 st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]" 2291) 2292 2293;; Likewise, but with the offset being extended from 32 bits. 2294(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked" 2295 [(set (mem:BLK (scratch)) 2296 (unspec:BLK 2297 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2298 (match_operand:DI 0 "register_operand" "rk, rk") 2299 (unspec:VNx2DI 2300 [(match_operand 6) 2301 (ANY_EXTEND:VNx2DI 2302 (match_operand:VNx2SI 1 "register_operand" "w, w"))] 2303 UNSPEC_PRED_X) 2304 (match_operand:DI 2 "const_int_operand") 2305 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 2306 (match_operand:SVE_2 4 "register_operand" "w, w")] 2307 UNSPEC_ST1_SCATTER))] 2308 "TARGET_SVE" 2309 "@ 2310 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw] 2311 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]" 2312 "&& !CONSTANT_P (operands[6])" 2313 { 2314 operands[6] = CONSTM1_RTX (<VPRED>mode); 2315 } 2316) 2317 2318;; Likewise, but with the offset being truncated to 32 bits and then 2319;; sign-extended. 2320(define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw" 2321 [(set (mem:BLK (scratch)) 2322 (unspec:BLK 2323 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2324 (match_operand:DI 0 "register_operand" "rk, rk") 2325 (unspec:VNx2DI 2326 [(match_operand 6) 2327 (sign_extend:VNx2DI 2328 (truncate:VNx2SI 2329 (match_operand:VNx2DI 1 "register_operand" "w, w")))] 2330 UNSPEC_PRED_X) 2331 (match_operand:DI 2 "const_int_operand") 2332 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 2333 (match_operand:SVE_2 4 "register_operand" "w, w")] 2334 UNSPEC_ST1_SCATTER))] 2335 "TARGET_SVE" 2336 "@ 2337 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw] 2338 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]" 2339 "&& !CONSTANT_P (operands[6])" 2340 { 2341 operands[6] = CONSTM1_RTX (<VPRED>mode); 2342 } 2343) 2344 2345;; Likewise, but with the offset being truncated to 32 bits and then 2346;; zero-extended. 2347(define_insn "*mask_scatter_store<mode><v_int_container>_uxtw" 2348 [(set (mem:BLK (scratch)) 2349 (unspec:BLK 2350 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2351 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") 2352 (and:VNx2DI 2353 (match_operand:VNx2DI 1 "register_operand" "w, w") 2354 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 2355 (match_operand:DI 2 "const_int_operand") 2356 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i") 2357 (match_operand:SVE_2 4 "register_operand" "w, w")] 2358 UNSPEC_ST1_SCATTER))] 2359 "TARGET_SVE" 2360 "@ 2361 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw] 2362 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]" 2363) 2364 2365;; ------------------------------------------------------------------------- 2366;; ---- Truncating scatter stores 2367;; ------------------------------------------------------------------------- 2368;; Includes scatter forms of: 2369;; - ST1B 2370;; - ST1H 2371;; - ST1W 2372;; ------------------------------------------------------------------------- 2373 2374;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is 2375;; true for unsigned extension and false for signed extension. 2376(define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>" 2377 [(set (mem:BLK (scratch)) 2378 (unspec:BLK 2379 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 2380 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk") 2381 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") 2382 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") 2383 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") 2384 (truncate:VNx4_NARROW 2385 (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))] 2386 UNSPEC_ST1_SCATTER))] 2387 "TARGET_SVE" 2388 "@ 2389 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s] 2390 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0] 2391 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw] 2392 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw] 2393 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3] 2394 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]" 2395) 2396 2397;; Predicated truncating scatter stores for 64-bit elements. The value of 2398;; operand 2 doesn't matter in this case. 2399(define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>" 2400 [(set (mem:BLK (scratch)) 2401 (unspec:BLK 2402 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") 2403 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk") 2404 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") 2405 (match_operand:DI 2 "const_int_operand") 2406 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i") 2407 (truncate:VNx2_NARROW 2408 (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))] 2409 UNSPEC_ST1_SCATTER))] 2410 "TARGET_SVE" 2411 "@ 2412 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d] 2413 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0] 2414 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d] 2415 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]" 2416) 2417 2418;; Likewise, but with the offset being sign-extended from 32 bits. 2419(define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw" 2420 [(set (mem:BLK (scratch)) 2421 (unspec:BLK 2422 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2423 (match_operand:DI 0 "register_operand" "rk, rk") 2424 (unspec:VNx2DI 2425 [(match_operand 6) 2426 (sign_extend:VNx2DI 2427 (truncate:VNx2SI 2428 (match_operand:VNx2DI 1 "register_operand" "w, w")))] 2429 UNSPEC_PRED_X) 2430 (match_operand:DI 2 "const_int_operand") 2431 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 2432 (truncate:VNx2_NARROW 2433 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] 2434 UNSPEC_ST1_SCATTER))] 2435 "TARGET_SVE" 2436 "@ 2437 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw] 2438 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]" 2439 "&& !rtx_equal_p (operands[5], operands[6])" 2440 { 2441 operands[6] = copy_rtx (operands[5]); 2442 } 2443) 2444 2445;; Likewise, but with the offset being zero-extended from 32 bits. 2446(define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw" 2447 [(set (mem:BLK (scratch)) 2448 (unspec:BLK 2449 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl") 2450 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk") 2451 (and:VNx2DI 2452 (match_operand:VNx2DI 1 "register_operand" "w, w") 2453 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate")) 2454 (match_operand:DI 2 "const_int_operand") 2455 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i") 2456 (truncate:VNx2_NARROW 2457 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))] 2458 UNSPEC_ST1_SCATTER))] 2459 "TARGET_SVE" 2460 "@ 2461 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw] 2462 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]" 2463) 2464 2465;; ========================================================================= 2466;; == Vector creation 2467;; ========================================================================= 2468 2469;; ------------------------------------------------------------------------- 2470;; ---- [INT,FP] Duplicate element 2471;; ------------------------------------------------------------------------- 2472;; Includes: 2473;; - DUP 2474;; - MOV 2475;; - LD1RB 2476;; - LD1RD 2477;; - LD1RH 2478;; - LD1RW 2479;; - LD1ROB (F64MM) 2480;; - LD1ROD (F64MM) 2481;; - LD1ROH (F64MM) 2482;; - LD1ROW (F64MM) 2483;; - LD1RQB 2484;; - LD1RQD 2485;; - LD1RQH 2486;; - LD1RQW 2487;; ------------------------------------------------------------------------- 2488 2489(define_expand "vec_duplicate<mode>" 2490 [(parallel 2491 [(set (match_operand:SVE_ALL 0 "register_operand") 2492 (vec_duplicate:SVE_ALL 2493 (match_operand:<VEL> 1 "aarch64_sve_dup_operand"))) 2494 (clobber (scratch:VNx16BI))])] 2495 "TARGET_SVE" 2496 { 2497 if (MEM_P (operands[1])) 2498 { 2499 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode); 2500 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1], 2501 CONST0_RTX (<MODE>mode))); 2502 DONE; 2503 } 2504 } 2505) 2506 2507;; Accept memory operands for the benefit of combine, and also in case 2508;; the scalar input gets spilled to memory during RA. We want to split 2509;; the load at the first opportunity in order to allow the PTRUE to be 2510;; optimized with surrounding code. 2511(define_insn_and_split "*vec_duplicate<mode>_reg" 2512 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w") 2513 (vec_duplicate:SVE_ALL 2514 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty"))) 2515 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))] 2516 "TARGET_SVE" 2517 "@ 2518 mov\t%0.<Vetype>, %<vwcore>1 2519 mov\t%0.<Vetype>, %<Vetype>1 2520 #" 2521 "&& MEM_P (operands[1])" 2522 [(const_int 0)] 2523 { 2524 if (GET_CODE (operands[2]) == SCRATCH) 2525 operands[2] = gen_reg_rtx (VNx16BImode); 2526 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode)); 2527 rtx gp = gen_lowpart (<VPRED>mode, operands[2]); 2528 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1], 2529 CONST0_RTX (<MODE>mode))); 2530 DONE; 2531 } 2532 [(set_attr "length" "4,4,8")] 2533) 2534 2535;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version). 2536(define_insn "@aarch64_vec_duplicate_vq<mode>_le" 2537 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2538 (vec_duplicate:SVE_FULL 2539 (match_operand:<V128> 1 "register_operand" "w")))] 2540 "TARGET_SVE && !BYTES_BIG_ENDIAN" 2541 { 2542 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); 2543 return "dup\t%0.q, %1.q[0]"; 2544 } 2545) 2546 2547;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version). 2548;; The SVE register layout puts memory lane N into (architectural) 2549;; register lane N, whereas the Advanced SIMD layout puts the memory 2550;; lsb into the register lsb. We therefore have to describe this in rtl 2551;; terms as a reverse of the V128 vector followed by a duplicate. 2552(define_insn "@aarch64_vec_duplicate_vq<mode>_be" 2553 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2554 (vec_duplicate:SVE_FULL 2555 (vec_select:<V128> 2556 (match_operand:<V128> 1 "register_operand" "w") 2557 (match_operand 2 "descending_int_parallel"))))] 2558 "TARGET_SVE 2559 && BYTES_BIG_ENDIAN 2560 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)), 2561 GET_MODE_NUNITS (<V128>mode) - 1)" 2562 { 2563 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1])); 2564 return "dup\t%0.q, %1.q[0]"; 2565 } 2566) 2567 2568;; This is used for vec_duplicate<mode>s from memory, but can also 2569;; be used by combine to optimize selects of a vec_duplicate<mode> 2570;; with zero. 2571(define_insn "sve_ld1r<mode>" 2572 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 2573 (unspec:SVE_ALL 2574 [(match_operand:<VPRED> 1 "register_operand" "Upl") 2575 (vec_duplicate:SVE_ALL 2576 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty")) 2577 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")] 2578 UNSPEC_SEL))] 2579 "TARGET_SVE" 2580 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2" 2581) 2582 2583;; Load 128 bits from memory under predicate control and duplicate to 2584;; fill a vector. 2585(define_insn "@aarch64_sve_ld1rq<mode>" 2586 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2587 (unspec:SVE_FULL 2588 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2589 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")] 2590 UNSPEC_LD1RQ))] 2591 "TARGET_SVE" 2592 { 2593 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); 2594 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1"; 2595 } 2596) 2597 2598(define_insn "@aarch64_sve_ld1ro<mode>" 2599 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 2600 (unspec:SVE_FULL 2601 [(match_operand:<VPRED> 2 "register_operand" "Upl") 2602 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>" 2603 "UO<Vesize>")] 2604 UNSPEC_LD1RO))] 2605 "TARGET_SVE_F64MM" 2606 { 2607 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0)); 2608 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1"; 2609 } 2610) 2611 2612;; ------------------------------------------------------------------------- 2613;; ---- [INT,FP] Initialize from individual elements 2614;; ------------------------------------------------------------------------- 2615;; Includes: 2616;; - INSR 2617;; ------------------------------------------------------------------------- 2618 2619(define_expand "vec_init<mode><Vel>" 2620 [(match_operand:SVE_FULL 0 "register_operand") 2621 (match_operand 1 "")] 2622 "TARGET_SVE" 2623 { 2624 aarch64_sve_expand_vector_init (operands[0], operands[1]); 2625 DONE; 2626 } 2627) 2628 2629;; Shift an SVE vector left and insert a scalar into element 0. 2630(define_insn "vec_shl_insert_<mode>" 2631 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w") 2632 (unspec:SVE_FULL 2633 [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w") 2634 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")] 2635 UNSPEC_INSR))] 2636 "TARGET_SVE" 2637 "@ 2638 insr\t%0.<Vetype>, %<vwcore>2 2639 insr\t%0.<Vetype>, %<Vetype>2 2640 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2 2641 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2" 2642 [(set_attr "movprfx" "*,*,yes,yes")] 2643) 2644 2645;; ------------------------------------------------------------------------- 2646;; ---- [INT] Linear series 2647;; ------------------------------------------------------------------------- 2648;; Includes: 2649;; - INDEX 2650;; ------------------------------------------------------------------------- 2651 2652(define_insn "vec_series<mode>" 2653 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w") 2654 (vec_series:SVE_I 2655 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r") 2656 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))] 2657 "TARGET_SVE" 2658 "@ 2659 index\t%0.<Vctype>, #%1, %<vccore>2 2660 index\t%0.<Vctype>, %<vccore>1, #%2 2661 index\t%0.<Vctype>, %<vccore>1, %<vccore>2" 2662) 2663 2664;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range 2665;; of an INDEX instruction. 2666(define_insn "*vec_series<mode>_plus" 2667 [(set (match_operand:SVE_I 0 "register_operand" "=w") 2668 (plus:SVE_I 2669 (vec_duplicate:SVE_I 2670 (match_operand:<VEL> 1 "register_operand" "r")) 2671 (match_operand:SVE_I 2 "immediate_operand")))] 2672 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])" 2673 { 2674 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]); 2675 return "index\t%0.<Vctype>, %<vccore>1, #%2"; 2676 } 2677) 2678 2679;; ------------------------------------------------------------------------- 2680;; ---- [PRED] Duplicate element 2681;; ------------------------------------------------------------------------- 2682;; The patterns in this section are synthetic. 2683;; ------------------------------------------------------------------------- 2684 2685;; Implement a predicate broadcast by shifting the low bit of the scalar 2686;; input into the top bit and using a WHILELO. An alternative would be to 2687;; duplicate the input and do a compare with zero. 2688(define_expand "vec_duplicate<mode>" 2689 [(set (match_operand:PRED_ALL 0 "register_operand") 2690 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))] 2691 "TARGET_SVE" 2692 { 2693 rtx tmp = gen_reg_rtx (DImode); 2694 rtx op1 = gen_lowpart (DImode, operands[1]); 2695 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode))); 2696 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp)); 2697 DONE; 2698 } 2699) 2700 2701;; ========================================================================= 2702;; == Vector decomposition 2703;; ========================================================================= 2704 2705;; ------------------------------------------------------------------------- 2706;; ---- [INT,FP] Extract index 2707;; ------------------------------------------------------------------------- 2708;; Includes: 2709;; - DUP (Advanced SIMD) 2710;; - DUP (SVE) 2711;; - EXT (SVE) 2712;; - ST1 (Advanced SIMD) 2713;; - UMOV (Advanced SIMD) 2714;; ------------------------------------------------------------------------- 2715 2716(define_expand "vec_extract<mode><Vel>" 2717 [(set (match_operand:<VEL> 0 "register_operand") 2718 (vec_select:<VEL> 2719 (match_operand:SVE_FULL 1 "register_operand") 2720 (parallel [(match_operand:SI 2 "nonmemory_operand")])))] 2721 "TARGET_SVE" 2722 { 2723 poly_int64 val; 2724 if (poly_int_rtx_p (operands[2], &val) 2725 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)) 2726 { 2727 /* The last element can be extracted with a LASTB and a false 2728 predicate. */ 2729 rtx sel = aarch64_pfalse_reg (<VPRED>mode); 2730 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); 2731 DONE; 2732 } 2733 if (!CONST_INT_P (operands[2])) 2734 { 2735 /* Create an index with operand[2] as the base and -1 as the step. 2736 It will then be zero for the element we care about. */ 2737 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]); 2738 index = force_reg (<VEL_INT>mode, index); 2739 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode); 2740 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx)); 2741 2742 /* Get a predicate that is true for only that element. */ 2743 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode); 2744 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero); 2745 rtx sel = gen_reg_rtx (<VPRED>mode); 2746 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero)); 2747 2748 /* Select the element using LASTB. */ 2749 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1])); 2750 DONE; 2751 } 2752 } 2753) 2754 2755;; Extract element zero. This is a special case because we want to force 2756;; the registers to be the same for the second alternative, and then 2757;; split the instruction into nothing after RA. 2758(define_insn_and_split "*vec_extract<mode><Vel>_0" 2759 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 2760 (vec_select:<VEL> 2761 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w") 2762 (parallel [(const_int 0)])))] 2763 "TARGET_SVE" 2764 { 2765 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); 2766 switch (which_alternative) 2767 { 2768 case 0: 2769 return "umov\\t%<vwcore>0, %1.<Vetype>[0]"; 2770 case 1: 2771 return "#"; 2772 case 2: 2773 return "st1\\t{%1.<Vetype>}[0], %0"; 2774 default: 2775 gcc_unreachable (); 2776 } 2777 } 2778 "&& reload_completed 2779 && REG_P (operands[0]) 2780 && REGNO (operands[0]) == REGNO (operands[1])" 2781 [(const_int 0)] 2782 { 2783 emit_note (NOTE_INSN_DELETED); 2784 DONE; 2785 } 2786 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")] 2787) 2788 2789;; Extract an element from the Advanced SIMD portion of the register. 2790;; We don't just reuse the aarch64-simd.md pattern because we don't 2791;; want any change in lane number on big-endian targets. 2792(define_insn "*vec_extract<mode><Vel>_v128" 2793 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 2794 (vec_select:<VEL> 2795 (match_operand:SVE_FULL 1 "register_operand" "w, w, w") 2796 (parallel [(match_operand:SI 2 "const_int_operand")])))] 2797 "TARGET_SVE 2798 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)" 2799 { 2800 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1])); 2801 switch (which_alternative) 2802 { 2803 case 0: 2804 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 2805 case 1: 2806 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]"; 2807 case 2: 2808 return "st1\\t{%1.<Vetype>}[%2], %0"; 2809 default: 2810 gcc_unreachable (); 2811 } 2812 } 2813 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")] 2814) 2815 2816;; Extract an element in the range of DUP. This pattern allows the 2817;; source and destination to be different. 2818(define_insn "*vec_extract<mode><Vel>_dup" 2819 [(set (match_operand:<VEL> 0 "register_operand" "=w") 2820 (vec_select:<VEL> 2821 (match_operand:SVE_FULL 1 "register_operand" "w") 2822 (parallel [(match_operand:SI 2 "const_int_operand")])))] 2823 "TARGET_SVE 2824 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)" 2825 { 2826 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); 2827 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]"; 2828 } 2829) 2830 2831;; Extract an element outside the range of DUP. This pattern requires the 2832;; source and destination to be the same. 2833(define_insn "*vec_extract<mode><Vel>_ext" 2834 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w") 2835 (vec_select:<VEL> 2836 (match_operand:SVE_FULL 1 "register_operand" "0, w") 2837 (parallel [(match_operand:SI 2 "const_int_operand")])))] 2838 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64" 2839 { 2840 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0])); 2841 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode)); 2842 return (which_alternative == 0 2843 ? "ext\t%0.b, %0.b, %0.b, #%2" 2844 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2"); 2845 } 2846 [(set_attr "movprfx" "*,yes")] 2847) 2848 2849;; ------------------------------------------------------------------------- 2850;; ---- [INT,FP] Extract active element 2851;; ------------------------------------------------------------------------- 2852;; Includes: 2853;; - LASTA 2854;; - LASTB 2855;; ------------------------------------------------------------------------- 2856 2857;; Extract the last active element of operand 1 into operand 0. 2858;; If no elements are active, extract the last inactive element instead. 2859(define_insn "@extract_<last_op>_<mode>" 2860 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") 2861 (unspec:<VEL> 2862 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2863 (match_operand:SVE_FULL 2 "register_operand" "w, w")] 2864 LAST))] 2865 "TARGET_SVE" 2866 "@ 2867 last<ab>\t%<vwcore>0, %1, %2.<Vetype> 2868 last<ab>\t%<Vetype>0, %1, %2.<Vetype>" 2869) 2870 2871;; ------------------------------------------------------------------------- 2872;; ---- [PRED] Extract index 2873;; ------------------------------------------------------------------------- 2874;; The patterns in this section are synthetic. 2875;; ------------------------------------------------------------------------- 2876 2877;; Handle extractions from a predicate by converting to an integer vector 2878;; and extracting from there. 2879(define_expand "vec_extract<vpred><Vel>" 2880 [(match_operand:<VEL> 0 "register_operand") 2881 (match_operand:<VPRED> 1 "register_operand") 2882 (match_operand:SI 2 "nonmemory_operand") 2883 ;; Dummy operand to which we can attach the iterator. 2884 (reg:SVE_FULL_I V0_REGNUM)] 2885 "TARGET_SVE" 2886 { 2887 rtx tmp = gen_reg_rtx (<MODE>mode); 2888 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1], 2889 CONST1_RTX (<MODE>mode), 2890 CONST0_RTX (<MODE>mode))); 2891 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2])); 2892 DONE; 2893 } 2894) 2895 2896;; ========================================================================= 2897;; == Unary arithmetic 2898;; ========================================================================= 2899 2900;; ------------------------------------------------------------------------- 2901;; ---- [INT] General unary arithmetic corresponding to rtx codes 2902;; ------------------------------------------------------------------------- 2903;; Includes: 2904;; - ABS 2905;; - CLS (= clrsb) 2906;; - CLZ 2907;; - CNT (= popcount) 2908;; - NEG 2909;; - NOT 2910;; ------------------------------------------------------------------------- 2911 2912;; Unpredicated integer unary arithmetic. 2913(define_expand "<optab><mode>2" 2914 [(set (match_operand:SVE_I 0 "register_operand") 2915 (unspec:SVE_I 2916 [(match_dup 2) 2917 (SVE_INT_UNARY:SVE_I 2918 (match_operand:SVE_I 1 "register_operand"))] 2919 UNSPEC_PRED_X))] 2920 "TARGET_SVE" 2921 { 2922 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 2923 } 2924) 2925 2926;; Integer unary arithmetic predicated with a PTRUE. 2927(define_insn "@aarch64_pred_<optab><mode>" 2928 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 2929 (unspec:SVE_I 2930 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2931 (SVE_INT_UNARY:SVE_I 2932 (match_operand:SVE_I 2 "register_operand" "0, w"))] 2933 UNSPEC_PRED_X))] 2934 "TARGET_SVE" 2935 "@ 2936 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2937 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2938 [(set_attr "movprfx" "*,yes")] 2939) 2940 2941;; Predicated integer unary arithmetic with merging. 2942(define_expand "@cond_<optab><mode>" 2943 [(set (match_operand:SVE_I 0 "register_operand") 2944 (unspec:SVE_I 2945 [(match_operand:<VPRED> 1 "register_operand") 2946 (SVE_INT_UNARY:SVE_I 2947 (match_operand:SVE_I 2 "register_operand")) 2948 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")] 2949 UNSPEC_SEL))] 2950 "TARGET_SVE" 2951) 2952 2953;; Predicated integer unary arithmetic, merging with the first input. 2954(define_insn "*cond_<optab><mode>_2" 2955 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 2956 (unspec:SVE_I 2957 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 2958 (SVE_INT_UNARY:SVE_I 2959 (match_operand:SVE_I 2 "register_operand" "0, w")) 2960 (match_dup 2)] 2961 UNSPEC_SEL))] 2962 "TARGET_SVE" 2963 "@ 2964 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype> 2965 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2966 [(set_attr "movprfx" "*,yes")] 2967) 2968 2969;; Predicated integer unary arithmetic, merging with an independent value. 2970;; 2971;; The earlyclobber isn't needed for the first alternative, but omitting 2972;; it would only help the case in which operands 2 and 3 are the same, 2973;; which is handled above rather than here. Marking all the alternatives 2974;; as earlyclobber helps to make the instruction more regular to the 2975;; register allocator. 2976(define_insn "*cond_<optab><mode>_any" 2977 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") 2978 (unspec:SVE_I 2979 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 2980 (SVE_INT_UNARY:SVE_I 2981 (match_operand:SVE_I 2 "register_operand" "w, w, w")) 2982 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 2983 UNSPEC_SEL))] 2984 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" 2985 "@ 2986 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2987 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 2988 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 2989 [(set_attr "movprfx" "*,yes,yes")] 2990) 2991 2992;; ------------------------------------------------------------------------- 2993;; ---- [INT] General unary arithmetic corresponding to unspecs 2994;; ------------------------------------------------------------------------- 2995;; Includes 2996;; - RBIT 2997;; - REVB 2998;; - REVH 2999;; - REVW 3000;; ------------------------------------------------------------------------- 3001 3002;; Predicated integer unary operations. 3003(define_insn "@aarch64_pred_<optab><mode>" 3004 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 3005 (unspec:SVE_FULL_I 3006 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3007 (unspec:SVE_FULL_I 3008 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")] 3009 SVE_INT_UNARY)] 3010 UNSPEC_PRED_X))] 3011 "TARGET_SVE && <elem_bits> >= <min_elem_bits>" 3012 "@ 3013 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3014 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3015 [(set_attr "movprfx" "*,yes")] 3016) 3017 3018;; Another way of expressing the REVB, REVH and REVW patterns, with this 3019;; form being easier for permutes. The predicate mode determines the number 3020;; of lanes and the data mode decides the granularity of the reversal within 3021;; each lane. 3022(define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>" 3023 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w") 3024 (unspec:SVE_ALL 3025 [(match_operand:PRED_HSD 1 "register_operand" "Upl, Upl") 3026 (unspec:SVE_ALL 3027 [(match_operand:SVE_ALL 2 "register_operand" "0, w")] 3028 UNSPEC_REVBHW)] 3029 UNSPEC_PRED_X))] 3030 "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>" 3031 "@ 3032 rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype> 3033 movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>" 3034 [(set_attr "movprfx" "*,yes")] 3035) 3036 3037;; Predicated integer unary operations with merging. 3038(define_insn "@cond_<optab><mode>" 3039 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w") 3040 (unspec:SVE_FULL_I 3041 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3042 (unspec:SVE_FULL_I 3043 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")] 3044 SVE_INT_UNARY) 3045 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3046 UNSPEC_SEL))] 3047 "TARGET_SVE && <elem_bits> >= <min_elem_bits>" 3048 "@ 3049 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3050 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3051 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3052 [(set_attr "movprfx" "*,yes,yes")] 3053) 3054 3055;; ------------------------------------------------------------------------- 3056;; ---- [INT] Sign and zero extension 3057;; ------------------------------------------------------------------------- 3058;; Includes: 3059;; - SXTB 3060;; - SXTH 3061;; - SXTW 3062;; - UXTB 3063;; - UXTH 3064;; - UXTW 3065;; ------------------------------------------------------------------------- 3066 3067;; Unpredicated sign and zero extension from a narrower mode. 3068(define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2" 3069 [(set (match_operand:SVE_HSDI 0 "register_operand") 3070 (unspec:SVE_HSDI 3071 [(match_dup 2) 3072 (ANY_EXTEND:SVE_HSDI 3073 (match_operand:SVE_PARTIAL_I 1 "register_operand"))] 3074 UNSPEC_PRED_X))] 3075 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3076 { 3077 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode); 3078 } 3079) 3080 3081;; Predicated sign and zero extension from a narrower mode. 3082(define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2" 3083 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w, ?&w") 3084 (unspec:SVE_HSDI 3085 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 3086 (ANY_EXTEND:SVE_HSDI 3087 (match_operand:SVE_PARTIAL_I 2 "register_operand" "0, w"))] 3088 UNSPEC_PRED_X))] 3089 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3090 "@ 3091 <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype> 3092 movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>" 3093 [(set_attr "movprfx" "*,yes")] 3094) 3095 3096;; Predicated truncate-and-sign-extend operations. 3097(define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" 3098 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 3099 (unspec:SVE_FULL_HSDI 3100 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 3101 (sign_extend:SVE_FULL_HSDI 3102 (truncate:SVE_PARTIAL_I 3103 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")))] 3104 UNSPEC_PRED_X))] 3105 "TARGET_SVE 3106 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3107 "@ 3108 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 3109 movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 3110 [(set_attr "movprfx" "*,yes")] 3111) 3112 3113;; Predicated truncate-and-sign-extend operations with merging. 3114(define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>" 3115 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w") 3116 (unspec:SVE_FULL_HSDI 3117 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 3118 (sign_extend:SVE_FULL_HSDI 3119 (truncate:SVE_PARTIAL_I 3120 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w"))) 3121 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3122 UNSPEC_SEL))] 3123 "TARGET_SVE 3124 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3125 "@ 3126 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 3127 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 3128 movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 3129 [(set_attr "movprfx" "*,yes,yes")] 3130) 3131 3132;; Predicated truncate-and-zero-extend operations, merging with the 3133;; first input. 3134;; 3135;; The canonical form of this operation is an AND of a constant rather 3136;; than (zero_extend (truncate ...)). 3137(define_insn "*cond_uxt<mode>_2" 3138 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 3139 (unspec:SVE_I 3140 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3141 (and:SVE_I 3142 (match_operand:SVE_I 2 "register_operand" "0, w") 3143 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate")) 3144 (match_dup 2)] 3145 UNSPEC_SEL))] 3146 "TARGET_SVE" 3147 "@ 3148 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype> 3149 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" 3150 [(set_attr "movprfx" "*,yes")] 3151) 3152 3153;; Predicated truncate-and-zero-extend operations, merging with an 3154;; independent value. 3155;; 3156;; The earlyclobber isn't needed for the first alternative, but omitting 3157;; it would only help the case in which operands 2 and 4 are the same, 3158;; which is handled above rather than here. Marking all the alternatives 3159;; as early-clobber helps to make the instruction more regular to the 3160;; register allocator. 3161(define_insn "*cond_uxt<mode>_any" 3162 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") 3163 (unspec:SVE_I 3164 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3165 (and:SVE_I 3166 (match_operand:SVE_I 2 "register_operand" "w, w, w") 3167 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate")) 3168 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3169 UNSPEC_SEL))] 3170 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 3171 "@ 3172 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> 3173 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype> 3174 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>" 3175 [(set_attr "movprfx" "*,yes,yes")] 3176) 3177 3178;; ------------------------------------------------------------------------- 3179;; ---- [INT] Truncation 3180;; ------------------------------------------------------------------------- 3181;; The patterns in this section are synthetic. 3182;; ------------------------------------------------------------------------- 3183 3184;; Truncate to a partial SVE vector from either a full vector or a 3185;; wider partial vector. This is a no-op, because we can just ignore 3186;; the unused upper bits of the source. 3187(define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2" 3188 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w") 3189 (truncate:SVE_PARTIAL_I 3190 (match_operand:SVE_HSDI 1 "register_operand" "w")))] 3191 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0" 3192 "#" 3193 "&& reload_completed" 3194 [(set (match_dup 0) (match_dup 1))] 3195 { 3196 operands[1] = aarch64_replace_reg_mode (operands[1], 3197 <SVE_PARTIAL_I:MODE>mode); 3198 } 3199) 3200 3201;; ------------------------------------------------------------------------- 3202;; ---- [INT] Logical inverse 3203;; ------------------------------------------------------------------------- 3204;; Includes: 3205;; - CNOT 3206;; ------------------------------------------------------------------------- 3207 3208;; Predicated logical inverse. 3209(define_expand "@aarch64_pred_cnot<mode>" 3210 [(set (match_operand:SVE_FULL_I 0 "register_operand") 3211 (unspec:SVE_FULL_I 3212 [(unspec:<VPRED> 3213 [(match_operand:<VPRED> 1 "register_operand") 3214 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 3215 (eq:<VPRED> 3216 (match_operand:SVE_FULL_I 3 "register_operand") 3217 (match_dup 4))] 3218 UNSPEC_PRED_Z) 3219 (match_dup 5) 3220 (match_dup 4)] 3221 UNSPEC_SEL))] 3222 "TARGET_SVE" 3223 { 3224 operands[4] = CONST0_RTX (<MODE>mode); 3225 operands[5] = CONST1_RTX (<MODE>mode); 3226 } 3227) 3228 3229(define_insn "*cnot<mode>" 3230 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 3231 (unspec:SVE_I 3232 [(unspec:<VPRED> 3233 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3234 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 3235 (eq:<VPRED> 3236 (match_operand:SVE_I 2 "register_operand" "0, w") 3237 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] 3238 UNSPEC_PRED_Z) 3239 (match_operand:SVE_I 4 "aarch64_simd_imm_one") 3240 (match_dup 3)] 3241 UNSPEC_SEL))] 3242 "TARGET_SVE" 3243 "@ 3244 cnot\t%0.<Vetype>, %1/m, %2.<Vetype> 3245 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" 3246 [(set_attr "movprfx" "*,yes")] 3247) 3248 3249;; Predicated logical inverse with merging. 3250(define_expand "@cond_cnot<mode>" 3251 [(set (match_operand:SVE_FULL_I 0 "register_operand") 3252 (unspec:SVE_FULL_I 3253 [(match_operand:<VPRED> 1 "register_operand") 3254 (unspec:SVE_FULL_I 3255 [(unspec:<VPRED> 3256 [(match_dup 4) 3257 (const_int SVE_KNOWN_PTRUE) 3258 (eq:<VPRED> 3259 (match_operand:SVE_FULL_I 2 "register_operand") 3260 (match_dup 5))] 3261 UNSPEC_PRED_Z) 3262 (match_dup 6) 3263 (match_dup 5)] 3264 UNSPEC_SEL) 3265 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")] 3266 UNSPEC_SEL))] 3267 "TARGET_SVE" 3268 { 3269 operands[4] = CONSTM1_RTX (<VPRED>mode); 3270 operands[5] = CONST0_RTX (<MODE>mode); 3271 operands[6] = CONST1_RTX (<MODE>mode); 3272 } 3273) 3274 3275;; Predicated logical inverse, merging with the first input. 3276(define_insn_and_rewrite "*cond_cnot<mode>_2" 3277 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 3278 (unspec:SVE_I 3279 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3280 ;; Logical inverse of operand 2 (as above). 3281 (unspec:SVE_I 3282 [(unspec:<VPRED> 3283 [(match_operand 5) 3284 (const_int SVE_KNOWN_PTRUE) 3285 (eq:<VPRED> 3286 (match_operand:SVE_I 2 "register_operand" "0, w") 3287 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] 3288 UNSPEC_PRED_Z) 3289 (match_operand:SVE_I 4 "aarch64_simd_imm_one") 3290 (match_dup 3)] 3291 UNSPEC_SEL) 3292 (match_dup 2)] 3293 UNSPEC_SEL))] 3294 "TARGET_SVE" 3295 "@ 3296 cnot\t%0.<Vetype>, %1/m, %0.<Vetype> 3297 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" 3298 "&& !CONSTANT_P (operands[5])" 3299 { 3300 operands[5] = CONSTM1_RTX (<VPRED>mode); 3301 } 3302 [(set_attr "movprfx" "*,yes")] 3303) 3304 3305;; Predicated logical inverse, merging with an independent value. 3306;; 3307;; The earlyclobber isn't needed for the first alternative, but omitting 3308;; it would only help the case in which operands 2 and 6 are the same, 3309;; which is handled above rather than here. Marking all the alternatives 3310;; as earlyclobber helps to make the instruction more regular to the 3311;; register allocator. 3312(define_insn_and_rewrite "*cond_cnot<mode>_any" 3313 [(set (match_operand:SVE_I 0 "register_operand" "=&w, ?&w, ?&w") 3314 (unspec:SVE_I 3315 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3316 ;; Logical inverse of operand 2 (as above). 3317 (unspec:SVE_I 3318 [(unspec:<VPRED> 3319 [(match_operand 5) 3320 (const_int SVE_KNOWN_PTRUE) 3321 (eq:<VPRED> 3322 (match_operand:SVE_I 2 "register_operand" "w, w, w") 3323 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))] 3324 UNSPEC_PRED_Z) 3325 (match_operand:SVE_I 4 "aarch64_simd_imm_one") 3326 (match_dup 3)] 3327 UNSPEC_SEL) 3328 (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3329 UNSPEC_SEL))] 3330 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])" 3331 "@ 3332 cnot\t%0.<Vetype>, %1/m, %2.<Vetype> 3333 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype> 3334 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>" 3335 "&& !CONSTANT_P (operands[5])" 3336 { 3337 operands[5] = CONSTM1_RTX (<VPRED>mode); 3338 } 3339 [(set_attr "movprfx" "*,yes,yes")] 3340) 3341 3342;; ------------------------------------------------------------------------- 3343;; ---- [FP<-INT] General unary arithmetic that maps to unspecs 3344;; ------------------------------------------------------------------------- 3345;; Includes: 3346;; - FEXPA 3347;; ------------------------------------------------------------------------- 3348 3349;; Unpredicated unary operations that take an integer and return a float. 3350(define_insn "@aarch64_sve_<optab><mode>" 3351 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 3352 (unspec:SVE_FULL_F 3353 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")] 3354 SVE_FP_UNARY_INT))] 3355 "TARGET_SVE" 3356 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" 3357) 3358 3359;; ------------------------------------------------------------------------- 3360;; ---- [FP] General unary arithmetic corresponding to unspecs 3361;; ------------------------------------------------------------------------- 3362;; Includes: 3363;; - FABS 3364;; - FNEG 3365;; - FRECPE 3366;; - FRECPX 3367;; - FRINTA 3368;; - FRINTI 3369;; - FRINTM 3370;; - FRINTN 3371;; - FRINTP 3372;; - FRINTX 3373;; - FRINTZ 3374;; - FRSQRTE 3375;; - FSQRT 3376;; ------------------------------------------------------------------------- 3377 3378;; Unpredicated floating-point unary operations. 3379(define_insn "@aarch64_sve_<optab><mode>" 3380 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 3381 (unspec:SVE_FULL_F 3382 [(match_operand:SVE_FULL_F 1 "register_operand" "w")] 3383 SVE_FP_UNARY))] 3384 "TARGET_SVE" 3385 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>" 3386) 3387 3388;; Unpredicated floating-point unary operations. 3389(define_expand "<optab><mode>2" 3390 [(set (match_operand:SVE_FULL_F 0 "register_operand") 3391 (unspec:SVE_FULL_F 3392 [(match_dup 2) 3393 (const_int SVE_RELAXED_GP) 3394 (match_operand:SVE_FULL_F 1 "register_operand")] 3395 SVE_COND_FP_UNARY_OPTAB))] 3396 "TARGET_SVE" 3397 { 3398 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 3399 } 3400) 3401 3402;; Predicated floating-point unary operations. 3403(define_insn "@aarch64_pred_<optab><mode>" 3404 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 3405 (unspec:SVE_FULL_F 3406 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3407 (match_operand:SI 3 "aarch64_sve_gp_strictness") 3408 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 3409 SVE_COND_FP_UNARY))] 3410 "TARGET_SVE" 3411 "@ 3412 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3413 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3414 [(set_attr "movprfx" "*,yes")] 3415) 3416 3417;; Predicated floating-point unary arithmetic with merging. 3418(define_expand "@cond_<optab><mode>" 3419 [(set (match_operand:SVE_FULL_F 0 "register_operand") 3420 (unspec:SVE_FULL_F 3421 [(match_operand:<VPRED> 1 "register_operand") 3422 (unspec:SVE_FULL_F 3423 [(match_dup 1) 3424 (const_int SVE_STRICT_GP) 3425 (match_operand:SVE_FULL_F 2 "register_operand")] 3426 SVE_COND_FP_UNARY) 3427 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] 3428 UNSPEC_SEL))] 3429 "TARGET_SVE" 3430) 3431 3432;; Predicated floating-point unary arithmetic, merging with the first input. 3433(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 3434 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 3435 (unspec:SVE_FULL_F 3436 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3437 (unspec:SVE_FULL_F 3438 [(match_operand 3) 3439 (const_int SVE_RELAXED_GP) 3440 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 3441 SVE_COND_FP_UNARY) 3442 (match_dup 2)] 3443 UNSPEC_SEL))] 3444 "TARGET_SVE" 3445 "@ 3446 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype> 3447 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3448 "&& !rtx_equal_p (operands[1], operands[3])" 3449 { 3450 operands[3] = copy_rtx (operands[1]); 3451 } 3452 [(set_attr "movprfx" "*,yes")] 3453) 3454 3455(define_insn "*cond_<optab><mode>_2_strict" 3456 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 3457 (unspec:SVE_FULL_F 3458 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3459 (unspec:SVE_FULL_F 3460 [(match_dup 1) 3461 (const_int SVE_STRICT_GP) 3462 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 3463 SVE_COND_FP_UNARY) 3464 (match_dup 2)] 3465 UNSPEC_SEL))] 3466 "TARGET_SVE" 3467 "@ 3468 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype> 3469 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3470 [(set_attr "movprfx" "*,yes")] 3471) 3472 3473;; Predicated floating-point unary arithmetic, merging with an independent 3474;; value. 3475;; 3476;; The earlyclobber isn't needed for the first alternative, but omitting 3477;; it would only help the case in which operands 2 and 3 are the same, 3478;; which is handled above rather than here. Marking all the alternatives 3479;; as earlyclobber helps to make the instruction more regular to the 3480;; register allocator. 3481(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 3482 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") 3483 (unspec:SVE_FULL_F 3484 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3485 (unspec:SVE_FULL_F 3486 [(match_operand 4) 3487 (const_int SVE_RELAXED_GP) 3488 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 3489 SVE_COND_FP_UNARY) 3490 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3491 UNSPEC_SEL))] 3492 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" 3493 "@ 3494 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3495 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3496 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3497 "&& !rtx_equal_p (operands[1], operands[4])" 3498 { 3499 operands[4] = copy_rtx (operands[1]); 3500 } 3501 [(set_attr "movprfx" "*,yes,yes")] 3502) 3503 3504(define_insn "*cond_<optab><mode>_any_strict" 3505 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w") 3506 (unspec:SVE_FULL_F 3507 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 3508 (unspec:SVE_FULL_F 3509 [(match_dup 1) 3510 (const_int SVE_STRICT_GP) 3511 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 3512 SVE_COND_FP_UNARY) 3513 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 3514 UNSPEC_SEL))] 3515 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])" 3516 "@ 3517 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3518 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype> 3519 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>" 3520 [(set_attr "movprfx" "*,yes,yes")] 3521) 3522 3523;; ------------------------------------------------------------------------- 3524;; ---- [FP] Square root 3525;; ------------------------------------------------------------------------- 3526 3527(define_expand "sqrt<mode>2" 3528 [(set (match_operand:SVE_FULL_F 0 "register_operand") 3529 (unspec:SVE_FULL_F 3530 [(match_dup 2) 3531 (const_int SVE_RELAXED_GP) 3532 (match_operand:SVE_FULL_F 1 "register_operand")] 3533 UNSPEC_COND_FSQRT))] 3534 "TARGET_SVE" 3535{ 3536 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false)) 3537 DONE; 3538 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 3539}) 3540 3541;; ------------------------------------------------------------------------- 3542;; ---- [FP] Reciprocal square root 3543;; ------------------------------------------------------------------------- 3544 3545(define_expand "rsqrt<mode>2" 3546 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 3547 (unspec:SVE_FULL_SDF 3548 [(match_operand:SVE_FULL_SDF 1 "register_operand")] 3549 UNSPEC_RSQRT))] 3550 "TARGET_SVE" 3551{ 3552 aarch64_emit_approx_sqrt (operands[0], operands[1], true); 3553 DONE; 3554}) 3555 3556(define_expand "@aarch64_rsqrte<mode>" 3557 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 3558 (unspec:SVE_FULL_SDF 3559 [(match_operand:SVE_FULL_SDF 1 "register_operand")] 3560 UNSPEC_RSQRTE))] 3561 "TARGET_SVE" 3562) 3563 3564(define_expand "@aarch64_rsqrts<mode>" 3565 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 3566 (unspec:SVE_FULL_SDF 3567 [(match_operand:SVE_FULL_SDF 1 "register_operand") 3568 (match_operand:SVE_FULL_SDF 2 "register_operand")] 3569 UNSPEC_RSQRTS))] 3570 "TARGET_SVE" 3571) 3572 3573;; ------------------------------------------------------------------------- 3574;; ---- [PRED] Inverse 3575;; ------------------------------------------------------------------------- 3576;; Includes: 3577;; - NOT 3578;; ------------------------------------------------------------------------- 3579 3580;; Unpredicated predicate inverse. 3581(define_expand "one_cmpl<mode>2" 3582 [(set (match_operand:PRED_ALL 0 "register_operand") 3583 (and:PRED_ALL 3584 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")) 3585 (match_dup 2)))] 3586 "TARGET_SVE" 3587 { 3588 operands[2] = aarch64_ptrue_reg (<MODE>mode); 3589 } 3590) 3591 3592;; Predicated predicate inverse. 3593(define_insn "*one_cmpl<mode>3" 3594 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 3595 (and:PRED_ALL 3596 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) 3597 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 3598 "TARGET_SVE" 3599 "not\t%0.b, %1/z, %2.b" 3600) 3601 3602;; ========================================================================= 3603;; == Binary arithmetic 3604;; ========================================================================= 3605 3606;; ------------------------------------------------------------------------- 3607;; ---- [INT] General binary arithmetic corresponding to rtx codes 3608;; ------------------------------------------------------------------------- 3609;; Includes: 3610;; - ADD (merging form only) 3611;; - AND (merging form only) 3612;; - ASR (merging form only) 3613;; - EOR (merging form only) 3614;; - LSL (merging form only) 3615;; - LSR (merging form only) 3616;; - MUL 3617;; - ORR (merging form only) 3618;; - SMAX 3619;; - SMIN 3620;; - SQADD (SVE2 merging form only) 3621;; - SQSUB (SVE2 merging form only) 3622;; - SUB (merging form only) 3623;; - UMAX 3624;; - UMIN 3625;; - UQADD (SVE2 merging form only) 3626;; - UQSUB (SVE2 merging form only) 3627;; ------------------------------------------------------------------------- 3628 3629;; Unpredicated integer binary operations that have an immediate form. 3630(define_expand "<optab><mode>3" 3631 [(set (match_operand:SVE_I 0 "register_operand") 3632 (unspec:SVE_I 3633 [(match_dup 3) 3634 (SVE_INT_BINARY_IMM:SVE_I 3635 (match_operand:SVE_I 1 "register_operand") 3636 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))] 3637 UNSPEC_PRED_X))] 3638 "TARGET_SVE" 3639 { 3640 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 3641 } 3642) 3643 3644;; Integer binary operations that have an immediate form, predicated 3645;; with a PTRUE. We don't actually need the predicate for the first 3646;; and third alternatives, but using Upa or X isn't likely to gain much 3647;; and would make the instruction seem less uniform to the register 3648;; allocator. 3649(define_insn_and_split "@aarch64_pred_<optab><mode>" 3650 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, ?&w") 3651 (unspec:SVE_I 3652 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 3653 (SVE_INT_BINARY_IMM:SVE_I 3654 (match_operand:SVE_I 2 "register_operand" "%0, 0, w, w") 3655 (match_operand:SVE_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))] 3656 UNSPEC_PRED_X))] 3657 "TARGET_SVE" 3658 "@ 3659 # 3660 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3661 # 3662 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 3663 ; Split the unpredicated form after reload, so that we don't have 3664 ; the unnecessary PTRUE. 3665 "&& reload_completed 3666 && !register_operand (operands[3], <MODE>mode)" 3667 [(set (match_dup 0) 3668 (SVE_INT_BINARY_IMM:SVE_I (match_dup 2) (match_dup 3)))] 3669 "" 3670 [(set_attr "movprfx" "*,*,yes,yes")] 3671) 3672 3673;; Unpredicated binary operations with a constant (post-RA only). 3674;; These are generated by splitting a predicated instruction whose 3675;; predicate is unused. 3676(define_insn "*post_ra_<optab><mode>3" 3677 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 3678 (SVE_INT_BINARY_IMM:SVE_I 3679 (match_operand:SVE_I 1 "register_operand" "0, w") 3680 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_immediate")))] 3681 "TARGET_SVE && reload_completed" 3682 "@ 3683 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2 3684 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2" 3685 [(set_attr "movprfx" "*,yes")] 3686) 3687 3688;; Predicated integer operations with merging. 3689(define_expand "@cond_<optab><mode>" 3690 [(set (match_operand:SVE_I 0 "register_operand") 3691 (unspec:SVE_I 3692 [(match_operand:<VPRED> 1 "register_operand") 3693 (SVE_INT_BINARY:SVE_I 3694 (match_operand:SVE_I 2 "register_operand") 3695 (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>")) 3696 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] 3697 UNSPEC_SEL))] 3698 "TARGET_SVE" 3699) 3700 3701;; Predicated integer operations, merging with the first input. 3702(define_insn "*cond_<optab><mode>_2" 3703 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 3704 (unspec:SVE_I 3705 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3706 (SVE_INT_BINARY:SVE_I 3707 (match_operand:SVE_I 2 "register_operand" "0, w") 3708 (match_operand:SVE_I 3 "register_operand" "w, w")) 3709 (match_dup 2)] 3710 UNSPEC_SEL))] 3711 "TARGET_SVE" 3712 "@ 3713 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3714 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 3715 [(set_attr "movprfx" "*,yes")] 3716) 3717 3718;; Predicated integer operations, merging with the second input. 3719(define_insn "*cond_<optab><mode>_3" 3720 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 3721 (unspec:SVE_I 3722 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3723 (SVE_INT_BINARY:SVE_I 3724 (match_operand:SVE_I 2 "register_operand" "w, w") 3725 (match_operand:SVE_I 3 "register_operand" "0, w")) 3726 (match_dup 3)] 3727 UNSPEC_SEL))] 3728 "TARGET_SVE" 3729 "@ 3730 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 3731 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 3732 [(set_attr "movprfx" "*,yes")] 3733) 3734 3735;; Predicated integer operations, merging with an independent value. 3736(define_insn_and_rewrite "*cond_<optab><mode>_any" 3737 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") 3738 (unspec:SVE_I 3739 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 3740 (SVE_INT_BINARY:SVE_I 3741 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") 3742 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w")) 3743 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 3744 UNSPEC_SEL))] 3745 "TARGET_SVE 3746 && !rtx_equal_p (operands[2], operands[4]) 3747 && !rtx_equal_p (operands[3], operands[4])" 3748 "@ 3749 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3750 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 3751 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3752 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 3753 #" 3754 "&& reload_completed 3755 && register_operand (operands[4], <MODE>mode) 3756 && !rtx_equal_p (operands[0], operands[4])" 3757 { 3758 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 3759 operands[4], operands[1])); 3760 operands[4] = operands[2] = operands[0]; 3761 } 3762 [(set_attr "movprfx" "yes")] 3763) 3764 3765;; ------------------------------------------------------------------------- 3766;; ---- [INT] Addition 3767;; ------------------------------------------------------------------------- 3768;; Includes: 3769;; - ADD 3770;; - DECB 3771;; - DECD 3772;; - DECH 3773;; - DECW 3774;; - INCB 3775;; - INCD 3776;; - INCH 3777;; - INCW 3778;; - SUB 3779;; ------------------------------------------------------------------------- 3780 3781(define_insn "add<mode>3" 3782 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w") 3783 (plus:SVE_I 3784 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w") 3785 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))] 3786 "TARGET_SVE" 3787 "@ 3788 add\t%0.<Vetype>, %0.<Vetype>, #%D2 3789 sub\t%0.<Vetype>, %0.<Vetype>, #%N2 3790 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]); 3791 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2 3792 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2 3793 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 3794 [(set_attr "movprfx" "*,*,*,yes,yes,*")] 3795) 3796 3797;; Merging forms are handled through SVE_INT_BINARY. 3798 3799;; ------------------------------------------------------------------------- 3800;; ---- [INT] Subtraction 3801;; ------------------------------------------------------------------------- 3802;; Includes: 3803;; - SUB 3804;; - SUBR 3805;; ------------------------------------------------------------------------- 3806 3807(define_insn "sub<mode>3" 3808 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") 3809 (minus:SVE_I 3810 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa") 3811 (match_operand:SVE_I 2 "register_operand" "w, 0, w")))] 3812 "TARGET_SVE" 3813 "@ 3814 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> 3815 subr\t%0.<Vetype>, %0.<Vetype>, #%D1 3816 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1" 3817 [(set_attr "movprfx" "*,*,yes")] 3818) 3819 3820;; Merging forms are handled through SVE_INT_BINARY. 3821 3822;; ------------------------------------------------------------------------- 3823;; ---- [INT] Take address 3824;; ------------------------------------------------------------------------- 3825;; Includes: 3826;; - ADR 3827;; ------------------------------------------------------------------------- 3828 3829;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD, 3830;; but the svadrb intrinsics should preserve the user's choice. 3831(define_insn "@aarch64_adr<mode>" 3832 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w") 3833 (unspec:SVE_FULL_SDI 3834 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w") 3835 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")] 3836 UNSPEC_ADR))] 3837 "TARGET_SVE" 3838 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]" 3839) 3840 3841;; Same, but with the offset being sign-extended from the low 32 bits. 3842(define_insn_and_rewrite "*aarch64_adr_sxtw" 3843 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3844 (unspec:VNx2DI 3845 [(match_operand:VNx2DI 1 "register_operand" "w") 3846 (unspec:VNx2DI 3847 [(match_operand 3) 3848 (sign_extend:VNx2DI 3849 (truncate:VNx2SI 3850 (match_operand:VNx2DI 2 "register_operand" "w")))] 3851 UNSPEC_PRED_X)] 3852 UNSPEC_ADR))] 3853 "TARGET_SVE" 3854 "adr\t%0.d, [%1.d, %2.d, sxtw]" 3855 "&& !CONSTANT_P (operands[3])" 3856 { 3857 operands[3] = CONSTM1_RTX (VNx2BImode); 3858 } 3859) 3860 3861;; Same, but with the offset being zero-extended from the low 32 bits. 3862(define_insn "*aarch64_adr_uxtw_unspec" 3863 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3864 (unspec:VNx2DI 3865 [(match_operand:VNx2DI 1 "register_operand" "w") 3866 (and:VNx2DI 3867 (match_operand:VNx2DI 2 "register_operand" "w") 3868 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))] 3869 UNSPEC_ADR))] 3870 "TARGET_SVE" 3871 "adr\t%0.d, [%1.d, %2.d, uxtw]" 3872) 3873 3874;; Same, matching as a PLUS rather than unspec. 3875(define_insn "*aarch64_adr_uxtw_and" 3876 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3877 (plus:VNx2DI 3878 (and:VNx2DI 3879 (match_operand:VNx2DI 2 "register_operand" "w") 3880 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate")) 3881 (match_operand:VNx2DI 1 "register_operand" "w")))] 3882 "TARGET_SVE" 3883 "adr\t%0.d, [%1.d, %2.d, uxtw]" 3884) 3885 3886;; ADR with a nonzero shift. 3887(define_expand "@aarch64_adr<mode>_shift" 3888 [(set (match_operand:SVE_FULL_SDI 0 "register_operand") 3889 (plus:SVE_FULL_SDI 3890 (unspec:SVE_FULL_SDI 3891 [(match_dup 4) 3892 (ashift:SVE_FULL_SDI 3893 (match_operand:SVE_FULL_SDI 2 "register_operand") 3894 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))] 3895 UNSPEC_PRED_X) 3896 (match_operand:SVE_FULL_SDI 1 "register_operand")))] 3897 "TARGET_SVE" 3898 { 3899 operands[4] = CONSTM1_RTX (<VPRED>mode); 3900 } 3901) 3902 3903(define_insn_and_rewrite "*aarch64_adr<mode>_shift" 3904 [(set (match_operand:SVE_24I 0 "register_operand" "=w") 3905 (plus:SVE_24I 3906 (unspec:SVE_24I 3907 [(match_operand 4) 3908 (ashift:SVE_24I 3909 (match_operand:SVE_24I 2 "register_operand" "w") 3910 (match_operand:SVE_24I 3 "const_1_to_3_operand"))] 3911 UNSPEC_PRED_X) 3912 (match_operand:SVE_24I 1 "register_operand" "w")))] 3913 "TARGET_SVE" 3914 "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]" 3915 "&& !CONSTANT_P (operands[4])" 3916 { 3917 operands[4] = CONSTM1_RTX (<VPRED>mode); 3918 } 3919) 3920 3921;; Same, but with the index being sign-extended from the low 32 bits. 3922(define_insn_and_rewrite "*aarch64_adr_shift_sxtw" 3923 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3924 (plus:VNx2DI 3925 (unspec:VNx2DI 3926 [(match_operand 4) 3927 (ashift:VNx2DI 3928 (unspec:VNx2DI 3929 [(match_operand 5) 3930 (sign_extend:VNx2DI 3931 (truncate:VNx2SI 3932 (match_operand:VNx2DI 2 "register_operand" "w")))] 3933 UNSPEC_PRED_X) 3934 (match_operand:VNx2DI 3 "const_1_to_3_operand"))] 3935 UNSPEC_PRED_X) 3936 (match_operand:VNx2DI 1 "register_operand" "w")))] 3937 "TARGET_SVE" 3938 "adr\t%0.d, [%1.d, %2.d, sxtw %3]" 3939 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" 3940 { 3941 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode); 3942 } 3943) 3944 3945;; Same, but with the index being zero-extended from the low 32 bits. 3946(define_insn_and_rewrite "*aarch64_adr_shift_uxtw" 3947 [(set (match_operand:VNx2DI 0 "register_operand" "=w") 3948 (plus:VNx2DI 3949 (unspec:VNx2DI 3950 [(match_operand 5) 3951 (ashift:VNx2DI 3952 (and:VNx2DI 3953 (match_operand:VNx2DI 2 "register_operand" "w") 3954 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate")) 3955 (match_operand:VNx2DI 3 "const_1_to_3_operand"))] 3956 UNSPEC_PRED_X) 3957 (match_operand:VNx2DI 1 "register_operand" "w")))] 3958 "TARGET_SVE" 3959 "adr\t%0.d, [%1.d, %2.d, uxtw %3]" 3960 "&& !CONSTANT_P (operands[5])" 3961 { 3962 operands[5] = CONSTM1_RTX (VNx2BImode); 3963 } 3964) 3965 3966;; ------------------------------------------------------------------------- 3967;; ---- [INT] Absolute difference 3968;; ------------------------------------------------------------------------- 3969;; Includes: 3970;; - SABD 3971;; - UABD 3972;; ------------------------------------------------------------------------- 3973 3974;; Unpredicated integer absolute difference. 3975(define_expand "<su>abd<mode>_3" 3976 [(use (match_operand:SVE_I 0 "register_operand")) 3977 (USMAX:SVE_I 3978 (match_operand:SVE_I 1 "register_operand") 3979 (match_operand:SVE_I 2 "register_operand"))] 3980 "TARGET_SVE" 3981 { 3982 rtx pred = aarch64_ptrue_reg (<VPRED>mode); 3983 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1], 3984 operands[2])); 3985 DONE; 3986 } 3987) 3988 3989;; Predicated integer absolute difference. 3990(define_insn "@aarch64_pred_<su>abd<mode>" 3991 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 3992 (minus:SVE_I 3993 (unspec:SVE_I 3994 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 3995 (USMAX:SVE_I 3996 (match_operand:SVE_I 2 "register_operand" "%0, w") 3997 (match_operand:SVE_I 3 "register_operand" "w, w"))] 3998 UNSPEC_PRED_X) 3999 (unspec:SVE_I 4000 [(match_dup 1) 4001 (<max_opp>:SVE_I 4002 (match_dup 2) 4003 (match_dup 3))] 4004 UNSPEC_PRED_X)))] 4005 "TARGET_SVE" 4006 "@ 4007 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4008 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4009 [(set_attr "movprfx" "*,yes")] 4010) 4011 4012(define_expand "@aarch64_cond_<su>abd<mode>" 4013 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4014 (unspec:SVE_FULL_I 4015 [(match_operand:<VPRED> 1 "register_operand") 4016 (minus:SVE_FULL_I 4017 (unspec:SVE_FULL_I 4018 [(match_dup 1) 4019 (USMAX:SVE_FULL_I 4020 (match_operand:SVE_FULL_I 2 "register_operand") 4021 (match_operand:SVE_FULL_I 3 "register_operand"))] 4022 UNSPEC_PRED_X) 4023 (unspec:SVE_FULL_I 4024 [(match_dup 1) 4025 (<max_opp>:SVE_FULL_I 4026 (match_dup 2) 4027 (match_dup 3))] 4028 UNSPEC_PRED_X)) 4029 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 4030 UNSPEC_SEL))] 4031 "TARGET_SVE" 4032{ 4033 if (rtx_equal_p (operands[3], operands[4])) 4034 std::swap (operands[2], operands[3]); 4035}) 4036 4037;; Predicated integer absolute difference, merging with the first input. 4038(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2" 4039 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 4040 (unspec:SVE_I 4041 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4042 (minus:SVE_I 4043 (unspec:SVE_I 4044 [(match_operand 4) 4045 (USMAX:SVE_I 4046 (match_operand:SVE_I 2 "register_operand" "0, w") 4047 (match_operand:SVE_I 3 "register_operand" "w, w"))] 4048 UNSPEC_PRED_X) 4049 (unspec:SVE_I 4050 [(match_operand 5) 4051 (<max_opp>:SVE_I 4052 (match_dup 2) 4053 (match_dup 3))] 4054 UNSPEC_PRED_X)) 4055 (match_dup 2)] 4056 UNSPEC_SEL))] 4057 "TARGET_SVE" 4058 "@ 4059 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4060 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4061 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" 4062 { 4063 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode); 4064 } 4065 [(set_attr "movprfx" "*,yes")] 4066) 4067 4068;; Predicated integer absolute difference, merging with the second input. 4069(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3" 4070 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 4071 (unspec:SVE_I 4072 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4073 (minus:SVE_I 4074 (unspec:SVE_I 4075 [(match_operand 4) 4076 (USMAX:SVE_I 4077 (match_operand:SVE_I 2 "register_operand" "w, w") 4078 (match_operand:SVE_I 3 "register_operand" "0, w"))] 4079 UNSPEC_PRED_X) 4080 (unspec:SVE_I 4081 [(match_operand 5) 4082 (<max_opp>:SVE_I 4083 (match_dup 2) 4084 (match_dup 3))] 4085 UNSPEC_PRED_X)) 4086 (match_dup 3)] 4087 UNSPEC_SEL))] 4088 "TARGET_SVE" 4089 "@ 4090 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4091 movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 4092 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))" 4093 { 4094 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode); 4095 } 4096 [(set_attr "movprfx" "*,yes")] 4097) 4098 4099;; Predicated integer absolute difference, merging with an independent value. 4100(define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any" 4101 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, ?&w") 4102 (unspec:SVE_I 4103 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 4104 (minus:SVE_I 4105 (unspec:SVE_I 4106 [(match_operand 5) 4107 (USMAX:SVE_I 4108 (match_operand:SVE_I 2 "register_operand" "0, w, w, w, w") 4109 (match_operand:SVE_I 3 "register_operand" "w, 0, w, w, w"))] 4110 UNSPEC_PRED_X) 4111 (unspec:SVE_I 4112 [(match_operand 6) 4113 (<max_opp>:SVE_I 4114 (match_dup 2) 4115 (match_dup 3))] 4116 UNSPEC_PRED_X)) 4117 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 4118 UNSPEC_SEL))] 4119 "TARGET_SVE 4120 && !rtx_equal_p (operands[2], operands[4]) 4121 && !rtx_equal_p (operands[3], operands[4])" 4122 "@ 4123 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4124 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4125 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4126 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4127 #" 4128 "&& 1" 4129 { 4130 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6])) 4131 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode); 4132 else if (reload_completed 4133 && register_operand (operands[4], <MODE>mode) 4134 && !rtx_equal_p (operands[0], operands[4])) 4135 { 4136 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4137 operands[4], operands[1])); 4138 operands[4] = operands[2] = operands[0]; 4139 } 4140 else 4141 FAIL; 4142 } 4143 [(set_attr "movprfx" "yes")] 4144) 4145 4146;; ------------------------------------------------------------------------- 4147;; ---- [INT] Saturating addition and subtraction 4148;; ------------------------------------------------------------------------- 4149;; - SQADD 4150;; - SQSUB 4151;; - UQADD 4152;; - UQSUB 4153;; ------------------------------------------------------------------------- 4154 4155;; Unpredicated saturating signed addition and subtraction. 4156(define_insn "@aarch64_sve_<optab><mode>" 4157 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w") 4158 (SBINQOPS:SVE_FULL_I 4159 (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w") 4160 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))] 4161 "TARGET_SVE" 4162 "@ 4163 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4164 <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2 4165 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4166 movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2 4167 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 4168 [(set_attr "movprfx" "*,*,yes,yes,*")] 4169) 4170 4171;; Unpredicated saturating unsigned addition and subtraction. 4172(define_insn "@aarch64_sve_<optab><mode>" 4173 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w") 4174 (UBINQOPS:SVE_FULL_I 4175 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w") 4176 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))] 4177 "TARGET_SVE" 4178 "@ 4179 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4180 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2 4181 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 4182 [(set_attr "movprfx" "*,yes,*")] 4183) 4184 4185;; ------------------------------------------------------------------------- 4186;; ---- [INT] Highpart multiplication 4187;; ------------------------------------------------------------------------- 4188;; Includes: 4189;; - SMULH 4190;; - UMULH 4191;; ------------------------------------------------------------------------- 4192 4193;; Unpredicated highpart multiplication. 4194(define_expand "<su>mul<mode>3_highpart" 4195 [(set (match_operand:SVE_I 0 "register_operand") 4196 (unspec:SVE_I 4197 [(match_dup 3) 4198 (unspec:SVE_I 4199 [(match_operand:SVE_I 1 "register_operand") 4200 (match_operand:SVE_I 2 "register_operand")] 4201 MUL_HIGHPART)] 4202 UNSPEC_PRED_X))] 4203 "TARGET_SVE" 4204 { 4205 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4206 } 4207) 4208 4209;; Predicated highpart multiplication. 4210(define_insn "@aarch64_pred_<optab><mode>" 4211 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 4212 (unspec:SVE_I 4213 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4214 (unspec:SVE_I 4215 [(match_operand:SVE_I 2 "register_operand" "%0, w") 4216 (match_operand:SVE_I 3 "register_operand" "w, w")] 4217 MUL_HIGHPART)] 4218 UNSPEC_PRED_X))] 4219 "TARGET_SVE" 4220 "@ 4221 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4222 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4223 [(set_attr "movprfx" "*,yes")] 4224) 4225 4226;; Predicated highpart multiplications with merging. 4227(define_expand "@cond_<optab><mode>" 4228 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4229 (unspec:SVE_FULL_I 4230 [(match_operand:<VPRED> 1 "register_operand") 4231 (unspec:SVE_FULL_I 4232 [(match_operand:SVE_FULL_I 2 "register_operand") 4233 (match_operand:SVE_FULL_I 3 "register_operand")] 4234 MUL_HIGHPART) 4235 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 4236 UNSPEC_SEL))] 4237 "TARGET_SVE" 4238{ 4239 /* Only target code is aware of these operations, so we don't need 4240 to handle the fully-general case. */ 4241 gcc_assert (rtx_equal_p (operands[2], operands[4]) 4242 || CONSTANT_P (operands[4])); 4243}) 4244 4245;; Predicated highpart multiplications, merging with the first input. 4246(define_insn "*cond_<optab><mode>_2" 4247 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w") 4248 (unspec:SVE_FULL_I 4249 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4250 (unspec:SVE_FULL_I 4251 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w") 4252 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 4253 MUL_HIGHPART) 4254 (match_dup 2)] 4255 UNSPEC_SEL))] 4256 "TARGET_SVE" 4257 "@ 4258 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4259 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4260 [(set_attr "movprfx" "*,yes")]) 4261 4262;; Predicated highpart multiplications, merging with zero. 4263(define_insn "*cond_<optab><mode>_z" 4264 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w") 4265 (unspec:SVE_FULL_I 4266 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4267 (unspec:SVE_FULL_I 4268 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w") 4269 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")] 4270 MUL_HIGHPART) 4271 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")] 4272 UNSPEC_SEL))] 4273 "TARGET_SVE" 4274 "@ 4275 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4276 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4277 [(set_attr "movprfx" "yes")]) 4278 4279;; ------------------------------------------------------------------------- 4280;; ---- [INT] Division 4281;; ------------------------------------------------------------------------- 4282;; Includes: 4283;; - SDIV 4284;; - SDIVR 4285;; - UDIV 4286;; - UDIVR 4287;; ------------------------------------------------------------------------- 4288 4289;; Unpredicated integer division. 4290(define_expand "<optab><mode>3" 4291 [(set (match_operand:SVE_FULL_SDI 0 "register_operand") 4292 (unspec:SVE_FULL_SDI 4293 [(match_dup 3) 4294 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4295 (match_operand:SVE_FULL_SDI 1 "register_operand") 4296 (match_operand:SVE_FULL_SDI 2 "register_operand"))] 4297 UNSPEC_PRED_X))] 4298 "TARGET_SVE" 4299 { 4300 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4301 } 4302) 4303 4304;; Integer division predicated with a PTRUE. 4305(define_insn "@aarch64_pred_<optab><mode>" 4306 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w") 4307 (unspec:SVE_FULL_SDI 4308 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 4309 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4310 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w") 4311 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))] 4312 UNSPEC_PRED_X))] 4313 "TARGET_SVE" 4314 "@ 4315 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4316 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4317 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4318 [(set_attr "movprfx" "*,*,yes")] 4319) 4320 4321;; Predicated integer division with merging. 4322(define_expand "@cond_<optab><mode>" 4323 [(set (match_operand:SVE_FULL_SDI 0 "register_operand") 4324 (unspec:SVE_FULL_SDI 4325 [(match_operand:<VPRED> 1 "register_operand") 4326 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4327 (match_operand:SVE_FULL_SDI 2 "register_operand") 4328 (match_operand:SVE_FULL_SDI 3 "register_operand")) 4329 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")] 4330 UNSPEC_SEL))] 4331 "TARGET_SVE" 4332) 4333 4334;; Predicated integer division, merging with the first input. 4335(define_insn "*cond_<optab><mode>_2" 4336 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 4337 (unspec:SVE_FULL_SDI 4338 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4339 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4340 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w") 4341 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w")) 4342 (match_dup 2)] 4343 UNSPEC_SEL))] 4344 "TARGET_SVE" 4345 "@ 4346 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4347 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4348 [(set_attr "movprfx" "*,yes")] 4349) 4350 4351;; Predicated integer division, merging with the second input. 4352(define_insn "*cond_<optab><mode>_3" 4353 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 4354 (unspec:SVE_FULL_SDI 4355 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4356 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4357 (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w") 4358 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")) 4359 (match_dup 3)] 4360 UNSPEC_SEL))] 4361 "TARGET_SVE" 4362 "@ 4363 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4364 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 4365 [(set_attr "movprfx" "*,yes")] 4366) 4367 4368;; Predicated integer division, merging with an independent value. 4369(define_insn_and_rewrite "*cond_<optab><mode>_any" 4370 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w") 4371 (unspec:SVE_FULL_SDI 4372 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 4373 (SVE_INT_BINARY_SD:SVE_FULL_SDI 4374 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w") 4375 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w")) 4376 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 4377 UNSPEC_SEL))] 4378 "TARGET_SVE 4379 && !rtx_equal_p (operands[2], operands[4]) 4380 && !rtx_equal_p (operands[3], operands[4])" 4381 "@ 4382 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4383 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 4384 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4385 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4386 #" 4387 "&& reload_completed 4388 && register_operand (operands[4], <MODE>mode) 4389 && !rtx_equal_p (operands[0], operands[4])" 4390 { 4391 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4392 operands[4], operands[1])); 4393 operands[4] = operands[2] = operands[0]; 4394 } 4395 [(set_attr "movprfx" "yes")] 4396) 4397 4398;; ------------------------------------------------------------------------- 4399;; ---- [INT] Binary logical operations 4400;; ------------------------------------------------------------------------- 4401;; Includes: 4402;; - AND 4403;; - EOR 4404;; - ORR 4405;; ------------------------------------------------------------------------- 4406 4407;; Unpredicated integer binary logical operations. 4408(define_insn "<optab><mode>3" 4409 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w") 4410 (LOGICAL:SVE_I 4411 (match_operand:SVE_I 1 "register_operand" "%0, w, w") 4412 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))] 4413 "TARGET_SVE" 4414 "@ 4415 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 4416 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2 4417 <logical>\t%0.d, %1.d, %2.d" 4418 [(set_attr "movprfx" "*,yes,*")] 4419) 4420 4421;; Merging forms are handled through SVE_INT_BINARY. 4422 4423;; ------------------------------------------------------------------------- 4424;; ---- [INT] Binary logical operations (inverted second input) 4425;; ------------------------------------------------------------------------- 4426;; Includes: 4427;; - BIC 4428;; ------------------------------------------------------------------------- 4429 4430;; Unpredicated BIC. 4431(define_expand "@aarch64_bic<mode>" 4432 [(set (match_operand:SVE_I 0 "register_operand") 4433 (and:SVE_I 4434 (unspec:SVE_I 4435 [(match_dup 3) 4436 (not:SVE_I (match_operand:SVE_I 2 "register_operand"))] 4437 UNSPEC_PRED_X) 4438 (match_operand:SVE_I 1 "register_operand")))] 4439 "TARGET_SVE" 4440 { 4441 operands[3] = CONSTM1_RTX (<VPRED>mode); 4442 } 4443) 4444 4445;; Predicated BIC. 4446(define_insn_and_rewrite "*bic<mode>3" 4447 [(set (match_operand:SVE_I 0 "register_operand" "=w") 4448 (and:SVE_I 4449 (unspec:SVE_I 4450 [(match_operand 3) 4451 (not:SVE_I 4452 (match_operand:SVE_I 2 "register_operand" "w"))] 4453 UNSPEC_PRED_X) 4454 (match_operand:SVE_I 1 "register_operand" "w")))] 4455 "TARGET_SVE" 4456 "bic\t%0.d, %1.d, %2.d" 4457 "&& !CONSTANT_P (operands[3])" 4458 { 4459 operands[3] = CONSTM1_RTX (<VPRED>mode); 4460 } 4461) 4462 4463;; Predicated BIC with merging. 4464(define_expand "@cond_bic<mode>" 4465 [(set (match_operand:SVE_FULL_I 0 "register_operand") 4466 (unspec:SVE_FULL_I 4467 [(match_operand:<VPRED> 1 "register_operand") 4468 (and:SVE_FULL_I 4469 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand")) 4470 (match_operand:SVE_FULL_I 2 "register_operand")) 4471 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")] 4472 UNSPEC_SEL))] 4473 "TARGET_SVE" 4474) 4475 4476;; Predicated integer BIC, merging with the first input. 4477(define_insn "*cond_bic<mode>_2" 4478 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 4479 (unspec:SVE_I 4480 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4481 (and:SVE_I 4482 (not:SVE_I 4483 (match_operand:SVE_I 3 "register_operand" "w, w")) 4484 (match_operand:SVE_I 2 "register_operand" "0, w")) 4485 (match_dup 2)] 4486 UNSPEC_SEL))] 4487 "TARGET_SVE" 4488 "@ 4489 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4490 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4491 [(set_attr "movprfx" "*,yes")] 4492) 4493 4494;; Predicated integer BIC, merging with an independent value. 4495(define_insn_and_rewrite "*cond_bic<mode>_any" 4496 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, ?&w") 4497 (unspec:SVE_I 4498 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4499 (and:SVE_I 4500 (not:SVE_I 4501 (match_operand:SVE_I 3 "register_operand" "w, w, w, w")) 4502 (match_operand:SVE_I 2 "register_operand" "0, w, w, w")) 4503 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 4504 UNSPEC_SEL))] 4505 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4506 "@ 4507 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4508 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4509 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4510 #" 4511 "&& reload_completed 4512 && register_operand (operands[4], <MODE>mode) 4513 && !rtx_equal_p (operands[0], operands[4])" 4514 { 4515 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4516 operands[4], operands[1])); 4517 operands[4] = operands[2] = operands[0]; 4518 } 4519 [(set_attr "movprfx" "yes")] 4520) 4521 4522;; ------------------------------------------------------------------------- 4523;; ---- [INT] Shifts (rounding towards -Inf) 4524;; ------------------------------------------------------------------------- 4525;; Includes: 4526;; - ASR 4527;; - ASRR 4528;; - LSL 4529;; - LSLR 4530;; - LSR 4531;; - LSRR 4532;; ------------------------------------------------------------------------- 4533 4534;; Unpredicated shift by a scalar, which expands into one of the vector 4535;; shifts below. 4536(define_expand "<ASHIFT:optab><mode>3" 4537 [(set (match_operand:SVE_I 0 "register_operand") 4538 (ASHIFT:SVE_I 4539 (match_operand:SVE_I 1 "register_operand") 4540 (match_operand:<VEL> 2 "general_operand")))] 4541 "TARGET_SVE" 4542 { 4543 rtx amount; 4544 if (CONST_INT_P (operands[2])) 4545 { 4546 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]); 4547 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode)) 4548 amount = force_reg (<MODE>mode, amount); 4549 } 4550 else 4551 { 4552 amount = convert_to_mode (<VEL>mode, operands[2], 0); 4553 amount = expand_vector_broadcast (<MODE>mode, amount); 4554 } 4555 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount)); 4556 DONE; 4557 } 4558) 4559 4560;; Unpredicated shift by a vector. 4561(define_expand "v<optab><mode>3" 4562 [(set (match_operand:SVE_I 0 "register_operand") 4563 (unspec:SVE_I 4564 [(match_dup 3) 4565 (ASHIFT:SVE_I 4566 (match_operand:SVE_I 1 "register_operand") 4567 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))] 4568 UNSPEC_PRED_X))] 4569 "TARGET_SVE" 4570 { 4571 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4572 } 4573) 4574 4575;; Shift by a vector, predicated with a PTRUE. We don't actually need 4576;; the predicate for the first alternative, but using Upa or X isn't 4577;; likely to gain much and would make the instruction seem less uniform 4578;; to the register allocator. 4579(define_insn_and_split "@aarch64_pred_<optab><mode>" 4580 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w") 4581 (unspec:SVE_I 4582 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4583 (ASHIFT:SVE_I 4584 (match_operand:SVE_I 2 "register_operand" "w, 0, w, w") 4585 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))] 4586 UNSPEC_PRED_X))] 4587 "TARGET_SVE" 4588 "@ 4589 # 4590 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4591 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype> 4592 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4593 "&& reload_completed 4594 && !register_operand (operands[3], <MODE>mode)" 4595 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))] 4596 "" 4597 [(set_attr "movprfx" "*,*,*,yes")] 4598) 4599 4600;; Unpredicated shift operations by a constant (post-RA only). 4601;; These are generated by splitting a predicated instruction whose 4602;; predicate is unused. 4603(define_insn "*post_ra_v<optab><mode>3" 4604 [(set (match_operand:SVE_I 0 "register_operand" "=w") 4605 (ASHIFT:SVE_I 4606 (match_operand:SVE_I 1 "register_operand" "w") 4607 (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))] 4608 "TARGET_SVE && reload_completed" 4609 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2" 4610) 4611 4612;; Predicated integer shift, merging with the first input. 4613(define_insn "*cond_<optab><mode>_2_const" 4614 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 4615 (unspec:SVE_I 4616 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4617 (ASHIFT:SVE_I 4618 (match_operand:SVE_I 2 "register_operand" "0, w") 4619 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")) 4620 (match_dup 2)] 4621 UNSPEC_SEL))] 4622 "TARGET_SVE" 4623 "@ 4624 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4625 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 4626 [(set_attr "movprfx" "*,yes")] 4627) 4628 4629;; Predicated integer shift, merging with an independent value. 4630(define_insn_and_rewrite "*cond_<optab><mode>_any_const" 4631 [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w") 4632 (unspec:SVE_I 4633 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 4634 (ASHIFT:SVE_I 4635 (match_operand:SVE_I 2 "register_operand" "w, w, w") 4636 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")) 4637 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 4638 UNSPEC_SEL))] 4639 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4640 "@ 4641 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4642 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4643 #" 4644 "&& reload_completed 4645 && register_operand (operands[4], <MODE>mode) 4646 && !rtx_equal_p (operands[0], operands[4])" 4647 { 4648 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4649 operands[4], operands[1])); 4650 operands[4] = operands[2] = operands[0]; 4651 } 4652 [(set_attr "movprfx" "yes")] 4653) 4654 4655;; Unpredicated shifts of narrow elements by 64-bit amounts. 4656(define_insn "@aarch64_sve_<sve_int_op><mode>" 4657 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") 4658 (unspec:SVE_FULL_BHSI 4659 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w") 4660 (match_operand:VNx2DI 2 "register_operand" "w")] 4661 SVE_SHIFT_WIDE))] 4662 "TARGET_SVE" 4663 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d" 4664) 4665 4666;; Merging predicated shifts of narrow elements by 64-bit amounts. 4667(define_expand "@cond_<sve_int_op><mode>" 4668 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand") 4669 (unspec:SVE_FULL_BHSI 4670 [(match_operand:<VPRED> 1 "register_operand") 4671 (unspec:SVE_FULL_BHSI 4672 [(match_operand:SVE_FULL_BHSI 2 "register_operand") 4673 (match_operand:VNx2DI 3 "register_operand")] 4674 SVE_SHIFT_WIDE) 4675 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")] 4676 UNSPEC_SEL))] 4677 "TARGET_SVE" 4678) 4679 4680;; Predicated shifts of narrow elements by 64-bit amounts, merging with 4681;; the first input. 4682(define_insn "*cond_<sve_int_op><mode>_m" 4683 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w") 4684 (unspec:SVE_FULL_BHSI 4685 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4686 (unspec:SVE_FULL_BHSI 4687 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") 4688 (match_operand:VNx2DI 3 "register_operand" "w, w")] 4689 SVE_SHIFT_WIDE) 4690 (match_dup 2)] 4691 UNSPEC_SEL))] 4692 "TARGET_SVE" 4693 "@ 4694 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d 4695 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d" 4696 [(set_attr "movprfx" "*, yes")]) 4697 4698;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero. 4699(define_insn "*cond_<sve_int_op><mode>_z" 4700 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w") 4701 (unspec:SVE_FULL_BHSI 4702 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4703 (unspec:SVE_FULL_BHSI 4704 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w") 4705 (match_operand:VNx2DI 3 "register_operand" "w, w")] 4706 SVE_SHIFT_WIDE) 4707 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")] 4708 UNSPEC_SEL))] 4709 "TARGET_SVE" 4710 "@ 4711 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d 4712 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d" 4713 [(set_attr "movprfx" "yes")]) 4714 4715;; ------------------------------------------------------------------------- 4716;; ---- [INT] Shifts (rounding towards 0) 4717;; ------------------------------------------------------------------------- 4718;; Includes: 4719;; - ASRD 4720;; - SQSHLU (SVE2) 4721;; - SRSHR (SVE2) 4722;; - URSHR (SVE2) 4723;; ------------------------------------------------------------------------- 4724 4725;; Unpredicated ASRD. 4726(define_expand "sdiv_pow2<mode>3" 4727 [(set (match_operand:SVE_I 0 "register_operand") 4728 (unspec:SVE_I 4729 [(match_dup 3) 4730 (unspec:SVE_I 4731 [(match_operand:SVE_I 1 "register_operand") 4732 (match_operand 2 "aarch64_simd_rshift_imm")] 4733 UNSPEC_ASRD)] 4734 UNSPEC_PRED_X))] 4735 "TARGET_SVE" 4736 { 4737 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 4738 } 4739) 4740 4741;; Predicated ASRD. 4742(define_insn "*sdiv_pow2<mode>3" 4743 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 4744 (unspec:SVE_I 4745 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4746 (unspec:SVE_I 4747 [(match_operand:SVE_I 2 "register_operand" "0, w") 4748 (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")] 4749 UNSPEC_ASRD)] 4750 UNSPEC_PRED_X))] 4751 "TARGET_SVE" 4752 "@ 4753 asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4754 movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 4755 [(set_attr "movprfx" "*,yes")]) 4756 4757;; Predicated shift with merging. 4758(define_expand "@cond_<sve_int_op><mode>" 4759 [(set (match_operand:SVE_I 0 "register_operand") 4760 (unspec:SVE_I 4761 [(match_operand:<VPRED> 1 "register_operand") 4762 (unspec:SVE_I 4763 [(match_dup 5) 4764 (unspec:SVE_I 4765 [(match_operand:SVE_I 2 "register_operand") 4766 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")] 4767 SVE_INT_SHIFT_IMM)] 4768 UNSPEC_PRED_X) 4769 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")] 4770 UNSPEC_SEL))] 4771 "TARGET_SVE" 4772 { 4773 operands[5] = aarch64_ptrue_reg (<VPRED>mode); 4774 } 4775) 4776 4777;; Predicated shift, merging with the first input. 4778(define_insn_and_rewrite "*cond_<sve_int_op><mode>_2" 4779 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 4780 (unspec:SVE_I 4781 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4782 (unspec:SVE_I 4783 [(match_operand 4) 4784 (unspec:SVE_I 4785 [(match_operand:SVE_I 2 "register_operand" "0, w") 4786 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")] 4787 SVE_INT_SHIFT_IMM)] 4788 UNSPEC_PRED_X) 4789 (match_dup 2)] 4790 UNSPEC_SEL))] 4791 "TARGET_SVE" 4792 "@ 4793 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4794 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 4795 "&& !CONSTANT_P (operands[4])" 4796 { 4797 operands[4] = CONSTM1_RTX (<VPRED>mode); 4798 } 4799 [(set_attr "movprfx" "*,yes")]) 4800 4801;; Predicated shift, merging with an independent value. 4802(define_insn_and_rewrite "*cond_<sve_int_op><mode>_any" 4803 [(set (match_operand:SVE_I 0 "register_operand" "=w, &w, ?&w") 4804 (unspec:SVE_I 4805 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 4806 (unspec:SVE_I 4807 [(match_operand 5) 4808 (unspec:SVE_I 4809 [(match_operand:SVE_I 2 "register_operand" "w, w, w") 4810 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")] 4811 SVE_INT_SHIFT_IMM)] 4812 UNSPEC_PRED_X) 4813 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 4814 UNSPEC_SEL))] 4815 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4816 "@ 4817 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4818 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 4819 #" 4820 "&& reload_completed 4821 && register_operand (operands[4], <MODE>mode) 4822 && !rtx_equal_p (operands[0], operands[4])" 4823 { 4824 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4825 operands[4], operands[1])); 4826 operands[4] = operands[2] = operands[0]; 4827 } 4828 [(set_attr "movprfx" "yes")] 4829) 4830 4831;; ------------------------------------------------------------------------- 4832;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs 4833;; ------------------------------------------------------------------------- 4834;; Includes: 4835;; - FSCALE 4836;; - FTSMUL 4837;; - FTSSEL 4838;; ------------------------------------------------------------------------- 4839 4840;; Unpredicated floating-point binary operations that take an integer as 4841;; their second operand. 4842(define_insn "@aarch64_sve_<optab><mode>" 4843 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 4844 (unspec:SVE_FULL_F 4845 [(match_operand:SVE_FULL_F 1 "register_operand" "w") 4846 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] 4847 SVE_FP_BINARY_INT))] 4848 "TARGET_SVE" 4849 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 4850) 4851 4852;; Predicated floating-point binary operations that take an integer 4853;; as their second operand. 4854(define_insn "@aarch64_pred_<optab><mode>" 4855 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 4856 (unspec:SVE_FULL_F 4857 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4858 (match_operand:SI 4 "aarch64_sve_gp_strictness") 4859 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 4860 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] 4861 SVE_COND_FP_BINARY_INT))] 4862 "TARGET_SVE" 4863 "@ 4864 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4865 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4866 [(set_attr "movprfx" "*,yes")] 4867) 4868 4869;; Predicated floating-point binary operations with merging, taking an 4870;; integer as their second operand. 4871(define_expand "@cond_<optab><mode>" 4872 [(set (match_operand:SVE_FULL_F 0 "register_operand") 4873 (unspec:SVE_FULL_F 4874 [(match_operand:<VPRED> 1 "register_operand") 4875 (unspec:SVE_FULL_F 4876 [(match_dup 1) 4877 (const_int SVE_STRICT_GP) 4878 (match_operand:SVE_FULL_F 2 "register_operand") 4879 (match_operand:<V_INT_EQUIV> 3 "register_operand")] 4880 SVE_COND_FP_BINARY_INT) 4881 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 4882 UNSPEC_SEL))] 4883 "TARGET_SVE" 4884) 4885 4886;; Predicated floating-point binary operations that take an integer as their 4887;; second operand, with inactive lanes coming from the first operand. 4888(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 4889 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 4890 (unspec:SVE_FULL_F 4891 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4892 (unspec:SVE_FULL_F 4893 [(match_operand 4) 4894 (const_int SVE_RELAXED_GP) 4895 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 4896 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] 4897 SVE_COND_FP_BINARY_INT) 4898 (match_dup 2)] 4899 UNSPEC_SEL))] 4900 "TARGET_SVE" 4901 "@ 4902 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4903 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4904 "&& !rtx_equal_p (operands[1], operands[4])" 4905 { 4906 operands[4] = copy_rtx (operands[1]); 4907 } 4908 [(set_attr "movprfx" "*,yes")] 4909) 4910 4911(define_insn "*cond_<optab><mode>_2_strict" 4912 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 4913 (unspec:SVE_FULL_F 4914 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 4915 (unspec:SVE_FULL_F 4916 [(match_dup 1) 4917 (const_int SVE_STRICT_GP) 4918 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 4919 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")] 4920 SVE_COND_FP_BINARY_INT) 4921 (match_dup 2)] 4922 UNSPEC_SEL))] 4923 "TARGET_SVE" 4924 "@ 4925 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4926 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 4927 [(set_attr "movprfx" "*,yes")] 4928) 4929 4930;; Predicated floating-point binary operations that take an integer as 4931;; their second operand, with the values of inactive lanes being distinct 4932;; from the other inputs. 4933(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 4934 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 4935 (unspec:SVE_FULL_F 4936 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4937 (unspec:SVE_FULL_F 4938 [(match_operand 5) 4939 (const_int SVE_RELAXED_GP) 4940 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") 4941 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")] 4942 SVE_COND_FP_BINARY_INT) 4943 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 4944 UNSPEC_SEL))] 4945 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4946 "@ 4947 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4948 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4949 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4950 #" 4951 "&& 1" 4952 { 4953 if (reload_completed 4954 && register_operand (operands[4], <MODE>mode) 4955 && !rtx_equal_p (operands[0], operands[4])) 4956 { 4957 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4958 operands[4], operands[1])); 4959 operands[4] = operands[2] = operands[0]; 4960 } 4961 else if (!rtx_equal_p (operands[1], operands[5])) 4962 operands[5] = copy_rtx (operands[1]); 4963 else 4964 FAIL; 4965 } 4966 [(set_attr "movprfx" "yes")] 4967) 4968 4969(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 4970 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 4971 (unspec:SVE_FULL_F 4972 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 4973 (unspec:SVE_FULL_F 4974 [(match_dup 1) 4975 (const_int SVE_STRICT_GP) 4976 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w") 4977 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")] 4978 SVE_COND_FP_BINARY_INT) 4979 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 4980 UNSPEC_SEL))] 4981 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 4982 "@ 4983 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4984 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4985 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 4986 #" 4987 "&& reload_completed 4988 && register_operand (operands[4], <MODE>mode) 4989 && !rtx_equal_p (operands[0], operands[4])" 4990 { 4991 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 4992 operands[4], operands[1])); 4993 operands[4] = operands[2] = operands[0]; 4994 } 4995 [(set_attr "movprfx" "yes")] 4996) 4997 4998;; ------------------------------------------------------------------------- 4999;; ---- [FP] General binary arithmetic corresponding to rtx codes 5000;; ------------------------------------------------------------------------- 5001;; Includes post-RA forms of: 5002;; - FADD 5003;; - FMUL 5004;; - FSUB 5005;; ------------------------------------------------------------------------- 5006 5007;; Unpredicated floating-point binary operations (post-RA only). 5008;; These are generated by splitting a predicated instruction whose 5009;; predicate is unused. 5010(define_insn "*post_ra_<sve_fp_op><mode>3" 5011 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 5012 (SVE_UNPRED_FP_BINARY:SVE_FULL_F 5013 (match_operand:SVE_FULL_F 1 "register_operand" "w") 5014 (match_operand:SVE_FULL_F 2 "register_operand" "w")))] 5015 "TARGET_SVE && reload_completed" 5016 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>") 5017 5018;; ------------------------------------------------------------------------- 5019;; ---- [FP] General binary arithmetic corresponding to unspecs 5020;; ------------------------------------------------------------------------- 5021;; Includes merging forms of: 5022;; - FADD (constant forms handled in the "Addition" section) 5023;; - FDIV 5024;; - FDIVR 5025;; - FMAX 5026;; - FMAXNM (including #0.0 and #1.0) 5027;; - FMIN 5028;; - FMINNM (including #0.0 and #1.0) 5029;; - FMUL (including #0.5 and #2.0) 5030;; - FMULX 5031;; - FRECPS 5032;; - FRSQRTS 5033;; - FSUB (constant forms handled in the "Addition" section) 5034;; - FSUBR (constant forms handled in the "Subtraction" section) 5035;; ------------------------------------------------------------------------- 5036 5037;; Unpredicated floating-point binary operations. 5038(define_insn "@aarch64_sve_<optab><mode>" 5039 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 5040 (unspec:SVE_FULL_F 5041 [(match_operand:SVE_FULL_F 1 "register_operand" "w") 5042 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 5043 SVE_FP_BINARY))] 5044 "TARGET_SVE" 5045 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 5046) 5047 5048;; Unpredicated floating-point binary operations that need to be predicated 5049;; for SVE. 5050(define_expand "<optab><mode>3" 5051 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5052 (unspec:SVE_FULL_F 5053 [(match_dup 3) 5054 (const_int SVE_RELAXED_GP) 5055 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>") 5056 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")] 5057 SVE_COND_FP_BINARY_OPTAB))] 5058 "TARGET_SVE" 5059 { 5060 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 5061 } 5062) 5063 5064;; Predicated floating-point binary operations that have no immediate forms. 5065(define_insn "@aarch64_pred_<optab><mode>" 5066 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") 5067 (unspec:SVE_FULL_F 5068 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5069 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5070 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w") 5071 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")] 5072 SVE_COND_FP_BINARY_REG))] 5073 "TARGET_SVE" 5074 "@ 5075 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5076 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5077 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5078 [(set_attr "movprfx" "*,*,yes")] 5079) 5080 5081;; Predicated floating-point operations with merging. 5082(define_expand "@cond_<optab><mode>" 5083 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5084 (unspec:SVE_FULL_F 5085 [(match_operand:<VPRED> 1 "register_operand") 5086 (unspec:SVE_FULL_F 5087 [(match_dup 1) 5088 (const_int SVE_STRICT_GP) 5089 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>") 5090 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")] 5091 SVE_COND_FP_BINARY) 5092 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 5093 UNSPEC_SEL))] 5094 "TARGET_SVE" 5095) 5096 5097;; Predicated floating-point operations, merging with the first input. 5098(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 5099 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5100 (unspec:SVE_FULL_F 5101 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5102 (unspec:SVE_FULL_F 5103 [(match_operand 4) 5104 (const_int SVE_RELAXED_GP) 5105 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5106 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5107 SVE_COND_FP_BINARY) 5108 (match_dup 2)] 5109 UNSPEC_SEL))] 5110 "TARGET_SVE" 5111 "@ 5112 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5113 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5114 "&& !rtx_equal_p (operands[1], operands[4])" 5115 { 5116 operands[4] = copy_rtx (operands[1]); 5117 } 5118 [(set_attr "movprfx" "*,yes")] 5119) 5120 5121(define_insn "*cond_<optab><mode>_2_strict" 5122 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5123 (unspec:SVE_FULL_F 5124 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5125 (unspec:SVE_FULL_F 5126 [(match_dup 1) 5127 (const_int SVE_STRICT_GP) 5128 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5129 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5130 SVE_COND_FP_BINARY) 5131 (match_dup 2)] 5132 UNSPEC_SEL))] 5133 "TARGET_SVE" 5134 "@ 5135 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5136 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5137 [(set_attr "movprfx" "*,yes")] 5138) 5139 5140;; Same for operations that take a 1-bit constant. 5141(define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed" 5142 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5143 (unspec:SVE_FULL_F 5144 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5145 (unspec:SVE_FULL_F 5146 [(match_operand 4) 5147 (const_int SVE_RELAXED_GP) 5148 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5149 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5150 SVE_COND_FP_BINARY_I1) 5151 (match_dup 2)] 5152 UNSPEC_SEL))] 5153 "TARGET_SVE" 5154 "@ 5155 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5156 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 5157 "&& !rtx_equal_p (operands[1], operands[4])" 5158 { 5159 operands[4] = copy_rtx (operands[1]); 5160 } 5161 [(set_attr "movprfx" "*,yes")] 5162) 5163 5164(define_insn "*cond_<optab><mode>_2_const_strict" 5165 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5166 (unspec:SVE_FULL_F 5167 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5168 (unspec:SVE_FULL_F 5169 [(match_dup 1) 5170 (const_int SVE_STRICT_GP) 5171 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5172 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5173 SVE_COND_FP_BINARY_I1) 5174 (match_dup 2)] 5175 UNSPEC_SEL))] 5176 "TARGET_SVE" 5177 "@ 5178 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5179 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3" 5180 [(set_attr "movprfx" "*,yes")] 5181) 5182 5183;; Predicated floating-point operations, merging with the second input. 5184(define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed" 5185 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5186 (unspec:SVE_FULL_F 5187 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5188 (unspec:SVE_FULL_F 5189 [(match_operand 4) 5190 (const_int SVE_RELAXED_GP) 5191 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 5192 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5193 SVE_COND_FP_BINARY) 5194 (match_dup 3)] 5195 UNSPEC_SEL))] 5196 "TARGET_SVE" 5197 "@ 5198 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5199 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 5200 "&& !rtx_equal_p (operands[1], operands[4])" 5201 { 5202 operands[4] = copy_rtx (operands[1]); 5203 } 5204 [(set_attr "movprfx" "*,yes")] 5205) 5206 5207(define_insn "*cond_<optab><mode>_3_strict" 5208 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5209 (unspec:SVE_FULL_F 5210 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5211 (unspec:SVE_FULL_F 5212 [(match_dup 1) 5213 (const_int SVE_STRICT_GP) 5214 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 5215 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5216 SVE_COND_FP_BINARY) 5217 (match_dup 3)] 5218 UNSPEC_SEL))] 5219 "TARGET_SVE" 5220 "@ 5221 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5222 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 5223 [(set_attr "movprfx" "*,yes")] 5224) 5225 5226;; Predicated floating-point operations, merging with an independent value. 5227(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 5228 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 5229 (unspec:SVE_FULL_F 5230 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 5231 (unspec:SVE_FULL_F 5232 [(match_operand 5) 5233 (const_int SVE_RELAXED_GP) 5234 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 5235 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 5236 SVE_COND_FP_BINARY) 5237 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 5238 UNSPEC_SEL))] 5239 "TARGET_SVE 5240 && !rtx_equal_p (operands[2], operands[4]) 5241 && !rtx_equal_p (operands[3], operands[4])" 5242 "@ 5243 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5244 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5245 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5246 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5247 #" 5248 "&& 1" 5249 { 5250 if (reload_completed 5251 && register_operand (operands[4], <MODE>mode) 5252 && !rtx_equal_p (operands[0], operands[4])) 5253 { 5254 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5255 operands[4], operands[1])); 5256 operands[4] = operands[2] = operands[0]; 5257 } 5258 else if (!rtx_equal_p (operands[1], operands[5])) 5259 operands[5] = copy_rtx (operands[1]); 5260 else 5261 FAIL; 5262 } 5263 [(set_attr "movprfx" "yes")] 5264) 5265 5266(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 5267 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 5268 (unspec:SVE_FULL_F 5269 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 5270 (unspec:SVE_FULL_F 5271 [(match_dup 1) 5272 (const_int SVE_STRICT_GP) 5273 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 5274 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 5275 SVE_COND_FP_BINARY) 5276 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 5277 UNSPEC_SEL))] 5278 "TARGET_SVE 5279 && !rtx_equal_p (operands[2], operands[4]) 5280 && !rtx_equal_p (operands[3], operands[4])" 5281 "@ 5282 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5283 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5284 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5285 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5286 #" 5287 "&& reload_completed 5288 && register_operand (operands[4], <MODE>mode) 5289 && !rtx_equal_p (operands[0], operands[4])" 5290 { 5291 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5292 operands[4], operands[1])); 5293 operands[4] = operands[2] = operands[0]; 5294 } 5295 [(set_attr "movprfx" "yes")] 5296) 5297 5298;; Same for operations that take a 1-bit constant. 5299(define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed" 5300 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5301 (unspec:SVE_FULL_F 5302 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5303 (unspec:SVE_FULL_F 5304 [(match_operand 5) 5305 (const_int SVE_RELAXED_GP) 5306 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") 5307 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5308 SVE_COND_FP_BINARY_I1) 5309 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5310 UNSPEC_SEL))] 5311 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5312 "@ 5313 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5314 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5315 #" 5316 "&& 1" 5317 { 5318 if (reload_completed 5319 && register_operand (operands[4], <MODE>mode) 5320 && !rtx_equal_p (operands[0], operands[4])) 5321 { 5322 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5323 operands[4], operands[1])); 5324 operands[4] = operands[2] = operands[0]; 5325 } 5326 else if (!rtx_equal_p (operands[1], operands[5])) 5327 operands[5] = copy_rtx (operands[1]); 5328 else 5329 FAIL; 5330 } 5331 [(set_attr "movprfx" "yes")] 5332) 5333 5334(define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict" 5335 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5336 (unspec:SVE_FULL_F 5337 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5338 (unspec:SVE_FULL_F 5339 [(match_dup 1) 5340 (const_int SVE_STRICT_GP) 5341 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w") 5342 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")] 5343 SVE_COND_FP_BINARY_I1) 5344 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5345 UNSPEC_SEL))] 5346 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5347 "@ 5348 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5349 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5350 #" 5351 "&& reload_completed 5352 && register_operand (operands[4], <MODE>mode) 5353 && !rtx_equal_p (operands[0], operands[4])" 5354 { 5355 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5356 operands[4], operands[1])); 5357 operands[4] = operands[2] = operands[0]; 5358 } 5359 [(set_attr "movprfx" "yes")] 5360) 5361 5362;; ------------------------------------------------------------------------- 5363;; ---- [FP] Addition 5364;; ------------------------------------------------------------------------- 5365;; Includes: 5366;; - FADD 5367;; - FSUB 5368;; ------------------------------------------------------------------------- 5369 5370;; Predicated floating-point addition. 5371(define_insn_and_split "@aarch64_pred_<optab><mode>" 5372 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w") 5373 (unspec:SVE_FULL_F 5374 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl") 5375 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1") 5376 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w") 5377 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")] 5378 SVE_COND_FP_ADD))] 5379 "TARGET_SVE" 5380 "@ 5381 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5382 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5383 # 5384 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5385 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5386 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5387 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5388 ; Split the unpredicated form after reload, so that we don't have 5389 ; the unnecessary PTRUE. 5390 "&& reload_completed 5391 && register_operand (operands[3], <MODE>mode) 5392 && INTVAL (operands[4]) == SVE_RELAXED_GP" 5393 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))] 5394 "" 5395 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] 5396) 5397 5398;; Predicated floating-point addition of a constant, merging with the 5399;; first input. 5400(define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed" 5401 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") 5402 (unspec:SVE_FULL_F 5403 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5404 (unspec:SVE_FULL_F 5405 [(match_operand 4) 5406 (const_int SVE_RELAXED_GP) 5407 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") 5408 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] 5409 UNSPEC_COND_FADD) 5410 (match_dup 2)] 5411 UNSPEC_SEL))] 5412 "TARGET_SVE" 5413 "@ 5414 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5415 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5416 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5417 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" 5418 "&& !rtx_equal_p (operands[1], operands[4])" 5419 { 5420 operands[4] = copy_rtx (operands[1]); 5421 } 5422 [(set_attr "movprfx" "*,*,yes,yes")] 5423) 5424 5425(define_insn "*cond_add<mode>_2_const_strict" 5426 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w") 5427 (unspec:SVE_FULL_F 5428 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5429 (unspec:SVE_FULL_F 5430 [(match_dup 1) 5431 (const_int SVE_STRICT_GP) 5432 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w") 5433 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")] 5434 UNSPEC_COND_FADD) 5435 (match_dup 2)] 5436 UNSPEC_SEL))] 5437 "TARGET_SVE" 5438 "@ 5439 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5440 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5441 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5442 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3" 5443 [(set_attr "movprfx" "*,*,yes,yes")] 5444) 5445 5446;; Predicated floating-point addition of a constant, merging with an 5447;; independent value. 5448(define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed" 5449 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") 5450 (unspec:SVE_FULL_F 5451 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 5452 (unspec:SVE_FULL_F 5453 [(match_operand 5) 5454 (const_int SVE_RELAXED_GP) 5455 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") 5456 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] 5457 UNSPEC_COND_FADD) 5458 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] 5459 UNSPEC_SEL))] 5460 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5461 "@ 5462 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5463 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5464 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5465 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5466 # 5467 #" 5468 "&& 1" 5469 { 5470 if (reload_completed 5471 && register_operand (operands[4], <MODE>mode) 5472 && !rtx_equal_p (operands[0], operands[4])) 5473 { 5474 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5475 operands[4], operands[1])); 5476 operands[4] = operands[2] = operands[0]; 5477 } 5478 else if (!rtx_equal_p (operands[1], operands[5])) 5479 operands[5] = copy_rtx (operands[1]); 5480 else 5481 FAIL; 5482 } 5483 [(set_attr "movprfx" "yes")] 5484) 5485 5486(define_insn_and_rewrite "*cond_add<mode>_any_const_strict" 5487 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w") 5488 (unspec:SVE_FULL_F 5489 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 5490 (unspec:SVE_FULL_F 5491 [(match_dup 1) 5492 (const_int SVE_STRICT_GP) 5493 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w") 5494 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")] 5495 UNSPEC_COND_FADD) 5496 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")] 5497 UNSPEC_SEL))] 5498 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5499 "@ 5500 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5501 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5502 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 5503 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3 5504 # 5505 #" 5506 "&& reload_completed 5507 && register_operand (operands[4], <MODE>mode) 5508 && !rtx_equal_p (operands[0], operands[4])" 5509 { 5510 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5511 operands[4], operands[1])); 5512 operands[4] = operands[2] = operands[0]; 5513 } 5514 [(set_attr "movprfx" "yes")] 5515) 5516 5517;; Register merging forms are handled through SVE_COND_FP_BINARY. 5518 5519;; ------------------------------------------------------------------------- 5520;; ---- [FP] Complex addition 5521;; ------------------------------------------------------------------------- 5522;; Includes: 5523;; - FCADD 5524;; ------------------------------------------------------------------------- 5525 5526;; Predicated FCADD. 5527(define_insn "@aarch64_pred_<optab><mode>" 5528 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5529 (unspec:SVE_FULL_F 5530 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5531 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5532 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5533 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5534 SVE_COND_FCADD))] 5535 "TARGET_SVE" 5536 "@ 5537 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5538 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" 5539 [(set_attr "movprfx" "*,yes")] 5540) 5541 5542;; Predicated FCADD with merging. 5543(define_expand "@cond_<optab><mode>" 5544 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5545 (unspec:SVE_FULL_F 5546 [(match_operand:<VPRED> 1 "register_operand") 5547 (unspec:SVE_FULL_F 5548 [(match_dup 1) 5549 (const_int SVE_STRICT_GP) 5550 (match_operand:SVE_FULL_F 2 "register_operand") 5551 (match_operand:SVE_FULL_F 3 "register_operand")] 5552 SVE_COND_FCADD) 5553 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 5554 UNSPEC_SEL))] 5555 "TARGET_SVE" 5556) 5557 5558;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer 5559(define_expand "@cadd<rot><mode>3" 5560 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5561 (unspec:SVE_FULL_F 5562 [(match_dup 3) 5563 (const_int SVE_RELAXED_GP) 5564 (match_operand:SVE_FULL_F 1 "register_operand") 5565 (match_operand:SVE_FULL_F 2 "register_operand")] 5566 SVE_COND_FCADD))] 5567 "TARGET_SVE" 5568{ 5569 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 5570}) 5571 5572;; Predicated FCADD, merging with the first input. 5573(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 5574 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5575 (unspec:SVE_FULL_F 5576 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5577 (unspec:SVE_FULL_F 5578 [(match_operand 4) 5579 (const_int SVE_RELAXED_GP) 5580 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5581 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5582 SVE_COND_FCADD) 5583 (match_dup 2)] 5584 UNSPEC_SEL))] 5585 "TARGET_SVE" 5586 "@ 5587 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5588 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" 5589 "&& !rtx_equal_p (operands[1], operands[4])" 5590 { 5591 operands[4] = copy_rtx (operands[1]); 5592 } 5593 [(set_attr "movprfx" "*,yes")] 5594) 5595 5596(define_insn "*cond_<optab><mode>_2_strict" 5597 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5598 (unspec:SVE_FULL_F 5599 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5600 (unspec:SVE_FULL_F 5601 [(match_dup 1) 5602 (const_int SVE_STRICT_GP) 5603 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5604 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5605 SVE_COND_FCADD) 5606 (match_dup 2)] 5607 UNSPEC_SEL))] 5608 "TARGET_SVE" 5609 "@ 5610 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5611 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>" 5612 [(set_attr "movprfx" "*,yes")] 5613) 5614 5615;; Predicated FCADD, merging with an independent value. 5616(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 5617 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 5618 (unspec:SVE_FULL_F 5619 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5620 (unspec:SVE_FULL_F 5621 [(match_operand 5) 5622 (const_int SVE_RELAXED_GP) 5623 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") 5624 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] 5625 SVE_COND_FCADD) 5626 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 5627 UNSPEC_SEL))] 5628 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5629 "@ 5630 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5631 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5632 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5633 #" 5634 "&& 1" 5635 { 5636 if (reload_completed 5637 && register_operand (operands[4], <MODE>mode) 5638 && !rtx_equal_p (operands[0], operands[4])) 5639 { 5640 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5641 operands[4], operands[1])); 5642 operands[4] = operands[2] = operands[0]; 5643 } 5644 else if (!rtx_equal_p (operands[1], operands[5])) 5645 operands[5] = copy_rtx (operands[1]); 5646 else 5647 FAIL; 5648 } 5649 [(set_attr "movprfx" "yes")] 5650) 5651 5652(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 5653 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 5654 (unspec:SVE_FULL_F 5655 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 5656 (unspec:SVE_FULL_F 5657 [(match_dup 1) 5658 (const_int SVE_STRICT_GP) 5659 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w") 5660 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")] 5661 SVE_COND_FCADD) 5662 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 5663 UNSPEC_SEL))] 5664 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])" 5665 "@ 5666 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5667 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5668 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot> 5669 #" 5670 "&& reload_completed 5671 && register_operand (operands[4], <MODE>mode) 5672 && !rtx_equal_p (operands[0], operands[4])" 5673 { 5674 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2], 5675 operands[4], operands[1])); 5676 operands[4] = operands[2] = operands[0]; 5677 } 5678 [(set_attr "movprfx" "yes")] 5679) 5680 5681;; ------------------------------------------------------------------------- 5682;; ---- [FP] Subtraction 5683;; ------------------------------------------------------------------------- 5684;; Includes: 5685;; - FSUB 5686;; - FSUBR 5687;; ------------------------------------------------------------------------- 5688 5689;; Predicated floating-point subtraction. 5690(define_insn_and_split "@aarch64_pred_<optab><mode>" 5691 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w") 5692 (unspec:SVE_FULL_F 5693 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 5694 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1") 5695 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w") 5696 (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")] 5697 SVE_COND_FP_SUB))] 5698 "TARGET_SVE" 5699 "@ 5700 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5701 # 5702 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5703 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5704 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5705 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5706 ; Split the unpredicated form after reload, so that we don't have 5707 ; the unnecessary PTRUE. 5708 "&& reload_completed 5709 && register_operand (operands[2], <MODE>mode) 5710 && INTVAL (operands[4]) == SVE_RELAXED_GP" 5711 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))] 5712 "" 5713 [(set_attr "movprfx" "*,*,*,*,yes,yes")] 5714) 5715 5716;; Predicated floating-point subtraction from a constant, merging with the 5717;; second input. 5718(define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed" 5719 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5720 (unspec:SVE_FULL_F 5721 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5722 (unspec:SVE_FULL_F 5723 [(match_operand 4) 5724 (const_int SVE_RELAXED_GP) 5725 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5726 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5727 UNSPEC_COND_FSUB) 5728 (match_dup 3)] 5729 UNSPEC_SEL))] 5730 "TARGET_SVE" 5731 "@ 5732 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5733 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" 5734 "&& !rtx_equal_p (operands[1], operands[4])" 5735 { 5736 operands[4] = copy_rtx (operands[1]); 5737 } 5738 [(set_attr "movprfx" "*,yes")] 5739) 5740 5741(define_insn "*cond_sub<mode>_3_const_strict" 5742 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w") 5743 (unspec:SVE_FULL_F 5744 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5745 (unspec:SVE_FULL_F 5746 [(match_dup 1) 5747 (const_int SVE_STRICT_GP) 5748 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5749 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5750 UNSPEC_COND_FSUB) 5751 (match_dup 3)] 5752 UNSPEC_SEL))] 5753 "TARGET_SVE" 5754 "@ 5755 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5756 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2" 5757 [(set_attr "movprfx" "*,yes")] 5758) 5759 5760;; Predicated floating-point subtraction from a constant, merging with an 5761;; independent value. 5762(define_insn_and_rewrite "*cond_sub<mode>_const_relaxed" 5763 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5764 (unspec:SVE_FULL_F 5765 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5766 (unspec:SVE_FULL_F 5767 [(match_operand 5) 5768 (const_int SVE_RELAXED_GP) 5769 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5770 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] 5771 UNSPEC_COND_FSUB) 5772 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5773 UNSPEC_SEL))] 5774 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" 5775 "@ 5776 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5777 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5778 #" 5779 "&& 1" 5780 { 5781 if (reload_completed 5782 && register_operand (operands[4], <MODE>mode) 5783 && !rtx_equal_p (operands[0], operands[4])) 5784 { 5785 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 5786 operands[4], operands[1])); 5787 operands[4] = operands[3] = operands[0]; 5788 } 5789 else if (!rtx_equal_p (operands[1], operands[5])) 5790 operands[5] = copy_rtx (operands[1]); 5791 else 5792 FAIL; 5793 } 5794 [(set_attr "movprfx" "yes")] 5795) 5796 5797(define_insn_and_rewrite "*cond_sub<mode>_const_strict" 5798 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w") 5799 (unspec:SVE_FULL_F 5800 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 5801 (unspec:SVE_FULL_F 5802 [(match_dup 1) 5803 (const_int SVE_STRICT_GP) 5804 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate") 5805 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")] 5806 UNSPEC_COND_FSUB) 5807 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")] 5808 UNSPEC_SEL))] 5809 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])" 5810 "@ 5811 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5812 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2 5813 #" 5814 "&& reload_completed 5815 && register_operand (operands[4], <MODE>mode) 5816 && !rtx_equal_p (operands[0], operands[4])" 5817 { 5818 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 5819 operands[4], operands[1])); 5820 operands[4] = operands[3] = operands[0]; 5821 } 5822 [(set_attr "movprfx" "yes")] 5823) 5824;; Register merging forms are handled through SVE_COND_FP_BINARY. 5825 5826;; ------------------------------------------------------------------------- 5827;; ---- [FP] Absolute difference 5828;; ------------------------------------------------------------------------- 5829;; Includes: 5830;; - FABD 5831;; ------------------------------------------------------------------------- 5832 5833;; Predicated floating-point absolute difference. 5834(define_expand "@aarch64_pred_abd<mode>" 5835 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5836 (unspec:SVE_FULL_F 5837 [(match_operand:<VPRED> 1 "register_operand") 5838 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5839 (unspec:SVE_FULL_F 5840 [(match_dup 1) 5841 (match_dup 4) 5842 (match_operand:SVE_FULL_F 2 "register_operand") 5843 (match_operand:SVE_FULL_F 3 "register_operand")] 5844 UNSPEC_COND_FSUB)] 5845 UNSPEC_COND_FABS))] 5846 "TARGET_SVE" 5847) 5848 5849;; Predicated floating-point absolute difference. 5850(define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed" 5851 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5852 (unspec:SVE_FULL_F 5853 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5854 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5855 (unspec:SVE_FULL_F 5856 [(match_operand 5) 5857 (const_int SVE_RELAXED_GP) 5858 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") 5859 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5860 UNSPEC_COND_FSUB)] 5861 UNSPEC_COND_FABS))] 5862 "TARGET_SVE" 5863 "@ 5864 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5865 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5866 "&& !rtx_equal_p (operands[1], operands[5])" 5867 { 5868 operands[5] = copy_rtx (operands[1]); 5869 } 5870 [(set_attr "movprfx" "*,yes")] 5871) 5872 5873(define_insn "*aarch64_pred_abd<mode>_strict" 5874 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5875 (unspec:SVE_FULL_F 5876 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5877 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5878 (unspec:SVE_FULL_F 5879 [(match_dup 1) 5880 (const_int SVE_STRICT_GP) 5881 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w") 5882 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5883 UNSPEC_COND_FSUB)] 5884 UNSPEC_COND_FABS))] 5885 "TARGET_SVE" 5886 "@ 5887 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5888 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5889 [(set_attr "movprfx" "*,yes")] 5890) 5891 5892(define_expand "@aarch64_cond_abd<mode>" 5893 [(set (match_operand:SVE_FULL_F 0 "register_operand") 5894 (unspec:SVE_FULL_F 5895 [(match_operand:<VPRED> 1 "register_operand") 5896 (unspec:SVE_FULL_F 5897 [(match_dup 1) 5898 (const_int SVE_STRICT_GP) 5899 (unspec:SVE_FULL_F 5900 [(match_dup 1) 5901 (const_int SVE_STRICT_GP) 5902 (match_operand:SVE_FULL_F 2 "register_operand") 5903 (match_operand:SVE_FULL_F 3 "register_operand")] 5904 UNSPEC_COND_FSUB)] 5905 UNSPEC_COND_FABS) 5906 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 5907 UNSPEC_SEL))] 5908 "TARGET_SVE" 5909{ 5910 if (rtx_equal_p (operands[3], operands[4])) 5911 std::swap (operands[2], operands[3]); 5912}) 5913 5914;; Predicated floating-point absolute difference, merging with the first 5915;; input. 5916(define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed" 5917 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5918 (unspec:SVE_FULL_F 5919 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5920 (unspec:SVE_FULL_F 5921 [(match_operand 4) 5922 (const_int SVE_RELAXED_GP) 5923 (unspec:SVE_FULL_F 5924 [(match_operand 5) 5925 (const_int SVE_RELAXED_GP) 5926 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5927 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5928 UNSPEC_COND_FSUB)] 5929 UNSPEC_COND_FABS) 5930 (match_dup 2)] 5931 UNSPEC_SEL))] 5932 "TARGET_SVE" 5933 "@ 5934 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5935 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5936 "&& (!rtx_equal_p (operands[1], operands[4]) 5937 || !rtx_equal_p (operands[1], operands[5]))" 5938 { 5939 operands[4] = copy_rtx (operands[1]); 5940 operands[5] = copy_rtx (operands[1]); 5941 } 5942 [(set_attr "movprfx" "*,yes")] 5943) 5944 5945(define_insn "*aarch64_cond_abd<mode>_2_strict" 5946 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5947 (unspec:SVE_FULL_F 5948 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5949 (unspec:SVE_FULL_F 5950 [(match_dup 1) 5951 (match_operand:SI 4 "aarch64_sve_gp_strictness") 5952 (unspec:SVE_FULL_F 5953 [(match_dup 1) 5954 (match_operand:SI 5 "aarch64_sve_gp_strictness") 5955 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 5956 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")] 5957 UNSPEC_COND_FSUB)] 5958 UNSPEC_COND_FABS) 5959 (match_dup 2)] 5960 UNSPEC_SEL))] 5961 "TARGET_SVE" 5962 "@ 5963 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 5964 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 5965 [(set_attr "movprfx" "*,yes")] 5966) 5967 5968;; Predicated floating-point absolute difference, merging with the second 5969;; input. 5970(define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed" 5971 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 5972 (unspec:SVE_FULL_F 5973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 5974 (unspec:SVE_FULL_F 5975 [(match_operand 4) 5976 (const_int SVE_RELAXED_GP) 5977 (unspec:SVE_FULL_F 5978 [(match_operand 5) 5979 (const_int SVE_RELAXED_GP) 5980 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 5981 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 5982 UNSPEC_COND_FSUB)] 5983 UNSPEC_COND_FABS) 5984 (match_dup 3)] 5985 UNSPEC_SEL))] 5986 "TARGET_SVE" 5987 "@ 5988 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 5989 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 5990 "&& (!rtx_equal_p (operands[1], operands[4]) 5991 || !rtx_equal_p (operands[1], operands[5]))" 5992 { 5993 operands[4] = copy_rtx (operands[1]); 5994 operands[5] = copy_rtx (operands[1]); 5995 } 5996 [(set_attr "movprfx" "*,yes")] 5997) 5998 5999(define_insn "*aarch64_cond_abd<mode>_3_strict" 6000 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 6001 (unspec:SVE_FULL_F 6002 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6003 (unspec:SVE_FULL_F 6004 [(match_dup 1) 6005 (match_operand:SI 4 "aarch64_sve_gp_strictness") 6006 (unspec:SVE_FULL_F 6007 [(match_dup 1) 6008 (match_operand:SI 5 "aarch64_sve_gp_strictness") 6009 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 6010 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")] 6011 UNSPEC_COND_FSUB)] 6012 UNSPEC_COND_FABS) 6013 (match_dup 3)] 6014 UNSPEC_SEL))] 6015 "TARGET_SVE" 6016 "@ 6017 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 6018 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>" 6019 [(set_attr "movprfx" "*,yes")] 6020) 6021 6022;; Predicated floating-point absolute difference, merging with an 6023;; independent value. 6024(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed" 6025 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 6026 (unspec:SVE_FULL_F 6027 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 6028 (unspec:SVE_FULL_F 6029 [(match_operand 5) 6030 (const_int SVE_RELAXED_GP) 6031 (unspec:SVE_FULL_F 6032 [(match_operand 6) 6033 (const_int SVE_RELAXED_GP) 6034 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 6035 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 6036 UNSPEC_COND_FSUB)] 6037 UNSPEC_COND_FABS) 6038 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 6039 UNSPEC_SEL))] 6040 "TARGET_SVE 6041 && !rtx_equal_p (operands[2], operands[4]) 6042 && !rtx_equal_p (operands[3], operands[4])" 6043 "@ 6044 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6045 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 6046 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6047 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6048 #" 6049 "&& 1" 6050 { 6051 if (reload_completed 6052 && register_operand (operands[4], <MODE>mode) 6053 && !rtx_equal_p (operands[0], operands[4])) 6054 { 6055 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 6056 operands[4], operands[1])); 6057 operands[4] = operands[3] = operands[0]; 6058 } 6059 else if (!rtx_equal_p (operands[1], operands[5]) 6060 || !rtx_equal_p (operands[1], operands[6])) 6061 { 6062 operands[5] = copy_rtx (operands[1]); 6063 operands[6] = copy_rtx (operands[1]); 6064 } 6065 else 6066 FAIL; 6067 } 6068 [(set_attr "movprfx" "yes")] 6069) 6070 6071(define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict" 6072 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w") 6073 (unspec:SVE_FULL_F 6074 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 6075 (unspec:SVE_FULL_F 6076 [(match_dup 1) 6077 (match_operand:SI 5 "aarch64_sve_gp_strictness") 6078 (unspec:SVE_FULL_F 6079 [(match_dup 1) 6080 (match_operand:SI 6 "aarch64_sve_gp_strictness") 6081 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w") 6082 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")] 6083 UNSPEC_COND_FSUB)] 6084 UNSPEC_COND_FABS) 6085 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")] 6086 UNSPEC_SEL))] 6087 "TARGET_SVE 6088 && !rtx_equal_p (operands[2], operands[4]) 6089 && !rtx_equal_p (operands[3], operands[4])" 6090 "@ 6091 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6092 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype> 6093 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6094 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6095 #" 6096 "&& reload_completed 6097 && register_operand (operands[4], <MODE>mode) 6098 && !rtx_equal_p (operands[0], operands[4])" 6099 { 6100 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3], 6101 operands[4], operands[1])); 6102 operands[4] = operands[3] = operands[0]; 6103 } 6104 [(set_attr "movprfx" "yes")] 6105) 6106 6107;; ------------------------------------------------------------------------- 6108;; ---- [FP] Multiplication 6109;; ------------------------------------------------------------------------- 6110;; Includes: 6111;; - FMUL 6112;; ------------------------------------------------------------------------- 6113 6114;; Predicated floating-point multiplication. 6115(define_insn_and_split "@aarch64_pred_<optab><mode>" 6116 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w") 6117 (unspec:SVE_FULL_F 6118 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl") 6119 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1") 6120 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w") 6121 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")] 6122 SVE_COND_FP_MUL))] 6123 "TARGET_SVE" 6124 "@ 6125 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6126 # 6127 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6128 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6129 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 6130 ; Split the unpredicated form after reload, so that we don't have 6131 ; the unnecessary PTRUE. 6132 "&& reload_completed 6133 && register_operand (operands[3], <MODE>mode) 6134 && INTVAL (operands[4]) == SVE_RELAXED_GP" 6135 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))] 6136 "" 6137 [(set_attr "movprfx" "*,*,*,yes,yes")] 6138) 6139 6140;; Merging forms are handled through SVE_COND_FP_BINARY and 6141;; SVE_COND_FP_BINARY_I1. 6142 6143;; Unpredicated multiplication by selected lanes. 6144(define_insn "@aarch64_mul_lane_<mode>" 6145 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 6146 (mult:SVE_FULL_F 6147 (unspec:SVE_FULL_F 6148 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>") 6149 (match_operand:SI 3 "const_int_operand")] 6150 UNSPEC_SVE_LANE_SELECT) 6151 (match_operand:SVE_FULL_F 1 "register_operand" "w")))] 6152 "TARGET_SVE" 6153 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 6154) 6155 6156;; ------------------------------------------------------------------------- 6157;; ---- [FP] Division 6158;; ------------------------------------------------------------------------- 6159;; The patterns in this section are synthetic. 6160;; ------------------------------------------------------------------------- 6161 6162(define_expand "div<mode>3" 6163 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6164 (unspec:SVE_FULL_F 6165 [(match_dup 3) 6166 (const_int SVE_RELAXED_GP) 6167 (match_operand:SVE_FULL_F 1 "nonmemory_operand") 6168 (match_operand:SVE_FULL_F 2 "register_operand")] 6169 UNSPEC_COND_FDIV))] 6170 "TARGET_SVE" 6171 { 6172 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) 6173 DONE; 6174 6175 operands[1] = force_reg (<MODE>mode, operands[1]); 6176 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 6177 } 6178) 6179 6180(define_expand "@aarch64_frecpe<mode>" 6181 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6182 (unspec:SVE_FULL_F 6183 [(match_operand:SVE_FULL_F 1 "register_operand")] 6184 UNSPEC_FRECPE))] 6185 "TARGET_SVE" 6186) 6187 6188(define_expand "@aarch64_frecps<mode>" 6189 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6190 (unspec:SVE_FULL_F 6191 [(match_operand:SVE_FULL_F 1 "register_operand") 6192 (match_operand:SVE_FULL_F 2 "register_operand")] 6193 UNSPEC_FRECPS))] 6194 "TARGET_SVE" 6195) 6196 6197;; ------------------------------------------------------------------------- 6198;; ---- [FP] Binary logical operations 6199;; ------------------------------------------------------------------------- 6200;; Includes 6201;; - AND 6202;; - EOR 6203;; - ORR 6204;; ------------------------------------------------------------------------- 6205 6206;; Binary logical operations on floating-point modes. We avoid subregs 6207;; by providing this, but we need to use UNSPECs since rtx logical ops 6208;; aren't defined for floating-point modes. 6209(define_insn "*<optab><mode>3" 6210 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w") 6211 (unspec:SVE_FULL_F 6212 [(match_operand:SVE_FULL_F 1 "register_operand" "w") 6213 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 6214 LOGICALF))] 6215 "TARGET_SVE" 6216 "<logicalf_op>\t%0.d, %1.d, %2.d" 6217) 6218 6219;; ------------------------------------------------------------------------- 6220;; ---- [FP] Sign copying 6221;; ------------------------------------------------------------------------- 6222;; The patterns in this section are synthetic. 6223;; ------------------------------------------------------------------------- 6224 6225(define_expand "copysign<mode>3" 6226 [(match_operand:SVE_FULL_F 0 "register_operand") 6227 (match_operand:SVE_FULL_F 1 "register_operand") 6228 (match_operand:SVE_FULL_F 2 "register_operand")] 6229 "TARGET_SVE" 6230 { 6231 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); 6232 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode); 6233 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); 6234 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 6235 6236 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); 6237 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); 6238 6239 emit_insn (gen_and<v_int_equiv>3 6240 (sign, arg2, 6241 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 6242 HOST_WIDE_INT_M1U 6243 << bits))); 6244 emit_insn (gen_and<v_int_equiv>3 6245 (mant, arg1, 6246 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 6247 ~(HOST_WIDE_INT_M1U 6248 << bits)))); 6249 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant)); 6250 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); 6251 DONE; 6252 } 6253) 6254 6255(define_expand "xorsign<mode>3" 6256 [(match_operand:SVE_FULL_F 0 "register_operand") 6257 (match_operand:SVE_FULL_F 1 "register_operand") 6258 (match_operand:SVE_FULL_F 2 "register_operand")] 6259 "TARGET_SVE" 6260 { 6261 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); 6262 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode); 6263 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; 6264 6265 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); 6266 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); 6267 6268 emit_insn (gen_and<v_int_equiv>3 6269 (sign, arg2, 6270 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, 6271 HOST_WIDE_INT_M1U 6272 << bits))); 6273 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign)); 6274 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); 6275 DONE; 6276 } 6277) 6278 6279;; ------------------------------------------------------------------------- 6280;; ---- [FP] Maximum and minimum 6281;; ------------------------------------------------------------------------- 6282;; Includes: 6283;; - FMAX 6284;; - FMAXNM 6285;; - FMIN 6286;; - FMINNM 6287;; ------------------------------------------------------------------------- 6288 6289;; Unpredicated fmax/fmin (the libm functions). The optabs for the 6290;; smax/smin rtx codes are handled in the generic section above. 6291(define_expand "<fmaxmin><mode>3" 6292 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6293 (unspec:SVE_FULL_F 6294 [(match_dup 3) 6295 (const_int SVE_RELAXED_GP) 6296 (match_operand:SVE_FULL_F 1 "register_operand") 6297 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")] 6298 SVE_COND_FP_MAXMIN_PUBLIC))] 6299 "TARGET_SVE" 6300 { 6301 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 6302 } 6303) 6304 6305;; Predicated fmax/fmin (the libm functions). The optabs for the 6306;; smax/smin rtx codes are handled in the generic section above. 6307(define_expand "cond_<fmaxmin><mode>" 6308 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6309 (unspec:SVE_FULL_F 6310 [(match_operand:<VPRED> 1 "register_operand") 6311 (unspec:SVE_FULL_F 6312 [(match_dup 1) 6313 (const_int SVE_RELAXED_GP) 6314 (match_operand:SVE_FULL_F 2 "register_operand") 6315 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")] 6316 SVE_COND_FP_MAXMIN_PUBLIC) 6317 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")] 6318 UNSPEC_SEL))] 6319 "TARGET_SVE" 6320) 6321 6322;; Predicated floating-point maximum/minimum. 6323(define_insn "@aarch64_pred_<optab><mode>" 6324 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w") 6325 (unspec:SVE_FULL_F 6326 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 6327 (match_operand:SI 4 "aarch64_sve_gp_strictness") 6328 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w") 6329 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")] 6330 SVE_COND_FP_MAXMIN))] 6331 "TARGET_SVE" 6332 "@ 6333 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6334 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype> 6335 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3 6336 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>" 6337 [(set_attr "movprfx" "*,*,yes,yes")] 6338) 6339 6340;; Merging forms are handled through SVE_COND_FP_BINARY and 6341;; SVE_COND_FP_BINARY_I1. 6342 6343;; ------------------------------------------------------------------------- 6344;; ---- [PRED] Binary logical operations 6345;; ------------------------------------------------------------------------- 6346;; Includes: 6347;; - AND 6348;; - ANDS 6349;; - EOR 6350;; - EORS 6351;; - ORR 6352;; - ORRS 6353;; ------------------------------------------------------------------------- 6354 6355;; Predicate AND. We can reuse one of the inputs as the GP. 6356;; Doubling the second operand is the preferred implementation 6357;; of the MOV alias, so we use that instead of %1/z, %1, %2. 6358(define_insn "and<mode>3" 6359 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6360 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa") 6361 (match_operand:PRED_ALL 2 "register_operand" "Upa")))] 6362 "TARGET_SVE" 6363 "and\t%0.b, %1/z, %2.b, %2.b" 6364) 6365 6366;; Unpredicated predicate EOR and ORR. 6367(define_expand "<optab><mode>3" 6368 [(set (match_operand:PRED_ALL 0 "register_operand") 6369 (and:PRED_ALL 6370 (LOGICAL_OR:PRED_ALL 6371 (match_operand:PRED_ALL 1 "register_operand") 6372 (match_operand:PRED_ALL 2 "register_operand")) 6373 (match_dup 3)))] 6374 "TARGET_SVE" 6375 { 6376 operands[3] = aarch64_ptrue_reg (<MODE>mode); 6377 } 6378) 6379 6380;; Predicated predicate AND, EOR and ORR. 6381(define_insn "@aarch64_pred_<optab><mode>_z" 6382 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6383 (and:PRED_ALL 6384 (LOGICAL:PRED_ALL 6385 (match_operand:PRED_ALL 2 "register_operand" "Upa") 6386 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6387 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 6388 "TARGET_SVE" 6389 "<logical>\t%0.b, %1/z, %2.b, %3.b" 6390) 6391 6392;; Perform a logical operation on operands 2 and 3, using operand 1 as 6393;; the GP. Store the result in operand 0 and set the flags in the same 6394;; way as for PTEST. 6395(define_insn "*<optab><mode>3_cc" 6396 [(set (reg:CC_NZC CC_REGNUM) 6397 (unspec:CC_NZC 6398 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6399 (match_operand 4) 6400 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6401 (and:PRED_ALL 6402 (LOGICAL:PRED_ALL 6403 (match_operand:PRED_ALL 2 "register_operand" "Upa") 6404 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6405 (match_dup 4))] 6406 UNSPEC_PTEST)) 6407 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6408 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3)) 6409 (match_dup 4)))] 6410 "TARGET_SVE" 6411 "<logical>s\t%0.b, %1/z, %2.b, %3.b" 6412) 6413 6414;; Same with just the flags result. 6415(define_insn "*<optab><mode>3_ptest" 6416 [(set (reg:CC_NZC CC_REGNUM) 6417 (unspec:CC_NZC 6418 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6419 (match_operand 4) 6420 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6421 (and:PRED_ALL 6422 (LOGICAL:PRED_ALL 6423 (match_operand:PRED_ALL 2 "register_operand" "Upa") 6424 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6425 (match_dup 4))] 6426 UNSPEC_PTEST)) 6427 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 6428 "TARGET_SVE" 6429 "<logical>s\t%0.b, %1/z, %2.b, %3.b" 6430) 6431 6432;; ------------------------------------------------------------------------- 6433;; ---- [PRED] Binary logical operations (inverted second input) 6434;; ------------------------------------------------------------------------- 6435;; Includes: 6436;; - BIC 6437;; - ORN 6438;; ------------------------------------------------------------------------- 6439 6440;; Predicated predicate BIC and ORN. 6441(define_insn "aarch64_pred_<nlogical><mode>_z" 6442 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6443 (and:PRED_ALL 6444 (NLOGICAL:PRED_ALL 6445 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6446 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6447 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 6448 "TARGET_SVE" 6449 "<nlogical>\t%0.b, %1/z, %2.b, %3.b" 6450) 6451 6452;; Same, but set the flags as a side-effect. 6453(define_insn "*<nlogical><mode>3_cc" 6454 [(set (reg:CC_NZC CC_REGNUM) 6455 (unspec:CC_NZC 6456 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6457 (match_operand 4) 6458 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6459 (and:PRED_ALL 6460 (NLOGICAL:PRED_ALL 6461 (not:PRED_ALL 6462 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6463 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6464 (match_dup 4))] 6465 UNSPEC_PTEST)) 6466 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6467 (and:PRED_ALL (NLOGICAL:PRED_ALL 6468 (not:PRED_ALL (match_dup 3)) 6469 (match_dup 2)) 6470 (match_dup 4)))] 6471 "TARGET_SVE" 6472 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b" 6473) 6474 6475;; Same with just the flags result. 6476(define_insn "*<nlogical><mode>3_ptest" 6477 [(set (reg:CC_NZC CC_REGNUM) 6478 (unspec:CC_NZC 6479 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6480 (match_operand 4) 6481 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6482 (and:PRED_ALL 6483 (NLOGICAL:PRED_ALL 6484 (not:PRED_ALL 6485 (match_operand:PRED_ALL 3 "register_operand" "Upa")) 6486 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6487 (match_dup 4))] 6488 UNSPEC_PTEST)) 6489 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 6490 "TARGET_SVE" 6491 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b" 6492) 6493 6494;; ------------------------------------------------------------------------- 6495;; ---- [PRED] Binary logical operations (inverted result) 6496;; ------------------------------------------------------------------------- 6497;; Includes: 6498;; - NAND 6499;; - NOR 6500;; ------------------------------------------------------------------------- 6501 6502;; Predicated predicate NAND and NOR. 6503(define_insn "aarch64_pred_<logical_nn><mode>_z" 6504 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6505 (and:PRED_ALL 6506 (NLOGICAL:PRED_ALL 6507 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6508 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))) 6509 (match_operand:PRED_ALL 1 "register_operand" "Upa")))] 6510 "TARGET_SVE" 6511 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b" 6512) 6513 6514;; Same, but set the flags as a side-effect. 6515(define_insn "*<logical_nn><mode>3_cc" 6516 [(set (reg:CC_NZC CC_REGNUM) 6517 (unspec:CC_NZC 6518 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6519 (match_operand 4) 6520 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6521 (and:PRED_ALL 6522 (NLOGICAL:PRED_ALL 6523 (not:PRED_ALL 6524 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6525 (not:PRED_ALL 6526 (match_operand:PRED_ALL 3 "register_operand" "Upa"))) 6527 (match_dup 4))] 6528 UNSPEC_PTEST)) 6529 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 6530 (and:PRED_ALL (NLOGICAL:PRED_ALL 6531 (not:PRED_ALL (match_dup 2)) 6532 (not:PRED_ALL (match_dup 3))) 6533 (match_dup 4)))] 6534 "TARGET_SVE" 6535 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b" 6536) 6537 6538;; Same with just the flags result. 6539(define_insn "*<logical_nn><mode>3_ptest" 6540 [(set (reg:CC_NZC CC_REGNUM) 6541 (unspec:CC_NZC 6542 [(match_operand:VNx16BI 1 "register_operand" "Upa") 6543 (match_operand 4) 6544 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 6545 (and:PRED_ALL 6546 (NLOGICAL:PRED_ALL 6547 (not:PRED_ALL 6548 (match_operand:PRED_ALL 2 "register_operand" "Upa")) 6549 (not:PRED_ALL 6550 (match_operand:PRED_ALL 3 "register_operand" "Upa"))) 6551 (match_dup 4))] 6552 UNSPEC_PTEST)) 6553 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 6554 "TARGET_SVE" 6555 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b" 6556) 6557 6558;; ========================================================================= 6559;; == Ternary arithmetic 6560;; ========================================================================= 6561 6562;; ------------------------------------------------------------------------- 6563;; ---- [INT] MLA and MAD 6564;; ------------------------------------------------------------------------- 6565;; Includes: 6566;; - MAD 6567;; - MLA 6568;; ------------------------------------------------------------------------- 6569 6570;; Unpredicated integer addition of product. 6571(define_expand "fma<mode>4" 6572 [(set (match_operand:SVE_I 0 "register_operand") 6573 (plus:SVE_I 6574 (unspec:SVE_I 6575 [(match_dup 4) 6576 (mult:SVE_I 6577 (match_operand:SVE_I 1 "register_operand") 6578 (match_operand:SVE_I 2 "nonmemory_operand"))] 6579 UNSPEC_PRED_X) 6580 (match_operand:SVE_I 3 "register_operand")))] 6581 "TARGET_SVE" 6582 { 6583 if (aarch64_prepare_sve_int_fma (operands, PLUS)) 6584 DONE; 6585 operands[4] = aarch64_ptrue_reg (<VPRED>mode); 6586 } 6587) 6588 6589;; Predicated integer addition of product. 6590(define_insn "@aarch64_pred_fma<mode>" 6591 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") 6592 (plus:SVE_I 6593 (unspec:SVE_I 6594 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 6595 (mult:SVE_I 6596 (match_operand:SVE_I 2 "register_operand" "%0, w, w") 6597 (match_operand:SVE_I 3 "register_operand" "w, w, w"))] 6598 UNSPEC_PRED_X) 6599 (match_operand:SVE_I 4 "register_operand" "w, 0, w")))] 6600 "TARGET_SVE" 6601 "@ 6602 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6603 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6604 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6605 [(set_attr "movprfx" "*,*,yes")] 6606) 6607 6608;; Predicated integer addition of product with merging. 6609(define_expand "cond_fma<mode>" 6610 [(set (match_operand:SVE_I 0 "register_operand") 6611 (unspec:SVE_I 6612 [(match_operand:<VPRED> 1 "register_operand") 6613 (plus:SVE_I 6614 (mult:SVE_I 6615 (match_operand:SVE_I 2 "register_operand") 6616 (match_operand:SVE_I 3 "general_operand")) 6617 (match_operand:SVE_I 4 "register_operand")) 6618 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] 6619 UNSPEC_SEL))] 6620 "TARGET_SVE" 6621 { 6622 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS)) 6623 DONE; 6624 /* Swap the multiplication operands if the fallback value is the 6625 second of the two. */ 6626 if (rtx_equal_p (operands[3], operands[5])) 6627 std::swap (operands[2], operands[3]); 6628 } 6629) 6630 6631;; Predicated integer addition of product, merging with the first input. 6632(define_insn "*cond_fma<mode>_2" 6633 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 6634 (unspec:SVE_I 6635 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6636 (plus:SVE_I 6637 (mult:SVE_I 6638 (match_operand:SVE_I 2 "register_operand" "0, w") 6639 (match_operand:SVE_I 3 "register_operand" "w, w")) 6640 (match_operand:SVE_I 4 "register_operand" "w, w")) 6641 (match_dup 2)] 6642 UNSPEC_SEL))] 6643 "TARGET_SVE" 6644 "@ 6645 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6646 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 6647 [(set_attr "movprfx" "*,yes")] 6648) 6649 6650;; Predicated integer addition of product, merging with the third input. 6651(define_insn "*cond_fma<mode>_4" 6652 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 6653 (unspec:SVE_I 6654 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6655 (plus:SVE_I 6656 (mult:SVE_I 6657 (match_operand:SVE_I 2 "register_operand" "w, w") 6658 (match_operand:SVE_I 3 "register_operand" "w, w")) 6659 (match_operand:SVE_I 4 "register_operand" "0, w")) 6660 (match_dup 4)] 6661 UNSPEC_SEL))] 6662 "TARGET_SVE" 6663 "@ 6664 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6665 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6666 [(set_attr "movprfx" "*,yes")] 6667) 6668 6669;; Predicated integer addition of product, merging with an independent value. 6670(define_insn_and_rewrite "*cond_fma<mode>_any" 6671 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 6672 (unspec:SVE_I 6673 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 6674 (plus:SVE_I 6675 (mult:SVE_I 6676 (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") 6677 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w")) 6678 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w")) 6679 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 6680 UNSPEC_SEL))] 6681 "TARGET_SVE 6682 && !rtx_equal_p (operands[2], operands[5]) 6683 && !rtx_equal_p (operands[3], operands[5]) 6684 && !rtx_equal_p (operands[4], operands[5])" 6685 "@ 6686 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6687 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6688 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6689 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 6690 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6691 #" 6692 "&& reload_completed 6693 && register_operand (operands[5], <MODE>mode) 6694 && !rtx_equal_p (operands[0], operands[5])" 6695 { 6696 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 6697 operands[5], operands[1])); 6698 operands[5] = operands[4] = operands[0]; 6699 } 6700 [(set_attr "movprfx" "yes")] 6701) 6702 6703;; ------------------------------------------------------------------------- 6704;; ---- [INT] MLS and MSB 6705;; ------------------------------------------------------------------------- 6706;; Includes: 6707;; - MLS 6708;; - MSB 6709;; ------------------------------------------------------------------------- 6710 6711;; Unpredicated integer subtraction of product. 6712(define_expand "fnma<mode>4" 6713 [(set (match_operand:SVE_I 0 "register_operand") 6714 (minus:SVE_I 6715 (match_operand:SVE_I 3 "register_operand") 6716 (unspec:SVE_I 6717 [(match_dup 4) 6718 (mult:SVE_I 6719 (match_operand:SVE_I 1 "register_operand") 6720 (match_operand:SVE_I 2 "general_operand"))] 6721 UNSPEC_PRED_X)))] 6722 "TARGET_SVE" 6723 { 6724 if (aarch64_prepare_sve_int_fma (operands, MINUS)) 6725 DONE; 6726 operands[4] = aarch64_ptrue_reg (<VPRED>mode); 6727 } 6728) 6729 6730;; Predicated integer subtraction of product. 6731(define_insn "@aarch64_pred_fnma<mode>" 6732 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w") 6733 (minus:SVE_I 6734 (match_operand:SVE_I 4 "register_operand" "w, 0, w") 6735 (unspec:SVE_I 6736 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 6737 (mult:SVE_I 6738 (match_operand:SVE_I 2 "register_operand" "%0, w, w") 6739 (match_operand:SVE_I 3 "register_operand" "w, w, w"))] 6740 UNSPEC_PRED_X)))] 6741 "TARGET_SVE" 6742 "@ 6743 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6744 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6745 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6746 [(set_attr "movprfx" "*,*,yes")] 6747) 6748 6749;; Predicated integer subtraction of product with merging. 6750(define_expand "cond_fnma<mode>" 6751 [(set (match_operand:SVE_I 0 "register_operand") 6752 (unspec:SVE_I 6753 [(match_operand:<VPRED> 1 "register_operand") 6754 (minus:SVE_I 6755 (match_operand:SVE_I 4 "register_operand") 6756 (mult:SVE_I 6757 (match_operand:SVE_I 2 "register_operand") 6758 (match_operand:SVE_I 3 "general_operand"))) 6759 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")] 6760 UNSPEC_SEL))] 6761 "TARGET_SVE" 6762 { 6763 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS)) 6764 DONE; 6765 /* Swap the multiplication operands if the fallback value is the 6766 second of the two. */ 6767 if (rtx_equal_p (operands[3], operands[5])) 6768 std::swap (operands[2], operands[3]); 6769 } 6770) 6771 6772;; Predicated integer subtraction of product, merging with the first input. 6773(define_insn "*cond_fnma<mode>_2" 6774 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 6775 (unspec:SVE_I 6776 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6777 (minus:SVE_I 6778 (match_operand:SVE_I 4 "register_operand" "w, w") 6779 (mult:SVE_I 6780 (match_operand:SVE_I 2 "register_operand" "0, w") 6781 (match_operand:SVE_I 3 "register_operand" "w, w"))) 6782 (match_dup 2)] 6783 UNSPEC_SEL))] 6784 "TARGET_SVE" 6785 "@ 6786 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6787 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 6788 [(set_attr "movprfx" "*,yes")] 6789) 6790 6791;; Predicated integer subtraction of product, merging with the third input. 6792(define_insn "*cond_fnma<mode>_4" 6793 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w") 6794 (unspec:SVE_I 6795 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 6796 (minus:SVE_I 6797 (match_operand:SVE_I 4 "register_operand" "0, w") 6798 (mult:SVE_I 6799 (match_operand:SVE_I 2 "register_operand" "w, w") 6800 (match_operand:SVE_I 3 "register_operand" "w, w"))) 6801 (match_dup 4)] 6802 UNSPEC_SEL))] 6803 "TARGET_SVE" 6804 "@ 6805 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6806 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 6807 [(set_attr "movprfx" "*,yes")] 6808) 6809 6810;; Predicated integer subtraction of product, merging with an 6811;; independent value. 6812(define_insn_and_rewrite "*cond_fnma<mode>_any" 6813 [(set (match_operand:SVE_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 6814 (unspec:SVE_I 6815 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 6816 (minus:SVE_I 6817 (match_operand:SVE_I 4 "register_operand" "w, 0, w, w, w, w") 6818 (mult:SVE_I 6819 (match_operand:SVE_I 2 "register_operand" "w, w, 0, w, w, w") 6820 (match_operand:SVE_I 3 "register_operand" "w, w, w, 0, w, w"))) 6821 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 6822 UNSPEC_SEL))] 6823 "TARGET_SVE 6824 && !rtx_equal_p (operands[2], operands[5]) 6825 && !rtx_equal_p (operands[3], operands[5]) 6826 && !rtx_equal_p (operands[4], operands[5])" 6827 "@ 6828 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6829 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6830 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 6831 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 6832 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 6833 #" 6834 "&& reload_completed 6835 && register_operand (operands[5], <MODE>mode) 6836 && !rtx_equal_p (operands[0], operands[5])" 6837 { 6838 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 6839 operands[5], operands[1])); 6840 operands[5] = operands[4] = operands[0]; 6841 } 6842 [(set_attr "movprfx" "yes")] 6843) 6844 6845;; ------------------------------------------------------------------------- 6846;; ---- [INT] Dot product 6847;; ------------------------------------------------------------------------- 6848;; Includes: 6849;; - SDOT 6850;; - SUDOT (I8MM) 6851;; - UDOT 6852;; - USDOT (I8MM) 6853;; ------------------------------------------------------------------------- 6854 6855;; Four-element integer dot-product with accumulation. 6856(define_insn "<sur>dot_prod<vsi2qi>" 6857 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 6858 (plus:SVE_FULL_SDI 6859 (unspec:SVE_FULL_SDI 6860 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6861 (match_operand:<VSI2QI> 2 "register_operand" "w, w")] 6862 DOTPROD) 6863 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))] 6864 "TARGET_SVE" 6865 "@ 6866 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth> 6867 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>" 6868 [(set_attr "movprfx" "*,yes")] 6869) 6870 6871;; Four-element integer dot-product by selected lanes with accumulation. 6872(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" 6873 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w") 6874 (plus:SVE_FULL_SDI 6875 (unspec:SVE_FULL_SDI 6876 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6877 (unspec:<VSI2QI> 6878 [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") 6879 (match_operand:SI 3 "const_int_operand")] 6880 UNSPEC_SVE_LANE_SELECT)] 6881 DOTPROD) 6882 (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))] 6883 "TARGET_SVE" 6884 "@ 6885 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3] 6886 movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]" 6887 [(set_attr "movprfx" "*,yes")] 6888) 6889 6890(define_insn "@<sur>dot_prod<vsi2qi>" 6891 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 6892 (plus:VNx4SI_ONLY 6893 (unspec:VNx4SI_ONLY 6894 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6895 (match_operand:<VSI2QI> 2 "register_operand" "w, w")] 6896 DOTPROD_US_ONLY) 6897 (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))] 6898 "TARGET_SVE_I8MM" 6899 "@ 6900 <sur>dot\\t%0.s, %1.b, %2.b 6901 movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b" 6902 [(set_attr "movprfx" "*,yes")] 6903) 6904 6905(define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>" 6906 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 6907 (plus:VNx4SI_ONLY 6908 (unspec:VNx4SI_ONLY 6909 [(match_operand:<VSI2QI> 1 "register_operand" "w, w") 6910 (unspec:<VSI2QI> 6911 [(match_operand:<VSI2QI> 2 "register_operand" "y, y") 6912 (match_operand:SI 3 "const_int_operand")] 6913 UNSPEC_SVE_LANE_SELECT)] 6914 DOTPROD_I8MM) 6915 (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))] 6916 "TARGET_SVE_I8MM" 6917 "@ 6918 <sur>dot\\t%0.s, %1.b, %2.b[%3] 6919 movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]" 6920 [(set_attr "movprfx" "*,yes")] 6921) 6922 6923;; ------------------------------------------------------------------------- 6924;; ---- [INT] Sum of absolute differences 6925;; ------------------------------------------------------------------------- 6926;; The patterns in this section are synthetic. 6927;; ------------------------------------------------------------------------- 6928 6929;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in 6930;; operands 1 and 2. The sequence also has to perform a widening reduction of 6931;; the difference into a vector and accumulate that into operand 3 before 6932;; copying that into the result operand 0. 6933;; Perform that with a sequence of: 6934;; MOV ones.b, #1 6935;; [SU]ABD diff.b, p0/m, op1.b, op2.b 6936;; MOVPRFX op0, op3 // If necessary 6937;; UDOT op0.s, diff.b, ones.b 6938(define_expand "<sur>sad<vsi2qi>" 6939 [(use (match_operand:SVE_FULL_SDI 0 "register_operand")) 6940 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand")) 6941 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL) 6942 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))] 6943 "TARGET_SVE" 6944 { 6945 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode)); 6946 rtx diff = gen_reg_rtx (<VSI2QI>mode); 6947 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2])); 6948 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3])); 6949 DONE; 6950 } 6951) 6952 6953;; ------------------------------------------------------------------------- 6954;; ---- [INT] Matrix multiply-accumulate 6955;; ------------------------------------------------------------------------- 6956;; Includes: 6957;; - SMMLA (I8MM) 6958;; - UMMLA (I8MM) 6959;; - USMMLA (I8MM) 6960;; ------------------------------------------------------------------------- 6961 6962(define_insn "@aarch64_sve_add_<optab><vsi2qi>" 6963 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 6964 (plus:VNx4SI_ONLY 6965 (unspec:VNx4SI_ONLY 6966 [(match_operand:<VSI2QI> 2 "register_operand" "w, w") 6967 (match_operand:<VSI2QI> 3 "register_operand" "w, w")] 6968 MATMUL) 6969 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] 6970 "TARGET_SVE_I8MM" 6971 "@ 6972 <sur>mmla\\t%0.s, %2.b, %3.b 6973 movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b" 6974 [(set_attr "movprfx" "*,yes")] 6975) 6976 6977;; ------------------------------------------------------------------------- 6978;; ---- [FP] General ternary arithmetic corresponding to unspecs 6979;; ------------------------------------------------------------------------- 6980;; Includes merging patterns for: 6981;; - FMAD 6982;; - FMLA 6983;; - FMLS 6984;; - FMSB 6985;; - FNMAD 6986;; - FNMLA 6987;; - FNMLS 6988;; - FNMSB 6989;; ------------------------------------------------------------------------- 6990 6991;; Unpredicated floating-point ternary operations. 6992(define_expand "<optab><mode>4" 6993 [(set (match_operand:SVE_FULL_F 0 "register_operand") 6994 (unspec:SVE_FULL_F 6995 [(match_dup 4) 6996 (const_int SVE_RELAXED_GP) 6997 (match_operand:SVE_FULL_F 1 "register_operand") 6998 (match_operand:SVE_FULL_F 2 "register_operand") 6999 (match_operand:SVE_FULL_F 3 "register_operand")] 7000 SVE_COND_FP_TERNARY))] 7001 "TARGET_SVE" 7002 { 7003 operands[4] = aarch64_ptrue_reg (<VPRED>mode); 7004 } 7005) 7006 7007;; Predicated floating-point ternary operations. 7008(define_insn "@aarch64_pred_<optab><mode>" 7009 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w") 7010 (unspec:SVE_FULL_F 7011 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl") 7012 (match_operand:SI 5 "aarch64_sve_gp_strictness") 7013 (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w") 7014 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w") 7015 (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")] 7016 SVE_COND_FP_TERNARY))] 7017 "TARGET_SVE" 7018 "@ 7019 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7020 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 7021 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 7022 [(set_attr "movprfx" "*,*,yes")] 7023) 7024 7025;; Predicated floating-point ternary operations with merging. 7026(define_expand "@cond_<optab><mode>" 7027 [(set (match_operand:SVE_FULL_F 0 "register_operand") 7028 (unspec:SVE_FULL_F 7029 [(match_operand:<VPRED> 1 "register_operand") 7030 (unspec:SVE_FULL_F 7031 [(match_dup 1) 7032 (const_int SVE_STRICT_GP) 7033 (match_operand:SVE_FULL_F 2 "register_operand") 7034 (match_operand:SVE_FULL_F 3 "register_operand") 7035 (match_operand:SVE_FULL_F 4 "register_operand")] 7036 SVE_COND_FP_TERNARY) 7037 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] 7038 UNSPEC_SEL))] 7039 "TARGET_SVE" 7040{ 7041 /* Swap the multiplication operands if the fallback value is the 7042 second of the two. */ 7043 if (rtx_equal_p (operands[3], operands[5])) 7044 std::swap (operands[2], operands[3]); 7045}) 7046 7047;; Predicated floating-point ternary operations, merging with the 7048;; first input. 7049(define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed" 7050 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7051 (unspec:SVE_FULL_F 7052 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7053 (unspec:SVE_FULL_F 7054 [(match_operand 5) 7055 (const_int SVE_RELAXED_GP) 7056 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 7057 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7058 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] 7059 SVE_COND_FP_TERNARY) 7060 (match_dup 2)] 7061 UNSPEC_SEL))] 7062 "TARGET_SVE" 7063 "@ 7064 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 7065 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 7066 "&& !rtx_equal_p (operands[1], operands[5])" 7067 { 7068 operands[5] = copy_rtx (operands[1]); 7069 } 7070 [(set_attr "movprfx" "*,yes")] 7071) 7072 7073(define_insn "*cond_<optab><mode>_2_strict" 7074 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7075 (unspec:SVE_FULL_F 7076 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7077 (unspec:SVE_FULL_F 7078 [(match_dup 1) 7079 (const_int SVE_STRICT_GP) 7080 (match_operand:SVE_FULL_F 2 "register_operand" "0, w") 7081 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7082 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")] 7083 SVE_COND_FP_TERNARY) 7084 (match_dup 2)] 7085 UNSPEC_SEL))] 7086 "TARGET_SVE" 7087 "@ 7088 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 7089 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>" 7090 [(set_attr "movprfx" "*,yes")] 7091) 7092 7093;; Predicated floating-point ternary operations, merging with the 7094;; third input. 7095(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" 7096 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7097 (unspec:SVE_FULL_F 7098 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7099 (unspec:SVE_FULL_F 7100 [(match_operand 5) 7101 (const_int SVE_RELAXED_GP) 7102 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7103 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7104 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7105 SVE_COND_FP_TERNARY) 7106 (match_dup 4)] 7107 UNSPEC_SEL))] 7108 "TARGET_SVE" 7109 "@ 7110 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7111 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 7112 "&& !rtx_equal_p (operands[1], operands[5])" 7113 { 7114 operands[5] = copy_rtx (operands[1]); 7115 } 7116 [(set_attr "movprfx" "*,yes")] 7117) 7118 7119(define_insn "*cond_<optab><mode>_4_strict" 7120 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7121 (unspec:SVE_FULL_F 7122 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7123 (unspec:SVE_FULL_F 7124 [(match_dup 1) 7125 (const_int SVE_STRICT_GP) 7126 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7127 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7128 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7129 SVE_COND_FP_TERNARY) 7130 (match_dup 4)] 7131 UNSPEC_SEL))] 7132 "TARGET_SVE" 7133 "@ 7134 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7135 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>" 7136 [(set_attr "movprfx" "*,yes")] 7137) 7138 7139;; Predicated floating-point ternary operations, merging with an 7140;; independent value. 7141(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 7142 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 7143 (unspec:SVE_FULL_F 7144 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 7145 (unspec:SVE_FULL_F 7146 [(match_operand 6) 7147 (const_int SVE_RELAXED_GP) 7148 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") 7149 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") 7150 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] 7151 SVE_COND_FP_TERNARY) 7152 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 7153 UNSPEC_SEL))] 7154 "TARGET_SVE 7155 && !rtx_equal_p (operands[2], operands[5]) 7156 && !rtx_equal_p (operands[3], operands[5]) 7157 && !rtx_equal_p (operands[4], operands[5])" 7158 "@ 7159 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7160 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7161 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 7162 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 7163 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7164 #" 7165 "&& 1" 7166 { 7167 if (reload_completed 7168 && register_operand (operands[5], <MODE>mode) 7169 && !rtx_equal_p (operands[0], operands[5])) 7170 { 7171 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7172 operands[5], operands[1])); 7173 operands[5] = operands[4] = operands[0]; 7174 } 7175 else if (!rtx_equal_p (operands[1], operands[6])) 7176 operands[6] = copy_rtx (operands[1]); 7177 else 7178 FAIL; 7179 } 7180 [(set_attr "movprfx" "yes")] 7181) 7182 7183(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 7184 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w") 7185 (unspec:SVE_FULL_F 7186 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 7187 (unspec:SVE_FULL_F 7188 [(match_dup 1) 7189 (const_int SVE_STRICT_GP) 7190 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w") 7191 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w") 7192 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")] 7193 SVE_COND_FP_TERNARY) 7194 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")] 7195 UNSPEC_SEL))] 7196 "TARGET_SVE 7197 && !rtx_equal_p (operands[2], operands[5]) 7198 && !rtx_equal_p (operands[3], operands[5]) 7199 && !rtx_equal_p (operands[4], operands[5])" 7200 "@ 7201 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7202 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7203 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype> 7204 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype> 7205 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype> 7206 #" 7207 "&& reload_completed 7208 && register_operand (operands[5], <MODE>mode) 7209 && !rtx_equal_p (operands[0], operands[5])" 7210 { 7211 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7212 operands[5], operands[1])); 7213 operands[5] = operands[4] = operands[0]; 7214 } 7215 [(set_attr "movprfx" "yes")] 7216) 7217 7218;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using 7219;; (fma ...) since target-independent code won't understand the indexing. 7220(define_insn "@aarch64_<optab>_lane_<mode>" 7221 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7222 (unspec:SVE_FULL_F 7223 [(match_operand:SVE_FULL_F 1 "register_operand" "w, w") 7224 (unspec:SVE_FULL_F 7225 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>") 7226 (match_operand:SI 3 "const_int_operand")] 7227 UNSPEC_SVE_LANE_SELECT) 7228 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7229 SVE_FP_TERNARY_LANE))] 7230 "TARGET_SVE" 7231 "@ 7232 <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3] 7233 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" 7234 [(set_attr "movprfx" "*,yes")] 7235) 7236 7237;; ------------------------------------------------------------------------- 7238;; ---- [FP] Complex multiply-add 7239;; ------------------------------------------------------------------------- 7240;; Includes merging patterns for: 7241;; - FCMLA 7242;; ------------------------------------------------------------------------- 7243 7244;; Predicated FCMLA. 7245(define_insn "@aarch64_pred_<optab><mode>" 7246 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7247 (unspec:SVE_FULL_F 7248 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7249 (match_operand:SI 5 "aarch64_sve_gp_strictness") 7250 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7251 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7252 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7253 SVE_COND_FCMLA))] 7254 "TARGET_SVE" 7255 "@ 7256 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7257 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" 7258 [(set_attr "movprfx" "*,yes")] 7259) 7260 7261;; unpredicated optab pattern for auto-vectorizer 7262;; The complex mla/mls operations always need to expand to two instructions. 7263;; The first operation does half the computation and the second does the 7264;; remainder. Because of this, expand early. 7265(define_expand "cml<fcmac1><conj_op><mode>4" 7266 [(set (match_operand:SVE_FULL_F 0 "register_operand") 7267 (unspec:SVE_FULL_F 7268 [(match_dup 4) 7269 (match_dup 5) 7270 (match_operand:SVE_FULL_F 1 "register_operand") 7271 (match_operand:SVE_FULL_F 2 "register_operand") 7272 (match_operand:SVE_FULL_F 3 "register_operand")] 7273 FCMLA_OP))] 7274 "TARGET_SVE" 7275{ 7276 operands[4] = aarch64_ptrue_reg (<VPRED>mode); 7277 operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode); 7278 rtx tmp = gen_reg_rtx (<MODE>mode); 7279 emit_insn 7280 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], 7281 operands[2], operands[1], 7282 operands[3], operands[5])); 7283 emit_insn 7284 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], 7285 operands[2], operands[1], 7286 tmp, operands[5])); 7287 DONE; 7288}) 7289 7290;; unpredicated optab pattern for auto-vectorizer 7291;; The complex mul operations always need to expand to two instructions. 7292;; The first operation does half the computation and the second does the 7293;; remainder. Because of this, expand early. 7294(define_expand "cmul<conj_op><mode>3" 7295 [(set (match_operand:SVE_FULL_F 0 "register_operand") 7296 (unspec:SVE_FULL_F 7297 [(match_operand:SVE_FULL_F 1 "register_operand") 7298 (match_operand:SVE_FULL_F 2 "register_operand")] 7299 FCMUL_OP))] 7300 "TARGET_SVE" 7301{ 7302 rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode); 7303 rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode); 7304 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode)); 7305 rtx tmp = gen_reg_rtx (<MODE>mode); 7306 emit_insn 7307 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg, 7308 operands[2], operands[1], 7309 accum, gp_mode)); 7310 emit_insn 7311 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg, 7312 operands[2], operands[1], 7313 tmp, gp_mode)); 7314 DONE; 7315}) 7316 7317;; Predicated FCMLA with merging. 7318(define_expand "@cond_<optab><mode>" 7319 [(set (match_operand:SVE_FULL_F 0 "register_operand") 7320 (unspec:SVE_FULL_F 7321 [(match_operand:<VPRED> 1 "register_operand") 7322 (unspec:SVE_FULL_F 7323 [(match_dup 1) 7324 (const_int SVE_STRICT_GP) 7325 (match_operand:SVE_FULL_F 2 "register_operand") 7326 (match_operand:SVE_FULL_F 3 "register_operand") 7327 (match_operand:SVE_FULL_F 4 "register_operand")] 7328 SVE_COND_FCMLA) 7329 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")] 7330 UNSPEC_SEL))] 7331 "TARGET_SVE" 7332) 7333 7334;; Predicated FCMLA, merging with the third input. 7335(define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed" 7336 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7337 (unspec:SVE_FULL_F 7338 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7339 (unspec:SVE_FULL_F 7340 [(match_operand 5) 7341 (const_int SVE_RELAXED_GP) 7342 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7343 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7344 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7345 SVE_COND_FCMLA) 7346 (match_dup 4)] 7347 UNSPEC_SEL))] 7348 "TARGET_SVE" 7349 "@ 7350 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7351 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" 7352 "&& !rtx_equal_p (operands[1], operands[5])" 7353 { 7354 operands[5] = copy_rtx (operands[1]); 7355 } 7356 [(set_attr "movprfx" "*,yes")] 7357) 7358 7359(define_insn "*cond_<optab><mode>_4_strict" 7360 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7361 (unspec:SVE_FULL_F 7362 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7363 (unspec:SVE_FULL_F 7364 [(match_dup 1) 7365 (const_int SVE_STRICT_GP) 7366 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7367 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 7368 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")] 7369 SVE_COND_FCMLA) 7370 (match_dup 4)] 7371 UNSPEC_SEL))] 7372 "TARGET_SVE" 7373 "@ 7374 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7375 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>" 7376 [(set_attr "movprfx" "*,yes")] 7377) 7378 7379;; Predicated FCMLA, merging with an independent value. 7380(define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed" 7381 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 7382 (unspec:SVE_FULL_F 7383 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 7384 (unspec:SVE_FULL_F 7385 [(match_operand 6) 7386 (const_int SVE_RELAXED_GP) 7387 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") 7388 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") 7389 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] 7390 SVE_COND_FCMLA) 7391 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 7392 UNSPEC_SEL))] 7393 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" 7394 "@ 7395 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7396 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7397 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7398 #" 7399 "&& 1" 7400 { 7401 if (reload_completed 7402 && register_operand (operands[5], <MODE>mode) 7403 && !rtx_equal_p (operands[0], operands[5])) 7404 { 7405 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7406 operands[5], operands[1])); 7407 operands[5] = operands[4] = operands[0]; 7408 } 7409 else if (!rtx_equal_p (operands[1], operands[6])) 7410 operands[6] = copy_rtx (operands[1]); 7411 else 7412 FAIL; 7413 } 7414 [(set_attr "movprfx" "yes")] 7415) 7416 7417(define_insn_and_rewrite "*cond_<optab><mode>_any_strict" 7418 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w") 7419 (unspec:SVE_FULL_F 7420 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl") 7421 (unspec:SVE_FULL_F 7422 [(match_dup 1) 7423 (const_int SVE_STRICT_GP) 7424 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w") 7425 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w") 7426 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")] 7427 SVE_COND_FCMLA) 7428 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")] 7429 UNSPEC_SEL))] 7430 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])" 7431 "@ 7432 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7433 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7434 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot> 7435 #" 7436 "&& reload_completed 7437 && register_operand (operands[5], <MODE>mode) 7438 && !rtx_equal_p (operands[0], operands[5])" 7439 { 7440 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4], 7441 operands[5], operands[1])); 7442 operands[5] = operands[4] = operands[0]; 7443 } 7444 [(set_attr "movprfx" "yes")] 7445) 7446 7447;; Unpredicated FCMLA with indexing. 7448(define_insn "@aarch64_<optab>_lane_<mode>" 7449 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") 7450 (unspec:SVE_FULL_HSF 7451 [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w") 7452 (unspec:SVE_FULL_HSF 7453 [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>") 7454 (match_operand:SI 3 "const_int_operand")] 7455 UNSPEC_SVE_LANE_SELECT) 7456 (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")] 7457 FCMLA))] 7458 "TARGET_SVE" 7459 "@ 7460 fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot> 7461 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>" 7462 [(set_attr "movprfx" "*,yes")] 7463) 7464 7465;; ------------------------------------------------------------------------- 7466;; ---- [FP] Trigonometric multiply-add 7467;; ------------------------------------------------------------------------- 7468;; Includes: 7469;; - FTMAD 7470;; ------------------------------------------------------------------------- 7471 7472(define_insn "@aarch64_sve_tmad<mode>" 7473 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 7474 (unspec:SVE_FULL_F 7475 [(match_operand:SVE_FULL_F 1 "register_operand" "0, w") 7476 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 7477 (match_operand:DI 3 "const_int_operand")] 7478 UNSPEC_FTMAD))] 7479 "TARGET_SVE" 7480 "@ 7481 ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3 7482 movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3" 7483 [(set_attr "movprfx" "*,yes")] 7484) 7485 7486;; ------------------------------------------------------------------------- 7487;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF) 7488;; ------------------------------------------------------------------------- 7489;; Includes: 7490;; - BFDOT (BF16) 7491;; - BFMLALB (BF16) 7492;; - BFMLALT (BF16) 7493;; - BFMMLA (BF16) 7494;; ------------------------------------------------------------------------- 7495 7496(define_insn "@aarch64_sve_<sve_fp_op>vnx4sf" 7497 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") 7498 (unspec:VNx4SF 7499 [(match_operand:VNx4SF 1 "register_operand" "0, w") 7500 (match_operand:VNx8BF 2 "register_operand" "w, w") 7501 (match_operand:VNx8BF 3 "register_operand" "w, w")] 7502 SVE_BFLOAT_TERNARY_LONG))] 7503 "TARGET_SVE_BF16" 7504 "@ 7505 <sve_fp_op>\t%0.s, %2.h, %3.h 7506 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h" 7507 [(set_attr "movprfx" "*,yes")] 7508) 7509 7510;; The immediate range is enforced before generating the instruction. 7511(define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf" 7512 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w") 7513 (unspec:VNx4SF 7514 [(match_operand:VNx4SF 1 "register_operand" "0, w") 7515 (match_operand:VNx8BF 2 "register_operand" "w, w") 7516 (match_operand:VNx8BF 3 "register_operand" "y, y") 7517 (match_operand:SI 4 "const_int_operand")] 7518 SVE_BFLOAT_TERNARY_LONG_LANE))] 7519 "TARGET_SVE_BF16" 7520 "@ 7521 <sve_fp_op>\t%0.s, %2.h, %3.h[%4] 7522 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]" 7523 [(set_attr "movprfx" "*,yes")] 7524) 7525 7526;; ------------------------------------------------------------------------- 7527;; ---- [FP] Matrix multiply-accumulate 7528;; ------------------------------------------------------------------------- 7529;; Includes: 7530;; - FMMLA (F32MM,F64MM) 7531;; ------------------------------------------------------------------------- 7532 7533;; The mode iterator enforces the target requirements. 7534(define_insn "@aarch64_sve_<sve_fp_op><mode>" 7535 [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w") 7536 (unspec:SVE_MATMULF 7537 [(match_operand:SVE_MATMULF 2 "register_operand" "w, w") 7538 (match_operand:SVE_MATMULF 3 "register_operand" "w, w") 7539 (match_operand:SVE_MATMULF 1 "register_operand" "0, w")] 7540 FMMLA))] 7541 "TARGET_SVE" 7542 "@ 7543 <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype> 7544 movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>" 7545 [(set_attr "movprfx" "*,yes")] 7546) 7547 7548;; ========================================================================= 7549;; == Comparisons and selects 7550;; ========================================================================= 7551 7552;; ------------------------------------------------------------------------- 7553;; ---- [INT,FP] Select based on predicates 7554;; ------------------------------------------------------------------------- 7555;; Includes merging patterns for: 7556;; - FMOV 7557;; - MOV 7558;; - SEL 7559;; ------------------------------------------------------------------------- 7560 7561;; vcond_mask operand order: true, false, mask 7562;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR) 7563;; SEL operand order: mask, true, false 7564(define_expand "@vcond_mask_<mode><vpred>" 7565 [(set (match_operand:SVE_ALL 0 "register_operand") 7566 (unspec:SVE_ALL 7567 [(match_operand:<VPRED> 3 "register_operand") 7568 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm") 7569 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")] 7570 UNSPEC_SEL))] 7571 "TARGET_SVE" 7572 { 7573 if (register_operand (operands[1], <MODE>mode)) 7574 operands[2] = force_reg (<MODE>mode, operands[2]); 7575 } 7576) 7577 7578;; Selects between: 7579;; - two registers 7580;; - a duplicated immediate and a register 7581;; - a duplicated immediate and zero 7582;; 7583;; For unpacked vectors, it doesn't really matter whether SEL uses the 7584;; the container size or the element size. If SEL used the container size, 7585;; it would ignore undefined bits of the predicate but would copy the 7586;; upper (undefined) bits of each container along with the defined bits. 7587;; If SEL used the element size, it would use undefined bits of the predicate 7588;; to select between undefined elements in each input vector. Thus the only 7589;; difference is whether the undefined bits in a container always come from 7590;; the same input as the defined bits, or whether the choice can vary 7591;; independently of the defined bits. 7592;; 7593;; For the other instructions, using the element size is more natural, 7594;; so we do that for SEL as well. 7595(define_insn "*vcond_mask_<mode><vpred>" 7596 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w") 7597 (unspec:SVE_ALL 7598 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl") 7599 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc") 7600 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")] 7601 UNSPEC_SEL))] 7602 "TARGET_SVE 7603 && (!register_operand (operands[1], <MODE>mode) 7604 || register_operand (operands[2], <MODE>mode))" 7605 "@ 7606 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype> 7607 mov\t%0.<Vetype>, %3/m, #%I1 7608 mov\t%0.<Vetype>, %3/z, #%I1 7609 fmov\t%0.<Vetype>, %3/m, #%1 7610 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1 7611 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1 7612 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1" 7613 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")] 7614) 7615 7616;; Optimize selects between a duplicated scalar variable and another vector, 7617;; the latter of which can be a zero constant or a variable. Treat duplicates 7618;; of GPRs as being more expensive than duplicates of FPRs, since they 7619;; involve a cross-file move. 7620(define_insn "@aarch64_sel_dup<mode>" 7621 [(set (match_operand:SVE_ALL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w") 7622 (unspec:SVE_ALL 7623 [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") 7624 (vec_duplicate:SVE_ALL 7625 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w")) 7626 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")] 7627 UNSPEC_SEL))] 7628 "TARGET_SVE" 7629 "@ 7630 mov\t%0.<Vetype>, %3/m, %<vwcore>1 7631 mov\t%0.<Vetype>, %3/m, %<Vetype>1 7632 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 7633 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1 7634 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1 7635 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1" 7636 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")] 7637) 7638 7639;; ------------------------------------------------------------------------- 7640;; ---- [INT,FP] Compare and select 7641;; ------------------------------------------------------------------------- 7642;; The patterns in this section are synthetic. 7643;; ------------------------------------------------------------------------- 7644 7645;; Integer (signed) vcond. Don't enforce an immediate range here, since it 7646;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. 7647(define_expand "vcond<SVE_ALL:mode><SVE_I:mode>" 7648 [(set (match_operand:SVE_ALL 0 "register_operand") 7649 (if_then_else:SVE_ALL 7650 (match_operator 3 "comparison_operator" 7651 [(match_operand:SVE_I 4 "register_operand") 7652 (match_operand:SVE_I 5 "nonmemory_operand")]) 7653 (match_operand:SVE_ALL 1 "nonmemory_operand") 7654 (match_operand:SVE_ALL 2 "nonmemory_operand")))] 7655 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>" 7656 { 7657 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands); 7658 DONE; 7659 } 7660) 7661 7662;; Integer vcondu. Don't enforce an immediate range here, since it 7663;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead. 7664(define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>" 7665 [(set (match_operand:SVE_ALL 0 "register_operand") 7666 (if_then_else:SVE_ALL 7667 (match_operator 3 "comparison_operator" 7668 [(match_operand:SVE_I 4 "register_operand") 7669 (match_operand:SVE_I 5 "nonmemory_operand")]) 7670 (match_operand:SVE_ALL 1 "nonmemory_operand") 7671 (match_operand:SVE_ALL 2 "nonmemory_operand")))] 7672 "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>" 7673 { 7674 aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands); 7675 DONE; 7676 } 7677) 7678 7679;; Floating-point vcond. All comparisons except FCMUO allow a zero operand; 7680;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero. 7681(define_expand "vcond<mode><v_fp_equiv>" 7682 [(set (match_operand:SVE_FULL_HSD 0 "register_operand") 7683 (if_then_else:SVE_FULL_HSD 7684 (match_operator 3 "comparison_operator" 7685 [(match_operand:<V_FP_EQUIV> 4 "register_operand") 7686 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")]) 7687 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand") 7688 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))] 7689 "TARGET_SVE" 7690 { 7691 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands); 7692 DONE; 7693 } 7694) 7695 7696;; ------------------------------------------------------------------------- 7697;; ---- [INT] Comparisons 7698;; ------------------------------------------------------------------------- 7699;; Includes: 7700;; - CMPEQ 7701;; - CMPGE 7702;; - CMPGT 7703;; - CMPHI 7704;; - CMPHS 7705;; - CMPLE 7706;; - CMPLO 7707;; - CMPLS 7708;; - CMPLT 7709;; - CMPNE 7710;; ------------------------------------------------------------------------- 7711 7712;; Signed integer comparisons. Don't enforce an immediate range here, since 7713;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int 7714;; instead. 7715(define_expand "vec_cmp<mode><vpred>" 7716 [(parallel 7717 [(set (match_operand:<VPRED> 0 "register_operand") 7718 (match_operator:<VPRED> 1 "comparison_operator" 7719 [(match_operand:SVE_I 2 "register_operand") 7720 (match_operand:SVE_I 3 "nonmemory_operand")])) 7721 (clobber (reg:CC_NZC CC_REGNUM))])] 7722 "TARGET_SVE" 7723 { 7724 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), 7725 operands[2], operands[3]); 7726 DONE; 7727 } 7728) 7729 7730;; Unsigned integer comparisons. Don't enforce an immediate range here, since 7731;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int 7732;; instead. 7733(define_expand "vec_cmpu<mode><vpred>" 7734 [(parallel 7735 [(set (match_operand:<VPRED> 0 "register_operand") 7736 (match_operator:<VPRED> 1 "comparison_operator" 7737 [(match_operand:SVE_I 2 "register_operand") 7738 (match_operand:SVE_I 3 "nonmemory_operand")])) 7739 (clobber (reg:CC_NZC CC_REGNUM))])] 7740 "TARGET_SVE" 7741 { 7742 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]), 7743 operands[2], operands[3]); 7744 DONE; 7745 } 7746) 7747 7748;; Predicated integer comparisons. 7749;; 7750;; For unpacked vectors, only the lowpart element in each input container 7751;; has a defined value, and only the predicate bits associated with 7752;; those elements are defined. For example, when comparing two VNx2SIs: 7753;; 7754;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each 7755;; DI container store an SI element. The upper bits of each DI container 7756;; are undefined. 7757;; 7758;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the 7759;; even elements are defined and the odd elements are undefined. 7760;; 7761;; - The associated predicate mode is VNx2BI. This means that only the 7762;; low bit in each predicate byte is defined (on input and on output). 7763;; 7764;; - We use a .s comparison to compare VNx2SIs, under the control of a 7765;; VNx2BI governing predicate, to produce a VNx2BI result. If we view 7766;; the .s operation as operating on VNx4SIs then for odd lanes: 7767;; 7768;; - the input governing predicate bit is undefined 7769;; - the SI elements being compared are undefined 7770;; - the predicate result bit is therefore undefined, but 7771;; - the predicate result bit is in the undefined part of a VNx2BI, 7772;; so its value doesn't matter anyway. 7773(define_insn "@aarch64_pred_cmp<cmp_op><mode>" 7774 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7775 (unspec:<VPRED> 7776 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 7777 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 7778 (SVE_INT_CMP:<VPRED> 7779 (match_operand:SVE_I 3 "register_operand" "w, w") 7780 (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7781 UNSPEC_PRED_Z)) 7782 (clobber (reg:CC_NZC CC_REGNUM))] 7783 "TARGET_SVE" 7784 "@ 7785 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4 7786 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 7787) 7788 7789;; Predicated integer comparisons in which both the flag and predicate 7790;; results are interesting. 7791(define_insn_and_rewrite "*cmp<cmp_op><mode>_cc" 7792 [(set (reg:CC_NZC CC_REGNUM) 7793 (unspec:CC_NZC 7794 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") 7795 (match_operand 4) 7796 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7797 (unspec:<VPRED> 7798 [(match_operand 6) 7799 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7800 (SVE_INT_CMP:<VPRED> 7801 (match_operand:SVE_I 2 "register_operand" "w, w") 7802 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7803 UNSPEC_PRED_Z)] 7804 UNSPEC_PTEST)) 7805 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7806 (unspec:<VPRED> 7807 [(match_dup 6) 7808 (match_dup 7) 7809 (SVE_INT_CMP:<VPRED> 7810 (match_dup 2) 7811 (match_dup 3))] 7812 UNSPEC_PRED_Z))] 7813 "TARGET_SVE 7814 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7815 "@ 7816 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 7817 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 7818 "&& !rtx_equal_p (operands[4], operands[6])" 7819 { 7820 operands[6] = copy_rtx (operands[4]); 7821 operands[7] = operands[5]; 7822 } 7823) 7824 7825;; Predicated integer comparisons in which only the flags result is 7826;; interesting. 7827(define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest" 7828 [(set (reg:CC_NZC CC_REGNUM) 7829 (unspec:CC_NZC 7830 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl") 7831 (match_operand 4) 7832 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7833 (unspec:<VPRED> 7834 [(match_operand 6) 7835 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7836 (SVE_INT_CMP:<VPRED> 7837 (match_operand:SVE_I 2 "register_operand" "w, w") 7838 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7839 UNSPEC_PRED_Z)] 7840 UNSPEC_PTEST)) 7841 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))] 7842 "TARGET_SVE 7843 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7844 "@ 7845 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3 7846 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 7847 "&& !rtx_equal_p (operands[4], operands[6])" 7848 { 7849 operands[6] = copy_rtx (operands[4]); 7850 operands[7] = operands[5]; 7851 } 7852) 7853 7854;; Predicated integer comparisons, formed by combining a PTRUE-predicated 7855;; comparison with an AND. Split the instruction into its preferred form 7856;; at the earliest opportunity, in order to get rid of the redundant 7857;; operand 4. 7858(define_insn_and_split "*cmp<cmp_op><mode>_and" 7859 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 7860 (and:<VPRED> 7861 (unspec:<VPRED> 7862 [(match_operand 4) 7863 (const_int SVE_KNOWN_PTRUE) 7864 (SVE_INT_CMP:<VPRED> 7865 (match_operand:SVE_I 2 "register_operand" "w, w") 7866 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))] 7867 UNSPEC_PRED_Z) 7868 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl"))) 7869 (clobber (reg:CC_NZC CC_REGNUM))] 7870 "TARGET_SVE" 7871 "#" 7872 "&& 1" 7873 [(parallel 7874 [(set (match_dup 0) 7875 (unspec:<VPRED> 7876 [(match_dup 1) 7877 (const_int SVE_MAYBE_NOT_PTRUE) 7878 (SVE_INT_CMP:<VPRED> 7879 (match_dup 2) 7880 (match_dup 3))] 7881 UNSPEC_PRED_Z)) 7882 (clobber (reg:CC_NZC CC_REGNUM))])] 7883) 7884 7885;; Predicated integer wide comparisons. 7886(define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide" 7887 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7888 (unspec:<VPRED> 7889 [(match_operand:VNx16BI 1 "register_operand" "Upl") 7890 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 7891 (unspec:<VPRED> 7892 [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w") 7893 (match_operand:VNx2DI 4 "register_operand" "w")] 7894 SVE_COND_INT_CMP_WIDE)] 7895 UNSPEC_PRED_Z)) 7896 (clobber (reg:CC_NZC CC_REGNUM))] 7897 "TARGET_SVE" 7898 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d" 7899) 7900 7901;; Predicated integer wide comparisons in which both the flag and 7902;; predicate results are interesting. 7903(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc" 7904 [(set (reg:CC_NZC CC_REGNUM) 7905 (unspec:CC_NZC 7906 [(match_operand:VNx16BI 1 "register_operand" "Upl") 7907 (match_operand 4) 7908 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7909 (unspec:<VPRED> 7910 [(match_operand:VNx16BI 6 "register_operand" "Upl") 7911 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7912 (unspec:<VPRED> 7913 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") 7914 (match_operand:VNx2DI 3 "register_operand" "w")] 7915 SVE_COND_INT_CMP_WIDE)] 7916 UNSPEC_PRED_Z)] 7917 UNSPEC_PTEST)) 7918 (set (match_operand:<VPRED> 0 "register_operand" "=Upa") 7919 (unspec:<VPRED> 7920 [(match_dup 6) 7921 (match_dup 7) 7922 (unspec:<VPRED> 7923 [(match_dup 2) 7924 (match_dup 3)] 7925 SVE_COND_INT_CMP_WIDE)] 7926 UNSPEC_PRED_Z))] 7927 "TARGET_SVE 7928 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7929 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d" 7930) 7931 7932;; Predicated integer wide comparisons in which only the flags result 7933;; is interesting. 7934(define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest" 7935 [(set (reg:CC_NZC CC_REGNUM) 7936 (unspec:CC_NZC 7937 [(match_operand:VNx16BI 1 "register_operand" "Upl") 7938 (match_operand 4) 7939 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 7940 (unspec:<VPRED> 7941 [(match_operand:VNx16BI 6 "register_operand" "Upl") 7942 (match_operand:SI 7 "aarch64_sve_ptrue_flag") 7943 (unspec:<VPRED> 7944 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w") 7945 (match_operand:VNx2DI 3 "register_operand" "w")] 7946 SVE_COND_INT_CMP_WIDE)] 7947 UNSPEC_PRED_Z)] 7948 UNSPEC_PTEST)) 7949 (clobber (match_scratch:<VPRED> 0 "=Upa"))] 7950 "TARGET_SVE 7951 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])" 7952 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d" 7953) 7954 7955;; ------------------------------------------------------------------------- 7956;; ---- [INT] While tests 7957;; ------------------------------------------------------------------------- 7958;; Includes: 7959;; - WHILEGE (SVE2) 7960;; - WHILEGT (SVE2) 7961;; - WHILEHI (SVE2) 7962;; - WHILEHS (SVE2) 7963;; - WHILELE 7964;; - WHILELO 7965;; - WHILELS 7966;; - WHILELT 7967;; - WHILERW (SVE2) 7968;; - WHILEWR (SVE2) 7969;; ------------------------------------------------------------------------- 7970 7971;; Set element I of the result if (cmp (plus operand1 J) operand2) is 7972;; true for all J in [0, I]. 7973(define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>" 7974 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 7975 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") 7976 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] 7977 SVE_WHILE)) 7978 (clobber (reg:CC_NZC CC_REGNUM))] 7979 "TARGET_SVE" 7980 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" 7981) 7982 7983;; The WHILE instructions set the flags in the same way as a PTEST with 7984;; a PTRUE GP. Handle the case in which both results are useful. The GP 7985;; operands to the PTEST aren't needed, so we allow them to be anything. 7986(define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc" 7987 [(set (reg:CC_NZC CC_REGNUM) 7988 (unspec:CC_NZC 7989 [(match_operand 3) 7990 (match_operand 4) 7991 (const_int SVE_KNOWN_PTRUE) 7992 (unspec:PRED_ALL 7993 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") 7994 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] 7995 SVE_WHILE)] 7996 UNSPEC_PTEST)) 7997 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 7998 (unspec:PRED_ALL [(match_dup 1) 7999 (match_dup 2)] 8000 SVE_WHILE))] 8001 "TARGET_SVE" 8002 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" 8003 ;; Force the compiler to drop the unused predicate operand, so that we 8004 ;; don't have an unnecessary PTRUE. 8005 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" 8006 { 8007 operands[3] = CONSTM1_RTX (VNx16BImode); 8008 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 8009 } 8010) 8011 8012;; Same, but handle the case in which only the flags result is useful. 8013(define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest" 8014 [(set (reg:CC_NZC CC_REGNUM) 8015 (unspec:CC_NZC 8016 [(match_operand 3) 8017 (match_operand 4) 8018 (const_int SVE_KNOWN_PTRUE) 8019 (unspec:PRED_ALL 8020 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ") 8021 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")] 8022 SVE_WHILE)] 8023 UNSPEC_PTEST)) 8024 (clobber (match_scratch:PRED_ALL 0 "=Upa"))] 8025 "TARGET_SVE" 8026 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2" 8027 ;; Force the compiler to drop the unused predicate operand, so that we 8028 ;; don't have an unnecessary PTRUE. 8029 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))" 8030 { 8031 operands[3] = CONSTM1_RTX (VNx16BImode); 8032 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 8033 } 8034) 8035 8036;; ------------------------------------------------------------------------- 8037;; ---- [FP] Direct comparisons 8038;; ------------------------------------------------------------------------- 8039;; Includes: 8040;; - FCMEQ 8041;; - FCMGE 8042;; - FCMGT 8043;; - FCMLE 8044;; - FCMLT 8045;; - FCMNE 8046;; - FCMUO 8047;; ------------------------------------------------------------------------- 8048 8049;; Floating-point comparisons. All comparisons except FCMUO allow a zero 8050;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO 8051;; with zero. 8052(define_expand "vec_cmp<mode><vpred>" 8053 [(set (match_operand:<VPRED> 0 "register_operand") 8054 (match_operator:<VPRED> 1 "comparison_operator" 8055 [(match_operand:SVE_FULL_F 2 "register_operand") 8056 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))] 8057 "TARGET_SVE" 8058 { 8059 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]), 8060 operands[2], operands[3], false); 8061 DONE; 8062 } 8063) 8064 8065;; Predicated floating-point comparisons. 8066(define_insn "@aarch64_pred_fcm<cmp_op><mode>" 8067 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 8068 (unspec:<VPRED> 8069 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 8070 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 8071 (match_operand:SVE_FULL_F 3 "register_operand" "w, w") 8072 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")] 8073 SVE_COND_FP_CMP_I0))] 8074 "TARGET_SVE" 8075 "@ 8076 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0 8077 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 8078) 8079 8080;; Same for unordered comparisons. 8081(define_insn "@aarch64_pred_fcmuo<mode>" 8082 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8083 (unspec:<VPRED> 8084 [(match_operand:<VPRED> 1 "register_operand" "Upl") 8085 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 8086 (match_operand:SVE_FULL_F 3 "register_operand" "w") 8087 (match_operand:SVE_FULL_F 4 "register_operand" "w")] 8088 UNSPEC_COND_FCMUO))] 8089 "TARGET_SVE" 8090 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>" 8091) 8092 8093;; Floating-point comparisons predicated on a PTRUE, with the results ANDed 8094;; with another predicate P. This does not have the same trapping behavior 8095;; as predicating the comparison itself on P, but it's a legitimate fold, 8096;; since we can drop any potentially-trapping operations whose results 8097;; are not needed. 8098;; 8099;; Split the instruction into its preferred form (below) at the earliest 8100;; opportunity, in order to get rid of the redundant operand 1. 8101(define_insn_and_split "*fcm<cmp_op><mode>_and_combine" 8102 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa") 8103 (and:<VPRED> 8104 (unspec:<VPRED> 8105 [(match_operand:<VPRED> 1) 8106 (const_int SVE_KNOWN_PTRUE) 8107 (match_operand:SVE_FULL_F 2 "register_operand" "w, w") 8108 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")] 8109 SVE_COND_FP_CMP_I0) 8110 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))] 8111 "TARGET_SVE" 8112 "#" 8113 "&& 1" 8114 [(set (match_dup 0) 8115 (unspec:<VPRED> 8116 [(match_dup 4) 8117 (const_int SVE_MAYBE_NOT_PTRUE) 8118 (match_dup 2) 8119 (match_dup 3)] 8120 SVE_COND_FP_CMP_I0))] 8121) 8122 8123;; Same for unordered comparisons. 8124(define_insn_and_split "*fcmuo<mode>_and_combine" 8125 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8126 (and:<VPRED> 8127 (unspec:<VPRED> 8128 [(match_operand:<VPRED> 1) 8129 (const_int SVE_KNOWN_PTRUE) 8130 (match_operand:SVE_FULL_F 2 "register_operand" "w") 8131 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 8132 UNSPEC_COND_FCMUO) 8133 (match_operand:<VPRED> 4 "register_operand" "Upl")))] 8134 "TARGET_SVE" 8135 "#" 8136 "&& 1" 8137 [(set (match_dup 0) 8138 (unspec:<VPRED> 8139 [(match_dup 4) 8140 (const_int SVE_MAYBE_NOT_PTRUE) 8141 (match_dup 2) 8142 (match_dup 3)] 8143 UNSPEC_COND_FCMUO))] 8144) 8145 8146;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND. 8147;; In this case, we still need a separate NOT/BIC operation, but predicating 8148;; the comparison on the BIC operand removes the need for a PTRUE. 8149(define_insn_and_split "*fcm<cmp_op><mode>_bic_combine" 8150 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8151 (and:<VPRED> 8152 (and:<VPRED> 8153 (not:<VPRED> 8154 (unspec:<VPRED> 8155 [(match_operand:<VPRED> 1) 8156 (const_int SVE_KNOWN_PTRUE) 8157 (match_operand:SVE_FULL_F 2 "register_operand" "w") 8158 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] 8159 SVE_COND_FP_CMP_I0)) 8160 (match_operand:<VPRED> 4 "register_operand" "Upa")) 8161 (match_dup:<VPRED> 1))) 8162 (clobber (match_scratch:<VPRED> 5 "=&Upl"))] 8163 "TARGET_SVE" 8164 "#" 8165 "&& 1" 8166 [(set (match_dup 5) 8167 (unspec:<VPRED> 8168 [(match_dup 4) 8169 (const_int SVE_MAYBE_NOT_PTRUE) 8170 (match_dup 2) 8171 (match_dup 3)] 8172 SVE_COND_FP_CMP_I0)) 8173 (set (match_dup 0) 8174 (and:<VPRED> 8175 (not:<VPRED> 8176 (match_dup 5)) 8177 (match_dup 4)))] 8178{ 8179 if (can_create_pseudo_p ()) 8180 operands[5] = gen_reg_rtx (<VPRED>mode); 8181} 8182) 8183 8184;; Make sure that we expand to a nor when the operand 4 of 8185;; *fcm<cmp_op><mode>_bic_combine is a not. 8186(define_insn_and_split "*fcm<cmp_op><mode>_nor_combine" 8187 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8188 (and:<VPRED> 8189 (and:<VPRED> 8190 (not:<VPRED> 8191 (unspec:<VPRED> 8192 [(match_operand:<VPRED> 1) 8193 (const_int SVE_KNOWN_PTRUE) 8194 (match_operand:SVE_FULL_F 2 "register_operand" "w") 8195 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] 8196 SVE_COND_FP_CMP_I0)) 8197 (not:<VPRED> 8198 (match_operand:<VPRED> 4 "register_operand" "Upa"))) 8199 (match_dup:<VPRED> 1))) 8200 (clobber (match_scratch:<VPRED> 5 "=&Upl"))] 8201 "TARGET_SVE" 8202 "#" 8203 "&& 1" 8204 [(set (match_dup 5) 8205 (unspec:<VPRED> 8206 [(match_dup 1) 8207 (const_int SVE_KNOWN_PTRUE) 8208 (match_dup 2) 8209 (match_dup 3)] 8210 SVE_COND_FP_CMP_I0)) 8211 (set (match_dup 0) 8212 (and:<VPRED> 8213 (and:<VPRED> 8214 (not:<VPRED> 8215 (match_dup 5)) 8216 (not:<VPRED> 8217 (match_dup 4))) 8218 (match_dup 1)))] 8219{ 8220 if (can_create_pseudo_p ()) 8221 operands[5] = gen_reg_rtx (<VPRED>mode); 8222} 8223) 8224 8225(define_insn_and_split "*fcmuo<mode>_bic_combine" 8226 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8227 (and:<VPRED> 8228 (and:<VPRED> 8229 (not:<VPRED> 8230 (unspec:<VPRED> 8231 [(match_operand:<VPRED> 1) 8232 (const_int SVE_KNOWN_PTRUE) 8233 (match_operand:SVE_FULL_F 2 "register_operand" "w") 8234 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 8235 UNSPEC_COND_FCMUO)) 8236 (match_operand:<VPRED> 4 "register_operand" "Upa")) 8237 (match_dup:<VPRED> 1))) 8238 (clobber (match_scratch:<VPRED> 5 "=&Upl"))] 8239 "TARGET_SVE" 8240 "#" 8241 "&& 1" 8242 [(set (match_dup 5) 8243 (unspec:<VPRED> 8244 [(match_dup 4) 8245 (const_int SVE_MAYBE_NOT_PTRUE) 8246 (match_dup 2) 8247 (match_dup 3)] 8248 UNSPEC_COND_FCMUO)) 8249 (set (match_dup 0) 8250 (and:<VPRED> 8251 (not:<VPRED> 8252 (match_dup 5)) 8253 (match_dup 4)))] 8254{ 8255 if (can_create_pseudo_p ()) 8256 operands[5] = gen_reg_rtx (<VPRED>mode); 8257} 8258) 8259 8260;; Same for unordered comparisons. 8261(define_insn_and_split "*fcmuo<mode>_nor_combine" 8262 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8263 (and:<VPRED> 8264 (and:<VPRED> 8265 (not:<VPRED> 8266 (unspec:<VPRED> 8267 [(match_operand:<VPRED> 1) 8268 (const_int SVE_KNOWN_PTRUE) 8269 (match_operand:SVE_FULL_F 2 "register_operand" "w") 8270 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 8271 UNSPEC_COND_FCMUO)) 8272 (not:<VPRED> 8273 (match_operand:<VPRED> 4 "register_operand" "Upa"))) 8274 (match_dup:<VPRED> 1))) 8275 (clobber (match_scratch:<VPRED> 5 "=&Upl"))] 8276 "TARGET_SVE" 8277 "#" 8278 "&& 1" 8279 [(set (match_dup 5) 8280 (unspec:<VPRED> 8281 [(match_dup 1) 8282 (const_int SVE_KNOWN_PTRUE) 8283 (match_dup 2) 8284 (match_dup 3)] 8285 UNSPEC_COND_FCMUO)) 8286 (set (match_dup 0) 8287 (and:<VPRED> 8288 (and:<VPRED> 8289 (not:<VPRED> 8290 (match_dup 5)) 8291 (not:<VPRED> 8292 (match_dup 4))) 8293 (match_dup 1)))] 8294{ 8295 if (can_create_pseudo_p ()) 8296 operands[5] = gen_reg_rtx (<VPRED>mode); 8297} 8298) 8299 8300;; ------------------------------------------------------------------------- 8301;; ---- [FP] Absolute comparisons 8302;; ------------------------------------------------------------------------- 8303;; Includes: 8304;; - FACGE 8305;; - FACGT 8306;; - FACLE 8307;; - FACLT 8308;; ------------------------------------------------------------------------- 8309 8310;; Predicated floating-point absolute comparisons. 8311(define_expand "@aarch64_pred_fac<cmp_op><mode>" 8312 [(set (match_operand:<VPRED> 0 "register_operand") 8313 (unspec:<VPRED> 8314 [(match_operand:<VPRED> 1 "register_operand") 8315 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 8316 (unspec:SVE_FULL_F 8317 [(match_dup 1) 8318 (match_dup 2) 8319 (match_operand:SVE_FULL_F 3 "register_operand")] 8320 UNSPEC_COND_FABS) 8321 (unspec:SVE_FULL_F 8322 [(match_dup 1) 8323 (match_dup 2) 8324 (match_operand:SVE_FULL_F 4 "register_operand")] 8325 UNSPEC_COND_FABS)] 8326 SVE_COND_FP_ABS_CMP))] 8327 "TARGET_SVE" 8328) 8329 8330(define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed" 8331 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8332 (unspec:<VPRED> 8333 [(match_operand:<VPRED> 1 "register_operand" "Upl") 8334 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 8335 (unspec:SVE_FULL_F 8336 [(match_operand 5) 8337 (const_int SVE_RELAXED_GP) 8338 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 8339 UNSPEC_COND_FABS) 8340 (unspec:SVE_FULL_F 8341 [(match_operand 6) 8342 (const_int SVE_RELAXED_GP) 8343 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 8344 UNSPEC_COND_FABS)] 8345 SVE_COND_FP_ABS_CMP))] 8346 "TARGET_SVE" 8347 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 8348 "&& (!rtx_equal_p (operands[1], operands[5]) 8349 || !rtx_equal_p (operands[1], operands[6]))" 8350 { 8351 operands[5] = copy_rtx (operands[1]); 8352 operands[6] = copy_rtx (operands[1]); 8353 } 8354) 8355 8356(define_insn "*aarch64_pred_fac<cmp_op><mode>_strict" 8357 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa") 8358 (unspec:<VPRED> 8359 [(match_operand:<VPRED> 1 "register_operand" "Upl") 8360 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 8361 (unspec:SVE_FULL_F 8362 [(match_dup 1) 8363 (match_operand:SI 5 "aarch64_sve_gp_strictness") 8364 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 8365 UNSPEC_COND_FABS) 8366 (unspec:SVE_FULL_F 8367 [(match_dup 1) 8368 (match_operand:SI 6 "aarch64_sve_gp_strictness") 8369 (match_operand:SVE_FULL_F 3 "register_operand" "w")] 8370 UNSPEC_COND_FABS)] 8371 SVE_COND_FP_ABS_CMP))] 8372 "TARGET_SVE" 8373 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>" 8374) 8375 8376;; ------------------------------------------------------------------------- 8377;; ---- [PRED] Select 8378;; ------------------------------------------------------------------------- 8379;; Includes: 8380;; - SEL 8381;; ------------------------------------------------------------------------- 8382 8383(define_insn "@vcond_mask_<mode><mode>" 8384 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 8385 (ior:PRED_ALL 8386 (and:PRED_ALL 8387 (match_operand:PRED_ALL 3 "register_operand" "Upa") 8388 (match_operand:PRED_ALL 1 "register_operand" "Upa")) 8389 (and:PRED_ALL 8390 (not (match_dup 3)) 8391 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))] 8392 "TARGET_SVE" 8393 "sel\t%0.b, %3, %1.b, %2.b" 8394) 8395 8396;; ------------------------------------------------------------------------- 8397;; ---- [PRED] Test bits 8398;; ------------------------------------------------------------------------- 8399;; Includes: 8400;; - PTEST 8401;; ------------------------------------------------------------------------- 8402 8403;; Branch based on predicate equality or inequality. 8404(define_expand "cbranch<mode>4" 8405 [(set (pc) 8406 (if_then_else 8407 (match_operator 0 "aarch64_equality_operator" 8408 [(match_operand:PRED_ALL 1 "register_operand") 8409 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")]) 8410 (label_ref (match_operand 3 "")) 8411 (pc)))] 8412 "" 8413 { 8414 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>)); 8415 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue); 8416 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode); 8417 rtx pred; 8418 if (operands[2] == CONST0_RTX (<MODE>mode)) 8419 pred = operands[1]; 8420 else 8421 { 8422 pred = gen_reg_rtx (<MODE>mode); 8423 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1], 8424 operands[2])); 8425 } 8426 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred)); 8427 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM); 8428 operands[2] = const0_rtx; 8429 } 8430) 8431 8432;; See "Description of UNSPEC_PTEST" above for details. 8433(define_insn "aarch64_ptest<mode>" 8434 [(set (reg:CC_NZC CC_REGNUM) 8435 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa") 8436 (match_operand 1) 8437 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 8438 (match_operand:PRED_ALL 3 "register_operand" "Upa")] 8439 UNSPEC_PTEST))] 8440 "TARGET_SVE" 8441 "ptest\t%0, %3.b" 8442) 8443 8444;; ========================================================================= 8445;; == Reductions 8446;; ========================================================================= 8447 8448;; ------------------------------------------------------------------------- 8449;; ---- [INT,FP] Conditional reductions 8450;; ------------------------------------------------------------------------- 8451;; Includes: 8452;; - CLASTA 8453;; - CLASTB 8454;; ------------------------------------------------------------------------- 8455 8456;; Set operand 0 to the last active element in operand 3, or to tied 8457;; operand 1 if no elements are active. 8458(define_insn "@fold_extract_<last_op>_<mode>" 8459 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w") 8460 (unspec:<VEL> 8461 [(match_operand:<VEL> 1 "register_operand" "0, 0") 8462 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") 8463 (match_operand:SVE_FULL 3 "register_operand" "w, w")] 8464 CLAST))] 8465 "TARGET_SVE" 8466 "@ 8467 clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype> 8468 clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>" 8469) 8470 8471(define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>" 8472 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") 8473 (unspec:SVE_FULL 8474 [(match_operand:SVE_FULL 1 "register_operand" "0, w") 8475 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl") 8476 (match_operand:SVE_FULL 3 "register_operand" "w, w")] 8477 CLAST))] 8478 "TARGET_SVE" 8479 "@ 8480 clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype> 8481 movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>" 8482) 8483 8484;; ------------------------------------------------------------------------- 8485;; ---- [INT] Tree reductions 8486;; ------------------------------------------------------------------------- 8487;; Includes: 8488;; - ANDV 8489;; - EORV 8490;; - ORV 8491;; - SADDV 8492;; - SMAXV 8493;; - SMINV 8494;; - UADDV 8495;; - UMAXV 8496;; - UMINV 8497;; ------------------------------------------------------------------------- 8498 8499;; Unpredicated integer add reduction. 8500(define_expand "reduc_plus_scal_<mode>" 8501 [(match_operand:<VEL> 0 "register_operand") 8502 (match_operand:SVE_FULL_I 1 "register_operand")] 8503 "TARGET_SVE" 8504 { 8505 rtx pred = aarch64_ptrue_reg (<VPRED>mode); 8506 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode); 8507 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1])); 8508 if (tmp != operands[0]) 8509 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp)); 8510 DONE; 8511 } 8512) 8513 8514;; Predicated integer add reduction. The result is always 64-bits. 8515(define_insn "@aarch64_pred_reduc_<optab>_<mode>" 8516 [(set (match_operand:DI 0 "register_operand" "=w") 8517 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl") 8518 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 8519 SVE_INT_ADDV))] 8520 "TARGET_SVE && <max_elem_bits> >= <elem_bits>" 8521 "<su>addv\t%d0, %1, %2.<Vetype>" 8522) 8523 8524;; Unpredicated integer reductions. 8525(define_expand "reduc_<optab>_scal_<mode>" 8526 [(set (match_operand:<VEL> 0 "register_operand") 8527 (unspec:<VEL> [(match_dup 2) 8528 (match_operand:SVE_FULL_I 1 "register_operand")] 8529 SVE_INT_REDUCTION))] 8530 "TARGET_SVE" 8531 { 8532 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 8533 } 8534) 8535 8536;; Predicated integer reductions. 8537(define_insn "@aarch64_pred_reduc_<optab>_<mode>" 8538 [(set (match_operand:<VEL> 0 "register_operand" "=w") 8539 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 8540 (match_operand:SVE_FULL_I 2 "register_operand" "w")] 8541 SVE_INT_REDUCTION))] 8542 "TARGET_SVE" 8543 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>" 8544) 8545 8546;; ------------------------------------------------------------------------- 8547;; ---- [FP] Tree reductions 8548;; ------------------------------------------------------------------------- 8549;; Includes: 8550;; - FADDV 8551;; - FMAXNMV 8552;; - FMAXV 8553;; - FMINNMV 8554;; - FMINV 8555;; ------------------------------------------------------------------------- 8556 8557;; Unpredicated floating-point tree reductions. 8558(define_expand "reduc_<optab>_scal_<mode>" 8559 [(set (match_operand:<VEL> 0 "register_operand") 8560 (unspec:<VEL> [(match_dup 2) 8561 (match_operand:SVE_FULL_F 1 "register_operand")] 8562 SVE_FP_REDUCTION))] 8563 "TARGET_SVE" 8564 { 8565 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 8566 } 8567) 8568 8569(define_expand "reduc_<fmaxmin>_scal_<mode>" 8570 [(match_operand:<VEL> 0 "register_operand") 8571 (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")] 8572 FMAXMINNMV)] 8573 "TARGET_SVE" 8574 { 8575 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1])); 8576 DONE; 8577 } 8578) 8579 8580;; Predicated floating-point tree reductions. 8581(define_insn "@aarch64_pred_reduc_<optab>_<mode>" 8582 [(set (match_operand:<VEL> 0 "register_operand" "=w") 8583 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl") 8584 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 8585 SVE_FP_REDUCTION))] 8586 "TARGET_SVE" 8587 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>" 8588) 8589 8590;; ------------------------------------------------------------------------- 8591;; ---- [FP] Left-to-right reductions 8592;; ------------------------------------------------------------------------- 8593;; Includes: 8594;; - FADDA 8595;; ------------------------------------------------------------------------- 8596 8597;; Unpredicated in-order FP reductions. 8598(define_expand "fold_left_plus_<mode>" 8599 [(set (match_operand:<VEL> 0 "register_operand") 8600 (unspec:<VEL> [(match_dup 3) 8601 (match_operand:<VEL> 1 "register_operand") 8602 (match_operand:SVE_FULL_F 2 "register_operand")] 8603 UNSPEC_FADDA))] 8604 "TARGET_SVE" 8605 { 8606 operands[3] = aarch64_ptrue_reg (<VPRED>mode); 8607 } 8608) 8609 8610;; Predicated in-order FP reductions. 8611(define_insn "mask_fold_left_plus_<mode>" 8612 [(set (match_operand:<VEL> 0 "register_operand" "=w") 8613 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl") 8614 (match_operand:<VEL> 1 "register_operand" "0") 8615 (match_operand:SVE_FULL_F 2 "register_operand" "w")] 8616 UNSPEC_FADDA))] 8617 "TARGET_SVE" 8618 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>" 8619) 8620 8621;; ========================================================================= 8622;; == Permutes 8623;; ========================================================================= 8624 8625;; ------------------------------------------------------------------------- 8626;; ---- [INT,FP] General permutes 8627;; ------------------------------------------------------------------------- 8628;; Includes: 8629;; - TBL 8630;; ------------------------------------------------------------------------- 8631 8632(define_expand "vec_perm<mode>" 8633 [(match_operand:SVE_FULL 0 "register_operand") 8634 (match_operand:SVE_FULL 1 "register_operand") 8635 (match_operand:SVE_FULL 2 "register_operand") 8636 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")] 8637 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()" 8638 { 8639 aarch64_expand_sve_vec_perm (operands[0], operands[1], 8640 operands[2], operands[3]); 8641 DONE; 8642 } 8643) 8644 8645(define_insn "@aarch64_sve_tbl<mode>" 8646 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8647 (unspec:SVE_FULL 8648 [(match_operand:SVE_FULL 1 "register_operand" "w") 8649 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")] 8650 UNSPEC_TBL))] 8651 "TARGET_SVE" 8652 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 8653) 8654 8655;; ------------------------------------------------------------------------- 8656;; ---- [INT,FP] Special-purpose unary permutes 8657;; ------------------------------------------------------------------------- 8658;; Includes: 8659;; - COMPACT 8660;; - DUP 8661;; - REV 8662;; ------------------------------------------------------------------------- 8663 8664;; Compact active elements and pad with zeros. 8665(define_insn "@aarch64_sve_compact<mode>" 8666 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w") 8667 (unspec:SVE_FULL_SD 8668 [(match_operand:<VPRED> 1 "register_operand" "Upl") 8669 (match_operand:SVE_FULL_SD 2 "register_operand" "w")] 8670 UNSPEC_SVE_COMPACT))] 8671 "TARGET_SVE" 8672 "compact\t%0.<Vetype>, %1, %2.<Vetype>" 8673) 8674 8675;; Duplicate one element of a vector. 8676(define_insn "@aarch64_sve_dup_lane<mode>" 8677 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 8678 (vec_duplicate:SVE_ALL 8679 (vec_select:<VEL> 8680 (match_operand:SVE_ALL 1 "register_operand" "w") 8681 (parallel [(match_operand:SI 2 "const_int_operand")]))))] 8682 "TARGET_SVE 8683 && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)" 8684 "dup\t%0.<Vctype>, %1.<Vctype>[%2]" 8685) 8686 8687;; Use DUP.Q to duplicate a 128-bit segment of a register. 8688;; 8689;; The vec_select:<V128> sets memory lane number N of the V128 to lane 8690;; number op2 + N of op1. (We don't need to distinguish between memory 8691;; and architectural register lane numbering for op1 or op0, since the 8692;; two numbering schemes are the same for SVE.) 8693;; 8694;; The vec_duplicate:SVE_FULL then copies memory lane number N of the 8695;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP 8696;; of op0. We therefore get the correct result for both endiannesses. 8697;; 8698;; The wrinkle is that for big-endian V128 registers, memory lane numbering 8699;; is in the opposite order to architectural register lane numbering. 8700;; Thus if we were to do this operation via a V128 temporary register, 8701;; the vec_select and vec_duplicate would both involve a reverse operation 8702;; for big-endian targets. In this fused pattern the two reverses cancel 8703;; each other out. 8704(define_insn "@aarch64_sve_dupq_lane<mode>" 8705 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8706 (vec_duplicate:SVE_FULL 8707 (vec_select:<V128> 8708 (match_operand:SVE_FULL 1 "register_operand" "w") 8709 (match_operand 2 "ascending_int_parallel"))))] 8710 "TARGET_SVE 8711 && (INTVAL (XVECEXP (operands[2], 0, 0)) 8712 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0 8713 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0)) 8714 * GET_MODE_SIZE (<VEL>mode), 0, 63)" 8715 { 8716 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0)) 8717 * GET_MODE_SIZE (<VEL>mode)); 8718 operands[2] = gen_int_mode (byte / 16, DImode); 8719 return "dup\t%0.q, %1.q[%2]"; 8720 } 8721) 8722 8723;; Reverse the order of elements within a full vector. 8724(define_insn "@aarch64_sve_rev<mode>" 8725 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 8726 (unspec:SVE_ALL 8727 [(match_operand:SVE_ALL 1 "register_operand" "w")] 8728 UNSPEC_REV))] 8729 "TARGET_SVE" 8730 "rev\t%0.<Vctype>, %1.<Vctype>") 8731 8732;; ------------------------------------------------------------------------- 8733;; ---- [INT,FP] Special-purpose binary permutes 8734;; ------------------------------------------------------------------------- 8735;; Includes: 8736;; - EXT 8737;; - SPLICE 8738;; - TRN1 8739;; - TRN2 8740;; - UZP1 8741;; - UZP2 8742;; - ZIP1 8743;; - ZIP2 8744;; ------------------------------------------------------------------------- 8745 8746;; Like EXT, but start at the first active element. 8747(define_insn "@aarch64_sve_splice<mode>" 8748 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w") 8749 (unspec:SVE_FULL 8750 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl") 8751 (match_operand:SVE_FULL 2 "register_operand" "0, w") 8752 (match_operand:SVE_FULL 3 "register_operand" "w, w")] 8753 UNSPEC_SVE_SPLICE))] 8754 "TARGET_SVE" 8755 "@ 8756 splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype> 8757 movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>" 8758 [(set_attr "movprfx" "*, yes")] 8759) 8760 8761;; Permutes that take half the elements from one vector and half the 8762;; elements from the other. 8763(define_insn "@aarch64_sve_<perm_insn><mode>" 8764 [(set (match_operand:SVE_ALL 0 "register_operand" "=w") 8765 (unspec:SVE_ALL 8766 [(match_operand:SVE_ALL 1 "register_operand" "w") 8767 (match_operand:SVE_ALL 2 "register_operand" "w")] 8768 PERMUTE))] 8769 "TARGET_SVE" 8770 "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>" 8771) 8772 8773;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns 8774;; doesn't depend on the mode. 8775(define_insn "@aarch64_sve_<optab><mode>" 8776 [(set (match_operand:SVE_FULL 0 "register_operand" "=w") 8777 (unspec:SVE_FULL 8778 [(match_operand:SVE_FULL 1 "register_operand" "w") 8779 (match_operand:SVE_FULL 2 "register_operand" "w")] 8780 PERMUTEQ))] 8781 "TARGET_SVE_F64MM" 8782 "<perm_insn>\t%0.q, %1.q, %2.q" 8783) 8784 8785;; Concatenate two vectors and extract a subvector. Note that the 8786;; immediate (third) operand is the lane index not the byte index. 8787(define_insn "@aarch64_sve_ext<mode>" 8788 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w") 8789 (unspec:SVE_ALL 8790 [(match_operand:SVE_ALL 1 "register_operand" "0, w") 8791 (match_operand:SVE_ALL 2 "register_operand" "w, w") 8792 (match_operand:SI 3 "const_int_operand")] 8793 UNSPEC_EXT))] 8794 "TARGET_SVE 8795 && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)" 8796 { 8797 operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8); 8798 return (which_alternative == 0 8799 ? "ext\\t%0.b, %0.b, %2.b, #%3" 8800 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3"); 8801 } 8802 [(set_attr "movprfx" "*,yes")] 8803) 8804 8805;; ------------------------------------------------------------------------- 8806;; ---- [PRED] Special-purpose unary permutes 8807;; ------------------------------------------------------------------------- 8808;; Includes: 8809;; - REV 8810;; ------------------------------------------------------------------------- 8811 8812(define_insn "@aarch64_sve_rev<mode>" 8813 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 8814 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")] 8815 UNSPEC_REV))] 8816 "TARGET_SVE" 8817 "rev\t%0.<Vetype>, %1.<Vetype>") 8818 8819;; ------------------------------------------------------------------------- 8820;; ---- [PRED] Special-purpose binary permutes 8821;; ------------------------------------------------------------------------- 8822;; Includes: 8823;; - TRN1 8824;; - TRN2 8825;; - UZP1 8826;; - UZP2 8827;; - ZIP1 8828;; - ZIP2 8829;; ------------------------------------------------------------------------- 8830 8831;; Permutes that take half the elements from one vector and half the 8832;; elements from the other. 8833(define_insn "@aarch64_sve_<perm_insn><mode>" 8834 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 8835 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa") 8836 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 8837 PERMUTE))] 8838 "TARGET_SVE" 8839 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 8840) 8841 8842;; Special purpose permute used by the predicate generation instructions. 8843;; Unlike the normal permute patterns, these instructions operate on VNx16BI 8844;; regardless of the element size, so that all input and output bits are 8845;; well-defined. Operand 3 then indicates the size of the permute. 8846(define_insn "@aarch64_sve_trn1_conv<mode>" 8847 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 8848 (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa") 8849 (match_operand:VNx16BI 2 "register_operand" "Upa") 8850 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")] 8851 UNSPEC_TRN1_CONV))] 8852 "TARGET_SVE" 8853 "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>" 8854) 8855 8856;; ========================================================================= 8857;; == Conversions 8858;; ========================================================================= 8859 8860;; ------------------------------------------------------------------------- 8861;; ---- [INT<-INT] Packs 8862;; ------------------------------------------------------------------------- 8863;; Includes: 8864;; - UZP1 8865;; ------------------------------------------------------------------------- 8866 8867;; Integer pack. Use UZP1 on the narrower type, which discards 8868;; the high part of each wide element. 8869(define_insn "vec_pack_trunc_<Vwide>" 8870 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w") 8871 (unspec:SVE_FULL_BHSI 8872 [(match_operand:<VWIDE> 1 "register_operand" "w") 8873 (match_operand:<VWIDE> 2 "register_operand" "w")] 8874 UNSPEC_PACK))] 8875 "TARGET_SVE" 8876 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 8877) 8878 8879;; ------------------------------------------------------------------------- 8880;; ---- [INT<-INT] Unpacks 8881;; ------------------------------------------------------------------------- 8882;; Includes: 8883;; - SUNPKHI 8884;; - SUNPKLO 8885;; - UUNPKHI 8886;; - UUNPKLO 8887;; ------------------------------------------------------------------------- 8888 8889;; Unpack the low or high half of a vector, where "high" refers to 8890;; the low-numbered lanes for big-endian and the high-numbered lanes 8891;; for little-endian. 8892(define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>" 8893 [(match_operand:<VWIDE> 0 "register_operand") 8894 (unspec:<VWIDE> 8895 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)] 8896 "TARGET_SVE" 8897 { 8898 emit_insn ((<hi_lanes_optab> 8899 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode> 8900 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>) 8901 (operands[0], operands[1])); 8902 DONE; 8903 } 8904) 8905 8906(define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>" 8907 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 8908 (unspec:<VWIDE> 8909 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")] 8910 UNPACK))] 8911 "TARGET_SVE" 8912 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>" 8913) 8914 8915;; ------------------------------------------------------------------------- 8916;; ---- [INT<-FP] Conversions 8917;; ------------------------------------------------------------------------- 8918;; Includes: 8919;; - FCVTZS 8920;; - FCVTZU 8921;; ------------------------------------------------------------------------- 8922 8923;; Unpredicated conversion of floats to integers of the same size (HF to HI, 8924;; SF to SI or DF to DI). 8925(define_expand "<optab><mode><v_int_equiv>2" 8926 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand") 8927 (unspec:<V_INT_EQUIV> 8928 [(match_dup 2) 8929 (const_int SVE_RELAXED_GP) 8930 (match_operand:SVE_FULL_F 1 "register_operand")] 8931 SVE_COND_FCVTI))] 8932 "TARGET_SVE" 8933 { 8934 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 8935 } 8936) 8937 8938;; Predicated float-to-integer conversion, either to the same width or wider. 8939(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>" 8940 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w") 8941 (unspec:SVE_FULL_HSDI 8942 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 8943 (match_operand:SI 3 "aarch64_sve_gp_strictness") 8944 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")] 8945 SVE_COND_FCVTI))] 8946 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8947 "@ 8948 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 8949 movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" 8950 [(set_attr "movprfx" "*,yes")] 8951) 8952 8953;; Predicated narrowing float-to-integer conversion. 8954(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" 8955 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w") 8956 (unspec:VNx4SI_ONLY 8957 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl") 8958 (match_operand:SI 3 "aarch64_sve_gp_strictness") 8959 (match_operand:VNx2DF_ONLY 2 "register_operand" "0, w")] 8960 SVE_COND_FCVTI))] 8961 "TARGET_SVE" 8962 "@ 8963 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> 8964 movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>" 8965 [(set_attr "movprfx" "*,yes")] 8966) 8967 8968;; Predicated float-to-integer conversion with merging, either to the same 8969;; width or wider. 8970(define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>" 8971 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand") 8972 (unspec:SVE_FULL_HSDI 8973 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand") 8974 (unspec:SVE_FULL_HSDI 8975 [(match_dup 1) 8976 (const_int SVE_STRICT_GP) 8977 (match_operand:SVE_FULL_F 2 "register_operand")] 8978 SVE_COND_FCVTI) 8979 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")] 8980 UNSPEC_SEL))] 8981 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 8982) 8983 8984;; The first alternative doesn't need the earlyclobber, but the only case 8985;; it would help is the uninteresting one in which operands 2 and 3 are 8986;; the same register (despite having different modes). Making all the 8987;; alternatives earlyclobber makes things more consistent for the 8988;; register allocator. 8989(define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed" 8990 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") 8991 (unspec:SVE_FULL_HSDI 8992 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 8993 (unspec:SVE_FULL_HSDI 8994 [(match_operand 4) 8995 (const_int SVE_RELAXED_GP) 8996 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 8997 SVE_COND_FCVTI) 8998 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 8999 UNSPEC_SEL))] 9000 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 9001 "@ 9002 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 9003 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 9004 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" 9005 "&& !rtx_equal_p (operands[1], operands[4])" 9006 { 9007 operands[4] = copy_rtx (operands[1]); 9008 } 9009 [(set_attr "movprfx" "*,yes,yes")] 9010) 9011 9012(define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict" 9013 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w") 9014 (unspec:SVE_FULL_HSDI 9015 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 9016 (unspec:SVE_FULL_HSDI 9017 [(match_dup 1) 9018 (const_int SVE_STRICT_GP) 9019 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")] 9020 SVE_COND_FCVTI) 9021 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9022 UNSPEC_SEL))] 9023 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 9024 "@ 9025 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 9026 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype> 9027 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>" 9028 [(set_attr "movprfx" "*,yes,yes")] 9029) 9030 9031;; Predicated narrowing float-to-integer conversion with merging. 9032(define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" 9033 [(set (match_operand:VNx4SI_ONLY 0 "register_operand") 9034 (unspec:VNx4SI_ONLY 9035 [(match_operand:VNx2BI 1 "register_operand") 9036 (unspec:VNx4SI_ONLY 9037 [(match_dup 1) 9038 (const_int SVE_STRICT_GP) 9039 (match_operand:VNx2DF_ONLY 2 "register_operand")] 9040 SVE_COND_FCVTI) 9041 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")] 9042 UNSPEC_SEL))] 9043 "TARGET_SVE" 9044) 9045 9046(define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>" 9047 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w") 9048 (unspec:VNx4SI_ONLY 9049 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") 9050 (unspec:VNx4SI_ONLY 9051 [(match_dup 1) 9052 (match_operand:SI 4 "aarch64_sve_gp_strictness") 9053 (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")] 9054 SVE_COND_FCVTI) 9055 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9056 UNSPEC_SEL))] 9057 "TARGET_SVE" 9058 "@ 9059 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> 9060 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype> 9061 movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>" 9062 [(set_attr "movprfx" "*,yes,yes")] 9063) 9064 9065;; ------------------------------------------------------------------------- 9066;; ---- [INT<-FP] Packs 9067;; ------------------------------------------------------------------------- 9068;; The patterns in this section are synthetic. 9069;; ------------------------------------------------------------------------- 9070 9071;; Convert two vectors of DF to SI and pack the results into a single vector. 9072(define_expand "vec_pack_<su>fix_trunc_vnx2df" 9073 [(set (match_dup 4) 9074 (unspec:VNx4SI 9075 [(match_dup 3) 9076 (const_int SVE_RELAXED_GP) 9077 (match_operand:VNx2DF 1 "register_operand")] 9078 SVE_COND_FCVTI)) 9079 (set (match_dup 5) 9080 (unspec:VNx4SI 9081 [(match_dup 3) 9082 (const_int SVE_RELAXED_GP) 9083 (match_operand:VNx2DF 2 "register_operand")] 9084 SVE_COND_FCVTI)) 9085 (set (match_operand:VNx4SI 0 "register_operand") 9086 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] 9087 "TARGET_SVE" 9088 { 9089 operands[3] = aarch64_ptrue_reg (VNx2BImode); 9090 operands[4] = gen_reg_rtx (VNx4SImode); 9091 operands[5] = gen_reg_rtx (VNx4SImode); 9092 } 9093) 9094 9095;; ------------------------------------------------------------------------- 9096;; ---- [INT<-FP] Unpacks 9097;; ------------------------------------------------------------------------- 9098;; No patterns here yet! 9099;; ------------------------------------------------------------------------- 9100 9101;; ------------------------------------------------------------------------- 9102;; ---- [FP<-INT] Conversions 9103;; ------------------------------------------------------------------------- 9104;; Includes: 9105;; - SCVTF 9106;; - UCVTF 9107;; ------------------------------------------------------------------------- 9108 9109;; Unpredicated conversion of integers to floats of the same size 9110;; (HI to HF, SI to SF or DI to DF). 9111(define_expand "<optab><v_int_equiv><mode>2" 9112 [(set (match_operand:SVE_FULL_F 0 "register_operand") 9113 (unspec:SVE_FULL_F 9114 [(match_dup 2) 9115 (const_int SVE_RELAXED_GP) 9116 (match_operand:<V_INT_EQUIV> 1 "register_operand")] 9117 SVE_COND_ICVTF))] 9118 "TARGET_SVE" 9119 { 9120 operands[2] = aarch64_ptrue_reg (<VPRED>mode); 9121 } 9122) 9123 9124;; Predicated integer-to-float conversion, either to the same width or 9125;; narrower. 9126(define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>" 9127 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w") 9128 (unspec:SVE_FULL_F 9129 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl") 9130 (match_operand:SI 3 "aarch64_sve_gp_strictness") 9131 (match_operand:SVE_FULL_HSDI 2 "register_operand" "0, w")] 9132 SVE_COND_ICVTF))] 9133 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 9134 "@ 9135 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 9136 movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 9137 [(set_attr "movprfx" "*,yes")] 9138) 9139 9140;; Predicated widening integer-to-float conversion. 9141(define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" 9142 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w") 9143 (unspec:VNx2DF_ONLY 9144 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl") 9145 (match_operand:SI 3 "aarch64_sve_gp_strictness") 9146 (match_operand:VNx4SI_ONLY 2 "register_operand" "0, w")] 9147 SVE_COND_ICVTF))] 9148 "TARGET_SVE" 9149 "@ 9150 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> 9151 movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>" 9152 [(set_attr "movprfx" "*,yes")] 9153) 9154 9155;; Predicated integer-to-float conversion with merging, either to the same 9156;; width or narrower. 9157(define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>" 9158 [(set (match_operand:SVE_FULL_F 0 "register_operand") 9159 (unspec:SVE_FULL_F 9160 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand") 9161 (unspec:SVE_FULL_F 9162 [(match_dup 1) 9163 (const_int SVE_STRICT_GP) 9164 (match_operand:SVE_FULL_HSDI 2 "register_operand")] 9165 SVE_COND_ICVTF) 9166 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")] 9167 UNSPEC_SEL))] 9168 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 9169) 9170 9171;; The first alternative doesn't need the earlyclobber, but the only case 9172;; it would help is the uninteresting one in which operands 2 and 3 are 9173;; the same register (despite having different modes). Making all the 9174;; alternatives earlyclobber makes things more consistent for the 9175;; register allocator. 9176(define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed" 9177 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") 9178 (unspec:SVE_FULL_F 9179 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 9180 (unspec:SVE_FULL_F 9181 [(match_operand 4) 9182 (const_int SVE_RELAXED_GP) 9183 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] 9184 SVE_COND_ICVTF) 9185 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9186 UNSPEC_SEL))] 9187 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 9188 "@ 9189 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 9190 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 9191 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 9192 "&& !rtx_equal_p (operands[1], operands[4])" 9193 { 9194 operands[4] = copy_rtx (operands[1]); 9195 } 9196 [(set_attr "movprfx" "*,yes,yes")] 9197) 9198 9199(define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict" 9200 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w") 9201 (unspec:SVE_FULL_F 9202 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl") 9203 (unspec:SVE_FULL_F 9204 [(match_dup 1) 9205 (const_int SVE_STRICT_GP) 9206 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")] 9207 SVE_COND_ICVTF) 9208 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9209 UNSPEC_SEL))] 9210 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>" 9211 "@ 9212 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 9213 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype> 9214 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>" 9215 [(set_attr "movprfx" "*,yes,yes")] 9216) 9217 9218;; Predicated widening integer-to-float conversion with merging. 9219(define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" 9220 [(set (match_operand:VNx2DF_ONLY 0 "register_operand") 9221 (unspec:VNx2DF_ONLY 9222 [(match_operand:VNx2BI 1 "register_operand") 9223 (unspec:VNx2DF_ONLY 9224 [(match_dup 1) 9225 (const_int SVE_STRICT_GP) 9226 (match_operand:VNx4SI_ONLY 2 "register_operand")] 9227 SVE_COND_ICVTF) 9228 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")] 9229 UNSPEC_SEL))] 9230 "TARGET_SVE" 9231) 9232 9233(define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>" 9234 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w") 9235 (unspec:VNx2DF_ONLY 9236 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl") 9237 (unspec:VNx2DF_ONLY 9238 [(match_dup 1) 9239 (match_operand:SI 4 "aarch64_sve_gp_strictness") 9240 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")] 9241 SVE_COND_ICVTF) 9242 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9243 UNSPEC_SEL))] 9244 "TARGET_SVE" 9245 "@ 9246 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> 9247 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype> 9248 movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>" 9249 [(set_attr "movprfx" "*,yes,yes")] 9250) 9251 9252;; ------------------------------------------------------------------------- 9253;; ---- [FP<-INT] Packs 9254;; ------------------------------------------------------------------------- 9255;; No patterns here yet! 9256;; ------------------------------------------------------------------------- 9257 9258;; ------------------------------------------------------------------------- 9259;; ---- [FP<-INT] Unpacks 9260;; ------------------------------------------------------------------------- 9261;; The patterns in this section are synthetic. 9262;; ------------------------------------------------------------------------- 9263 9264;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI 9265;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the 9266;; unpacked VNx4SI to VNx2DF. 9267(define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si" 9268 [(match_operand:VNx2DF 0 "register_operand") 9269 (FLOATUORS:VNx2DF 9270 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")] 9271 UNPACK_UNSIGNED))] 9272 "TARGET_SVE" 9273 { 9274 /* Use ZIP to do the unpack, since we don't care about the upper halves 9275 and since it has the nice property of not needing any subregs. 9276 If using UUNPK* turns out to be preferable, we could model it as 9277 a ZIP whose first operand is zero. */ 9278 rtx temp = gen_reg_rtx (VNx4SImode); 9279 emit_insn ((<hi_lanes_optab> 9280 ? gen_aarch64_sve_zip2vnx4si 9281 : gen_aarch64_sve_zip1vnx4si) 9282 (temp, operands[1], operands[1])); 9283 rtx ptrue = aarch64_ptrue_reg (VNx2BImode); 9284 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); 9285 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df 9286 (operands[0], ptrue, temp, strictness)); 9287 DONE; 9288 } 9289) 9290 9291;; ------------------------------------------------------------------------- 9292;; ---- [FP<-FP] Packs 9293;; ------------------------------------------------------------------------- 9294;; Includes: 9295;; - FCVT 9296;; ------------------------------------------------------------------------- 9297 9298;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack 9299;; the results into a single vector. 9300(define_expand "vec_pack_trunc_<Vwide>" 9301 [(set (match_dup 4) 9302 (unspec:SVE_FULL_HSF 9303 [(match_dup 3) 9304 (const_int SVE_RELAXED_GP) 9305 (match_operand:<VWIDE> 1 "register_operand")] 9306 UNSPEC_COND_FCVT)) 9307 (set (match_dup 5) 9308 (unspec:SVE_FULL_HSF 9309 [(match_dup 3) 9310 (const_int SVE_RELAXED_GP) 9311 (match_operand:<VWIDE> 2 "register_operand")] 9312 UNSPEC_COND_FCVT)) 9313 (set (match_operand:SVE_FULL_HSF 0 "register_operand") 9314 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))] 9315 "TARGET_SVE" 9316 { 9317 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode); 9318 operands[4] = gen_reg_rtx (<MODE>mode); 9319 operands[5] = gen_reg_rtx (<MODE>mode); 9320 } 9321) 9322 9323;; Predicated float-to-float truncation. 9324(define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" 9325 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w") 9326 (unspec:SVE_FULL_HSF 9327 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl") 9328 (match_operand:SI 3 "aarch64_sve_gp_strictness") 9329 (match_operand:SVE_FULL_SDF 2 "register_operand" "0, w")] 9330 SVE_COND_FCVT))] 9331 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9332 "@ 9333 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> 9334 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>" 9335 [(set_attr "movprfx" "*,yes")] 9336) 9337 9338;; Predicated float-to-float truncation with merging. 9339(define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" 9340 [(set (match_operand:SVE_FULL_HSF 0 "register_operand") 9341 (unspec:SVE_FULL_HSF 9342 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand") 9343 (unspec:SVE_FULL_HSF 9344 [(match_dup 1) 9345 (const_int SVE_STRICT_GP) 9346 (match_operand:SVE_FULL_SDF 2 "register_operand")] 9347 SVE_COND_FCVT) 9348 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")] 9349 UNSPEC_SEL))] 9350 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9351) 9352 9353(define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>" 9354 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w") 9355 (unspec:SVE_FULL_HSF 9356 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl") 9357 (unspec:SVE_FULL_HSF 9358 [(match_dup 1) 9359 (match_operand:SI 4 "aarch64_sve_gp_strictness") 9360 (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")] 9361 SVE_COND_FCVT) 9362 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9363 UNSPEC_SEL))] 9364 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9365 "@ 9366 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> 9367 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype> 9368 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>" 9369 [(set_attr "movprfx" "*,yes,yes")] 9370) 9371 9372;; ------------------------------------------------------------------------- 9373;; ---- [FP<-FP] Packs (bfloat16) 9374;; ------------------------------------------------------------------------- 9375;; Includes: 9376;; - BFCVT (BF16) 9377;; - BFCVTNT (BF16) 9378;; ------------------------------------------------------------------------- 9379 9380;; Predicated BFCVT. 9381(define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" 9382 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w") 9383 (unspec:VNx8BF_ONLY 9384 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl") 9385 (match_operand:SI 3 "aarch64_sve_gp_strictness") 9386 (match_operand:VNx4SF_ONLY 2 "register_operand" "0, w")] 9387 SVE_COND_FCVT))] 9388 "TARGET_SVE_BF16" 9389 "@ 9390 bfcvt\t%0.h, %1/m, %2.s 9391 movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s" 9392 [(set_attr "movprfx" "*,yes")] 9393) 9394 9395;; Predicated BFCVT with merging. 9396(define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" 9397 [(set (match_operand:VNx8BF_ONLY 0 "register_operand") 9398 (unspec:VNx8BF_ONLY 9399 [(match_operand:VNx4BI 1 "register_operand") 9400 (unspec:VNx8BF_ONLY 9401 [(match_dup 1) 9402 (const_int SVE_STRICT_GP) 9403 (match_operand:VNx4SF_ONLY 2 "register_operand")] 9404 SVE_COND_FCVT) 9405 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")] 9406 UNSPEC_SEL))] 9407 "TARGET_SVE_BF16" 9408) 9409 9410(define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>" 9411 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w") 9412 (unspec:VNx8BF_ONLY 9413 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl") 9414 (unspec:VNx8BF_ONLY 9415 [(match_dup 1) 9416 (match_operand:SI 4 "aarch64_sve_gp_strictness") 9417 (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")] 9418 SVE_COND_FCVT) 9419 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9420 UNSPEC_SEL))] 9421 "TARGET_SVE_BF16" 9422 "@ 9423 bfcvt\t%0.h, %1/m, %2.s 9424 movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s 9425 movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s" 9426 [(set_attr "movprfx" "*,yes,yes")] 9427) 9428 9429;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_* 9430;; pair because the even elements always have to be supplied for active 9431;; elements, even if the inactive elements don't matter. 9432;; 9433;; This instructions does not take MOVPRFX. 9434(define_insn "@aarch64_sve_cvtnt<mode>" 9435 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w") 9436 (unspec:VNx8BF_ONLY 9437 [(match_operand:VNx4BI 2 "register_operand" "Upl") 9438 (const_int SVE_STRICT_GP) 9439 (match_operand:VNx8BF_ONLY 1 "register_operand" "0") 9440 (match_operand:VNx4SF 3 "register_operand" "w")] 9441 UNSPEC_COND_FCVTNT))] 9442 "TARGET_SVE_BF16" 9443 "bfcvtnt\t%0.h, %2/m, %3.s" 9444) 9445 9446;; ------------------------------------------------------------------------- 9447;; ---- [FP<-FP] Unpacks 9448;; ------------------------------------------------------------------------- 9449;; Includes: 9450;; - FCVT 9451;; ------------------------------------------------------------------------- 9452 9453;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF. 9454;; First unpack the source without conversion, then float-convert the 9455;; unpacked source. 9456(define_expand "vec_unpacks_<perm_hilo>_<mode>" 9457 [(match_operand:<VWIDE> 0 "register_operand") 9458 (unspec:SVE_FULL_HSF 9459 [(match_operand:SVE_FULL_HSF 1 "register_operand")] 9460 UNPACK_UNSIGNED)] 9461 "TARGET_SVE" 9462 { 9463 /* Use ZIP to do the unpack, since we don't care about the upper halves 9464 and since it has the nice property of not needing any subregs. 9465 If using UUNPK* turns out to be preferable, we could model it as 9466 a ZIP whose first operand is zero. */ 9467 rtx temp = gen_reg_rtx (<MODE>mode); 9468 emit_insn ((<hi_lanes_optab> 9469 ? gen_aarch64_sve_zip2<mode> 9470 : gen_aarch64_sve_zip1<mode>) 9471 (temp, operands[1], operands[1])); 9472 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode); 9473 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode); 9474 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide> 9475 (operands[0], ptrue, temp, strictness)); 9476 DONE; 9477 } 9478) 9479 9480;; Predicated float-to-float extension. 9481(define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" 9482 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w") 9483 (unspec:SVE_FULL_SDF 9484 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl") 9485 (match_operand:SI 3 "aarch64_sve_gp_strictness") 9486 (match_operand:SVE_FULL_HSF 2 "register_operand" "0, w")] 9487 SVE_COND_FCVT))] 9488 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9489 "@ 9490 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> 9491 movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>" 9492 [(set_attr "movprfx" "*,yes")] 9493) 9494 9495;; Predicated float-to-float extension with merging. 9496(define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" 9497 [(set (match_operand:SVE_FULL_SDF 0 "register_operand") 9498 (unspec:SVE_FULL_SDF 9499 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand") 9500 (unspec:SVE_FULL_SDF 9501 [(match_dup 1) 9502 (const_int SVE_STRICT_GP) 9503 (match_operand:SVE_FULL_HSF 2 "register_operand")] 9504 SVE_COND_FCVT) 9505 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")] 9506 UNSPEC_SEL))] 9507 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9508) 9509 9510(define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>" 9511 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w") 9512 (unspec:SVE_FULL_SDF 9513 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl") 9514 (unspec:SVE_FULL_SDF 9515 [(match_dup 1) 9516 (match_operand:SI 4 "aarch64_sve_gp_strictness") 9517 (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")] 9518 SVE_COND_FCVT) 9519 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")] 9520 UNSPEC_SEL))] 9521 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>" 9522 "@ 9523 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> 9524 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype> 9525 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>" 9526 [(set_attr "movprfx" "*,yes,yes")] 9527) 9528 9529;; ------------------------------------------------------------------------- 9530;; ---- [PRED<-PRED] Packs 9531;; ------------------------------------------------------------------------- 9532;; Includes: 9533;; - UZP1 9534;; ------------------------------------------------------------------------- 9535 9536;; Predicate pack. Use UZP1 on the narrower type, which discards 9537;; the high part of each wide element. 9538(define_insn "vec_pack_trunc_<Vwide>" 9539 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa") 9540 (unspec:PRED_BHS 9541 [(match_operand:<VWIDE> 1 "register_operand" "Upa") 9542 (match_operand:<VWIDE> 2 "register_operand" "Upa")] 9543 UNSPEC_PACK))] 9544 "TARGET_SVE" 9545 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>" 9546) 9547 9548;; ------------------------------------------------------------------------- 9549;; ---- [PRED<-PRED] Unpacks 9550;; ------------------------------------------------------------------------- 9551;; Includes: 9552;; - PUNPKHI 9553;; - PUNPKLO 9554;; ------------------------------------------------------------------------- 9555 9556;; Unpack the low or high half of a predicate, where "high" refers to 9557;; the low-numbered lanes for big-endian and the high-numbered lanes 9558;; for little-endian. 9559(define_expand "vec_unpack<su>_<perm_hilo>_<mode>" 9560 [(match_operand:<VWIDE> 0 "register_operand") 9561 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")] 9562 UNPACK)] 9563 "TARGET_SVE" 9564 { 9565 emit_insn ((<hi_lanes_optab> 9566 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode> 9567 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>) 9568 (operands[0], operands[1])); 9569 DONE; 9570 } 9571) 9572 9573(define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>" 9574 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa") 9575 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")] 9576 UNPACK_UNSIGNED))] 9577 "TARGET_SVE" 9578 "punpk<perm_hilo>\t%0.h, %1.b" 9579) 9580 9581;; ========================================================================= 9582;; == Vector partitioning 9583;; ========================================================================= 9584 9585;; ------------------------------------------------------------------------- 9586;; ---- [PRED] Unary partitioning 9587;; ------------------------------------------------------------------------- 9588;; Includes: 9589;; - BRKA 9590;; - BRKAS 9591;; - BRKB 9592;; - BRKBS 9593;; ------------------------------------------------------------------------- 9594 9595;; Note that unlike most other instructions that have both merging and 9596;; zeroing forms, these instructions don't operate elementwise and so 9597;; don't fit the IFN_COND model. 9598(define_insn "@aarch64_brk<brk_op>" 9599 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa") 9600 (unspec:VNx16BI 9601 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa") 9602 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa") 9603 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")] 9604 SVE_BRK_UNARY))] 9605 "TARGET_SVE" 9606 "@ 9607 brk<brk_op>\t%0.b, %1/z, %2.b 9608 brk<brk_op>\t%0.b, %1/m, %2.b" 9609) 9610 9611;; Same, but also producing a flags result. 9612(define_insn "*aarch64_brk<brk_op>_cc" 9613 [(set (reg:CC_NZC CC_REGNUM) 9614 (unspec:CC_NZC 9615 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9616 (match_dup 1) 9617 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9618 (unspec:VNx16BI 9619 [(match_dup 1) 9620 (match_operand:VNx16BI 2 "register_operand" "Upa") 9621 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")] 9622 SVE_BRK_UNARY)] 9623 UNSPEC_PTEST)) 9624 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9625 (unspec:VNx16BI 9626 [(match_dup 1) 9627 (match_dup 2) 9628 (match_dup 3)] 9629 SVE_BRK_UNARY))] 9630 "TARGET_SVE" 9631 "brk<brk_op>s\t%0.b, %1/z, %2.b" 9632) 9633 9634;; Same, but with only the flags result being interesting. 9635(define_insn "*aarch64_brk<brk_op>_ptest" 9636 [(set (reg:CC_NZC CC_REGNUM) 9637 (unspec:CC_NZC 9638 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9639 (match_dup 1) 9640 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9641 (unspec:VNx16BI 9642 [(match_dup 1) 9643 (match_operand:VNx16BI 2 "register_operand" "Upa") 9644 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")] 9645 SVE_BRK_UNARY)] 9646 UNSPEC_PTEST)) 9647 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 9648 "TARGET_SVE" 9649 "brk<brk_op>s\t%0.b, %1/z, %2.b" 9650) 9651 9652;; ------------------------------------------------------------------------- 9653;; ---- [PRED] Binary partitioning 9654;; ------------------------------------------------------------------------- 9655;; Includes: 9656;; - BRKN 9657;; - BRKNS 9658;; - BRKPA 9659;; - BRKPAS 9660;; - BRKPB 9661;; - BRKPBS 9662;; ------------------------------------------------------------------------- 9663 9664;; Binary BRKs (BRKN, BRKPA, BRKPB). 9665(define_insn "@aarch64_brk<brk_op>" 9666 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9667 (unspec:VNx16BI 9668 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9669 (match_operand:VNx16BI 2 "register_operand" "Upa") 9670 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")] 9671 SVE_BRK_BINARY))] 9672 "TARGET_SVE" 9673 "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b" 9674) 9675 9676;; BRKN, producing both a predicate and a flags result. Unlike other 9677;; flag-setting instructions, these flags are always set wrt a ptrue. 9678(define_insn_and_rewrite "*aarch64_brkn_cc" 9679 [(set (reg:CC_NZC CC_REGNUM) 9680 (unspec:CC_NZC 9681 [(match_operand:VNx16BI 4) 9682 (match_operand:VNx16BI 5) 9683 (const_int SVE_KNOWN_PTRUE) 9684 (unspec:VNx16BI 9685 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9686 (match_operand:VNx16BI 2 "register_operand" "Upa") 9687 (match_operand:VNx16BI 3 "register_operand" "0")] 9688 UNSPEC_BRKN)] 9689 UNSPEC_PTEST)) 9690 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9691 (unspec:VNx16BI 9692 [(match_dup 1) 9693 (match_dup 2) 9694 (match_dup 3)] 9695 UNSPEC_BRKN))] 9696 "TARGET_SVE" 9697 "brkns\t%0.b, %1/z, %2.b, %0.b" 9698 "&& (operands[4] != CONST0_RTX (VNx16BImode) 9699 || operands[5] != CONST0_RTX (VNx16BImode))" 9700 { 9701 operands[4] = CONST0_RTX (VNx16BImode); 9702 operands[5] = CONST0_RTX (VNx16BImode); 9703 } 9704) 9705 9706;; Same, but with only the flags result being interesting. 9707(define_insn_and_rewrite "*aarch64_brkn_ptest" 9708 [(set (reg:CC_NZC CC_REGNUM) 9709 (unspec:CC_NZC 9710 [(match_operand:VNx16BI 4) 9711 (match_operand:VNx16BI 5) 9712 (const_int SVE_KNOWN_PTRUE) 9713 (unspec:VNx16BI 9714 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9715 (match_operand:VNx16BI 2 "register_operand" "Upa") 9716 (match_operand:VNx16BI 3 "register_operand" "0")] 9717 UNSPEC_BRKN)] 9718 UNSPEC_PTEST)) 9719 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 9720 "TARGET_SVE" 9721 "brkns\t%0.b, %1/z, %2.b, %0.b" 9722 "&& (operands[4] != CONST0_RTX (VNx16BImode) 9723 || operands[5] != CONST0_RTX (VNx16BImode))" 9724 { 9725 operands[4] = CONST0_RTX (VNx16BImode); 9726 operands[5] = CONST0_RTX (VNx16BImode); 9727 } 9728) 9729 9730;; BRKPA and BRKPB, producing both a predicate and a flags result. 9731(define_insn "*aarch64_brk<brk_op>_cc" 9732 [(set (reg:CC_NZC CC_REGNUM) 9733 (unspec:CC_NZC 9734 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9735 (match_dup 1) 9736 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9737 (unspec:VNx16BI 9738 [(match_dup 1) 9739 (match_operand:VNx16BI 2 "register_operand" "Upa") 9740 (match_operand:VNx16BI 3 "register_operand" "Upa")] 9741 SVE_BRKP)] 9742 UNSPEC_PTEST)) 9743 (set (match_operand:VNx16BI 0 "register_operand" "=Upa") 9744 (unspec:VNx16BI 9745 [(match_dup 1) 9746 (match_dup 2) 9747 (match_dup 3)] 9748 SVE_BRKP))] 9749 "TARGET_SVE" 9750 "brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b" 9751) 9752 9753;; Same, but with only the flags result being interesting. 9754(define_insn "*aarch64_brk<brk_op>_ptest" 9755 [(set (reg:CC_NZC CC_REGNUM) 9756 (unspec:CC_NZC 9757 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9758 (match_dup 1) 9759 (match_operand:SI 4 "aarch64_sve_ptrue_flag") 9760 (unspec:VNx16BI 9761 [(match_dup 1) 9762 (match_operand:VNx16BI 2 "register_operand" "Upa") 9763 (match_operand:VNx16BI 3 "register_operand" "Upa")] 9764 SVE_BRKP)] 9765 UNSPEC_PTEST)) 9766 (clobber (match_scratch:VNx16BI 0 "=Upa"))] 9767 "TARGET_SVE" 9768 "brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b" 9769) 9770 9771;; ------------------------------------------------------------------------- 9772;; ---- [PRED] Scalarization 9773;; ------------------------------------------------------------------------- 9774;; Includes: 9775;; - PFIRST 9776;; - PNEXT 9777;; ------------------------------------------------------------------------- 9778 9779(define_insn "@aarch64_sve_<sve_pred_op><mode>" 9780 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 9781 (unspec:PRED_ALL 9782 [(match_operand:PRED_ALL 1 "register_operand" "Upa") 9783 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 9784 (match_operand:PRED_ALL 3 "register_operand" "0")] 9785 SVE_PITER)) 9786 (clobber (reg:CC_NZC CC_REGNUM))] 9787 "TARGET_SVE && <max_elem_bits> >= <elem_bits>" 9788 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" 9789) 9790 9791;; Same, but also producing a flags result. 9792(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc" 9793 [(set (reg:CC_NZC CC_REGNUM) 9794 (unspec:CC_NZC 9795 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9796 (match_operand 2) 9797 (match_operand:SI 3 "aarch64_sve_ptrue_flag") 9798 (unspec:PRED_ALL 9799 [(match_operand 4) 9800 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 9801 (match_operand:PRED_ALL 6 "register_operand" "0")] 9802 SVE_PITER)] 9803 UNSPEC_PTEST)) 9804 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa") 9805 (unspec:PRED_ALL 9806 [(match_dup 4) 9807 (match_dup 5) 9808 (match_dup 6)] 9809 SVE_PITER))] 9810 "TARGET_SVE 9811 && <max_elem_bits> >= <elem_bits> 9812 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" 9813 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" 9814 "&& !rtx_equal_p (operands[2], operands[4])" 9815 { 9816 operands[4] = operands[2]; 9817 operands[5] = operands[3]; 9818 } 9819) 9820 9821;; Same, but with only the flags result being interesting. 9822(define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest" 9823 [(set (reg:CC_NZC CC_REGNUM) 9824 (unspec:CC_NZC 9825 [(match_operand:VNx16BI 1 "register_operand" "Upa") 9826 (match_operand 2) 9827 (match_operand:SI 3 "aarch64_sve_ptrue_flag") 9828 (unspec:PRED_ALL 9829 [(match_operand 4) 9830 (match_operand:SI 5 "aarch64_sve_ptrue_flag") 9831 (match_operand:PRED_ALL 6 "register_operand" "0")] 9832 SVE_PITER)] 9833 UNSPEC_PTEST)) 9834 (clobber (match_scratch:PRED_ALL 0 "=Upa"))] 9835 "TARGET_SVE 9836 && <max_elem_bits> >= <elem_bits> 9837 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])" 9838 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>" 9839 "&& !rtx_equal_p (operands[2], operands[4])" 9840 { 9841 operands[4] = operands[2]; 9842 operands[5] = operands[3]; 9843 } 9844) 9845 9846;; ========================================================================= 9847;; == Counting elements 9848;; ========================================================================= 9849 9850;; ------------------------------------------------------------------------- 9851;; ---- [INT] Count elements in a pattern (scalar) 9852;; ------------------------------------------------------------------------- 9853;; Includes: 9854;; - CNTB 9855;; - CNTD 9856;; - CNTH 9857;; - CNTW 9858;; ------------------------------------------------------------------------- 9859 9860;; Count the number of elements in an svpattern. Operand 1 is the pattern, 9861;; operand 2 is the number of elements that fit in a 128-bit block, and 9862;; operand 3 is a multiplier in the range [1, 16]. 9863;; 9864;; Note that this pattern isn't used for SV_ALL (but would work for that too). 9865(define_insn "aarch64_sve_cnt_pat" 9866 [(set (match_operand:DI 0 "register_operand" "=r") 9867 (zero_extend:DI 9868 (unspec:SI [(match_operand:DI 1 "const_int_operand") 9869 (match_operand:DI 2 "const_int_operand") 9870 (match_operand:DI 3 "const_int_operand")] 9871 UNSPEC_SVE_CNT_PAT)))] 9872 "TARGET_SVE" 9873 { 9874 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1); 9875 } 9876) 9877 9878;; ------------------------------------------------------------------------- 9879;; ---- [INT] Increment by the number of elements in a pattern (scalar) 9880;; ------------------------------------------------------------------------- 9881;; Includes: 9882;; - INC 9883;; - SQINC 9884;; - UQINC 9885;; ------------------------------------------------------------------------- 9886 9887;; Increment a DImode register by the number of elements in an svpattern. 9888;; See aarch64_sve_cnt_pat for the counting behavior. 9889(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9890 [(set (match_operand:DI 0 "register_operand" "=r") 9891 (ANY_PLUS:DI (zero_extend:DI 9892 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9893 (match_operand:DI 3 "const_int_operand") 9894 (match_operand:DI 4 "const_int_operand")] 9895 UNSPEC_SVE_CNT_PAT)) 9896 (match_operand:DI_ONLY 1 "register_operand" "0")))] 9897 "TARGET_SVE" 9898 { 9899 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0", 9900 operands + 2); 9901 } 9902) 9903 9904;; Increment an SImode register by the number of elements in an svpattern 9905;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting 9906;; behavior. 9907(define_insn "*aarch64_sve_incsi_pat" 9908 [(set (match_operand:SI 0 "register_operand" "=r") 9909 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand") 9910 (match_operand:DI 3 "const_int_operand") 9911 (match_operand:DI 4 "const_int_operand")] 9912 UNSPEC_SVE_CNT_PAT) 9913 (match_operand:SI 1 "register_operand" "0")))] 9914 "TARGET_SVE" 9915 { 9916 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2); 9917 } 9918) 9919 9920;; Increment an SImode register by the number of elements in an svpattern 9921;; using saturating arithmetic, extending the result to 64 bits. 9922;; 9923;; See aarch64_sve_cnt_pat for the counting behavior. 9924(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9925 [(set (match_operand:DI 0 "register_operand" "=r") 9926 (<paired_extend>:DI 9927 (SAT_PLUS:SI 9928 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9929 (match_operand:DI 3 "const_int_operand") 9930 (match_operand:DI 4 "const_int_operand")] 9931 UNSPEC_SVE_CNT_PAT) 9932 (match_operand:SI_ONLY 1 "register_operand" "0"))))] 9933 "TARGET_SVE" 9934 { 9935 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0"); 9936 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers, 9937 operands + 2); 9938 } 9939) 9940 9941;; ------------------------------------------------------------------------- 9942;; ---- [INT] Increment by the number of elements in a pattern (vector) 9943;; ------------------------------------------------------------------------- 9944;; Includes: 9945;; - INC 9946;; - SQINC 9947;; - UQINC 9948;; ------------------------------------------------------------------------- 9949 9950;; Increment a vector of DIs by the number of elements in an svpattern. 9951;; See aarch64_sve_cnt_pat for the counting behavior. 9952(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9953 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 9954 (ANY_PLUS:VNx2DI 9955 (vec_duplicate:VNx2DI 9956 (zero_extend:DI 9957 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9958 (match_operand:DI 3 "const_int_operand") 9959 (match_operand:DI 4 "const_int_operand")] 9960 UNSPEC_SVE_CNT_PAT))) 9961 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] 9962 "TARGET_SVE" 9963 { 9964 if (which_alternative == 1) 9965 output_asm_insn ("movprfx\t%0, %1", operands); 9966 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9967 operands + 2); 9968 } 9969 [(set_attr "movprfx" "*,yes")] 9970) 9971 9972;; Increment a vector of SIs by the number of elements in an svpattern. 9973;; See aarch64_sve_cnt_pat for the counting behavior. 9974(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 9975 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 9976 (ANY_PLUS:VNx4SI 9977 (vec_duplicate:VNx4SI 9978 (unspec:SI [(match_operand:DI 2 "const_int_operand") 9979 (match_operand:DI 3 "const_int_operand") 9980 (match_operand:DI 4 "const_int_operand")] 9981 UNSPEC_SVE_CNT_PAT)) 9982 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] 9983 "TARGET_SVE" 9984 { 9985 if (which_alternative == 1) 9986 output_asm_insn ("movprfx\t%0, %1", operands); 9987 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 9988 operands + 2); 9989 } 9990 [(set_attr "movprfx" "*,yes")] 9991) 9992 9993;; Increment a vector of HIs by the number of elements in an svpattern. 9994;; See aarch64_sve_cnt_pat for the counting behavior. 9995(define_expand "@aarch64_sve_<inc_dec><mode>_pat" 9996 [(set (match_operand:VNx8HI 0 "register_operand") 9997 (ANY_PLUS:VNx8HI 9998 (vec_duplicate:VNx8HI 9999 (truncate:HI 10000 (unspec:SI [(match_operand:DI 2 "const_int_operand") 10001 (match_operand:DI 3 "const_int_operand") 10002 (match_operand:DI 4 "const_int_operand")] 10003 UNSPEC_SVE_CNT_PAT))) 10004 (match_operand:VNx8HI_ONLY 1 "register_operand")))] 10005 "TARGET_SVE" 10006) 10007 10008(define_insn "*aarch64_sve_<inc_dec><mode>_pat" 10009 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 10010 (ANY_PLUS:VNx8HI 10011 (vec_duplicate:VNx8HI 10012 (match_operator:HI 5 "subreg_lowpart_operator" 10013 [(unspec:SI [(match_operand:DI 2 "const_int_operand") 10014 (match_operand:DI 3 "const_int_operand") 10015 (match_operand:DI 4 "const_int_operand")] 10016 UNSPEC_SVE_CNT_PAT)])) 10017 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] 10018 "TARGET_SVE" 10019 { 10020 if (which_alternative == 1) 10021 output_asm_insn ("movprfx\t%0, %1", operands); 10022 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 10023 operands + 2); 10024 } 10025 [(set_attr "movprfx" "*,yes")] 10026) 10027 10028;; ------------------------------------------------------------------------- 10029;; ---- [INT] Decrement by the number of elements in a pattern (scalar) 10030;; ------------------------------------------------------------------------- 10031;; Includes: 10032;; - DEC 10033;; - SQDEC 10034;; - UQDEC 10035;; ------------------------------------------------------------------------- 10036 10037;; Decrement a DImode register by the number of elements in an svpattern. 10038;; See aarch64_sve_cnt_pat for the counting behavior. 10039(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 10040 [(set (match_operand:DI 0 "register_operand" "=r") 10041 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0") 10042 (zero_extend:DI 10043 (unspec:SI [(match_operand:DI 2 "const_int_operand") 10044 (match_operand:DI 3 "const_int_operand") 10045 (match_operand:DI 4 "const_int_operand")] 10046 UNSPEC_SVE_CNT_PAT))))] 10047 "TARGET_SVE" 10048 { 10049 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0", 10050 operands + 2); 10051 } 10052) 10053 10054;; Decrement an SImode register by the number of elements in an svpattern 10055;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting 10056;; behavior. 10057(define_insn "*aarch64_sve_decsi_pat" 10058 [(set (match_operand:SI 0 "register_operand" "=r") 10059 (minus:SI (match_operand:SI 1 "register_operand" "0") 10060 (unspec:SI [(match_operand:DI 2 "const_int_operand") 10061 (match_operand:DI 3 "const_int_operand") 10062 (match_operand:DI 4 "const_int_operand")] 10063 UNSPEC_SVE_CNT_PAT)))] 10064 "TARGET_SVE" 10065 { 10066 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2); 10067 } 10068) 10069 10070;; Decrement an SImode register by the number of elements in an svpattern 10071;; using saturating arithmetic, extending the result to 64 bits. 10072;; 10073;; See aarch64_sve_cnt_pat for the counting behavior. 10074(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 10075 [(set (match_operand:DI 0 "register_operand" "=r") 10076 (<paired_extend>:DI 10077 (SAT_MINUS:SI 10078 (match_operand:SI_ONLY 1 "register_operand" "0") 10079 (unspec:SI [(match_operand:DI 2 "const_int_operand") 10080 (match_operand:DI 3 "const_int_operand") 10081 (match_operand:DI 4 "const_int_operand")] 10082 UNSPEC_SVE_CNT_PAT))))] 10083 "TARGET_SVE" 10084 { 10085 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0"); 10086 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers, 10087 operands + 2); 10088 } 10089) 10090 10091;; ------------------------------------------------------------------------- 10092;; ---- [INT] Decrement by the number of elements in a pattern (vector) 10093;; ------------------------------------------------------------------------- 10094;; Includes: 10095;; - DEC 10096;; - SQDEC 10097;; - UQDEC 10098;; ------------------------------------------------------------------------- 10099 10100;; Decrement a vector of DIs by the number of elements in an svpattern. 10101;; See aarch64_sve_cnt_pat for the counting behavior. 10102(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 10103 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 10104 (ANY_MINUS:VNx2DI 10105 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") 10106 (vec_duplicate:VNx2DI 10107 (zero_extend:DI 10108 (unspec:SI [(match_operand:DI 2 "const_int_operand") 10109 (match_operand:DI 3 "const_int_operand") 10110 (match_operand:DI 4 "const_int_operand")] 10111 UNSPEC_SVE_CNT_PAT)))))] 10112 "TARGET_SVE" 10113 { 10114 if (which_alternative == 1) 10115 output_asm_insn ("movprfx\t%0, %1", operands); 10116 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 10117 operands + 2); 10118 } 10119 [(set_attr "movprfx" "*,yes")] 10120) 10121 10122;; Decrement a vector of SIs by the number of elements in an svpattern. 10123;; See aarch64_sve_cnt_pat for the counting behavior. 10124(define_insn "@aarch64_sve_<inc_dec><mode>_pat" 10125 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 10126 (ANY_MINUS:VNx4SI 10127 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") 10128 (vec_duplicate:VNx4SI 10129 (unspec:SI [(match_operand:DI 2 "const_int_operand") 10130 (match_operand:DI 3 "const_int_operand") 10131 (match_operand:DI 4 "const_int_operand")] 10132 UNSPEC_SVE_CNT_PAT))))] 10133 "TARGET_SVE" 10134 { 10135 if (which_alternative == 1) 10136 output_asm_insn ("movprfx\t%0, %1", operands); 10137 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 10138 operands + 2); 10139 } 10140 [(set_attr "movprfx" "*,yes")] 10141) 10142 10143;; Decrement a vector of HIs by the number of elements in an svpattern. 10144;; See aarch64_sve_cnt_pat for the counting behavior. 10145(define_expand "@aarch64_sve_<inc_dec><mode>_pat" 10146 [(set (match_operand:VNx8HI 0 "register_operand") 10147 (ANY_MINUS:VNx8HI 10148 (match_operand:VNx8HI_ONLY 1 "register_operand") 10149 (vec_duplicate:VNx8HI 10150 (truncate:HI 10151 (unspec:SI [(match_operand:DI 2 "const_int_operand") 10152 (match_operand:DI 3 "const_int_operand") 10153 (match_operand:DI 4 "const_int_operand")] 10154 UNSPEC_SVE_CNT_PAT)))))] 10155 "TARGET_SVE" 10156) 10157 10158(define_insn "*aarch64_sve_<inc_dec><mode>_pat" 10159 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 10160 (ANY_MINUS:VNx8HI 10161 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") 10162 (vec_duplicate:VNx8HI 10163 (match_operator:HI 5 "subreg_lowpart_operator" 10164 [(unspec:SI [(match_operand:DI 2 "const_int_operand") 10165 (match_operand:DI 3 "const_int_operand") 10166 (match_operand:DI 4 "const_int_operand")] 10167 UNSPEC_SVE_CNT_PAT)]))))] 10168 "TARGET_SVE" 10169 { 10170 if (which_alternative == 1) 10171 output_asm_insn ("movprfx\t%0, %1", operands); 10172 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>", 10173 operands + 2); 10174 } 10175 [(set_attr "movprfx" "*,yes")] 10176) 10177 10178;; ------------------------------------------------------------------------- 10179;; ---- [INT] Count elements in a predicate (scalar) 10180;; ------------------------------------------------------------------------- 10181;; Includes: 10182;; - CNTP 10183;; ------------------------------------------------------------------------- 10184 10185;; Count the number of set bits in a predicate. Operand 3 is true if 10186;; operand 1 is known to be all-true. 10187(define_insn "@aarch64_pred_cntp<mode>" 10188 [(set (match_operand:DI 0 "register_operand" "=r") 10189 (zero_extend:DI 10190 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl") 10191 (match_operand:SI 2 "aarch64_sve_ptrue_flag") 10192 (match_operand:PRED_ALL 3 "register_operand" "Upa")] 10193 UNSPEC_CNTP)))] 10194 "TARGET_SVE" 10195 "cntp\t%x0, %1, %3.<Vetype>") 10196 10197;; ------------------------------------------------------------------------- 10198;; ---- [INT] Increment by the number of elements in a predicate (scalar) 10199;; ------------------------------------------------------------------------- 10200;; Includes: 10201;; - INCP 10202;; - SQINCP 10203;; - UQINCP 10204;; ------------------------------------------------------------------------- 10205 10206;; Increment a DImode register by the number of set bits in a predicate. 10207;; See aarch64_sve_cntp for a description of the operands. 10208(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 10209 [(set (match_operand:DI 0 "register_operand") 10210 (ANY_PLUS:DI 10211 (zero_extend:DI 10212 (unspec:SI [(match_dup 3) 10213 (const_int SVE_KNOWN_PTRUE) 10214 (match_operand:PRED_ALL 2 "register_operand")] 10215 UNSPEC_CNTP)) 10216 (match_operand:DI_ONLY 1 "register_operand")))] 10217 "TARGET_SVE" 10218 { 10219 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10220 } 10221) 10222 10223(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 10224 [(set (match_operand:DI 0 "register_operand" "=r") 10225 (ANY_PLUS:DI 10226 (zero_extend:DI 10227 (unspec:SI [(match_operand 3) 10228 (const_int SVE_KNOWN_PTRUE) 10229 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10230 UNSPEC_CNTP)) 10231 (match_operand:DI_ONLY 1 "register_operand" "0")))] 10232 "TARGET_SVE" 10233 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>" 10234 "&& !CONSTANT_P (operands[3])" 10235 { 10236 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10237 } 10238) 10239 10240;; Increment an SImode register by the number of set bits in a predicate 10241;; using modular arithmetic. See aarch64_sve_cntp for a description of 10242;; the operands. 10243(define_insn_and_rewrite "*aarch64_incsi<mode>_cntp" 10244 [(set (match_operand:SI 0 "register_operand" "=r") 10245 (plus:SI 10246 (unspec:SI [(match_operand 3) 10247 (const_int SVE_KNOWN_PTRUE) 10248 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10249 UNSPEC_CNTP) 10250 (match_operand:SI 1 "register_operand" "0")))] 10251 "TARGET_SVE" 10252 "incp\t%x0, %2.<Vetype>" 10253 "&& !CONSTANT_P (operands[3])" 10254 { 10255 operands[3] = CONSTM1_RTX (<MODE>mode); 10256 } 10257) 10258 10259;; Increment an SImode register by the number of set bits in a predicate 10260;; using saturating arithmetic, extending the result to 64 bits. 10261;; 10262;; See aarch64_sve_cntp for a description of the operands. 10263(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 10264 [(set (match_operand:DI 0 "register_operand") 10265 (<paired_extend>:DI 10266 (SAT_PLUS:SI 10267 (unspec:SI [(match_dup 3) 10268 (const_int SVE_KNOWN_PTRUE) 10269 (match_operand:PRED_ALL 2 "register_operand")] 10270 UNSPEC_CNTP) 10271 (match_operand:SI_ONLY 1 "register_operand"))))] 10272 "TARGET_SVE" 10273 { 10274 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10275 } 10276) 10277 10278(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 10279 [(set (match_operand:DI 0 "register_operand" "=r") 10280 (<paired_extend>:DI 10281 (SAT_PLUS:SI 10282 (unspec:SI [(match_operand 3) 10283 (const_int SVE_KNOWN_PTRUE) 10284 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10285 UNSPEC_CNTP) 10286 (match_operand:SI_ONLY 1 "register_operand" "0"))))] 10287 "TARGET_SVE" 10288 { 10289 if (<CODE> == SS_PLUS) 10290 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0"; 10291 else 10292 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>"; 10293 } 10294 "&& !CONSTANT_P (operands[3])" 10295 { 10296 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10297 } 10298) 10299 10300;; ------------------------------------------------------------------------- 10301;; ---- [INT] Increment by the number of elements in a predicate (vector) 10302;; ------------------------------------------------------------------------- 10303;; Includes: 10304;; - INCP 10305;; - SQINCP 10306;; - UQINCP 10307;; ------------------------------------------------------------------------- 10308 10309;; Increment a vector of DIs by the number of set bits in a predicate. 10310;; See aarch64_sve_cntp for a description of the operands. 10311(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10312 [(set (match_operand:VNx2DI 0 "register_operand") 10313 (ANY_PLUS:VNx2DI 10314 (vec_duplicate:VNx2DI 10315 (zero_extend:DI 10316 (unspec:SI 10317 [(match_dup 3) 10318 (const_int SVE_KNOWN_PTRUE) 10319 (match_operand:<VPRED> 2 "register_operand")] 10320 UNSPEC_CNTP))) 10321 (match_operand:VNx2DI_ONLY 1 "register_operand")))] 10322 "TARGET_SVE" 10323 { 10324 operands[3] = CONSTM1_RTX (<VPRED>mode); 10325 } 10326) 10327 10328(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10329 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 10330 (ANY_PLUS:VNx2DI 10331 (vec_duplicate:VNx2DI 10332 (zero_extend:DI 10333 (unspec:SI 10334 [(match_operand 3) 10335 (const_int SVE_KNOWN_PTRUE) 10336 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10337 UNSPEC_CNTP))) 10338 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))] 10339 "TARGET_SVE" 10340 "@ 10341 <inc_dec>p\t%0.d, %2 10342 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2" 10343 "&& !CONSTANT_P (operands[3])" 10344 { 10345 operands[3] = CONSTM1_RTX (<VPRED>mode); 10346 } 10347 [(set_attr "movprfx" "*,yes")] 10348) 10349 10350;; Increment a vector of SIs by the number of set bits in a predicate. 10351;; See aarch64_sve_cntp for a description of the operands. 10352(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10353 [(set (match_operand:VNx4SI 0 "register_operand") 10354 (ANY_PLUS:VNx4SI 10355 (vec_duplicate:VNx4SI 10356 (unspec:SI 10357 [(match_dup 3) 10358 (const_int SVE_KNOWN_PTRUE) 10359 (match_operand:<VPRED> 2 "register_operand")] 10360 UNSPEC_CNTP)) 10361 (match_operand:VNx4SI_ONLY 1 "register_operand")))] 10362 "TARGET_SVE" 10363 { 10364 operands[3] = CONSTM1_RTX (<VPRED>mode); 10365 } 10366) 10367 10368(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10369 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 10370 (ANY_PLUS:VNx4SI 10371 (vec_duplicate:VNx4SI 10372 (unspec:SI 10373 [(match_operand 3) 10374 (const_int SVE_KNOWN_PTRUE) 10375 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10376 UNSPEC_CNTP)) 10377 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))] 10378 "TARGET_SVE" 10379 "@ 10380 <inc_dec>p\t%0.s, %2 10381 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2" 10382 "&& !CONSTANT_P (operands[3])" 10383 { 10384 operands[3] = CONSTM1_RTX (<VPRED>mode); 10385 } 10386 [(set_attr "movprfx" "*,yes")] 10387) 10388 10389;; Increment a vector of HIs by the number of set bits in a predicate. 10390;; See aarch64_sve_cntp for a description of the operands. 10391(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10392 [(set (match_operand:VNx8HI 0 "register_operand") 10393 (ANY_PLUS:VNx8HI 10394 (vec_duplicate:VNx8HI 10395 (truncate:HI 10396 (unspec:SI 10397 [(match_dup 3) 10398 (const_int SVE_KNOWN_PTRUE) 10399 (match_operand:<VPRED> 2 "register_operand")] 10400 UNSPEC_CNTP))) 10401 (match_operand:VNx8HI_ONLY 1 "register_operand")))] 10402 "TARGET_SVE" 10403 { 10404 operands[3] = CONSTM1_RTX (<VPRED>mode); 10405 } 10406) 10407 10408(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10409 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 10410 (ANY_PLUS:VNx8HI 10411 (vec_duplicate:VNx8HI 10412 (match_operator:HI 3 "subreg_lowpart_operator" 10413 [(unspec:SI 10414 [(match_operand 4) 10415 (const_int SVE_KNOWN_PTRUE) 10416 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10417 UNSPEC_CNTP)])) 10418 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))] 10419 "TARGET_SVE" 10420 "@ 10421 <inc_dec>p\t%0.h, %2 10422 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2" 10423 "&& !CONSTANT_P (operands[4])" 10424 { 10425 operands[4] = CONSTM1_RTX (<VPRED>mode); 10426 } 10427 [(set_attr "movprfx" "*,yes")] 10428) 10429 10430;; ------------------------------------------------------------------------- 10431;; ---- [INT] Decrement by the number of elements in a predicate (scalar) 10432;; ------------------------------------------------------------------------- 10433;; Includes: 10434;; - DECP 10435;; - SQDECP 10436;; - UQDECP 10437;; ------------------------------------------------------------------------- 10438 10439;; Decrement a DImode register by the number of set bits in a predicate. 10440;; See aarch64_sve_cntp for a description of the operands. 10441(define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 10442 [(set (match_operand:DI 0 "register_operand") 10443 (ANY_MINUS:DI 10444 (match_operand:DI_ONLY 1 "register_operand") 10445 (zero_extend:DI 10446 (unspec:SI [(match_dup 3) 10447 (const_int SVE_KNOWN_PTRUE) 10448 (match_operand:PRED_ALL 2 "register_operand")] 10449 UNSPEC_CNTP))))] 10450 "TARGET_SVE" 10451 { 10452 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10453 } 10454) 10455 10456(define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp" 10457 [(set (match_operand:DI 0 "register_operand" "=r") 10458 (ANY_MINUS:DI 10459 (match_operand:DI_ONLY 1 "register_operand" "0") 10460 (zero_extend:DI 10461 (unspec:SI [(match_operand 3) 10462 (const_int SVE_KNOWN_PTRUE) 10463 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10464 UNSPEC_CNTP))))] 10465 "TARGET_SVE" 10466 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>" 10467 "&& !CONSTANT_P (operands[3])" 10468 { 10469 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10470 } 10471) 10472 10473;; Decrement an SImode register by the number of set bits in a predicate 10474;; using modular arithmetic. See aarch64_sve_cntp for a description of the 10475;; operands. 10476(define_insn_and_rewrite "*aarch64_decsi<mode>_cntp" 10477 [(set (match_operand:SI 0 "register_operand" "=r") 10478 (minus:SI 10479 (match_operand:SI 1 "register_operand" "0") 10480 (unspec:SI [(match_operand 3) 10481 (const_int SVE_KNOWN_PTRUE) 10482 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10483 UNSPEC_CNTP)))] 10484 "TARGET_SVE" 10485 "decp\t%x0, %2.<Vetype>" 10486 "&& !CONSTANT_P (operands[3])" 10487 { 10488 operands[3] = CONSTM1_RTX (<MODE>mode); 10489 } 10490) 10491 10492;; Decrement an SImode register by the number of set bits in a predicate 10493;; using saturating arithmetic, extending the result to 64 bits. 10494;; 10495;; See aarch64_sve_cntp for a description of the operands. 10496(define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 10497 [(set (match_operand:DI 0 "register_operand") 10498 (<paired_extend>:DI 10499 (SAT_MINUS:SI 10500 (match_operand:SI_ONLY 1 "register_operand") 10501 (unspec:SI [(match_dup 3) 10502 (const_int SVE_KNOWN_PTRUE) 10503 (match_operand:PRED_ALL 2 "register_operand")] 10504 UNSPEC_CNTP))))] 10505 "TARGET_SVE" 10506 { 10507 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10508 } 10509) 10510 10511(define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp" 10512 [(set (match_operand:DI 0 "register_operand" "=r") 10513 (<paired_extend>:DI 10514 (SAT_MINUS:SI 10515 (match_operand:SI_ONLY 1 "register_operand" "0") 10516 (unspec:SI [(match_operand 3) 10517 (const_int SVE_KNOWN_PTRUE) 10518 (match_operand:PRED_ALL 2 "register_operand" "Upa")] 10519 UNSPEC_CNTP))))] 10520 "TARGET_SVE" 10521 { 10522 if (<CODE> == SS_MINUS) 10523 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0"; 10524 else 10525 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>"; 10526 } 10527 "&& !CONSTANT_P (operands[3])" 10528 { 10529 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode); 10530 } 10531) 10532 10533;; ------------------------------------------------------------------------- 10534;; ---- [INT] Decrement by the number of elements in a predicate (vector) 10535;; ------------------------------------------------------------------------- 10536;; Includes: 10537;; - DECP 10538;; - SQDECP 10539;; - UQDECP 10540;; ------------------------------------------------------------------------- 10541 10542;; Decrement a vector of DIs by the number of set bits in a predicate. 10543;; See aarch64_sve_cntp for a description of the operands. 10544(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10545 [(set (match_operand:VNx2DI 0 "register_operand") 10546 (ANY_MINUS:VNx2DI 10547 (match_operand:VNx2DI_ONLY 1 "register_operand") 10548 (vec_duplicate:VNx2DI 10549 (zero_extend:DI 10550 (unspec:SI 10551 [(match_dup 3) 10552 (const_int SVE_KNOWN_PTRUE) 10553 (match_operand:<VPRED> 2 "register_operand")] 10554 UNSPEC_CNTP)))))] 10555 "TARGET_SVE" 10556 { 10557 operands[3] = CONSTM1_RTX (<VPRED>mode); 10558 } 10559) 10560 10561(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10562 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w") 10563 (ANY_MINUS:VNx2DI 10564 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w") 10565 (vec_duplicate:VNx2DI 10566 (zero_extend:DI 10567 (unspec:SI 10568 [(match_operand 3) 10569 (const_int SVE_KNOWN_PTRUE) 10570 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10571 UNSPEC_CNTP)))))] 10572 "TARGET_SVE" 10573 "@ 10574 <inc_dec>p\t%0.d, %2 10575 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2" 10576 "&& !CONSTANT_P (operands[3])" 10577 { 10578 operands[3] = CONSTM1_RTX (<VPRED>mode); 10579 } 10580 [(set_attr "movprfx" "*,yes")] 10581) 10582 10583;; Decrement a vector of SIs by the number of set bits in a predicate. 10584;; See aarch64_sve_cntp for a description of the operands. 10585(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10586 [(set (match_operand:VNx4SI 0 "register_operand") 10587 (ANY_MINUS:VNx4SI 10588 (match_operand:VNx4SI_ONLY 1 "register_operand") 10589 (vec_duplicate:VNx4SI 10590 (unspec:SI 10591 [(match_dup 3) 10592 (const_int SVE_KNOWN_PTRUE) 10593 (match_operand:<VPRED> 2 "register_operand")] 10594 UNSPEC_CNTP))))] 10595 "TARGET_SVE" 10596 { 10597 operands[3] = CONSTM1_RTX (<VPRED>mode); 10598 } 10599) 10600 10601(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10602 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w") 10603 (ANY_MINUS:VNx4SI 10604 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w") 10605 (vec_duplicate:VNx4SI 10606 (unspec:SI 10607 [(match_operand 3) 10608 (const_int SVE_KNOWN_PTRUE) 10609 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10610 UNSPEC_CNTP))))] 10611 "TARGET_SVE" 10612 "@ 10613 <inc_dec>p\t%0.s, %2 10614 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2" 10615 "&& !CONSTANT_P (operands[3])" 10616 { 10617 operands[3] = CONSTM1_RTX (<VPRED>mode); 10618 } 10619 [(set_attr "movprfx" "*,yes")] 10620) 10621 10622;; Decrement a vector of HIs by the number of set bits in a predicate. 10623;; See aarch64_sve_cntp for a description of the operands. 10624(define_expand "@aarch64_sve_<inc_dec><mode>_cntp" 10625 [(set (match_operand:VNx8HI 0 "register_operand") 10626 (ANY_MINUS:VNx8HI 10627 (match_operand:VNx8HI_ONLY 1 "register_operand") 10628 (vec_duplicate:VNx8HI 10629 (truncate:HI 10630 (unspec:SI 10631 [(match_dup 3) 10632 (const_int SVE_KNOWN_PTRUE) 10633 (match_operand:<VPRED> 2 "register_operand")] 10634 UNSPEC_CNTP)))))] 10635 "TARGET_SVE" 10636 { 10637 operands[3] = CONSTM1_RTX (<VPRED>mode); 10638 } 10639) 10640 10641(define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp" 10642 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w") 10643 (ANY_MINUS:VNx8HI 10644 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w") 10645 (vec_duplicate:VNx8HI 10646 (match_operator:HI 3 "subreg_lowpart_operator" 10647 [(unspec:SI 10648 [(match_operand 4) 10649 (const_int SVE_KNOWN_PTRUE) 10650 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")] 10651 UNSPEC_CNTP)]))))] 10652 "TARGET_SVE" 10653 "@ 10654 <inc_dec>p\t%0.h, %2 10655 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2" 10656 "&& !CONSTANT_P (operands[4])" 10657 { 10658 operands[4] = CONSTM1_RTX (<VPRED>mode); 10659 } 10660 [(set_attr "movprfx" "*,yes")] 10661) 10662