1;; Copyright (C) 2016-2020 Free Software Foundation, Inc. 2 3;; This file is free software; you can redistribute it and/or modify it under 4;; the terms of the GNU General Public License as published by the Free 5;; Software Foundation; either version 3 of the License, or (at your option) 6;; any later version. 7 8;; This file is distributed in the hope that it will be useful, but WITHOUT 9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11;; for more details. 12 13;; You should have received a copy of the GNU General Public License 14;; along with GCC; see the file COPYING3. If not see 15;; <http://www.gnu.org/licenses/>. 16 17;;- See file "rtl.def" for documentation on define_insn, match_*, et. al. 18 19(include "predicates.md") 20(include "constraints.md") 21 22;; {{{ Constants and enums 23 24; Named registers 25(define_constants 26 [(FIRST_SGPR_REG 0) 27 (CC_SAVE_REG 22) 28 (LAST_SGPR_REG 101) 29 (FLAT_SCRATCH_REG 102) 30 (FLAT_SCRATCH_LO_REG 102) 31 (FLAT_SCRATCH_HI_REG 103) 32 (XNACK_MASK_REG 104) 33 (XNACK_MASK_LO_REG 104) 34 (XNACK_MASK_HI_REG 105) 35 (VCC_REG 106) 36 (VCC_LO_REG 106) 37 (VCC_HI_REG 107) 38 (VCCZ_REG 108) 39 (TBA_REG 109) 40 (TBA_LO_REG 109) 41 (TBA_HI_REG 110) 42 (TMA_REG 111) 43 (TMA_LO_REG 111) 44 (TMA_HI_REG 112) 45 (TTMP0_REG 113) 46 (TTMP11_REG 124) 47 (M0_REG 125) 48 (EXEC_REG 126) 49 (EXEC_LO_REG 126) 50 (EXEC_HI_REG 127) 51 (EXECZ_REG 128) 52 (SCC_REG 129) 53 (FIRST_VGPR_REG 160) 54 (LAST_VGPR_REG 415)]) 55 56(define_constants 57 [(SP_REGNUM 16) 58 (LR_REGNUM 18) 59 (AP_REGNUM 416) 60 (FP_REGNUM 418)]) 61 62(define_c_enum "unspecv" [ 63 UNSPECV_PROLOGUE_USE 64 UNSPECV_KERNEL_RETURN 65 UNSPECV_BARRIER 66 UNSPECV_ATOMIC 67 UNSPECV_ICACHE_INV]) 68 69(define_c_enum "unspec" [ 70 UNSPEC_VECTOR 71 UNSPEC_BPERMUTE 72 UNSPEC_SGPRBASE 73 UNSPEC_MEMORY_BARRIER 74 UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR 75 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR 76 UNSPEC_PLUS_DPP_SHR 77 UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR 78 UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR 79 UNSPEC_MOV_DPP_SHR 80 UNSPEC_MOV_FROM_LANE63 81 UNSPEC_GATHER 82 UNSPEC_SCATTER]) 83 84;; }}} 85;; {{{ Attributes 86 87; Instruction type (encoding) as described in the ISA specification. 88; The following table summarizes possible operands of individual instruction 89; types and corresponding constraints. 90; 91; sop2 - scalar, two inputs, one output 92; ssrc0/ssrc1: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec 93; vccz,execz,scc,inline immedate,fp inline immediate 94; sdst: sgpr 0-102; flat_scratch,xnack,vcc,tba,tma,ttmp0-11,exec 95; 96; Constraints "=SD, SD", "SSA,SSB","SSB,SSA" 97; 98; sopk - scalar, inline constant input, one output 99; simm16: 16bit inline constant 100; sdst: same as sop2/ssrc0 101; 102; Constraints "=SD", "J" 103; 104; sop1 - scalar, one input, one output 105; ssrc0: same as sop2/ssrc0. FIXME: manual omit VCCZ 106; sdst: same as sop2/sdst 107; 108; Constraints "=SD", "SSA" 109; 110; sopc - scalar, two inputs, one comparsion 111; ssrc0: same as sop2/ssc0. 112; 113; Constraints "SSI,SSA","SSA,SSI" 114; 115; sopp - scalar, one constant input, one special 116; simm16 117; 118; smem - scalar memory 119; sbase: aligned pair of sgprs. Specify {size[15:0], base[47:0]} in 120; dwords 121; sdata: sgpr0-102, flat_scratch, xnack, vcc, tba, tma 122; offset: sgpr or 20bit unsigned byte offset 123; 124; vop2 - vector, two inputs, one output 125; vsrc0: sgpr0-102,flat_scratch,xnack,vcc,tba,ttmp0-11,m0,exec, 126; inline constant -16 to -64, fp inline immediate, vccz, execz, 127; scc, lds, literal constant, vgpr0-255 128; vsrc1: vgpr0-255 129; vdst: vgpr0-255 130; Limitations: At most one SGPR, at most one constant 131; if constant is used, SGPR must be M0 132; Only SRC0 can be LDS_DIRECT 133; 134; constraints: "=v", "vBSv", "v" 135; 136; vop1 - vector, one input, one output 137; vsrc0: same as vop2/src0 138; vdst: vgpr0-255 139; 140; constraints: "=v", "vBSv" 141; 142; vopc - vector, two inputs, one comparsion output; 143; vsrc0: same as vop2/src0 144; vsrc1: vgpr0-255 145; vdst: 146; 147; constraints: "vASv", "v" 148; 149; vop3a - vector, three inputs, one output 150; vdst: vgpr0-255, for v_cmp sgpr or vcc 151; abs,clamp 152; vsrc0: sgpr0-102,vcc,tba,ttmp0-11,m0,exec, 153; inline constant -16 to -64, fp inline immediate, vccz, execz, 154; scc, lds_direct 155; FIXME: really missing 1/pi? really 104 SGPRs 156; 157; vop3b - vector, three inputs, one vector output, one scalar output 158; vsrc0,vsrc1,vsrc2: same as vop3a vsrc0 159; vdst: vgpr0-255 160; sdst: sgpr0-103/vcc/tba/tma/ttmp0-11 161; 162; vop_sdwa - second dword for vop1/vop2/vopc for specifying sub-dword address 163; src0: vgpr0-255 164; dst_sel: BYTE_0-3, WORD_0-1, DWORD 165; dst_unused: UNUSED_PAD, UNUSED_SEXT, UNUSED_PRESERVE 166; clamp: true/false 167; src0_sel: BYTE_0-3, WORD_0-1, DWORD 168; flags: src0_sext, src0_neg, src0_abs, src1_sel, src1_sext, src1_neg, 169 ; src1_abs 170; 171; vop_dpp - second dword for vop1/vop2/vopc for specifying data-parallel ops 172; src0: vgpr0-255 173; dpp_ctrl: quad_perm, row_sl0-15, row_sr0-15, row_rr0-15, wf_sl1, 174; wf_rl1, wf_sr1, wf_rr1, row_mirror, row_half_mirror, 175; bcast15, bcast31 176; flags: src0_neg, src0_abs, src1_neg, src1_abs 177; bank_mask: 4-bit mask 178; row_mask: 4-bit mask 179; 180; ds - Local and global data share instructions. 181; offset0: 8-bit constant 182; offset1: 8-bit constant 183; flag: gds 184; addr: vgpr0-255 185; data0: vgpr0-255 186; data1: vgpr0-255 187; vdst: vgpr0-255 188; 189; mubuf - Untyped memory buffer operation. First word with LDS, second word 190; non-LDS. 191; offset: 12-bit constant 192; vaddr: vgpr0-255 193; vdata: vgpr0-255 194; srsrc: sgpr0-102 195; soffset: sgpr0-102 196; flags: offen, idxen, glc, lds, slc, tfe 197; 198; mtbuf - Typed memory buffer operation. Two words 199; offset: 12-bit constant 200; dfmt: 4-bit constant 201; nfmt: 3-bit constant 202; vaddr: vgpr0-255 203; vdata: vgpr0-255 204; srsrc: sgpr0-102 205; soffset: sgpr0-102 206; flags: offen, idxen, glc, lds, slc, tfe 207; 208; flat - flat or global memory operations 209; flags: glc, slc 210; addr: vgpr0-255 211; data: vgpr0-255 212; vdst: vgpr0-255 213; 214; mult - expands to multiple instructions (pseudo encoding) 215; 216; vmult - as mult, when a vector instruction is used. 217 218(define_attr "type" 219 "unknown,sop1,sop2,sopk,sopc,sopp,smem,ds,vop2,vop1,vopc, 220 vop3a,vop3b,vop_sdwa,vop_dpp,mubuf,mtbuf,flat,mult,vmult" 221 (const_string "unknown")) 222 223; Set if instruction is executed in scalar or vector unit 224 225(define_attr "unit" "unknown,scalar,vector" 226 (cond [(eq_attr "type" "sop1,sop2,sopk,sopc,sopp,smem,mult") 227 (const_string "scalar") 228 (eq_attr "type" "vop2,vop1,vopc,vop3a,vop3b,ds, 229 vop_sdwa,vop_dpp,flat,vmult") 230 (const_string "vector")] 231 (const_string "unknown"))) 232 233; All vector instructions run as 64 threads as predicated by the EXEC 234; register. Scalar operations in vector register require a single lane 235; enabled, vector moves require a full set of lanes enabled, and most vector 236; operations handle the lane masking themselves. 237; The md_reorg pass is responsible for ensuring that EXEC is set appropriately 238; according to the following settings: 239; auto - md_reorg will inspect def/use to determine what to do. 240; none - exec is not needed. 241; single - disable all but lane zero. 242; full - enable all lanes. 243 244(define_attr "exec" "auto,none,single,full" 245 (const_string "auto")) 246 247; Infer the (worst-case) length from the instruction type by default. Many 248; types can have an optional immediate word following, which we include here. 249; "Multiple" types are counted as two 64-bit instructions. This is just a 250; default fallback: it can be overridden per-alternative in insn patterns for 251; greater accuracy. 252 253(define_attr "length" "" 254 (cond [(eq_attr "type" "sop1") (const_int 8) 255 (eq_attr "type" "sop2") (const_int 8) 256 (eq_attr "type" "sopk") (const_int 8) 257 (eq_attr "type" "sopc") (const_int 8) 258 (eq_attr "type" "sopp") (const_int 4) 259 (eq_attr "type" "smem") (const_int 8) 260 (eq_attr "type" "ds") (const_int 8) 261 (eq_attr "type" "vop1") (const_int 8) 262 (eq_attr "type" "vop2") (const_int 8) 263 (eq_attr "type" "vopc") (const_int 8) 264 (eq_attr "type" "vop3a") (const_int 8) 265 (eq_attr "type" "vop3b") (const_int 8) 266 (eq_attr "type" "vop_sdwa") (const_int 8) 267 (eq_attr "type" "vop_dpp") (const_int 8) 268 (eq_attr "type" "flat") (const_int 8) 269 (eq_attr "type" "mult") (const_int 16) 270 (eq_attr "type" "vmult") (const_int 16)] 271 (const_int 4))) 272 273; Disable alternatives that only apply to specific ISA variants. 274 275(define_attr "gcn_version" "gcn3,gcn5" (const_string "gcn3")) 276 277(define_attr "enabled" "" 278 (cond [(eq_attr "gcn_version" "gcn3") (const_int 1) 279 (and (eq_attr "gcn_version" "gcn5") 280 (ne (symbol_ref "TARGET_GCN5_PLUS") (const_int 0))) 281 (const_int 1)] 282 (const_int 0))) 283 284; We need to be able to identify v_readlane and v_writelane with 285; SGPR lane selection in order to handle "Manually Inserted Wait States". 286 287(define_attr "laneselect" "yes,no" (const_string "no")) 288 289; Identify instructions that require a "Manually Inserted Wait State" if 290; their inputs are overwritten by subsequent instructions. 291 292(define_attr "delayeduse" "yes,no" (const_string "no")) 293 294;; }}} 295;; {{{ Iterators useful across the wole machine description 296 297(define_mode_iterator SIDI [SI DI]) 298(define_mode_iterator SFDF [SF DF]) 299(define_mode_iterator SISF [SI SF]) 300(define_mode_iterator QIHI [QI HI]) 301(define_mode_iterator DIDF [DI DF]) 302(define_mode_iterator FP [HF SF DF]) 303(define_mode_iterator FP_1REG [HF SF]) 304 305;; }}} 306;; {{{ Attributes. 307 308; Translate RTX code into GCN instruction mnemonics with and without 309; suffixes such as _b32, etc. 310 311(define_code_attr mnemonic 312 [(minus "sub%i") 313 (plus "add%i") 314 (ashift "lshl%b") 315 (lshiftrt "lshr%b") 316 (ashiftrt "ashr%i") 317 (and "and%B") 318 (ior "or%B") 319 (xor "xor%B") 320 (mult "mul%i") 321 (smin "min%i") 322 (smax "max%i") 323 (umin "min%u") 324 (umax "max%u") 325 (not "not%B") 326 (popcount "bcnt_u32%b")]) 327 328(define_code_attr bare_mnemonic 329 [(plus "add") 330 (minus "sub") 331 (and "and") 332 (ior "or") 333 (xor "xor")]) 334 335(define_code_attr s_mnemonic 336 [(not "not%b") 337 (popcount "bcnt1_i32%b") 338 (clz "flbit_i32%b") 339 (ctz "ff1_i32%b")]) 340 341(define_code_attr revmnemonic 342 [(minus "subrev%i") 343 (ashift "lshlrev%b") 344 (lshiftrt "lshrrev%b") 345 (ashiftrt "ashrrev%i")]) 346 347; Translate RTX code into corresponding expander name. 348 349(define_code_attr expander 350 [(and "and") 351 (ior "ior") 352 (xor "xor") 353 (plus "add") 354 (minus "sub") 355 (ashift "ashl") 356 (lshiftrt "lshr") 357 (ashiftrt "ashr") 358 (mult "mul") 359 (smin "smin") 360 (smax "smax") 361 (umin "umin") 362 (umax "umax") 363 (not "one_cmpl") 364 (popcount "popcount") 365 (clz "clz") 366 (ctz "ctz") 367 (sign_extend "extend") 368 (zero_extend "zero_extend")]) 369 370;; }}} 371;; {{{ Miscellaneous instructions 372 373(define_insn "nop" 374 [(const_int 0)] 375 "" 376 "s_nop\t0x0" 377 [(set_attr "type" "sopp")]) 378 379; FIXME: What should the value of the immediate be? Zero is disallowed, so 380; pick 1 for now. 381(define_insn "trap" 382 [(trap_if (const_int 1) (const_int 0))] 383 "" 384 "s_trap\t1" 385 [(set_attr "type" "sopp")]) 386 387;; }}} 388;; {{{ Moves 389 390;; All scalar modes we support moves in. 391(define_mode_iterator MOV_MODE [BI QI HI SI DI TI SF DF]) 392 393; This is the entry point for creating all kinds of scalar moves, 394; including reloads and symbols. 395 396(define_expand "mov<mode>" 397 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand") 398 (match_operand:MOV_MODE 1 "general_operand"))] 399 "" 400 { 401 if (SUBREG_P (operands[1]) 402 && GET_MODE (operands[1]) == SImode 403 && GET_MODE (SUBREG_REG (operands[1])) == BImode) 404 { 405 /* (reg:BI VCC) has nregs==2 to ensure it gets clobbered as a whole, 406 but (subreg:SI (reg:BI VCC)) doesn't, which causes the LRA liveness 407 checks to assert. Transform this: 408 (set (reg:SI) (subreg:SI (reg:BI))) 409 to this: 410 (set (subreg:BI (reg:SI)) (reg:BI)) */ 411 operands[0] = gen_rtx_SUBREG (BImode, operands[0], 0); 412 operands[1] = SUBREG_REG (operands[1]); 413 } 414 if (SUBREG_P (operands[0]) 415 && GET_MODE (operands[0]) == SImode 416 && GET_MODE (SUBREG_REG (operands[0])) == BImode) 417 { 418 /* Likewise, transform this: 419 (set (subreg:SI (reg:BI)) (reg:SI)) 420 to this: 421 (set (reg:BI) (subreg:BI (reg:SI))) */ 422 operands[0] = SUBREG_REG (operands[0]); 423 operands[1] = gen_rtx_SUBREG (BImode, operands[1], 0); 424 } 425 426 if (MEM_P (operands[0])) 427 operands[1] = force_reg (<MODE>mode, operands[1]); 428 429 if (!lra_in_progress && !reload_completed 430 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])) 431 { 432 /* Something is probably trying to generate a move 433 which can only work indirectly. 434 E.g. Move from LDS memory to SGPR hardreg 435 or MEM:QI to SGPR. */ 436 rtx tmpreg = gen_reg_rtx (<MODE>mode); 437 emit_insn (gen_mov<mode> (tmpreg, operands[1])); 438 emit_insn (gen_mov<mode> (operands[0], tmpreg)); 439 DONE; 440 } 441 442 if (<MODE>mode == DImode 443 && (GET_CODE (operands[1]) == SYMBOL_REF 444 || GET_CODE (operands[1]) == LABEL_REF)) 445 { 446 if (lra_in_progress) 447 emit_insn (gen_movdi_symbol_save_scc (operands[0], operands[1])); 448 else 449 emit_insn (gen_movdi_symbol (operands[0], operands[1])); 450 DONE; 451 } 452 }) 453 454; Split invalid moves into two valid moves 455 456(define_split 457 [(set (match_operand:MOV_MODE 0 "nonimmediate_operand") 458 (match_operand:MOV_MODE 1 "general_operand"))] 459 "!reload_completed && !lra_in_progress 460 && !gcn_valid_move_p (<MODE>mode, operands[0], operands[1])" 461 [(set (match_dup 2) (match_dup 1)) 462 (set (match_dup 0) (match_dup 2))] 463 { 464 operands[2] = gen_reg_rtx(<MODE>mode); 465 }) 466 467; We need BImode move so we can reload flags registers. 468 469(define_insn "*movbi" 470 [(set (match_operand:BI 0 "nonimmediate_operand" 471 "=Sg, v,Sg,cs,cV,cV,Sm,RS, v,RF, v,RM") 472 (match_operand:BI 1 "gcn_load_operand" 473 "SSA,vSvA, v,SS, v,SS,RS,Sm,RF, v,RM, v"))] 474 "" 475 { 476 /* SCC as an operand is currently not accepted by the LLVM assembler, so 477 we emit bytes directly as a workaround. */ 478 switch (which_alternative) { 479 case 0: 480 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG) 481 return "; s_mov_b32\t%0,%1 is not supported by the assembler.\;" 482 ".byte\t0xfd\;" 483 ".byte\t0x0\;" 484 ".byte\t0x80|%R0\;" 485 ".byte\t0xbe"; 486 else 487 return "s_mov_b32\t%0, %1"; 488 case 1: 489 if (REG_P (operands[1]) && REGNO (operands[1]) == SCC_REG) 490 return "; v_mov_b32\t%0, %1\;" 491 ".byte\t0xfd\;" 492 ".byte\t0x2\;" 493 ".byte\t((%V0<<1)&0xff)\;" 494 ".byte\t0x7e|(%V0>>7)"; 495 else 496 return "v_mov_b32\t%0, %1"; 497 case 2: 498 return "v_readlane_b32\t%0, %1, 0"; 499 case 3: 500 return "s_cmpk_lg_u32\t%1, 0"; 501 case 4: 502 return "v_cmp_ne_u32\tvcc, 0, %1"; 503 case 5: 504 if (REGNO (operands[1]) == SCC_REG) 505 return "; s_mov_b32\t%0, %1 is not supported by the assembler.\;" 506 ".byte\t0xfd\;" 507 ".byte\t0x0\;" 508 ".byte\t0xea\;" 509 ".byte\t0xbe\;" 510 "s_mov_b32\tvcc_hi, 0"; 511 else 512 return "s_mov_b32\tvcc_lo, %1\;" 513 "s_mov_b32\tvcc_hi, 0"; 514 case 6: 515 return "s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0)"; 516 case 7: 517 return "s_store_dword\t%1, %A0"; 518 case 8: 519 return "flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0"; 520 case 9: 521 return "flat_store_dword\t%A0, %1%O0%g0"; 522 case 10: 523 return "global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0)"; 524 case 11: 525 return "global_store_dword\t%A0, %1%O0%g0"; 526 default: 527 gcc_unreachable (); 528 } 529 } 530 [(set_attr "type" "sop1,vop1,vop3a,sopk,vopc,mult,smem,smem,flat,flat, 531 flat,flat") 532 (set_attr "exec" "*,*,none,*,*,*,*,*,*,*,*,*") 533 (set_attr "length" "4,4,4,4,4,8,12,12,12,12,12,12")]) 534 535; 32bit move pattern 536 537(define_insn "*mov<mode>_insn" 538 [(set (match_operand:SISF 0 "nonimmediate_operand" 539 "=SD,SD,SD,SD,RB,Sm,RS,v,Sg, v, v,RF,v,RLRG, v,SD, v,RM") 540 (match_operand:SISF 1 "gcn_load_operand" 541 "SSA, J, B,RB,Sm,RS,Sm,v, v,Sv,RF, v,B, v,RLRG, Y,RM, v"))] 542 "" 543 "@ 544 s_mov_b32\t%0, %1 545 s_movk_i32\t%0, %1 546 s_mov_b32\t%0, %1 547 s_buffer_load%s0\t%0, s[0:3], %1\;s_waitcnt\tlgkmcnt(0) 548 s_buffer_store%s1\t%1, s[0:3], %0 549 s_load_dword\t%0, %A1\;s_waitcnt\tlgkmcnt(0) 550 s_store_dword\t%1, %A0 551 v_mov_b32\t%0, %1 552 v_readlane_b32\t%0, %1, 0 553 v_writelane_b32\t%0, %1, 0 554 flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 555 flat_store_dword\t%A0, %1%O0%g0 556 v_mov_b32\t%0, %1 557 ds_write_b32\t%A0, %1%O0 558 ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) 559 s_mov_b32\t%0, %1 560 global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 561 global_store_dword\t%A0, %1%O0%g0" 562 [(set_attr "type" "sop1,sopk,sop1,smem,smem,smem,smem,vop1,vop3a,vop3a,flat, 563 flat,vop1,ds,ds,sop1,flat,flat") 564 (set_attr "exec" "*,*,*,*,*,*,*,*,none,none,*,*,*,*,*,*,*,*") 565 (set_attr "length" "4,4,8,12,12,12,12,4,8,8,12,12,8,12,12,8,12,12")]) 566 567; 8/16bit move pattern 568 569(define_insn "*mov<mode>_insn" 570 [(set (match_operand:QIHI 0 "nonimmediate_operand" 571 "=SD,SD,SD,v,Sg, v, v,RF,v,RLRG, v, v,RM") 572 (match_operand:QIHI 1 "gcn_load_operand" 573 "SSA, J, B,v, v,Sv,RF, v,B, v,RLRG,RM, v"))] 574 "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])" 575 "@ 576 s_mov_b32\t%0, %1 577 s_movk_i32\t%0, %1 578 s_mov_b32\t%0, %1 579 v_mov_b32\t%0, %1 580 v_readlane_b32\t%0, %1, 0 581 v_writelane_b32\t%0, %1, 0 582 flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 583 flat_store%s0\t%A0, %1%O0%g0 584 v_mov_b32\t%0, %1 585 ds_write%b0\t%A0, %1%O0 586 ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) 587 global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 588 global_store%s0\t%A0, %1%O0%g0" 589 [(set_attr "type" 590 "sop1,sopk,sop1,vop1,vop3a,vop3a,flat,flat,vop1,ds,ds,flat,flat") 591 (set_attr "exec" "*,*,*,*,none,none,*,*,*,*,*,*,*") 592 (set_attr "length" "4,4,8,4,4,4,12,12,8,12,12,12,12")]) 593 594; 64bit move pattern 595 596(define_insn_and_split "*mov<mode>_insn" 597 [(set (match_operand:DIDF 0 "nonimmediate_operand" 598 "=SD,SD,SD,RS,Sm,v, v,Sg, v, v,RF,RLRG, v, v,RM") 599 (match_operand:DIDF 1 "general_operand" 600 "SSA, C,DB,Sm,RS,v,DB, v,Sv,RF, v, v,RLRG,RM, v"))] 601 "GET_CODE(operands[1]) != SYMBOL_REF" 602 "@ 603 s_mov_b64\t%0, %1 604 s_mov_b64\t%0, %1 605 # 606 s_store_dwordx2\t%1, %A0 607 s_load_dwordx2\t%0, %A1\;s_waitcnt\tlgkmcnt(0) 608 # 609 # 610 # 611 # 612 flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 613 flat_store_dwordx2\t%A0, %1%O0%g0 614 ds_write_b64\t%A0, %1%O0 615 ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) 616 global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 617 global_store_dwordx2\t%A0, %1%O0%g0" 618 "reload_completed 619 && ((!MEM_P (operands[0]) && !MEM_P (operands[1]) 620 && !gcn_sgpr_move_p (operands[0], operands[1])) 621 || (GET_CODE (operands[1]) == CONST_INT 622 && !gcn_constant64_p (operands[1])))" 623 [(set (match_dup 0) (match_dup 1)) 624 (set (match_dup 2) (match_dup 3))] 625 { 626 rtx inlo = gen_lowpart (SImode, operands[1]); 627 rtx inhi = gen_highpart_mode (SImode, <MODE>mode, operands[1]); 628 rtx outlo = gen_lowpart (SImode, operands[0]); 629 rtx outhi = gen_highpart_mode (SImode, <MODE>mode, operands[0]); 630 631 /* Ensure that overlapping registers aren't corrupted. */ 632 if (reg_overlap_mentioned_p (outlo, inhi)) 633 { 634 operands[0] = outhi; 635 operands[1] = inhi; 636 operands[2] = outlo; 637 operands[3] = inlo; 638 } 639 else 640 { 641 operands[0] = outlo; 642 operands[1] = inlo; 643 operands[2] = outhi; 644 operands[3] = inhi; 645 } 646 } 647 [(set_attr "type" "sop1,sop1,mult,smem,smem,vmult,vmult,vmult,vmult,flat, 648 flat,ds,ds,flat,flat") 649 (set_attr "length" "4,8,*,12,12,*,*,*,*,12,12,12,12,12,12")]) 650 651; 128-bit move. 652 653(define_insn_and_split "*movti_insn" 654 [(set (match_operand:TI 0 "nonimmediate_operand" 655 "=SD,RS,Sm,RF, v,v, v,SD,RM, v,RL, v") 656 (match_operand:TI 1 "general_operand" 657 "SSB,Sm,RS, v,RF,v,Sv, v, v,RM, v,RL"))] 658 "" 659 "@ 660 # 661 s_store_dwordx4\t%1, %A0 662 s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0) 663 flat_store_dwordx4\t%A0, %1%O0%g0 664 flat_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\t0 665 # 666 # 667 # 668 global_store_dwordx4\t%A0, %1%O0%g0 669 global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) 670 ds_write_b128\t%A0, %1%O0 671 ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)" 672 "reload_completed 673 && REG_P (operands[0]) 674 && (REG_P (operands[1]) || GET_CODE (operands[1]) == CONST_INT)" 675 [(set (match_dup 0) (match_dup 1)) 676 (set (match_dup 2) (match_dup 3)) 677 (set (match_dup 4) (match_dup 5)) 678 (set (match_dup 6) (match_dup 7))] 679 { 680 gcc_assert (rtx_equal_p (operands[0], operands[1]) 681 || !reg_overlap_mentioned_p (operands[0], operands[1])); 682 operands[6] = gcn_operand_part (TImode, operands[0], 3); 683 operands[7] = gcn_operand_part (TImode, operands[1], 3); 684 operands[4] = gcn_operand_part (TImode, operands[0], 2); 685 operands[5] = gcn_operand_part (TImode, operands[1], 2); 686 operands[2] = gcn_operand_part (TImode, operands[0], 1); 687 operands[3] = gcn_operand_part (TImode, operands[1], 1); 688 operands[0] = gcn_operand_part (TImode, operands[0], 0); 689 operands[1] = gcn_operand_part (TImode, operands[1], 0); 690 } 691 [(set_attr "type" "mult,smem,smem,flat,flat,vmult,vmult,vmult,flat,flat,\ 692 ds,ds") 693 (set_attr "delayeduse" "*,*,yes,*,*,*,*,*,yes,*,*,*") 694 (set_attr "length" "*,12,12,12,12,*,*,*,12,12,12,12")]) 695 696;; }}} 697;; {{{ Prologue/Epilogue 698 699(define_insn "prologue_use" 700 [(unspec_volatile [(match_operand 0)] UNSPECV_PROLOGUE_USE)] 701 "" 702 "" 703 [(set_attr "length" "0")]) 704 705(define_expand "prologue" 706 [(const_int 0)] 707 "" 708 { 709 gcn_expand_prologue (); 710 DONE; 711 }) 712 713(define_expand "epilogue" 714 [(const_int 0)] 715 "" 716 { 717 gcn_expand_epilogue (); 718 DONE; 719 }) 720 721;; }}} 722;; {{{ Control flow 723 724; This pattern must satisfy simplejump_p, which means it cannot be a parallel 725; that clobbers SCC. Thus, we must preserve SCC if we're generating a long 726; branch sequence. 727 728(define_insn "jump" 729 [(set (pc) 730 (label_ref (match_operand 0)))] 731 "" 732 { 733 if (get_attr_length (insn) == 4) 734 return "s_branch\t%0"; 735 else 736 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG. */ 737 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 738 ".long\t0xbe9600fd\;" 739 "s_getpc_b64\ts[20:21]\;" 740 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 741 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 742 "s_cmpk_lg_u32\ts22, 0\;" 743 "s_setpc_b64\ts[20:21]"; 744 } 745 [(set_attr "type" "sopp") 746 (set (attr "length") 747 (if_then_else (and (ge (minus (match_dup 0) (pc)) 748 (const_int -131072)) 749 (lt (minus (match_dup 0) (pc)) 750 (const_int 131072))) 751 (const_int 4) 752 (const_int 32)))]) 753 754(define_insn "indirect_jump" 755 [(set (pc) 756 (match_operand:DI 0 "register_operand" "Sg"))] 757 "" 758 "s_setpc_b64\t%0" 759 [(set_attr "type" "sop1") 760 (set_attr "length" "4")]) 761 762(define_insn "cjump" 763 [(set (pc) 764 (if_then_else 765 (match_operator:BI 1 "gcn_conditional_operator" 766 [(match_operand:BI 2 "gcn_conditional_register_operand" "ca,cV") 767 (const_int 0)]) 768 (label_ref (match_operand 0)) 769 (pc)))] 770 "" 771 { 772 if (get_attr_length (insn) == 4) 773 return "s_cbranch%C1\t%0"; 774 else 775 { 776 /* !!! This sequence clobbers EXEC_SAVE_REG and CC_SAVE_REG but 777 restores SCC. */ 778 if (REGNO (operands[2]) == SCC_REG) 779 { 780 if (GET_CODE (operands[1]) == EQ) 781 return "s_cbranch%c1\t.Lskip%=\;" 782 "s_getpc_b64\ts[20:21]\;" 783 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 784 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 785 "s_cmp_lg_u32\t0, 0\;" 786 "s_setpc_b64\ts[20:21]\n" 787 ".Lskip%=:"; 788 else 789 return "s_cbranch%c1\t.Lskip%=\;" 790 "s_getpc_b64\ts[20:21]\;" 791 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 792 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 793 "s_cmp_eq_u32\t0, 0\;" 794 "s_setpc_b64\ts[20:21]\n" 795 ".Lskip%=:"; 796 } 797 else 798 return "s_cbranch%c1\t.Lskip%=\;" 799 "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 800 ".byte\t0xfd\;" 801 ".byte\t0x0\;" 802 ".byte\t0x80|22\;" 803 ".byte\t0xbe\;" 804 "s_getpc_b64\ts[20:21]\;" 805 "s_add_u32\ts20, s20, %0@rel32@lo+4\;" 806 "s_addc_u32\ts21, s21, %0@rel32@hi+4\;" 807 "s_cmpk_lg_u32\ts22, 0\;" 808 "s_setpc_b64\ts[20:21]\n" 809 ".Lskip%=:"; 810 } 811 } 812 [(set_attr "type" "sopp") 813 (set (attr "length") 814 (if_then_else (and (ge (minus (match_dup 0) (pc)) 815 (const_int -131072)) 816 (lt (minus (match_dup 0) (pc)) 817 (const_int 131072))) 818 (const_int 4) 819 (const_int 36)))]) 820 821; Returning from a normal function is different to returning from a 822; kernel function. 823 824(define_insn "gcn_return" 825 [(return)] 826 "" 827 { 828 if (cfun && cfun->machine && cfun->machine->normal_function) 829 return "s_setpc_b64\ts[18:19]"; 830 else 831 return "s_waitcnt\tlgkmcnt(0)\;s_dcache_wb\;s_endpgm"; 832 } 833 [(set_attr "type" "sop1") 834 (set_attr "length" "12")]) 835 836(define_expand "call" 837 [(parallel [(call (match_operand 0 "") 838 (match_operand 1 "")) 839 (clobber (reg:DI LR_REGNUM)) 840 (clobber (match_scratch:DI 2))])] 841 "" 842 {}) 843 844(define_insn "gcn_simple_call" 845 [(call (mem (match_operand 0 "immediate_operand" "Y,B")) 846 (match_operand 1 "const_int_operand")) 847 (clobber (reg:DI LR_REGNUM)) 848 (clobber (match_scratch:DI 2 "=&Sg,X"))] 849 "" 850 "@ 851 s_getpc_b64\t%2\;s_add_u32\t%L2, %L2, %0@rel32@lo+4\;s_addc_u32\t%H2, %H2, %0@rel32@hi+4\;s_swappc_b64\ts[18:19], %2 852 s_swappc_b64\ts[18:19], %0" 853 [(set_attr "type" "mult,sop1") 854 (set_attr "length" "24,4")]) 855 856(define_insn "movdi_symbol" 857 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg") 858 (match_operand:DI 1 "general_operand" "Y")) 859 (clobber (reg:BI SCC_REG))] 860 "GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF" 861 { 862 if (SYMBOL_REF_P (operands[1]) 863 && SYMBOL_REF_WEAK (operands[1])) 864 return "s_getpc_b64\t%0\;" 865 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;" 866 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;" 867 "s_load_dwordx2\t%0, %0\;" 868 "s_waitcnt\tlgkmcnt(0)"; 869 870 return "s_getpc_b64\t%0\;" 871 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;" 872 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4"; 873 } 874 [(set_attr "type" "mult") 875 (set_attr "length" "32")]) 876 877(define_insn "movdi_symbol_save_scc" 878 [(set (match_operand:DI 0 "nonimmediate_operand" "=Sg") 879 (match_operand:DI 1 "general_operand" "Y")) 880 (clobber (reg:BI CC_SAVE_REG))] 881 "(GET_CODE (operands[1]) == SYMBOL_REF || GET_CODE (operands[1]) == LABEL_REF) 882 && (lra_in_progress || reload_completed)" 883 { 884 /* !!! These sequences clobber CC_SAVE_REG. */ 885 886 if (SYMBOL_REF_P (operands[1]) 887 && SYMBOL_REF_WEAK (operands[1])) 888 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 889 ".long\t0xbe9600fd\;" 890 "s_getpc_b64\t%0\;" 891 "s_add_u32\t%L0, %L0, %1@gotpcrel32@lo+4\;" 892 "s_addc_u32\t%H0, %H0, %1@gotpcrel32@hi+4\;" 893 "s_load_dwordx2\t%0, %0\;" 894 "s_cmpk_lg_u32\ts22, 0\;" 895 "s_waitcnt\tlgkmcnt(0)"; 896 897 return "; s_mov_b32\ts22, scc is not supported by the assembler.\;" 898 ".long\t0xbe9600fd\;" 899 "s_getpc_b64\t%0\;" 900 "s_add_u32\t%L0, %L0, %1@rel32@lo+4\;" 901 "s_addc_u32\t%H0, %H0, %1@rel32@hi+4\;" 902 "s_cmpk_lg_u32\ts22, 0"; 903 } 904 [(set_attr "type" "mult") 905 (set_attr "length" "40")]) 906 907 908(define_insn "gcn_indirect_call" 909 [(call (mem (match_operand:DI 0 "register_operand" "Sg")) 910 (match_operand 1 "" "")) 911 (clobber (reg:DI LR_REGNUM)) 912 (clobber (match_scratch:DI 2 "=X"))] 913 "" 914 "s_swappc_b64\ts[18:19], %0" 915 [(set_attr "type" "sop1") 916 (set_attr "length" "4")]) 917 918(define_expand "call_value" 919 [(parallel [(set (match_operand 0 "") 920 (call (match_operand 1 "") 921 (match_operand 2 ""))) 922 (clobber (reg:DI LR_REGNUM)) 923 (clobber (match_scratch:DI 3))])] 924 "" 925 {}) 926 927(define_insn "gcn_call_value" 928 [(set (match_operand 0 "register_operand" "=Sg,Sg") 929 (call (mem (match_operand 1 "immediate_operand" "Y,B")) 930 (match_operand 2 "const_int_operand"))) 931 (clobber (reg:DI LR_REGNUM)) 932 (clobber (match_scratch:DI 3 "=&Sg,X"))] 933 "" 934 "@ 935 s_getpc_b64\t%3\;s_add_u32\t%L3, %L3, %1@rel32@lo+4\;s_addc_u32\t%H3, %H3, %1@rel32@hi+4\;s_swappc_b64\ts[18:19], %3 936 s_swappc_b64\ts[18:19], %1" 937 [(set_attr "type" "sop1") 938 (set_attr "length" "24")]) 939 940(define_insn "gcn_call_value_indirect" 941 [(set (match_operand 0 "register_operand" "=Sg") 942 (call (mem (match_operand:DI 1 "register_operand" "Sg")) 943 (match_operand 2 "" ""))) 944 (clobber (reg:DI LR_REGNUM)) 945 (clobber (match_scratch:DI 3 "=X"))] 946 "" 947 "s_swappc_b64\ts[18:19], %1" 948 [(set_attr "type" "sop1") 949 (set_attr "length" "4")]) 950 951; GCN does not have an instruction to clear only part of the instruction 952; cache, so the operands are ignored. 953 954(define_insn "clear_icache" 955 [(unspec_volatile 956 [(match_operand 0 "") (match_operand 1 "")] 957 UNSPECV_ICACHE_INV)] 958 "" 959 "s_icache_inv" 960 [(set_attr "type" "sopp") 961 (set_attr "length" "4")]) 962 963;; }}} 964;; {{{ Conditionals 965 966; 32-bit compare, scalar unit only 967 968(define_insn "cstoresi4" 969 [(set (match_operand:BI 0 "gcn_conditional_register_operand" 970 "=cs, cs, cs, cs") 971 (match_operator:BI 1 "gcn_compare_operator" 972 [(match_operand:SI 2 "gcn_alu_operand" "SSA,SSA,SSB, SS") 973 (match_operand:SI 3 "gcn_alu_operand" "SSA,SSL, SS,SSB")]))] 974 "" 975 "@ 976 s_cmp%D1\t%2, %3 977 s_cmpk%D1\t%2, %3 978 s_cmp%D1\t%2, %3 979 s_cmp%D1\t%2, %3" 980 [(set_attr "type" "sopc,sopk,sopk,sopk") 981 (set_attr "length" "4,4,8,8")]) 982 983(define_expand "cbranchsi4" 984 [(match_operator 0 "gcn_compare_operator" 985 [(match_operand:SI 1 "gcn_alu_operand") 986 (match_operand:SI 2 "gcn_alu_operand")]) 987 (match_operand 3)] 988 "" 989 { 990 rtx cc = gen_reg_rtx (BImode); 991 emit_insn (gen_cstoresi4 (cc, operands[0], operands[1], operands[2])); 992 emit_jump_insn (gen_cjump (operands[3], 993 gen_rtx_NE (BImode, cc, const0_rtx), cc)); 994 DONE; 995 }) 996 997; 64-bit compare; either unit, but scalar allows limited operators 998 999(define_expand "cstoredi4" 1000 [(set (match_operand:BI 0 "gcn_conditional_register_operand") 1001 (match_operator:BI 1 "gcn_compare_operator" 1002 [(match_operand:DI 2 "gcn_alu_operand") 1003 (match_operand:DI 3 "gcn_alu_operand")]))] 1004 "" 1005 {}) 1006 1007(define_insn "cstoredi4_vec_and_scalar" 1008 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cs, cV") 1009 (match_operator:BI 1 "gcn_compare_64bit_operator" 1010 [(match_operand:DI 2 "gcn_alu_operand" "%SSA,vSvC") 1011 (match_operand:DI 3 "gcn_alu_operand" " SSC, v")]))] 1012 "" 1013 "@ 1014 s_cmp%D1\t%2, %3 1015 v_cmp%E1\tvcc, %2, %3" 1016 [(set_attr "type" "sopc,vopc") 1017 (set_attr "length" "8")]) 1018 1019(define_insn "cstoredi4_vector" 1020 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "= cV") 1021 (match_operator:BI 1 "gcn_compare_operator" 1022 [(match_operand:DI 2 "gcn_alu_operand" "vSvB") 1023 (match_operand:DI 3 "gcn_alu_operand" " v")]))] 1024 "" 1025 "v_cmp%E1\tvcc, %2, %3" 1026 [(set_attr "type" "vopc") 1027 (set_attr "length" "8")]) 1028 1029(define_expand "cbranchdi4" 1030 [(match_operator 0 "gcn_compare_operator" 1031 [(match_operand:DI 1 "gcn_alu_operand") 1032 (match_operand:DI 2 "gcn_alu_operand")]) 1033 (match_operand 3)] 1034 "" 1035 { 1036 rtx cc = gen_reg_rtx (BImode); 1037 emit_insn (gen_cstoredi4 (cc, operands[0], operands[1], operands[2])); 1038 emit_jump_insn (gen_cjump (operands[3], 1039 gen_rtx_NE (BImode, cc, const0_rtx), cc)); 1040 DONE; 1041 }) 1042 1043; FP compare; vector unit only 1044 1045(define_insn "cstore<mode>4" 1046 [(set (match_operand:BI 0 "gcn_conditional_register_operand" "=cV") 1047 (match_operator:BI 1 "gcn_fp_compare_operator" 1048 [(match_operand:SFDF 2 "gcn_alu_operand" "vB") 1049 (match_operand:SFDF 3 "gcn_alu_operand" "v")]))] 1050 "" 1051 "v_cmp%E1\tvcc, %2, %3" 1052 [(set_attr "type" "vopc") 1053 (set_attr "length" "8")]) 1054 1055(define_expand "cbranch<mode>4" 1056 [(match_operator 0 "gcn_fp_compare_operator" 1057 [(match_operand:SFDF 1 "gcn_alu_operand") 1058 (match_operand:SFDF 2 "gcn_alu_operand")]) 1059 (match_operand 3)] 1060 "" 1061 { 1062 rtx cc = gen_reg_rtx (BImode); 1063 emit_insn (gen_cstore<mode>4 (cc, operands[0], operands[1], operands[2])); 1064 emit_jump_insn (gen_cjump (operands[3], 1065 gen_rtx_NE (BImode, cc, const0_rtx), cc)); 1066 DONE; 1067 }) 1068 1069;; }}} 1070;; {{{ ALU special cases: Plus 1071 1072(define_insn "addsi3" 1073 [(set (match_operand:SI 0 "register_operand" "= Sg, Sg, Sg, v") 1074 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") 1075 (match_operand:SI 2 "gcn_alu_operand" " SgA,SgJ, B,vBSv"))) 1076 (clobber (match_scratch:BI 3 "= cs, cs, cs, X")) 1077 (clobber (match_scratch:DI 4 "= X, X, X, cV"))] 1078 "" 1079 "@ 1080 s_add_i32\t%0, %1, %2 1081 s_addk_i32\t%0, %2 1082 s_add_i32\t%0, %1, %2 1083 v_add%^_u32\t%0, vcc, %2, %1" 1084 [(set_attr "type" "sop2,sopk,sop2,vop2") 1085 (set_attr "length" "4,4,8,8")]) 1086 1087(define_expand "addsi3_scc" 1088 [(parallel [(set (match_operand:SI 0 "register_operand") 1089 (plus:SI (match_operand:SI 1 "gcn_alu_operand") 1090 (match_operand:SI 2 "gcn_alu_operand"))) 1091 (clobber (reg:BI SCC_REG)) 1092 (clobber (scratch:DI))])] 1093 "" 1094 {}) 1095 1096; Having this as an insn_and_split allows us to keep together DImode adds 1097; through some RTL optimisation passes, and means the CC reg we set isn't 1098; dependent on the constraint alternative (which doesn't seem to work well). 1099 1100; If v_addc_u32 is used to add with carry, a 32-bit literal constant cannot be 1101; used as an operand due to the read of VCC, so we restrict constants to the 1102; inlinable range for that alternative. 1103 1104(define_insn_and_split "adddi3" 1105 [(set (match_operand:DI 0 "register_operand" "=Sg, v") 1106 (plus:DI (match_operand:DI 1 "register_operand" " Sg, v") 1107 (match_operand:DI 2 "nonmemory_operand" "SgB,vA"))) 1108 (clobber (match_scratch:BI 3 "=cs, X")) 1109 (clobber (match_scratch:DI 4 "= X,cV"))] 1110 "" 1111 "#" 1112 "&& reload_completed" 1113 [(const_int 0)] 1114 { 1115 rtx cc = gen_rtx_REG (BImode, gcn_vgpr_register_operand (operands[1], 1116 DImode) 1117 ? VCC_REG : SCC_REG); 1118 1119 emit_insn (gen_addsi3_scalar_carry 1120 (gcn_operand_part (DImode, operands[0], 0), 1121 gcn_operand_part (DImode, operands[1], 0), 1122 gcn_operand_part (DImode, operands[2], 0), 1123 cc)); 1124 rtx val = gcn_operand_part (DImode, operands[2], 1); 1125 if (val != const0_rtx) 1126 emit_insn (gen_addcsi3_scalar 1127 (gcn_operand_part (DImode, operands[0], 1), 1128 gcn_operand_part (DImode, operands[1], 1), 1129 gcn_operand_part (DImode, operands[2], 1), 1130 cc, cc)); 1131 else 1132 emit_insn (gen_addcsi3_scalar_zero 1133 (gcn_operand_part (DImode, operands[0], 1), 1134 gcn_operand_part (DImode, operands[1], 1), 1135 cc)); 1136 DONE; 1137 } 1138 [(set_attr "type" "mult,vmult") 1139 (set_attr "length" "8")]) 1140 1141(define_expand "adddi3_scc" 1142 [(parallel [(set (match_operand:DI 0 "register_operand") 1143 (plus:DI (match_operand:DI 1 "register_operand") 1144 (match_operand:DI 2 "nonmemory_operand"))) 1145 (clobber (reg:BI SCC_REG)) 1146 (clobber (scratch:DI))])] 1147 "" 1148 {}) 1149 1150;; Add with carry. 1151 1152(define_insn "addsi3_scalar_carry" 1153 [(set (match_operand:SI 0 "register_operand" "= Sg, v") 1154 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, v") 1155 (match_operand:SI 2 "gcn_alu_operand" " SgB,vB"))) 1156 (set (match_operand:BI 3 "register_operand" "= cs,cV") 1157 (ltu:BI (plus:SI (match_dup 1) 1158 (match_dup 2)) 1159 (match_dup 1)))] 1160 "" 1161 "@ 1162 s_add_u32\t%0, %1, %2 1163 v_add%^_u32\t%0, vcc, %2, %1" 1164 [(set_attr "type" "sop2,vop2") 1165 (set_attr "length" "8,8")]) 1166 1167(define_insn "addsi3_scalar_carry_cst" 1168 [(set (match_operand:SI 0 "register_operand" "=Sg, v") 1169 (plus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA, v") 1170 (match_operand:SI 2 "const_int_operand" " n, n"))) 1171 (set (match_operand:BI 4 "register_operand" "=cs,cV") 1172 (geu:BI (plus:SI (match_dup 1) 1173 (match_dup 2)) 1174 (match_operand:SI 3 "const_int_operand" " n, n")))] 1175 "INTVAL (operands[2]) == -INTVAL (operands[3])" 1176 "@ 1177 s_add_u32\t%0, %1, %2 1178 v_add%^_u32\t%0, vcc, %2, %1" 1179 [(set_attr "type" "sop2,vop2") 1180 (set_attr "length" "4")]) 1181 1182(define_insn "addcsi3_scalar" 1183 [(set (match_operand:SI 0 "register_operand" "= Sg, v") 1184 (plus:SI (plus:SI (zero_extend:SI 1185 (match_operand:BI 3 "register_operand" "= cs,cV")) 1186 (match_operand:SI 1 "gcn_alu_operand" "%SgA, v")) 1187 (match_operand:SI 2 "gcn_alu_operand" " SgB,vA"))) 1188 (set (match_operand:BI 4 "register_operand" "= 3, 3") 1189 (ior:BI (ltu:BI (plus:SI 1190 (plus:SI 1191 (zero_extend:SI (match_dup 3)) 1192 (match_dup 1)) 1193 (match_dup 2)) 1194 (match_dup 2)) 1195 (ltu:BI (plus:SI (zero_extend:SI (match_dup 3)) (match_dup 1)) 1196 (match_dup 1))))] 1197 "" 1198 "@ 1199 s_addc_u32\t%0, %1, %2 1200 v_addc%^_u32\t%0, vcc, %2, %1, vcc" 1201 [(set_attr "type" "sop2,vop2") 1202 (set_attr "length" "8,4")]) 1203 1204(define_insn "addcsi3_scalar_zero" 1205 [(set (match_operand:SI 0 "register_operand" "=Sg, v") 1206 (plus:SI (zero_extend:SI 1207 (match_operand:BI 2 "register_operand" "=cs,cV")) 1208 (match_operand:SI 1 "gcn_alu_operand" "SgA, v"))) 1209 (set (match_dup 2) 1210 (ltu:BI (plus:SI (zero_extend:SI (match_dup 2)) 1211 (match_dup 1)) 1212 (match_dup 1)))] 1213 "" 1214 "@ 1215 s_addc_u32\t%0, %1, 0 1216 v_addc%^_u32\t%0, vcc, 0, %1, vcc" 1217 [(set_attr "type" "sop2,vop2") 1218 (set_attr "length" "4")]) 1219 1220; "addptr" is the same as "add" except that it must not write to VCC or SCC 1221; as a side-effect. Unfortunately GCN does not have a suitable instruction 1222; for this, so we use a custom VOP3 add with CC_SAVE_REG as a temp. 1223; Note that it is not safe to save/clobber/restore SCC because doing so will 1224; break data-flow analysis, so this must use vector registers. 1225; 1226; The "v0" should be just "v", but somehow the "0" helps LRA not loop forever 1227; on testcase pr54713-2.c with -O0. It's only an optimization hint anyway. 1228 1229(define_insn "addptrdi3" 1230 [(set (match_operand:DI 0 "register_operand" "= v") 1231 (plus:DI (match_operand:DI 1 "register_operand" " v0") 1232 (match_operand:DI 2 "nonmemory_operand" "vDA")))] 1233 "" 1234 { 1235 rtx new_operands[4] = { operands[0], operands[1], operands[2], 1236 gen_rtx_REG (DImode, CC_SAVE_REG) }; 1237 1238 output_asm_insn ("v_add%^_u32 %L0, %3, %L2, %L1", new_operands); 1239 output_asm_insn ("v_addc%^_u32 %H0, %3, %H2, %H1, %3", new_operands); 1240 1241 return ""; 1242 } 1243 [(set_attr "type" "vmult") 1244 (set_attr "length" "16")]) 1245 1246;; }}} 1247;; {{{ ALU special cases: Minus 1248 1249(define_insn "subsi3" 1250 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v, v") 1251 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgA, v,vBSv") 1252 (match_operand:SI 2 "gcn_alu_operand" "SgA, B, vBSv, v"))) 1253 (clobber (match_scratch:BI 3 "=cs, cs, X, X")) 1254 (clobber (match_scratch:DI 4 "= X, X, cV, cV"))] 1255 "" 1256 "@ 1257 s_sub_i32\t%0, %1, %2 1258 s_sub_i32\t%0, %1, %2 1259 v_subrev%^_u32\t%0, vcc, %2, %1 1260 v_sub%^_u32\t%0, vcc, %1, %2" 1261 [(set_attr "type" "sop2,sop2,vop2,vop2") 1262 (set_attr "length" "4,8,8,8")]) 1263 1264(define_insn_and_split "subdi3" 1265 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg") 1266 (minus:DI 1267 (match_operand:DI 1 "gcn_alu_operand" "SgA,SgB") 1268 (match_operand:DI 2 "gcn_alu_operand" "SgB,SgA"))) 1269 (clobber (reg:BI SCC_REG))] 1270 "" 1271 "#" 1272 "reload_completed" 1273 [(const_int 0)] 1274 { 1275 emit_insn (gen_subsi3_scalar_carry 1276 (gcn_operand_part (DImode, operands[0], 0), 1277 gcn_operand_part (DImode, operands[1], 0), 1278 gcn_operand_part (DImode, operands[2], 0))); 1279 rtx val = gcn_operand_part (DImode, operands[2], 1); 1280 if (val != const0_rtx) 1281 emit_insn (gen_subcsi3_scalar 1282 (gcn_operand_part (DImode, operands[0], 1), 1283 gcn_operand_part (DImode, operands[1], 1), 1284 gcn_operand_part (DImode, operands[2], 1))); 1285 else 1286 emit_insn (gen_subcsi3_scalar_zero 1287 (gcn_operand_part (DImode, operands[0], 1), 1288 gcn_operand_part (DImode, operands[1], 1))); 1289 DONE; 1290 } 1291 [(set_attr "length" "8")]) 1292 1293(define_insn "subsi3_scalar_carry" 1294 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg") 1295 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB") 1296 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA"))) 1297 (set (reg:BI SCC_REG) 1298 (gtu:BI (minus:SI (match_dup 1) 1299 (match_dup 2)) 1300 (match_dup 1)))] 1301 "" 1302 "s_sub_u32\t%0, %1, %2" 1303 [(set_attr "type" "sop2") 1304 (set_attr "length" "8")]) 1305 1306(define_insn "subsi3_scalar_carry_cst" 1307 [(set (match_operand:SI 0 "register_operand" "=Sg") 1308 (minus:SI (match_operand:SI 1 "gcn_alu_operand" "SgA") 1309 (match_operand:SI 2 "const_int_operand" " n"))) 1310 (set (reg:BI SCC_REG) 1311 (leu:BI (minus:SI (match_dup 1) 1312 (match_dup 2)) 1313 (match_operand:SI 3 "const_int_operand" " n")))] 1314 "INTVAL (operands[2]) == -INTVAL (operands[3])" 1315 "s_sub_u32\t%0, %1, %2" 1316 [(set_attr "type" "sop2") 1317 (set_attr "length" "4")]) 1318 1319(define_insn "subcsi3_scalar" 1320 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg") 1321 (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1322 (match_operand:SI 1 "gcn_alu_operand" "SgA,SgB")) 1323 (match_operand:SI 2 "gcn_alu_operand" "SgB,SgA"))) 1324 (set (reg:BI SCC_REG) 1325 (ior:BI (gtu:BI (minus:SI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1326 (match_dup 1)) 1327 (match_dup 2)) 1328 (match_dup 1)) 1329 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1330 (match_dup 1)) 1331 (match_dup 1))))] 1332 "" 1333 "s_subb_u32\t%0, %1, %2" 1334 [(set_attr "type" "sop2") 1335 (set_attr "length" "8")]) 1336 1337(define_insn "subcsi3_scalar_zero" 1338 [(set (match_operand:SI 0 "register_operand" "=Sg") 1339 (minus:SI (zero_extend:SI (reg:BI SCC_REG)) 1340 (match_operand:SI 1 "gcn_alu_operand" "SgA"))) 1341 (set (reg:BI SCC_REG) 1342 (gtu:BI (minus:SI (zero_extend:SI (reg:BI SCC_REG)) (match_dup 1)) 1343 (match_dup 1)))] 1344 "" 1345 "s_subb_u32\t%0, %1, 0" 1346 [(set_attr "type" "sop2") 1347 (set_attr "length" "4")]) 1348 1349;; }}} 1350;; {{{ ALU: mult 1351 1352; Vector multiply has vop3a encoding, but no corresponding vop2a, so no long 1353; immediate. 1354(define_insn "mulsi3" 1355 [(set (match_operand:SI 0 "register_operand" "= Sg,Sg, Sg, v") 1356 (mult:SI (match_operand:SI 1 "gcn_alu_operand" "%SgA, 0,SgA, v") 1357 (match_operand:SI 2 "gcn_alu_operand" " SgA, J, B,vASv")))] 1358 "" 1359 "@ 1360 s_mul_i32\t%0, %1, %2 1361 s_mulk_i32\t%0, %2 1362 s_mul_i32\t%0, %1, %2 1363 v_mul_lo_i32\t%0, %1, %2" 1364 [(set_attr "type" "sop2,sopk,sop2,vop3a") 1365 (set_attr "length" "4,4,8,4")]) 1366 1367(define_code_iterator any_extend [sign_extend zero_extend]) 1368(define_code_attr sgnsuffix [(sign_extend "%i") (zero_extend "%u")]) 1369(define_code_attr su [(sign_extend "s") (zero_extend "u")]) 1370(define_code_attr u [(sign_extend "") (zero_extend "u")]) 1371(define_code_attr iu [(sign_extend "i") (zero_extend "u")]) 1372(define_code_attr e [(sign_extend "e") (zero_extend "")]) 1373 1374(define_insn "<su>mulsi3_highpart" 1375 [(set (match_operand:SI 0 "register_operand" "= v") 1376 (truncate:SI 1377 (lshiftrt:DI 1378 (mult:DI 1379 (any_extend:DI 1380 (match_operand:SI 1 "register_operand" "% v")) 1381 (any_extend:DI 1382 (match_operand:SI 2 "register_operand" "vSv"))) 1383 (const_int 32))))] 1384 "" 1385 "v_mul_hi<sgnsuffix>0\t%0, %2, %1" 1386 [(set_attr "type" "vop3a") 1387 (set_attr "length" "8")]) 1388 1389(define_insn "<u>mulhisi3" 1390 [(set (match_operand:SI 0 "register_operand" "=v") 1391 (mult:SI 1392 (any_extend:SI (match_operand:HI 1 "register_operand" "%v")) 1393 (any_extend:SI (match_operand:HI 2 "register_operand" " v"))))] 1394 "" 1395 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:WORD_0 src1_sel:WORD_0" 1396 [(set_attr "type" "vop_sdwa") 1397 (set_attr "length" "8")]) 1398 1399(define_insn "<u>mulqihi3_scalar" 1400 [(set (match_operand:HI 0 "register_operand" "=v") 1401 (mult:HI 1402 (any_extend:HI (match_operand:QI 1 "register_operand" "%v")) 1403 (any_extend:HI (match_operand:QI 2 "register_operand" " v"))))] 1404 "" 1405 "v_mul_<iu>32_<iu>24_sdwa\t%0, %<e>1, %<e>2 src0_sel:BYTE_0 src1_sel:BYTE_0" 1406 [(set_attr "type" "vop_sdwa") 1407 (set_attr "length" "8")]) 1408 1409;; }}} 1410;; {{{ ALU: generic 32-bit unop 1411 1412(define_code_iterator bitunop [not popcount]) 1413(define_code_attr popcount_extra_op [(not "") (popcount ", 0")]) 1414 1415(define_insn "<expander>si2" 1416 [(set (match_operand:SI 0 "register_operand" "=Sg, v") 1417 (bitunop:SI 1418 (match_operand:SI 1 "gcn_alu_operand" "SgB,vSvB"))) 1419 (clobber (match_scratch:BI 2 "=cs, X"))] 1420 "" 1421 "@ 1422 s_<s_mnemonic>0\t%0, %1 1423 v_<mnemonic>0\t%0, %1<popcount_extra_op>" 1424 [(set_attr "type" "sop1,vop1") 1425 (set_attr "length" "8")]) 1426 1427(define_code_iterator countzeros [clz ctz]) 1428 1429(define_insn "<expander>si2" 1430 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg") 1431 (countzeros:SI 1432 (match_operand:SI 1 "gcn_alu_operand" "SgA, B")))] 1433 "" 1434 "s_<s_mnemonic>1\t%0, %1" 1435 [(set_attr "type" "sop1") 1436 (set_attr "length" "4,8")]) 1437 1438; The truncate ensures that a constant passed to operand 1 is treated as DImode 1439(define_insn "<expander>di2" 1440 [(set (match_operand:SI 0 "register_operand" "=Sg,Sg") 1441 (truncate:SI 1442 (countzeros:DI 1443 (match_operand:DI 1 "gcn_alu_operand" "SgA, B"))))] 1444 "" 1445 "s_<s_mnemonic>1\t%0, %1" 1446 [(set_attr "type" "sop1") 1447 (set_attr "length" "4,8")]) 1448 1449;; }}} 1450;; {{{ ALU: generic 32-bit binop 1451 1452; No plus and mult - they have variant with 16bit immediate 1453; and thus are defined later. 1454(define_code_iterator binop [and ior xor smin smax umin umax 1455 ashift lshiftrt ashiftrt]) 1456(define_code_iterator vec_and_scalar_com [and ior xor smin smax umin umax]) 1457(define_code_iterator vec_and_scalar_nocom [ashift lshiftrt ashiftrt]) 1458 1459(define_insn "<expander>si3" 1460 [(set (match_operand:SI 0 "gcn_valu_dst_operand" "= Sg, v,RD") 1461 (vec_and_scalar_com:SI 1462 (match_operand:SI 1 "gcn_valu_src0_operand" "%SgA,vSvB, 0") 1463 (match_operand:SI 2 "gcn_alu_operand" " SgB, v, v"))) 1464 (clobber (match_scratch:BI 3 "= cs, X, X"))] 1465 "" 1466 "@ 1467 s_<mnemonic>0\t%0, %1, %2 1468 v_<mnemonic>0\t%0, %1, %2 1469 ds_<mnemonic>0\t%A0, %2%O0" 1470 [(set_attr "type" "sop2,vop2,ds") 1471 (set_attr "length" "8")]) 1472 1473(define_insn "<expander>si3" 1474 [(set (match_operand:SI 0 "register_operand" "=Sg, Sg, v") 1475 (vec_and_scalar_nocom:SI 1476 (match_operand:SI 1 "gcn_alu_operand" "SgB,SgA, v") 1477 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgB,vSvB"))) 1478 (clobber (match_scratch:BI 3 "=cs, cs, X"))] 1479 "" 1480 "@ 1481 s_<mnemonic>0\t%0, %1, %2 1482 s_<mnemonic>0\t%0, %1, %2 1483 v_<revmnemonic>0\t%0, %2, %1" 1484 [(set_attr "type" "sop2,sop2,vop2") 1485 (set_attr "length" "8")]) 1486 1487(define_expand "<expander>si3_scc" 1488 [(parallel [(set (match_operand:SI 0 "gcn_valu_dst_operand") 1489 (binop:SI 1490 (match_operand:SI 1 "gcn_valu_src0_operand") 1491 (match_operand:SI 2 "gcn_alu_operand"))) 1492 (clobber (reg:BI SCC_REG))])] 1493 "" 1494 {}) 1495 1496;; }}} 1497;; {{{ ALU: generic 64-bit 1498 1499(define_code_iterator vec_and_scalar64_com [and ior xor]) 1500 1501(define_insn_and_split "<expander>di3" 1502 [(set (match_operand:DI 0 "register_operand" "= Sg, v") 1503 (vec_and_scalar64_com:DI 1504 (match_operand:DI 1 "gcn_alu_operand" "%SgA,vSvDB") 1505 (match_operand:DI 2 "gcn_alu_operand" " SgC, v"))) 1506 (clobber (match_scratch:BI 3 "= cs, X"))] 1507 "" 1508 "@ 1509 s_<mnemonic>0\t%0, %1, %2 1510 #" 1511 "reload_completed && gcn_vgpr_register_operand (operands[0], DImode)" 1512 [(parallel [(set (match_dup 4) 1513 (vec_and_scalar64_com:SI (match_dup 5) (match_dup 6))) 1514 (clobber (match_dup 3))]) 1515 (parallel [(set (match_dup 7) 1516 (vec_and_scalar64_com:SI (match_dup 8) (match_dup 9))) 1517 (clobber (match_dup 3))])] 1518 { 1519 operands[4] = gcn_operand_part (DImode, operands[0], 0); 1520 operands[5] = gcn_operand_part (DImode, operands[1], 0); 1521 operands[6] = gcn_operand_part (DImode, operands[2], 0); 1522 operands[7] = gcn_operand_part (DImode, operands[0], 1); 1523 operands[8] = gcn_operand_part (DImode, operands[1], 1); 1524 operands[9] = gcn_operand_part (DImode, operands[2], 1); 1525 } 1526 [(set_attr "type" "sop2,vop2") 1527 (set_attr "length" "8")]) 1528 1529(define_insn "<expander>di3" 1530 [(set (match_operand:DI 0 "register_operand" "=Sg, Sg, v") 1531 (vec_and_scalar_nocom:DI 1532 (match_operand:DI 1 "gcn_alu_operand" "SgC,SgA, v") 1533 (match_operand:SI 2 "gcn_alu_operand" "SgA,SgC,vSvC"))) 1534 (clobber (match_scratch:BI 3 "=cs, cs, X"))] 1535 "" 1536 "@ 1537 s_<mnemonic>0\t%0, %1, %2 1538 s_<mnemonic>0\t%0, %1, %2 1539 v_<revmnemonic>0\t%0, %2, %1" 1540 [(set_attr "type" "sop2,sop2,vop2") 1541 (set_attr "length" "8")]) 1542 1543;; }}} 1544;; {{{ Atomics 1545 1546; Each compute unit has it's own L1 cache. The L2 cache is shared between 1547; all the compute units. Any load or store instruction can skip L1 and 1548; access L2 directly using the "glc" flag. Atomic instructions also skip 1549; L1. The L1 cache can be flushed and invalidated using instructions. 1550; 1551; Therefore, in order for "acquire" and "release" atomic modes to work 1552; correctly across compute units we must flush before each "release" 1553; and invalidate the cache after each "acquire". It might seem like 1554; invalidation could be safely done before an "acquire", but since each 1555; compute unit can run up to 40 threads simultaneously, all reading values 1556; into the L1 cache, this is not actually safe. 1557; 1558; Additionally, scalar flat instructions access L2 via a different cache 1559; (the "constant cache"), so they have separate constrol instructions. We 1560; do not attempt to invalidate both caches at once; instead, atomics 1561; operating on scalar flat pointers will flush the constant cache, and 1562; atomics operating on flat or global pointers will flush L1. It is up to 1563; the programmer to get this right. 1564 1565(define_code_iterator atomicops [plus minus and ior xor]) 1566(define_mode_attr X [(SI "") (DI "_X2")]) 1567 1568;; TODO compare_and_swap test_and_set inc dec 1569;; Hardware also supports min and max, but GCC does not. 1570 1571(define_expand "memory_barrier" 1572 [(set (match_dup 0) 1573 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] 1574 "" 1575 { 1576 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 1577 MEM_VOLATILE_P (operands[0]) = 1; 1578 }) 1579 1580(define_insn "*memory_barrier" 1581 [(set (match_operand:BLK 0) 1582 (unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))] 1583 "" 1584 "buffer_wbinvl1_vol" 1585 [(set_attr "type" "mubuf") 1586 (set_attr "length" "4")]) 1587 1588; FIXME: These patterns have been disabled as they do not seem to work 1589; reliably - they can cause hangs or incorrect results. 1590; TODO: flush caches according to memory model 1591(define_insn "atomic_fetch_<bare_mnemonic><mode>" 1592 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1593 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) 1594 (set (match_dup 1) 1595 (unspec_volatile:SIDI 1596 [(atomicops:SIDI 1597 (match_dup 1) 1598 (match_operand:SIDI 2 "register_operand" " Sm, v, v"))] 1599 UNSPECV_ATOMIC)) 1600 (use (match_operand 3 "const_int_operand"))] 1601 "0 /* Disabled. */" 1602 "@ 1603 s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) 1604 flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0 1605 global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)" 1606 [(set_attr "type" "smem,flat,flat") 1607 (set_attr "length" "12") 1608 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1609 1610; FIXME: These patterns are disabled because the instructions don't 1611; seem to work as advertised. Specifically, OMP "team distribute" 1612; reductions apparently "lose" some of the writes, similar to what 1613; you might expect from a concurrent non-atomic read-modify-write. 1614; TODO: flush caches according to memory model 1615(define_insn "atomic_<bare_mnemonic><mode>" 1616 [(set (match_operand:SIDI 0 "memory_operand" "+RS,RF,RM") 1617 (unspec_volatile:SIDI 1618 [(atomicops:SIDI 1619 (match_dup 0) 1620 (match_operand:SIDI 1 "register_operand" " Sm, v, v"))] 1621 UNSPECV_ATOMIC)) 1622 (use (match_operand 2 "const_int_operand"))] 1623 "0 /* Disabled. */" 1624 "@ 1625 s_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\tlgkmcnt(0) 1626 flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0 1627 global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)" 1628 [(set_attr "type" "smem,flat,flat") 1629 (set_attr "length" "12") 1630 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1631 1632(define_mode_attr x2 [(SI "DI") (DI "TI")]) 1633(define_mode_attr size [(SI "4") (DI "8")]) 1634(define_mode_attr bitsize [(SI "32") (DI "64")]) 1635 1636(define_expand "sync_compare_and_swap<mode>" 1637 [(match_operand:SIDI 0 "register_operand") 1638 (match_operand:SIDI 1 "memory_operand") 1639 (match_operand:SIDI 2 "register_operand") 1640 (match_operand:SIDI 3 "register_operand")] 1641 "" 1642 { 1643 if (MEM_ADDR_SPACE (operands[1]) == ADDR_SPACE_LDS) 1644 { 1645 emit_insn (gen_sync_compare_and_swap<mode>_lds_insn (operands[0], 1646 operands[1], 1647 operands[2], 1648 operands[3])); 1649 DONE; 1650 } 1651 1652 /* Operands 2 and 3 must be placed in consecutive registers, and passed 1653 as a combined value. */ 1654 rtx src_cmp = gen_reg_rtx (<x2>mode); 1655 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, 0), operands[3]); 1656 emit_move_insn (gen_rtx_SUBREG (<MODE>mode, src_cmp, <size>), operands[2]); 1657 emit_insn (gen_sync_compare_and_swap<mode>_insn (operands[0], 1658 operands[1], 1659 src_cmp)); 1660 DONE; 1661 }) 1662 1663(define_insn "sync_compare_and_swap<mode>_insn" 1664 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1665 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) 1666 (set (match_dup 1) 1667 (unspec_volatile:SIDI 1668 [(match_operand:<x2> 2 "register_operand" " Sm, v, v")] 1669 UNSPECV_ATOMIC))] 1670 "" 1671 "@ 1672 s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0) 1673 flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0 1674 global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)" 1675 [(set_attr "type" "smem,flat,flat") 1676 (set_attr "length" "12") 1677 (set_attr "gcn_version" "gcn5,*,gcn5") 1678 (set_attr "delayeduse" "*,yes,yes")]) 1679 1680(define_insn "sync_compare_and_swap<mode>_lds_insn" 1681 [(set (match_operand:SIDI 0 "register_operand" "= v") 1682 (unspec_volatile:SIDI 1683 [(match_operand:SIDI 1 "memory_operand" "+RL")] 1684 UNSPECV_ATOMIC)) 1685 (set (match_dup 1) 1686 (unspec_volatile:SIDI 1687 [(match_operand:SIDI 2 "register_operand" " v") 1688 (match_operand:SIDI 3 "register_operand" " v")] 1689 UNSPECV_ATOMIC))] 1690 "" 1691 "ds_cmpst_rtn_b<bitsize> %0, %1, %2, %3\;s_waitcnt\tlgkmcnt(0)" 1692 [(set_attr "type" "ds") 1693 (set_attr "length" "12")]) 1694 1695(define_insn "atomic_load<mode>" 1696 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1697 (unspec_volatile:SIDI 1698 [(match_operand:SIDI 1 "memory_operand" " RS,RF,RM")] 1699 UNSPECV_ATOMIC)) 1700 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))] 1701 "" 1702 { 1703 switch (INTVAL (operands[2])) 1704 { 1705 case MEMMODEL_RELAXED: 1706 switch (which_alternative) 1707 { 1708 case 0: 1709 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)"; 1710 case 1: 1711 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0"; 1712 case 2: 1713 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)"; 1714 } 1715 break; 1716 case MEMMODEL_CONSUME: 1717 case MEMMODEL_ACQUIRE: 1718 case MEMMODEL_SYNC_ACQUIRE: 1719 switch (which_alternative) 1720 { 1721 case 0: 1722 return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;" 1723 "s_dcache_wb_vol"; 1724 case 1: 1725 return "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" 1726 "buffer_wbinvl1_vol"; 1727 case 2: 1728 return "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" 1729 "buffer_wbinvl1_vol"; 1730 } 1731 break; 1732 case MEMMODEL_ACQ_REL: 1733 case MEMMODEL_SEQ_CST: 1734 case MEMMODEL_SYNC_SEQ_CST: 1735 switch (which_alternative) 1736 { 1737 case 0: 1738 return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;" 1739 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; 1740 case 1: 1741 return "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;" 1742 "s_waitcnt\t0\;buffer_wbinvl1_vol"; 1743 case 2: 1744 return "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;" 1745 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 1746 } 1747 break; 1748 } 1749 gcc_unreachable (); 1750 } 1751 [(set_attr "type" "smem,flat,flat") 1752 (set_attr "length" "20") 1753 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1754 1755(define_insn "atomic_store<mode>" 1756 [(set (match_operand:SIDI 0 "memory_operand" "=RS,RF,RM") 1757 (unspec_volatile:SIDI 1758 [(match_operand:SIDI 1 "register_operand" " Sm, v, v")] 1759 UNSPECV_ATOMIC)) 1760 (use (match_operand:SIDI 2 "immediate_operand" " i, i, i"))] 1761 "" 1762 { 1763 switch (INTVAL (operands[2])) 1764 { 1765 case MEMMODEL_RELAXED: 1766 switch (which_alternative) 1767 { 1768 case 0: 1769 return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)"; 1770 case 1: 1771 return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0"; 1772 case 2: 1773 return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)"; 1774 } 1775 break; 1776 case MEMMODEL_RELEASE: 1777 case MEMMODEL_SYNC_RELEASE: 1778 switch (which_alternative) 1779 { 1780 case 0: 1781 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc"; 1782 case 1: 1783 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"; 1784 case 2: 1785 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"; 1786 } 1787 break; 1788 case MEMMODEL_ACQ_REL: 1789 case MEMMODEL_SEQ_CST: 1790 case MEMMODEL_SYNC_SEQ_CST: 1791 switch (which_alternative) 1792 { 1793 case 0: 1794 return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" 1795 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; 1796 case 1: 1797 return "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" 1798 "s_waitcnt\t0\;buffer_wbinvl1_vol"; 1799 case 2: 1800 return "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" 1801 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 1802 } 1803 break; 1804 } 1805 gcc_unreachable (); 1806 } 1807 [(set_attr "type" "smem,flat,flat") 1808 (set_attr "length" "20") 1809 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1810 1811(define_insn "atomic_exchange<mode>" 1812 [(set (match_operand:SIDI 0 "register_operand" "=Sm, v, v") 1813 (match_operand:SIDI 1 "memory_operand" "+RS,RF,RM")) 1814 (set (match_dup 1) 1815 (unspec_volatile:SIDI 1816 [(match_operand:SIDI 2 "register_operand" " Sm, v, v")] 1817 UNSPECV_ATOMIC)) 1818 (use (match_operand 3 "immediate_operand"))] 1819 "" 1820 { 1821 switch (INTVAL (operands[3])) 1822 { 1823 case MEMMODEL_RELAXED: 1824 switch (which_alternative) 1825 { 1826 case 0: 1827 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)"; 1828 case 1: 1829 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0"; 1830 case 2: 1831 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 1832 "s_waitcnt\tvmcnt(0)"; 1833 } 1834 break; 1835 case MEMMODEL_CONSUME: 1836 case MEMMODEL_ACQUIRE: 1837 case MEMMODEL_SYNC_ACQUIRE: 1838 switch (which_alternative) 1839 { 1840 case 0: 1841 return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;" 1842 "s_dcache_wb_vol\;s_dcache_inv_vol"; 1843 case 1: 1844 return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;" 1845 "buffer_wbinvl1_vol"; 1846 case 2: 1847 return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 1848 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 1849 } 1850 break; 1851 case MEMMODEL_RELEASE: 1852 case MEMMODEL_SYNC_RELEASE: 1853 switch (which_alternative) 1854 { 1855 case 0: 1856 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" 1857 "s_waitcnt\tlgkmcnt(0)"; 1858 case 1: 1859 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" 1860 "s_waitcnt\t0"; 1861 case 2: 1862 return "buffer_wbinvl1_vol\;" 1863 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 1864 "s_waitcnt\tvmcnt(0)"; 1865 } 1866 break; 1867 case MEMMODEL_ACQ_REL: 1868 case MEMMODEL_SEQ_CST: 1869 case MEMMODEL_SYNC_SEQ_CST: 1870 switch (which_alternative) 1871 { 1872 case 0: 1873 return "s_dcache_wb_vol\;s_atomic_swap<X>\t%0, %1, %2 glc\;" 1874 "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; 1875 case 1: 1876 return "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;" 1877 "s_waitcnt\t0\;buffer_wbinvl1_vol"; 1878 case 2: 1879 return "buffer_wbinvl1_vol\;" 1880 "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;" 1881 "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"; 1882 } 1883 break; 1884 } 1885 gcc_unreachable (); 1886 } 1887 [(set_attr "type" "smem,flat,flat") 1888 (set_attr "length" "20") 1889 (set_attr "gcn_version" "gcn5,*,gcn5")]) 1890 1891;; }}} 1892;; {{{ OpenACC / OpenMP 1893 1894(define_expand "oacc_dim_size" 1895 [(match_operand:SI 0 "register_operand") 1896 (match_operand:SI 1 "const_int_operand")] 1897 "" 1898 { 1899 rtx tmp = gcn_oacc_dim_size (INTVAL (operands[1])); 1900 emit_move_insn (operands[0], gen_lowpart (SImode, tmp)); 1901 DONE; 1902 }) 1903 1904(define_expand "oacc_dim_pos" 1905 [(match_operand:SI 0 "register_operand") 1906 (match_operand:SI 1 "const_int_operand")] 1907 "" 1908 { 1909 emit_move_insn (operands[0], gcn_oacc_dim_pos (INTVAL (operands[1]))); 1910 DONE; 1911 }) 1912 1913(define_expand "gcn_wavefront_barrier" 1914 [(set (match_dup 0) 1915 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))] 1916 "" 1917 { 1918 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); 1919 MEM_VOLATILE_P (operands[0]) = 1; 1920 }) 1921 1922(define_insn "*gcn_wavefront_barrier" 1923 [(set (match_operand:BLK 0 "") 1924 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_BARRIER))] 1925 "" 1926 "s_barrier" 1927 [(set_attr "type" "sopp")]) 1928 1929(define_expand "oacc_fork" 1930 [(set (match_operand:SI 0 "") 1931 (match_operand:SI 1 "")) 1932 (use (match_operand:SI 2 ""))] 1933 "" 1934 { 1935 /* We need to have oacc_fork/oacc_join named patterns as a pair, 1936 but the fork isn't actually used. */ 1937 gcc_unreachable (); 1938 }) 1939 1940(define_expand "oacc_join" 1941 [(set (match_operand:SI 0 "") 1942 (match_operand:SI 1 "")) 1943 (use (match_operand:SI 2 ""))] 1944 "" 1945 { 1946 emit_insn (gen_gcn_wavefront_barrier ()); 1947 DONE; 1948 }) 1949 1950;; }}} 1951 1952(include "gcn-valu.md") 1953