gcn-valu.md revision 1.1.1.1
1132720Skan;; Copyright (C) 2016-2019 Free Software Foundation, Inc. 2132720Skan 3169691Skan;; This file is free software; you can redistribute it and/or modify it under 4132720Skan;; the terms of the GNU General Public License as published by the Free 5132720Skan;; Software Foundation; either version 3 of the License, or (at your option) 6132720Skan;; any later version. 7132720Skan 8132720Skan;; This file is distributed in the hope that it will be useful, but WITHOUT 9132720Skan;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10132720Skan;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 11132720Skan;; for more details. 12132720Skan 13132720Skan;; You should have received a copy of the GNU General Public License 14132720Skan;; along with GCC; see the file COPYING3. If not see 15132720Skan;; <http://www.gnu.org/licenses/>. 16132720Skan 17132720Skan;; {{{ Vector iterators 18132720Skan 19169691Skan; Vector modes for one vector register 20132720Skan(define_mode_iterator VEC_1REG_MODE 21132720Skan [V64QI V64HI V64SI V64HF V64SF]) 22132720Skan(define_mode_iterator VEC_1REG_ALT 23132720Skan [V64QI V64HI V64SI V64HF V64SF]) 24132720Skan 25132720Skan(define_mode_iterator VEC_1REG_INT_MODE 26132720Skan [V64QI V64HI V64SI]) 27132720Skan(define_mode_iterator VEC_1REG_INT_ALT 28132720Skan [V64QI V64HI V64SI]) 29132720Skan 30132720Skan; Vector modes for two vector registers 31169691Skan(define_mode_iterator VEC_2REG_MODE 32169691Skan [V64DI V64DF]) 33169691Skan 34169691Skan; All of above 35132720Skan(define_mode_iterator VEC_REG_MODE 36132720Skan [V64QI V64HI V64SI V64HF V64SF ; Single reg 37132720Skan V64DI V64DF]) ; Double reg 38132720Skan 39132720Skan(define_mode_attr scalar_mode 40132720Skan [(V64QI "qi") (V64HI "hi") (V64SI "si") 41132720Skan (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) 42169691Skan 43132720Skan(define_mode_attr SCALAR_MODE 44169691Skan [(V64QI "QI") (V64HI "HI") (V64SI "SI") 45169691Skan (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) 46132720Skan 47132720Skan;; }}} 48132720Skan;; {{{ Substitutions 49132720Skan 50132720Skan(define_subst_attr "exec" "vec_merge" 51132720Skan "" "_exec") 52132720Skan(define_subst_attr "exec_clobber" "vec_merge_with_clobber" 53132720Skan "" "_exec") 54132720Skan(define_subst_attr "exec_vcc" "vec_merge_with_vcc" 55132720Skan "" "_exec") 56132720Skan(define_subst_attr "exec_scatter" "scatter_store" 57132720Skan "" "_exec") 58132720Skan 59132720Skan(define_subst "vec_merge" 60132720Skan [(set (match_operand:VEC_REG_MODE 0) 61132720Skan (match_operand:VEC_REG_MODE 1))] 62169691Skan "" 63169691Skan [(set (match_dup 0) 64132720Skan (vec_merge:VEC_REG_MODE 65132720Skan (match_dup 1) 66132720Skan (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0") 67132720Skan (match_operand:DI 4 "gcn_exec_reg_operand" "e")))]) 68132720Skan 69132720Skan(define_subst "vec_merge_with_clobber" 70132720Skan [(set (match_operand:VEC_REG_MODE 0) 71132720Skan (match_operand:VEC_REG_MODE 1)) 72169691Skan (clobber (match_operand 2))] 73169691Skan "" 74132720Skan [(set (match_dup 0) 75132720Skan (vec_merge:VEC_REG_MODE 76132720Skan (match_dup 1) 77132720Skan (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0") 78132720Skan (match_operand:DI 4 "gcn_exec_reg_operand" "e"))) 79132720Skan (clobber (match_dup 2))]) 80132720Skan 81132720Skan(define_subst "vec_merge_with_vcc" 82132720Skan [(set (match_operand:VEC_REG_MODE 0) 83132720Skan (match_operand:VEC_REG_MODE 1)) 84132720Skan (set (match_operand:DI 2) 85132720Skan (match_operand:DI 3))] 86132720Skan "" 87132720Skan [(parallel 88132720Skan [(set (match_dup 0) 89132720Skan (vec_merge:VEC_REG_MODE 90132720Skan (match_dup 1) 91132720Skan (match_operand:VEC_REG_MODE 4 92132720Skan "gcn_register_or_unspec_operand" "U0") 93132720Skan (match_operand:DI 5 "gcn_exec_reg_operand" "e"))) 94132720Skan (set (match_dup 2) 95132720Skan (and:DI (match_dup 3) 96132720Skan (reg:DI EXEC_REG)))])]) 97132720Skan 98132720Skan(define_subst "scatter_store" 99132720Skan [(set (mem:BLK (scratch)) 100132720Skan (unspec:BLK 101132720Skan [(match_operand 0) 102132720Skan (match_operand 1) 103132720Skan (match_operand 2) 104132720Skan (match_operand 3)] 105132720Skan UNSPEC_SCATTER))] 106132720Skan "" 107132720Skan [(set (mem:BLK (scratch)) 108132720Skan (unspec:BLK 109132720Skan [(match_dup 0) 110132720Skan (match_dup 1) 111132720Skan (match_dup 2) 112132720Skan (match_dup 3) 113132720Skan (match_operand:DI 4 "gcn_exec_reg_operand" "e")] 114132720Skan UNSPEC_SCATTER))]) 115132720Skan 116132720Skan;; }}} 117132720Skan;; {{{ Vector moves 118132720Skan 119132720Skan; This is the entry point for all vector register moves. Memory accesses can 120132720Skan; come this way also, but will more usually use the reload_in/out, 121132720Skan; gather/scatter, maskload/store, etc. 122132720Skan 123132720Skan(define_expand "mov<mode>" 124132720Skan [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand") 125132720Skan (match_operand:VEC_REG_MODE 1 "general_operand"))] 126132720Skan "" 127132720Skan { 128132720Skan if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) 129132720Skan { 130132720Skan operands[1] = force_reg (<MODE>mode, operands[1]); 131132720Skan rtx scratch = gen_rtx_SCRATCH (V64DImode); 132132720Skan rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 133132720Skan rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 134132720Skan rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 135132720Skan operands[0], 136132720Skan scratch); 137132720Skan emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v)); 138132720Skan DONE; 139132720Skan } 140132720Skan else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed) 141132720Skan { 142132720Skan rtx scratch = gen_rtx_SCRATCH (V64DImode); 143132720Skan rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 144132720Skan rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 145132720Skan rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 146132720Skan operands[1], 147132720Skan scratch); 148132720Skan emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v)); 149169691Skan DONE; 150169691Skan } 151169691Skan else if ((MEM_P (operands[0]) || MEM_P (operands[1]))) 152169691Skan { 153132720Skan gcc_assert (!reload_completed); 154132720Skan rtx scratch = gen_reg_rtx (V64DImode); 155132720Skan emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch)); 156132720Skan DONE; 157132720Skan } 158132720Skan }) 159132720Skan 160132720Skan; A pseudo instruction that helps LRA use the "U0" constraint. 161132720Skan 162132720Skan(define_insn "mov<mode>_unspec" 163132720Skan [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v") 164132720Skan (match_operand:VEC_REG_MODE 1 "gcn_unspec_operand" " U"))] 165132720Skan "" 166132720Skan "" 167132720Skan [(set_attr "type" "unknown") 168132720Skan (set_attr "length" "0")]) 169132720Skan 170132720Skan(define_insn "*mov<mode>" 171132720Skan [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v") 172132720Skan (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B"))] 173132720Skan "" 174146897Skan "v_mov_b32\t%0, %1" 175132720Skan [(set_attr "type" "vop1,vop1") 176132720Skan (set_attr "length" "4,8")]) 177132720Skan 178132720Skan(define_insn "mov<mode>_exec" 179132720Skan [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" 180132720Skan "=v, v, v, v, v, m") 181132720Skan (vec_merge:VEC_1REG_MODE 182132720Skan (match_operand:VEC_1REG_MODE 1 "general_operand" 183132720Skan "vA, B, v,vA, m, v") 184132720Skan (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand" 185132720Skan "U0,U0,vA,vA,U0,U0") 186132720Skan (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e"))) 187132720Skan (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))] 188132720Skan "!MEM_P (operands[0]) || REG_P (operands[1])" 189132720Skan "@ 190132720Skan v_mov_b32\t%0, %1 191132720Skan v_mov_b32\t%0, %1 192132720Skan v_cndmask_b32\t%0, %3, %1, vcc 193132720Skan v_cndmask_b32\t%0, %3, %1, %2 194132720Skan # 195132720Skan #" 196132720Skan [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*") 197132720Skan (set_attr "length" "4,8,4,8,16,16")]) 198132720Skan 199132720Skan; This variant does not accept an unspec, but does permit MEM 200132720Skan; read/modify/write which is necessary for maskstore. 201132720Skan 202132720Skan;(define_insn "*mov<mode>_exec_match" 203132720Skan; [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m") 204132720Skan; (vec_merge:VEC_1REG_MODE 205132720Skan; (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B, m, v") 206132720Skan; (match_dup 0) 207132720Skan; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e"))) 208132720Skan; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))] 209132720Skan; "!MEM_P (operands[0]) || REG_P (operands[1])" 210132720Skan; "@ 211132720Skan; v_mov_b32\t%0, %1 212132720Skan; v_mov_b32\t%0, %1 213132720Skan; # 214132720Skan; #" 215132720Skan; [(set_attr "type" "vop1,vop1,*,*") 216132720Skan; (set_attr "length" "4,8,16,16")]) 217132720Skan 218132720Skan(define_insn "*mov<mode>" 219132720Skan [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v") 220132720Skan (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))] 221132720Skan "" 222132720Skan { 223132720Skan if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) 224132720Skan return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; 225132720Skan else 226132720Skan return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; 227132720Skan } 228132720Skan [(set_attr "type" "vmult") 229132720Skan (set_attr "length" "16")]) 230132720Skan 231132720Skan(define_insn "mov<mode>_exec" 232132720Skan [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" 233132720Skan "= v, v, v, v, m") 234132720Skan (vec_merge:VEC_2REG_MODE 235132720Skan (match_operand:VEC_2REG_MODE 1 "general_operand" 236132720Skan "vDB, v0, v0, m, v") 237132720Skan (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand" 238132720Skan " U0,vDA0,vDA0,U0,U0") 239132720Skan (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e"))) 240132720Skan (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))] 241132720Skan "!MEM_P (operands[0]) || REG_P (operands[1])" 242132720Skan { 243132720Skan if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) 244132720Skan switch (which_alternative) 245132720Skan { 246132720Skan case 0: 247132720Skan return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"; 248132720Skan case 1: 249132720Skan return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;" 250132720Skan "v_cndmask_b32\t%H0, %H3, %H1, vcc"; 251132720Skan case 2: 252132720Skan return "v_cndmask_b32\t%L0, %L3, %L1, %2\;" 253132720Skan "v_cndmask_b32\t%H0, %H3, %H1, %2"; 254132720Skan } 255132720Skan else 256132720Skan switch (which_alternative) 257132720Skan { 258132720Skan case 0: 259132720Skan return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1"; 260132720Skan case 1: 261132720Skan return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;" 262132720Skan "v_cndmask_b32\t%L0, %L3, %L1, vcc"; 263132720Skan case 2: 264132720Skan return "v_cndmask_b32\t%H0, %H3, %H1, %2\;" 265132720Skan "v_cndmask_b32\t%L0, %L3, %L1, %2"; 266132720Skan } 267132720Skan 268132720Skan return "#"; 269132720Skan } 270132720Skan [(set_attr "type" "vmult,vmult,vmult,*,*") 271132720Skan (set_attr "length" "16,16,16,16,16")]) 272132720Skan 273132720Skan; This variant does not accept an unspec, but does permit MEM 274132720Skan; read/modify/write which is necessary for maskstore. 275132720Skan 276132720Skan;(define_insn "*mov<mode>_exec_match" 277132720Skan; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m") 278132720Skan; (vec_merge:VEC_2REG_MODE 279132720Skan; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v") 280132720Skan; (match_dup 0) 281132720Skan; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e"))) 282132720Skan; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))] 283132720Skan; "!MEM_P (operands[0]) || REG_P (operands[1])" 284132720Skan; "@ 285132720Skan; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ 286132720Skan; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ 287132720Skan; else \ 288132720Skan; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; 289132720Skan; # 290132720Skan; #" 291132720Skan; [(set_attr "type" "vmult,*,*") 292132720Skan; (set_attr "length" "16,16,16")]) 293132720Skan 294132720Skan; A SGPR-base load looks like: 295132720Skan; <load> v, Sv 296132720Skan; 297132720Skan; There's no hardware instruction that corresponds to this, but vector base 298132720Skan; addresses are placed in an SGPR because it is easier to add to a vector. 299132720Skan; We also have a temporary vT, and the vector v1 holding numbered lanes. 300132720Skan; 301132720Skan; Rewrite as: 302132720Skan; vT = v1 << log2(element-size) 303132720Skan; vT += Sv 304132720Skan; flat_load v, vT 305132720Skan 306132720Skan(define_insn "mov<mode>_sgprbase" 307132720Skan [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m") 308132720Skan (unspec:VEC_1REG_MODE 309132720Skan [(match_operand:VEC_1REG_MODE 1 "general_operand" " vA,vB, m, v")] 310132720Skan UNSPEC_SGPRBASE)) 311132720Skan (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))] 312132720Skan "lra_in_progress || reload_completed" 313132720Skan "@ 314132720Skan v_mov_b32\t%0, %1 315132720Skan v_mov_b32\t%0, %1 316132720Skan # 317132720Skan #" 318132720Skan [(set_attr "type" "vop1,vop1,*,*") 319132720Skan (set_attr "length" "4,8,12,12")]) 320132720Skan 321132720Skan(define_insn "mov<mode>_sgprbase" 322132720Skan [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m") 323132720Skan (unspec:VEC_2REG_MODE 324132720Skan [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")] 325132720Skan UNSPEC_SGPRBASE)) 326132720Skan (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))] 327132720Skan "lra_in_progress || reload_completed" 328132720Skan "@ 329169691Skan * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \ 330169691Skan return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \ 331132720Skan else \ 332132720Skan return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\"; 333 # 334 #" 335 [(set_attr "type" "vmult,*,*") 336 (set_attr "length" "8,12,12")]) 337 338; reload_in was once a standard name, but here it's only referenced by 339; gcn_secondary_reload. It allows a reload with a scratch register. 340 341(define_expand "reload_in<mode>" 342 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v") 343 (match_operand:VEC_REG_MODE 1 "memory_operand" " m")) 344 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))] 345 "" 346 { 347 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); 348 DONE; 349 }) 350 351; reload_out is similar to reload_in, above. 352 353(define_expand "reload_out<mode>" 354 [(set (match_operand:VEC_REG_MODE 0 "memory_operand" "= m") 355 (match_operand:VEC_REG_MODE 1 "register_operand" " v")) 356 (clobber (match_operand:V64DI 2 "register_operand" "=&v"))] 357 "" 358 { 359 emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2])); 360 DONE; 361 }) 362 363; Expand scalar addresses into gather/scatter patterns 364 365(define_split 366 [(set (match_operand:VEC_REG_MODE 0 "memory_operand") 367 (unspec:VEC_REG_MODE 368 [(match_operand:VEC_REG_MODE 1 "general_operand")] 369 UNSPEC_SGPRBASE)) 370 (clobber (match_scratch:V64DI 2))] 371 "" 372 [(set (mem:BLK (scratch)) 373 (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)] 374 UNSPEC_SCATTER))] 375 { 376 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 377 operands[0], 378 operands[2]); 379 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 380 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 381 }) 382 383(define_split 384 [(set (match_operand:VEC_REG_MODE 0 "memory_operand") 385 (vec_merge:VEC_REG_MODE 386 (match_operand:VEC_REG_MODE 1 "general_operand") 387 (match_operand:VEC_REG_MODE 2 "") 388 (match_operand:DI 3 "gcn_exec_reg_operand"))) 389 (clobber (match_scratch:V64DI 4))] 390 "" 391 [(set (mem:BLK (scratch)) 392 (unspec:BLK [(match_dup 5) (match_dup 1) 393 (match_dup 6) (match_dup 7) (match_dup 3)] 394 UNSPEC_SCATTER))] 395 { 396 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, 397 operands[3], 398 operands[0], 399 operands[4]); 400 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 401 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 402 }) 403 404(define_split 405 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand") 406 (unspec:VEC_REG_MODE 407 [(match_operand:VEC_REG_MODE 1 "memory_operand")] 408 UNSPEC_SGPRBASE)) 409 (clobber (match_scratch:V64DI 2))] 410 "" 411 [(set (match_dup 0) 412 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7) 413 (mem:BLK (scratch))] 414 UNSPEC_GATHER))] 415 { 416 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL, 417 operands[1], 418 operands[2]); 419 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 420 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 421 }) 422 423(define_split 424 [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand") 425 (vec_merge:VEC_REG_MODE 426 (match_operand:VEC_REG_MODE 1 "memory_operand") 427 (match_operand:VEC_REG_MODE 2 "") 428 (match_operand:DI 3 "gcn_exec_reg_operand"))) 429 (clobber (match_scratch:V64DI 4))] 430 "" 431 [(set (match_dup 0) 432 (vec_merge:VEC_REG_MODE 433 (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7) 434 (mem:BLK (scratch))] 435 UNSPEC_GATHER) 436 (match_dup 2) 437 (match_dup 3)))] 438 { 439 operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, 440 operands[3], 441 operands[1], 442 operands[4]); 443 operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 444 operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 445 }) 446 447; TODO: Add zero/sign extending variants. 448 449;; }}} 450;; {{{ Lane moves 451 452; v_writelane and v_readlane work regardless of exec flags. 453; We allow source to be scratch. 454; 455; FIXME these should take A immediates 456 457(define_insn "*vec_set<mode>" 458 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v") 459 (vec_merge:VEC_1REG_MODE 460 (vec_duplicate:VEC_1REG_MODE 461 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) 462 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" 463 " U0") 464 (ashift (const_int 1) 465 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 466 "" 467 "v_writelane_b32 %0, %1, %2" 468 [(set_attr "type" "vop3a") 469 (set_attr "length" "8") 470 (set_attr "exec" "none") 471 (set_attr "laneselect" "yes")]) 472 473; FIXME: 64bit operations really should be splitters, but I am not sure how 474; to represent vertical subregs. 475(define_insn "*vec_set<mode>" 476 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v") 477 (vec_merge:VEC_2REG_MODE 478 (vec_duplicate:VEC_2REG_MODE 479 (match_operand:<SCALAR_MODE> 1 "register_operand" " Sv")) 480 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" 481 " U0") 482 (ashift (const_int 1) 483 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 484 "" 485 "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2" 486 [(set_attr "type" "vmult") 487 (set_attr "length" "16") 488 (set_attr "exec" "none") 489 (set_attr "laneselect" "yes")]) 490 491(define_expand "vec_set<mode>" 492 [(set (match_operand:VEC_REG_MODE 0 "register_operand") 493 (vec_merge:VEC_REG_MODE 494 (vec_duplicate:VEC_REG_MODE 495 (match_operand:<SCALAR_MODE> 1 "register_operand")) 496 (match_dup 0) 497 (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))] 498 "") 499 500(define_insn "*vec_set<mode>_1" 501 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") 502 (vec_merge:VEC_1REG_MODE 503 (vec_duplicate:VEC_1REG_MODE 504 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) 505 (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand" 506 "U0") 507 (match_operand:SI 2 "const_int_operand" " i")))] 508 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" 509 { 510 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); 511 return "v_writelane_b32 %0, %1, %2"; 512 } 513 [(set_attr "type" "vop3a") 514 (set_attr "length" "8") 515 (set_attr "exec" "none") 516 (set_attr "laneselect" "yes")]) 517 518(define_insn "*vec_set<mode>_1" 519 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v") 520 (vec_merge:VEC_2REG_MODE 521 (vec_duplicate:VEC_2REG_MODE 522 (match_operand:<SCALAR_MODE> 1 "register_operand" "Sv")) 523 (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand" 524 "U0") 525 (match_operand:SI 2 "const_int_operand" " i")))] 526 "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)" 527 { 528 operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2]))); 529 return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"; 530 } 531 [(set_attr "type" "vmult") 532 (set_attr "length" "16") 533 (set_attr "exec" "none") 534 (set_attr "laneselect" "yes")]) 535 536(define_insn "vec_duplicate<mode><exec>" 537 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") 538 (vec_duplicate:VEC_1REG_MODE 539 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))] 540 "" 541 "v_mov_b32\t%0, %1" 542 [(set_attr "type" "vop3a") 543 (set_attr "length" "8")]) 544 545(define_insn "vec_duplicate<mode><exec>" 546 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v") 547 (vec_duplicate:VEC_2REG_MODE 548 (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))] 549 "" 550 "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1" 551 [(set_attr "type" "vop3a") 552 (set_attr "length" "16")]) 553 554(define_insn "vec_extract<mode><scalar_mode>" 555 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg") 556 (vec_select:<SCALAR_MODE> 557 (match_operand:VEC_1REG_MODE 1 "register_operand" " v") 558 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))] 559 "" 560 "v_readlane_b32 %0, %1, %2" 561 [(set_attr "type" "vop3a") 562 (set_attr "length" "8") 563 (set_attr "exec" "none") 564 (set_attr "laneselect" "yes")]) 565 566(define_insn "vec_extract<mode><scalar_mode>" 567 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg") 568 (vec_select:<SCALAR_MODE> 569 (match_operand:VEC_2REG_MODE 1 "register_operand" " v") 570 (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))] 571 "" 572 "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2" 573 [(set_attr "type" "vmult") 574 (set_attr "length" "16") 575 (set_attr "exec" "none") 576 (set_attr "laneselect" "yes")]) 577 578(define_expand "vec_init<mode><scalar_mode>" 579 [(match_operand:VEC_REG_MODE 0 "register_operand") 580 (match_operand 1)] 581 "" 582 { 583 gcn_expand_vector_init (operands[0], operands[1]); 584 DONE; 585 }) 586 587;; }}} 588;; {{{ Scatter / Gather 589 590;; GCN does not have an instruction for loading a vector from contiguous 591;; memory so *all* loads and stores are eventually converted to scatter 592;; or gather. 593;; 594;; GCC does not permit MEM to hold vectors of addresses, so we must use an 595;; unspec. The unspec formats are as follows: 596;; 597;; (unspec:V64?? 598;; [(<address expression>) 599;; (<addr_space_t>) 600;; (<use_glc>) 601;; (mem:BLK (scratch))] 602;; UNSPEC_GATHER) 603;; 604;; (unspec:BLK 605;; [(<address expression>) 606;; (<source register>) 607;; (<addr_space_t>) 608;; (<use_glc>) 609;; (<exec>)] 610;; UNSPEC_SCATTER) 611;; 612;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>. 613;; - The mem:BLK does not contain any real information, but indicates that an 614;; unknown memory read is taking place. Stores are expected to use a similar 615;; mem:BLK outside the unspec. 616;; - The address space and glc (volatile) fields are there to replace the 617;; fields normally found in a MEM. 618;; - Multiple forms of address expression are supported, below. 619 620(define_expand "gather_load<mode>" 621 [(match_operand:VEC_REG_MODE 0 "register_operand") 622 (match_operand:DI 1 "register_operand") 623 (match_operand 2 "register_operand") 624 (match_operand 3 "immediate_operand") 625 (match_operand:SI 4 "gcn_alu_operand")] 626 "" 627 { 628 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], 629 operands[2], operands[4], 630 INTVAL (operands[3]), NULL); 631 632 if (GET_MODE (addr) == V64DImode) 633 emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx, 634 const0_rtx, const0_rtx)); 635 else 636 emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1], 637 addr, const0_rtx, const0_rtx, 638 const0_rtx)); 639 DONE; 640 }) 641 642(define_expand "gather<mode>_exec" 643 [(match_operand:VEC_REG_MODE 0 "register_operand") 644 (match_operand:DI 1 "register_operand") 645 (match_operand:V64SI 2 "register_operand") 646 (match_operand 3 "immediate_operand") 647 (match_operand:SI 4 "gcn_alu_operand") 648 (match_operand:DI 5 "gcn_exec_reg_operand")] 649 "" 650 { 651 rtx undefmode = gcn_gen_undef (<MODE>mode); 652 653 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1], 654 operands[2], operands[4], 655 INTVAL (operands[3]), operands[5]); 656 657 if (GET_MODE (addr) == V64DImode) 658 emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr, 659 const0_rtx, const0_rtx, 660 const0_rtx, undefmode, 661 operands[5])); 662 else 663 emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1], 664 addr, const0_rtx, 665 const0_rtx, const0_rtx, 666 undefmode, operands[5])); 667 DONE; 668 }) 669 670; Allow any address expression 671(define_expand "gather<mode>_expr<exec>" 672 [(set (match_operand:VEC_REG_MODE 0 "register_operand") 673 (unspec:VEC_REG_MODE 674 [(match_operand 1 "") 675 (match_operand 2 "immediate_operand") 676 (match_operand 3 "immediate_operand") 677 (mem:BLK (scratch))] 678 UNSPEC_GATHER))] 679 "" 680 {}) 681 682(define_insn "gather<mode>_insn_1offset<exec>" 683 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v") 684 (unspec:VEC_REG_MODE 685 [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v") 686 (vec_duplicate:V64DI 687 (match_operand 2 "immediate_operand" " n"))) 688 (match_operand 3 "immediate_operand" " n") 689 (match_operand 4 "immediate_operand" " n") 690 (mem:BLK (scratch))] 691 UNSPEC_GATHER))] 692 "(AS_FLAT_P (INTVAL (operands[3])) 693 && ((TARGET_GCN3 && INTVAL(operands[2]) == 0) 694 || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))) 695 || (AS_GLOBAL_P (INTVAL (operands[3])) 696 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" 697 { 698 addr_space_t as = INTVAL (operands[3]); 699 const char *glc = INTVAL (operands[4]) ? " glc" : ""; 700 701 static char buf[200]; 702 if (AS_FLAT_P (as)) 703 { 704 if (TARGET_GCN5_PLUS) 705 sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", 706 glc); 707 else 708 sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc); 709 } 710 else if (AS_GLOBAL_P (as)) 711 sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;" 712 "s_waitcnt\tvmcnt(0)", glc); 713 else 714 gcc_unreachable (); 715 716 return buf; 717 } 718 [(set_attr "type" "flat") 719 (set_attr "length" "12")]) 720 721(define_insn "gather<mode>_insn_1offset_ds<exec>" 722 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v") 723 (unspec:VEC_REG_MODE 724 [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v") 725 (vec_duplicate:V64SI 726 (match_operand 2 "immediate_operand" " n"))) 727 (match_operand 3 "immediate_operand" " n") 728 (match_operand 4 "immediate_operand" " n") 729 (mem:BLK (scratch))] 730 UNSPEC_GATHER))] 731 "(AS_ANY_DS_P (INTVAL (operands[3])) 732 && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))" 733 { 734 addr_space_t as = INTVAL (operands[3]); 735 static char buf[200]; 736 sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)", 737 (AS_GDS_P (as) ? " gds" : "")); 738 return buf; 739 } 740 [(set_attr "type" "ds") 741 (set_attr "length" "12")]) 742 743(define_insn "gather<mode>_insn_2offsets<exec>" 744 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v") 745 (unspec:VEC_REG_MODE 746 [(plus:V64DI 747 (plus:V64DI 748 (vec_duplicate:V64DI 749 (match_operand:DI 1 "register_operand" "Sv")) 750 (sign_extend:V64DI 751 (match_operand:V64SI 2 "register_operand" " v"))) 752 (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n"))) 753 (match_operand 4 "immediate_operand" " n") 754 (match_operand 5 "immediate_operand" " n") 755 (mem:BLK (scratch))] 756 UNSPEC_GATHER))] 757 "(AS_GLOBAL_P (INTVAL (operands[4])) 758 && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))" 759 { 760 addr_space_t as = INTVAL (operands[4]); 761 const char *glc = INTVAL (operands[5]) ? " glc" : ""; 762 763 static char buf[200]; 764 if (AS_GLOBAL_P (as)) 765 { 766 /* Work around assembler bug in which a 64-bit register is expected, 767 but a 32-bit value would be correct. */ 768 int reg = REGNO (operands[2]) - FIRST_VGPR_REG; 769 sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;" 770 "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc); 771 } 772 else 773 gcc_unreachable (); 774 775 return buf; 776 } 777 [(set_attr "type" "flat") 778 (set_attr "length" "12")]) 779 780(define_expand "scatter_store<mode>" 781 [(match_operand:DI 0 "register_operand") 782 (match_operand 1 "register_operand") 783 (match_operand 2 "immediate_operand") 784 (match_operand:SI 3 "gcn_alu_operand") 785 (match_operand:VEC_REG_MODE 4 "register_operand")] 786 "" 787 { 788 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], 789 operands[1], operands[3], 790 INTVAL (operands[2]), NULL); 791 792 if (GET_MODE (addr) == V64DImode) 793 emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4], 794 const0_rtx, const0_rtx)); 795 else 796 emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr, 797 const0_rtx, operands[4], 798 const0_rtx, const0_rtx)); 799 DONE; 800 }) 801 802(define_expand "scatter<mode>_exec" 803 [(match_operand:DI 0 "register_operand") 804 (match_operand 1 "register_operand") 805 (match_operand 2 "immediate_operand") 806 (match_operand:SI 3 "gcn_alu_operand") 807 (match_operand:VEC_REG_MODE 4 "register_operand") 808 (match_operand:DI 5 "gcn_exec_reg_operand")] 809 "" 810 { 811 operands[5] = force_reg (DImode, operands[5]); 812 813 rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0], 814 operands[1], operands[3], 815 INTVAL (operands[2]), operands[5]); 816 817 if (GET_MODE (addr) == V64DImode) 818 emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx, 819 operands[4], const0_rtx, 820 const0_rtx, 821 operands[5])); 822 else 823 emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr, 824 const0_rtx, operands[4], 825 const0_rtx, const0_rtx, 826 operands[5])); 827 DONE; 828 }) 829 830; Allow any address expression 831(define_expand "scatter<mode>_expr<exec_scatter>" 832 [(set (mem:BLK (scratch)) 833 (unspec:BLK 834 [(match_operand:V64DI 0 "") 835 (match_operand:VEC_REG_MODE 1 "register_operand") 836 (match_operand 2 "immediate_operand") 837 (match_operand 3 "immediate_operand")] 838 UNSPEC_SCATTER))] 839 "" 840 {}) 841 842(define_insn "scatter<mode>_insn_1offset<exec_scatter>" 843 [(set (mem:BLK (scratch)) 844 (unspec:BLK 845 [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v") 846 (vec_duplicate:V64DI 847 (match_operand 1 "immediate_operand" "n"))) 848 (match_operand:VEC_REG_MODE 2 "register_operand" "v") 849 (match_operand 3 "immediate_operand" "n") 850 (match_operand 4 "immediate_operand" "n")] 851 UNSPEC_SCATTER))] 852 "(AS_FLAT_P (INTVAL (operands[3])) 853 && (INTVAL(operands[1]) == 0 854 || (TARGET_GCN5_PLUS 855 && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000))) 856 || (AS_GLOBAL_P (INTVAL (operands[3])) 857 && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))" 858 { 859 addr_space_t as = INTVAL (operands[3]); 860 const char *glc = INTVAL (operands[4]) ? " glc" : ""; 861 862 static char buf[200]; 863 if (AS_FLAT_P (as)) 864 { 865 if (TARGET_GCN5_PLUS) 866 sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;" 867 "s_waitcnt\texpcnt(0)", glc); 868 else 869 sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\texpcnt(0)", 870 glc); 871 } 872 else if (AS_GLOBAL_P (as)) 873 sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;" 874 "s_waitcnt\texpcnt(0)", glc); 875 else 876 gcc_unreachable (); 877 878 return buf; 879 } 880 [(set_attr "type" "flat") 881 (set_attr "length" "12")]) 882 883(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>" 884 [(set (mem:BLK (scratch)) 885 (unspec:BLK 886 [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v") 887 (vec_duplicate:V64SI 888 (match_operand 1 "immediate_operand" "n"))) 889 (match_operand:VEC_REG_MODE 2 "register_operand" "v") 890 (match_operand 3 "immediate_operand" "n") 891 (match_operand 4 "immediate_operand" "n")] 892 UNSPEC_SCATTER))] 893 "(AS_ANY_DS_P (INTVAL (operands[3])) 894 && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))" 895 { 896 addr_space_t as = INTVAL (operands[3]); 897 static char buf[200]; 898 sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\texpcnt(0)", 899 (AS_GDS_P (as) ? " gds" : "")); 900 return buf; 901 } 902 [(set_attr "type" "ds") 903 (set_attr "length" "12")]) 904 905(define_insn "scatter<mode>_insn_2offsets<exec_scatter>" 906 [(set (mem:BLK (scratch)) 907 (unspec:BLK 908 [(plus:V64DI 909 (plus:V64DI 910 (vec_duplicate:V64DI 911 (match_operand:DI 0 "register_operand" "Sv")) 912 (sign_extend:V64DI 913 (match_operand:V64SI 1 "register_operand" " v"))) 914 (vec_duplicate:V64DI (match_operand 2 "immediate_operand" 915 " n"))) 916 (match_operand:VEC_REG_MODE 3 "register_operand" " v") 917 (match_operand 4 "immediate_operand" " n") 918 (match_operand 5 "immediate_operand" " n")] 919 UNSPEC_SCATTER))] 920 "(AS_GLOBAL_P (INTVAL (operands[4])) 921 && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))" 922 { 923 addr_space_t as = INTVAL (operands[4]); 924 const char *glc = INTVAL (operands[5]) ? " glc" : ""; 925 926 static char buf[200]; 927 if (AS_GLOBAL_P (as)) 928 { 929 /* Work around assembler bug in which a 64-bit register is expected, 930 but a 32-bit value would be correct. */ 931 int reg = REGNO (operands[1]) - FIRST_VGPR_REG; 932 sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;" 933 "s_waitcnt\texpcnt(0)", reg, reg + 1, glc); 934 } 935 else 936 gcc_unreachable (); 937 938 return buf; 939 } 940 [(set_attr "type" "flat") 941 (set_attr "length" "12")]) 942 943;; }}} 944;; {{{ Permutations 945 946(define_insn "ds_bpermute<mode>" 947 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") 948 (unspec:VEC_1REG_MODE 949 [(match_operand:VEC_1REG_MODE 2 "register_operand" " v") 950 (match_operand:V64SI 1 "register_operand" " v") 951 (match_operand:DI 3 "gcn_exec_reg_operand" " e")] 952 UNSPEC_BPERMUTE))] 953 "" 954 "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)" 955 [(set_attr "type" "vop2") 956 (set_attr "length" "12")]) 957 958(define_insn_and_split "ds_bpermute<mode>" 959 [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v") 960 (unspec:VEC_2REG_MODE 961 [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0") 962 (match_operand:V64SI 1 "register_operand" " v") 963 (match_operand:DI 3 "gcn_exec_reg_operand" " e")] 964 UNSPEC_BPERMUTE))] 965 "" 966 "#" 967 "reload_completed" 968 [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)] 969 UNSPEC_BPERMUTE)) 970 (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)] 971 UNSPEC_BPERMUTE))] 972 { 973 operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0); 974 operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1); 975 operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0); 976 operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1); 977 } 978 [(set_attr "type" "vmult") 979 (set_attr "length" "24")]) 980 981;; }}} 982;; {{{ ALU special case: add/sub 983 984(define_insn "addv64si3<exec_clobber>" 985 [(set (match_operand:V64SI 0 "register_operand" "= v") 986 (plus:V64SI 987 (match_operand:V64SI 1 "register_operand" "% v") 988 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB"))) 989 (clobber (reg:DI VCC_REG))] 990 "" 991 "v_add%^_u32\t%0, vcc, %2, %1" 992 [(set_attr "type" "vop2") 993 (set_attr "length" "8")]) 994 995(define_insn "addv64si3_dup<exec_clobber>" 996 [(set (match_operand:V64SI 0 "register_operand" "= v") 997 (plus:V64SI 998 (vec_duplicate:V64SI 999 (match_operand:SI 2 "gcn_alu_operand" "SvB")) 1000 (match_operand:V64SI 1 "register_operand" " v"))) 1001 (clobber (reg:DI VCC_REG))] 1002 "" 1003 "v_add%^_u32\t%0, vcc, %2, %1" 1004 [(set_attr "type" "vop2") 1005 (set_attr "length" "8")]) 1006 1007(define_insn "addv64si3_vcc<exec_vcc>" 1008 [(set (match_operand:V64SI 0 "register_operand" "= v, v") 1009 (plus:V64SI 1010 (match_operand:V64SI 1 "register_operand" "% v, v") 1011 (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB"))) 1012 (set (match_operand:DI 3 "register_operand" "= cV, Sg") 1013 (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2)) 1014 (match_dup 1)))] 1015 "" 1016 "v_add%^_u32\t%0, %3, %2, %1" 1017 [(set_attr "type" "vop2,vop3b") 1018 (set_attr "length" "8")]) 1019 1020; This pattern only changes the VCC bits when the corresponding lane is 1021; enabled, so the set must be described as an ior. 1022 1023(define_insn "addv64si3_vcc_dup<exec_vcc>" 1024 [(set (match_operand:V64SI 0 "register_operand" "= v, v") 1025 (plus:V64SI 1026 (vec_duplicate:V64SI 1027 (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB")) 1028 (match_operand:V64SI 2 "register_operand" " v, v"))) 1029 (set (match_operand:DI 3 "register_operand" "=cV, Sg") 1030 (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2)) 1031 (match_dup 1)) 1032 (vec_duplicate:V64SI (match_dup 2))))] 1033 "" 1034 "v_add%^_u32\t%0, %3, %2, %1" 1035 [(set_attr "type" "vop2,vop3b") 1036 (set_attr "length" "8,8")]) 1037 1038; This pattern does not accept SGPR because VCC read already counts as an 1039; SGPR use and number of SGPR operands is limited to 1. 1040 1041(define_insn "addcv64si3<exec_vcc>" 1042 [(set (match_operand:V64SI 0 "register_operand" "=v,v") 1043 (plus:V64SI 1044 (plus:V64SI 1045 (vec_merge:V64SI 1046 (vec_duplicate:V64SI (const_int 1)) 1047 (vec_duplicate:V64SI (const_int 0)) 1048 (match_operand:DI 3 "register_operand" " cV,Sv")) 1049 (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA")) 1050 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB"))) 1051 (set (match_operand:DI 4 "register_operand" "=cV,Sg") 1052 (ior:DI (ltu:DI (plus:V64SI 1053 (plus:V64SI 1054 (vec_merge:V64SI 1055 (vec_duplicate:V64SI (const_int 1)) 1056 (vec_duplicate:V64SI (const_int 0)) 1057 (match_dup 3)) 1058 (match_dup 1)) 1059 (match_dup 2)) 1060 (match_dup 2)) 1061 (ltu:DI (plus:V64SI 1062 (vec_merge:V64SI 1063 (vec_duplicate:V64SI (const_int 1)) 1064 (vec_duplicate:V64SI (const_int 0)) 1065 (match_dup 3)) 1066 (match_dup 1)) 1067 (match_dup 1))))] 1068 "" 1069 "v_addc%^_u32\t%0, %4, %1, %2, %3" 1070 [(set_attr "type" "vop2,vop3b") 1071 (set_attr "length" "4,8")]) 1072 1073(define_insn "addcv64si3_dup<exec_vcc>" 1074 [(set (match_operand:V64SI 0 "register_operand" "=v,v") 1075 (plus:V64SI 1076 (plus:V64SI 1077 (vec_merge:V64SI 1078 (vec_duplicate:V64SI (const_int 1)) 1079 (vec_duplicate:V64SI (const_int 0)) 1080 (match_operand:DI 3 "register_operand" " cV, Sv")) 1081 (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA")) 1082 (vec_duplicate:V64SI 1083 (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB")))) 1084 (set (match_operand:DI 4 "register_operand" "=cV, Sg") 1085 (ior:DI (ltu:DI (plus:V64SI (plus:V64SI 1086 (vec_merge:V64SI 1087 (vec_duplicate:V64SI (const_int 1)) 1088 (vec_duplicate:V64SI (const_int 0)) 1089 (match_dup 3)) 1090 (match_dup 1)) 1091 (vec_duplicate:V64SI 1092 (match_dup 2))) 1093 (vec_duplicate:V64SI 1094 (match_dup 2))) 1095 (ltu:DI (plus:V64SI (vec_merge:V64SI 1096 (vec_duplicate:V64SI (const_int 1)) 1097 (vec_duplicate:V64SI (const_int 0)) 1098 (match_dup 3)) 1099 (match_dup 1)) 1100 (match_dup 1))))] 1101 "" 1102 "v_addc%^_u32\t%0, %4, %1, %2, %3" 1103 [(set_attr "type" "vop2,vop3b") 1104 (set_attr "length" "4,8")]) 1105 1106(define_insn "subv64si3<exec_clobber>" 1107 [(set (match_operand:V64SI 0 "register_operand" "= v, v") 1108 (minus:V64SI 1109 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB, v") 1110 (match_operand:V64SI 2 "gcn_alu_operand" " v,vSvB"))) 1111 (clobber (reg:DI VCC_REG))] 1112 "" 1113 "@ 1114 v_sub%^_u32\t%0, vcc, %1, %2 1115 v_subrev%^_u32\t%0, vcc, %2, %1" 1116 [(set_attr "type" "vop2") 1117 (set_attr "length" "8,8")]) 1118 1119(define_insn "subv64si3_vcc<exec_vcc>" 1120 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v") 1121 (minus:V64SI 1122 (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v") 1123 (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB"))) 1124 (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg") 1125 (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2)) 1126 (match_dup 1)))] 1127 "" 1128 "@ 1129 v_sub%^_u32\t%0, %3, %1, %2 1130 v_sub%^_u32\t%0, %3, %1, %2 1131 v_subrev%^_u32\t%0, %3, %2, %1 1132 v_subrev%^_u32\t%0, %3, %2, %1" 1133 [(set_attr "type" "vop2,vop3b,vop2,vop3b") 1134 (set_attr "length" "8")]) 1135 1136; This pattern does not accept SGPR because VCC read already counts 1137; as a SGPR use and number of SGPR operands is limited to 1. 1138 1139(define_insn "subcv64si3<exec_vcc>" 1140 [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v") 1141 (minus:V64SI 1142 (minus:V64SI 1143 (vec_merge:V64SI 1144 (vec_duplicate:V64SI (const_int 1)) 1145 (vec_duplicate:V64SI (const_int 0)) 1146 (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv")) 1147 (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB")) 1148 (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA"))) 1149 (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg") 1150 (ior:DI (gtu:DI (minus:V64SI (minus:V64SI 1151 (vec_merge:V64SI 1152 (vec_duplicate:V64SI (const_int 1)) 1153 (vec_duplicate:V64SI (const_int 0)) 1154 (match_dup 3)) 1155 (match_dup 1)) 1156 (match_dup 2)) 1157 (match_dup 2)) 1158 (ltu:DI (minus:V64SI (vec_merge:V64SI 1159 (vec_duplicate:V64SI (const_int 1)) 1160 (vec_duplicate:V64SI (const_int 0)) 1161 (match_dup 3)) 1162 (match_dup 1)) 1163 (match_dup 1))))] 1164 "" 1165 "@ 1166 v_subb%^_u32\t%0, %4, %1, %2, %3 1167 v_subb%^_u32\t%0, %4, %1, %2, %3 1168 v_subbrev%^_u32\t%0, %4, %2, %1, %3 1169 v_subbrev%^_u32\t%0, %4, %2, %1, %3" 1170 [(set_attr "type" "vop2,vop3b,vop2,vop3b") 1171 (set_attr "length" "8")]) 1172 1173(define_insn_and_split "addv64di3" 1174 [(set (match_operand:V64DI 0 "register_operand" "= &v") 1175 (plus:V64DI 1176 (match_operand:V64DI 1 "register_operand" "% v0") 1177 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))) 1178 (clobber (reg:DI VCC_REG))] 1179 "" 1180 "#" 1181 "gcn_can_split_p (V64DImode, operands[0]) 1182 && gcn_can_split_p (V64DImode, operands[1]) 1183 && gcn_can_split_p (V64DImode, operands[2])" 1184 [(const_int 0)] 1185 { 1186 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1187 emit_insn (gen_addv64si3_vcc 1188 (gcn_operand_part (V64DImode, operands[0], 0), 1189 gcn_operand_part (V64DImode, operands[1], 0), 1190 gcn_operand_part (V64DImode, operands[2], 0), 1191 vcc)); 1192 emit_insn (gen_addcv64si3 1193 (gcn_operand_part (V64DImode, operands[0], 1), 1194 gcn_operand_part (V64DImode, operands[1], 1), 1195 gcn_operand_part (V64DImode, operands[2], 1), 1196 vcc, vcc)); 1197 DONE; 1198 } 1199 [(set_attr "type" "vmult") 1200 (set_attr "length" "8")]) 1201 1202(define_insn_and_split "addv64di3_exec" 1203 [(set (match_operand:V64DI 0 "register_operand" "= &v") 1204 (vec_merge:V64DI 1205 (plus:V64DI 1206 (match_operand:V64DI 1 "register_operand" "% v0") 1207 (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")) 1208 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1209 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1210 (clobber (reg:DI VCC_REG))] 1211 "" 1212 "#" 1213 "gcn_can_split_p (V64DImode, operands[0]) 1214 && gcn_can_split_p (V64DImode, operands[1]) 1215 && gcn_can_split_p (V64DImode, operands[2]) 1216 && gcn_can_split_p (V64DImode, operands[4])" 1217 [(const_int 0)] 1218 { 1219 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1220 emit_insn (gen_addv64si3_vcc_exec 1221 (gcn_operand_part (V64DImode, operands[0], 0), 1222 gcn_operand_part (V64DImode, operands[1], 0), 1223 gcn_operand_part (V64DImode, operands[2], 0), 1224 vcc, 1225 gcn_operand_part (V64DImode, operands[3], 0), 1226 operands[4])); 1227 emit_insn (gen_addcv64si3_exec 1228 (gcn_operand_part (V64DImode, operands[0], 1), 1229 gcn_operand_part (V64DImode, operands[1], 1), 1230 gcn_operand_part (V64DImode, operands[2], 1), 1231 vcc, vcc, 1232 gcn_operand_part (V64DImode, operands[3], 1), 1233 operands[4])); 1234 DONE; 1235 } 1236 [(set_attr "type" "vmult") 1237 (set_attr "length" "8")]) 1238 1239(define_insn_and_split "subv64di3" 1240 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v") 1241 (minus:V64DI 1242 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0") 1243 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))) 1244 (clobber (reg:DI VCC_REG))] 1245 "" 1246 "#" 1247 "gcn_can_split_p (V64DImode, operands[0]) 1248 && gcn_can_split_p (V64DImode, operands[1]) 1249 && gcn_can_split_p (V64DImode, operands[2])" 1250 [(const_int 0)] 1251 { 1252 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1253 emit_insn (gen_subv64si3_vcc 1254 (gcn_operand_part (V64DImode, operands[0], 0), 1255 gcn_operand_part (V64DImode, operands[1], 0), 1256 gcn_operand_part (V64DImode, operands[2], 0), 1257 vcc)); 1258 emit_insn (gen_subcv64si3 1259 (gcn_operand_part (V64DImode, operands[0], 1), 1260 gcn_operand_part (V64DImode, operands[1], 1), 1261 gcn_operand_part (V64DImode, operands[2], 1), 1262 vcc, vcc)); 1263 DONE; 1264 } 1265 [(set_attr "type" "vmult") 1266 (set_attr "length" "8,8")]) 1267 1268(define_insn_and_split "subv64di3_exec" 1269 [(set (match_operand:V64DI 0 "register_operand" "= &v, &v") 1270 (vec_merge:V64DI 1271 (minus:V64DI 1272 (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0") 1273 (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")) 1274 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" 1275 " U0, U0") 1276 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) 1277 (clobber (reg:DI VCC_REG))] 1278 "register_operand (operands[1], VOIDmode) 1279 || register_operand (operands[2], VOIDmode)" 1280 "#" 1281 "gcn_can_split_p (V64DImode, operands[0]) 1282 && gcn_can_split_p (V64DImode, operands[1]) 1283 && gcn_can_split_p (V64DImode, operands[2]) 1284 && gcn_can_split_p (V64DImode, operands[3])" 1285 [(const_int 0)] 1286 { 1287 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1288 emit_insn (gen_subv64si3_vcc_exec 1289 (gcn_operand_part (V64DImode, operands[0], 0), 1290 gcn_operand_part (V64DImode, operands[1], 0), 1291 gcn_operand_part (V64DImode, operands[2], 0), 1292 vcc, 1293 gcn_operand_part (V64DImode, operands[3], 0), 1294 operands[4])); 1295 emit_insn (gen_subcv64si3_exec 1296 (gcn_operand_part (V64DImode, operands[0], 1), 1297 gcn_operand_part (V64DImode, operands[1], 1), 1298 gcn_operand_part (V64DImode, operands[2], 1), 1299 vcc, vcc, 1300 gcn_operand_part (V64DImode, operands[3], 1), 1301 operands[4])); 1302 DONE; 1303 } 1304 [(set_attr "type" "vmult") 1305 (set_attr "length" "8,8")]) 1306 1307(define_insn_and_split "addv64di3_dup" 1308 [(set (match_operand:V64DI 0 "register_operand" "= &v") 1309 (plus:V64DI 1310 (match_operand:V64DI 1 "register_operand" " v0") 1311 (vec_duplicate:V64DI 1312 (match_operand:DI 2 "gcn_alu_operand" "SvDB")))) 1313 (clobber (reg:DI VCC_REG))] 1314 "" 1315 "#" 1316 "gcn_can_split_p (V64DImode, operands[0]) 1317 && gcn_can_split_p (V64DImode, operands[1]) 1318 && gcn_can_split_p (V64DImode, operands[2])" 1319 [(const_int 0)] 1320 { 1321 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1322 emit_insn (gen_addv64si3_vcc_dup 1323 (gcn_operand_part (V64DImode, operands[0], 0), 1324 gcn_operand_part (DImode, operands[2], 0), 1325 gcn_operand_part (V64DImode, operands[1], 0), 1326 vcc)); 1327 emit_insn (gen_addcv64si3_dup 1328 (gcn_operand_part (V64DImode, operands[0], 1), 1329 gcn_operand_part (V64DImode, operands[1], 1), 1330 gcn_operand_part (DImode, operands[2], 1), 1331 vcc, vcc)); 1332 DONE; 1333 } 1334 [(set_attr "type" "vmult") 1335 (set_attr "length" "8")]) 1336 1337(define_insn_and_split "addv64di3_dup_exec" 1338 [(set (match_operand:V64DI 0 "register_operand" "= &v") 1339 (vec_merge:V64DI 1340 (plus:V64DI 1341 (match_operand:V64DI 1 "register_operand" " v0") 1342 (vec_duplicate:V64DI 1343 (match_operand:DI 2 "gcn_alu_operand" "SvDB"))) 1344 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1345 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1346 (clobber (reg:DI VCC_REG))] 1347 "" 1348 "#" 1349 "gcn_can_split_p (V64DImode, operands[0]) 1350 && gcn_can_split_p (V64DImode, operands[1]) 1351 && gcn_can_split_p (V64DImode, operands[2]) 1352 && gcn_can_split_p (V64DImode, operands[3])" 1353 [(const_int 0)] 1354 { 1355 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1356 emit_insn (gen_addv64si3_vcc_dup_exec 1357 (gcn_operand_part (V64DImode, operands[0], 0), 1358 gcn_operand_part (DImode, operands[2], 0), 1359 gcn_operand_part (V64DImode, operands[1], 0), 1360 vcc, 1361 gcn_operand_part (V64DImode, operands[3], 0), 1362 operands[4])); 1363 emit_insn (gen_addcv64si3_dup_exec 1364 (gcn_operand_part (V64DImode, operands[0], 1), 1365 gcn_operand_part (V64DImode, operands[1], 1), 1366 gcn_operand_part (DImode, operands[2], 1), 1367 vcc, vcc, 1368 gcn_operand_part (V64DImode, operands[3], 1), 1369 operands[4])); 1370 DONE; 1371 } 1372 [(set_attr "type" "vmult") 1373 (set_attr "length" "8")]) 1374 1375(define_insn_and_split "addv64di3_zext" 1376 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v") 1377 (plus:V64DI 1378 (zero_extend:V64DI 1379 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB")) 1380 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))) 1381 (clobber (reg:DI VCC_REG))] 1382 "" 1383 "#" 1384 "gcn_can_split_p (V64DImode, operands[0]) 1385 && gcn_can_split_p (V64DImode, operands[2])" 1386 [(const_int 0)] 1387 { 1388 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1389 emit_insn (gen_addv64si3_vcc 1390 (gcn_operand_part (V64DImode, operands[0], 0), 1391 operands[1], 1392 gcn_operand_part (V64DImode, operands[2], 0), 1393 vcc)); 1394 emit_insn (gen_addcv64si3 1395 (gcn_operand_part (V64DImode, operands[0], 1), 1396 gcn_operand_part (V64DImode, operands[2], 1), 1397 const0_rtx, vcc, vcc)); 1398 DONE; 1399 } 1400 [(set_attr "type" "vmult") 1401 (set_attr "length" "8,8")]) 1402 1403(define_insn_and_split "addv64di3_zext_exec" 1404 [(set (match_operand:V64DI 0 "register_operand" "=&v,&v") 1405 (vec_merge:V64DI 1406 (plus:V64DI 1407 (zero_extend:V64DI 1408 (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB")) 1409 (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")) 1410 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0") 1411 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e"))) 1412 (clobber (reg:DI VCC_REG))] 1413 "" 1414 "#" 1415 "gcn_can_split_p (V64DImode, operands[0]) 1416 && gcn_can_split_p (V64DImode, operands[2]) 1417 && gcn_can_split_p (V64DImode, operands[3])" 1418 [(const_int 0)] 1419 { 1420 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1421 emit_insn (gen_addv64si3_vcc_exec 1422 (gcn_operand_part (V64DImode, operands[0], 0), 1423 operands[1], 1424 gcn_operand_part (V64DImode, operands[2], 0), 1425 vcc, 1426 gcn_operand_part (V64DImode, operands[3], 0), 1427 operands[4])); 1428 emit_insn (gen_addcv64si3_exec 1429 (gcn_operand_part (V64DImode, operands[0], 1), 1430 gcn_operand_part (V64DImode, operands[2], 1), 1431 const0_rtx, vcc, vcc, 1432 gcn_operand_part (V64DImode, operands[3], 1), 1433 operands[4])); 1434 DONE; 1435 } 1436 [(set_attr "type" "vmult") 1437 (set_attr "length" "8,8")]) 1438 1439(define_insn_and_split "addv64di3_zext_dup" 1440 [(set (match_operand:V64DI 0 "register_operand" "=&v") 1441 (plus:V64DI 1442 (zero_extend:V64DI 1443 (vec_duplicate:V64SI 1444 (match_operand:SI 1 "gcn_alu_operand" "BSv"))) 1445 (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))) 1446 (clobber (reg:DI VCC_REG))] 1447 "" 1448 "#" 1449 "gcn_can_split_p (V64DImode, operands[0]) 1450 && gcn_can_split_p (V64DImode, operands[2])" 1451 [(const_int 0)] 1452 { 1453 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1454 emit_insn (gen_addv64si3_vcc_dup 1455 (gcn_operand_part (V64DImode, operands[0], 0), 1456 gcn_operand_part (DImode, operands[1], 0), 1457 gcn_operand_part (V64DImode, operands[2], 0), 1458 vcc)); 1459 emit_insn (gen_addcv64si3 1460 (gcn_operand_part (V64DImode, operands[0], 1), 1461 gcn_operand_part (V64DImode, operands[2], 1), 1462 const0_rtx, vcc, vcc)); 1463 DONE; 1464 } 1465 [(set_attr "type" "vmult") 1466 (set_attr "length" "8")]) 1467 1468(define_insn_and_split "addv64di3_zext_dup_exec" 1469 [(set (match_operand:V64DI 0 "register_operand" "=&v") 1470 (vec_merge:V64DI 1471 (plus:V64DI 1472 (zero_extend:V64DI 1473 (vec_duplicate:V64SI 1474 (match_operand:SI 1 "gcn_alu_operand" "BSv"))) 1475 (match_operand:V64DI 2 "gcn_alu_operand" "vA0")) 1476 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1477 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1478 (clobber (reg:DI VCC_REG))] 1479 "" 1480 "#" 1481 "gcn_can_split_p (V64DImode, operands[0]) 1482 && gcn_can_split_p (V64DImode, operands[2]) 1483 && gcn_can_split_p (V64DImode, operands[3])" 1484 [(const_int 0)] 1485 { 1486 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1487 emit_insn (gen_addv64si3_vcc_dup_exec 1488 (gcn_operand_part (V64DImode, operands[0], 0), 1489 gcn_operand_part (DImode, operands[1], 0), 1490 gcn_operand_part (V64DImode, operands[2], 0), 1491 vcc, 1492 gcn_operand_part (V64DImode, operands[3], 0), 1493 operands[4])); 1494 emit_insn (gen_addcv64si3_exec 1495 (gcn_operand_part (V64DImode, operands[0], 1), 1496 gcn_operand_part (V64DImode, operands[2], 1), 1497 const0_rtx, vcc, vcc, 1498 gcn_operand_part (V64DImode, operands[3], 1), 1499 operands[4])); 1500 DONE; 1501 } 1502 [(set_attr "type" "vmult") 1503 (set_attr "length" "8")]) 1504 1505(define_insn_and_split "addv64di3_zext_dup2" 1506 [(set (match_operand:V64DI 0 "register_operand" "= v") 1507 (plus:V64DI 1508 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA")) 1509 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))) 1510 (clobber (reg:DI VCC_REG))] 1511 "" 1512 "#" 1513 "gcn_can_split_p (V64DImode, operands[0])" 1514 [(const_int 0)] 1515 { 1516 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1517 emit_insn (gen_addv64si3_vcc_dup 1518 (gcn_operand_part (V64DImode, operands[0], 0), 1519 gcn_operand_part (DImode, operands[2], 0), 1520 operands[1], 1521 vcc)); 1522 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); 1523 emit_insn (gen_vec_duplicatev64si 1524 (dsthi, gcn_operand_part (DImode, operands[2], 1))); 1525 emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc)); 1526 DONE; 1527 } 1528 [(set_attr "type" "vmult") 1529 (set_attr "length" "8")]) 1530 1531(define_insn_and_split "addv64di3_zext_dup2_exec" 1532 [(set (match_operand:V64DI 0 "register_operand" "= v") 1533 (vec_merge:V64DI 1534 (plus:V64DI 1535 (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" 1536 " vA")) 1537 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) 1538 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1539 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1540 (clobber (reg:DI VCC_REG))] 1541 "" 1542 "#" 1543 "gcn_can_split_p (V64DImode, operands[0]) 1544 && gcn_can_split_p (V64DImode, operands[3])" 1545 [(const_int 0)] 1546 { 1547 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1548 emit_insn (gen_addv64si3_vcc_dup_exec 1549 (gcn_operand_part (V64DImode, operands[0], 0), 1550 gcn_operand_part (DImode, operands[2], 0), 1551 operands[1], 1552 vcc, 1553 gcn_operand_part (V64DImode, operands[3], 0), 1554 operands[4])); 1555 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); 1556 emit_insn (gen_vec_duplicatev64si_exec 1557 (dsthi, gcn_operand_part (DImode, operands[2], 1), 1558 gcn_gen_undef (V64SImode), operands[4])); 1559 emit_insn (gen_addcv64si3_exec 1560 (dsthi, dsthi, const0_rtx, vcc, vcc, 1561 gcn_operand_part (V64DImode, operands[3], 1), 1562 operands[4])); 1563 DONE; 1564 } 1565 [(set_attr "type" "vmult") 1566 (set_attr "length" "8")]) 1567 1568(define_insn_and_split "addv64di3_sext_dup2" 1569 [(set (match_operand:V64DI 0 "register_operand" "= v") 1570 (plus:V64DI 1571 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA")) 1572 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))) 1573 (clobber (match_scratch:V64SI 3 "=&v")) 1574 (clobber (reg:DI VCC_REG))] 1575 "" 1576 "#" 1577 "gcn_can_split_p (V64DImode, operands[0])" 1578 [(const_int 0)] 1579 { 1580 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1581 emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31))); 1582 emit_insn (gen_addv64si3_vcc_dup 1583 (gcn_operand_part (V64DImode, operands[0], 0), 1584 gcn_operand_part (DImode, operands[2], 0), 1585 operands[1], 1586 vcc)); 1587 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); 1588 emit_insn (gen_vec_duplicatev64si 1589 (dsthi, gcn_operand_part (DImode, operands[2], 1))); 1590 emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc)); 1591 DONE; 1592 } 1593 [(set_attr "type" "vmult") 1594 (set_attr "length" "8")]) 1595 1596(define_insn_and_split "addv64di3_sext_dup2_exec" 1597 [(set (match_operand:V64DI 0 "register_operand" "= v") 1598 (vec_merge:V64DI 1599 (plus:V64DI 1600 (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" 1601 " vA")) 1602 (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))) 1603 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1604 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1605 (clobber (match_scratch:V64SI 5 "=&v")) 1606 (clobber (reg:DI VCC_REG))] 1607 "" 1608 "#" 1609 "gcn_can_split_p (V64DImode, operands[0]) 1610 && gcn_can_split_p (V64DImode, operands[3])" 1611 [(const_int 0)] 1612 { 1613 rtx vcc = gen_rtx_REG (DImode, VCC_REG); 1614 emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31), 1615 gcn_gen_undef (V64SImode), operands[4])); 1616 emit_insn (gen_addv64si3_vcc_dup_exec 1617 (gcn_operand_part (V64DImode, operands[0], 0), 1618 gcn_operand_part (DImode, operands[2], 0), 1619 operands[1], 1620 vcc, 1621 gcn_operand_part (V64DImode, operands[3], 0), 1622 operands[4])); 1623 rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1); 1624 emit_insn (gen_vec_duplicatev64si_exec 1625 (dsthi, gcn_operand_part (DImode, operands[2], 1), 1626 gcn_gen_undef (V64SImode), operands[4])); 1627 emit_insn (gen_addcv64si3_exec 1628 (dsthi, dsthi, operands[5], vcc, vcc, 1629 gcn_operand_part (V64DImode, operands[3], 1), 1630 operands[4])); 1631 DONE; 1632 } 1633 [(set_attr "type" "vmult") 1634 (set_attr "length" "8")]) 1635 1636;; }}} 1637;; {{{ DS memory ALU: add/sub 1638 1639(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI]) 1640(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI]) 1641 1642;; FIXME: the vector patterns probably need RD expanded to a vector of 1643;; addresses. For now, the only way a vector can get into LDS is 1644;; if the user puts it there manually. 1645;; 1646;; FIXME: the scalar patterns are probably fine in themselves, but need to be 1647;; checked to see if anything can ever use them. 1648 1649(define_insn "add<mode>3_ds<exec>" 1650 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1651 (plus:DS_ARITH_MODE 1652 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD") 1653 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] 1654 "rtx_equal_p (operands[0], operands[1])" 1655 "ds_add%u0\t%A0, %2%O0" 1656 [(set_attr "type" "ds") 1657 (set_attr "length" "8")]) 1658 1659(define_insn "add<mode>3_ds_scalar" 1660 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1661 (plus:DS_ARITH_SCALAR_MODE 1662 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1663 "%RD") 1664 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] 1665 "rtx_equal_p (operands[0], operands[1])" 1666 "ds_add%u0\t%A0, %2%O0" 1667 [(set_attr "type" "ds") 1668 (set_attr "length" "8")]) 1669 1670(define_insn "sub<mode>3_ds<exec>" 1671 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1672 (minus:DS_ARITH_MODE 1673 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD") 1674 (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))] 1675 "rtx_equal_p (operands[0], operands[1])" 1676 "ds_sub%u0\t%A0, %2%O0" 1677 [(set_attr "type" "ds") 1678 (set_attr "length" "8")]) 1679 1680(define_insn "sub<mode>3_ds_scalar" 1681 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1682 (minus:DS_ARITH_SCALAR_MODE 1683 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1684 " RD") 1685 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))] 1686 "rtx_equal_p (operands[0], operands[1])" 1687 "ds_sub%u0\t%A0, %2%O0" 1688 [(set_attr "type" "ds") 1689 (set_attr "length" "8")]) 1690 1691(define_insn "subr<mode>3_ds<exec>" 1692 [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD") 1693 (minus:DS_ARITH_MODE 1694 (match_operand:DS_ARITH_MODE 2 "register_operand" " v") 1695 (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))] 1696 "rtx_equal_p (operands[0], operands[1])" 1697 "ds_rsub%u0\t%A0, %2%O0" 1698 [(set_attr "type" "ds") 1699 (set_attr "length" "8")]) 1700 1701(define_insn "subr<mode>3_ds_scalar" 1702 [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD") 1703 (minus:DS_ARITH_SCALAR_MODE 1704 (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v") 1705 (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 1706 " RD")))] 1707 "rtx_equal_p (operands[0], operands[1])" 1708 "ds_rsub%u0\t%A0, %2%O0" 1709 [(set_attr "type" "ds") 1710 (set_attr "length" "8")]) 1711 1712;; }}} 1713;; {{{ ALU special case: mult 1714 1715(define_insn "<su>mulv64si3_highpart<exec>" 1716 [(set (match_operand:V64SI 0 "register_operand" "= v") 1717 (truncate:V64SI 1718 (lshiftrt:V64DI 1719 (mult:V64DI 1720 (any_extend:V64DI 1721 (match_operand:V64SI 1 "gcn_alu_operand" " %v")) 1722 (any_extend:V64DI 1723 (match_operand:V64SI 2 "gcn_alu_operand" "vSvA"))) 1724 (const_int 32))))] 1725 "" 1726 "v_mul_hi<sgnsuffix>0\t%0, %2, %1" 1727 [(set_attr "type" "vop3a") 1728 (set_attr "length" "8")]) 1729 1730(define_insn "mulv64si3<exec>" 1731 [(set (match_operand:V64SI 0 "register_operand" "= v") 1732 (mult:V64SI 1733 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA") 1734 (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))] 1735 "" 1736 "v_mul_lo_u32\t%0, %1, %2" 1737 [(set_attr "type" "vop3a") 1738 (set_attr "length" "8")]) 1739 1740(define_insn "mulv64si3_dup<exec>" 1741 [(set (match_operand:V64SI 0 "register_operand" "= v") 1742 (mult:V64SI 1743 (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA") 1744 (vec_duplicate:V64SI 1745 (match_operand:SI 2 "gcn_alu_operand" " SvA"))))] 1746 "" 1747 "v_mul_lo_u32\t%0, %1, %2" 1748 [(set_attr "type" "vop3a") 1749 (set_attr "length" "8")]) 1750 1751(define_insn_and_split "mulv64di3" 1752 [(set (match_operand:V64DI 0 "register_operand" "=&v") 1753 (mult:V64DI 1754 (match_operand:V64DI 1 "gcn_alu_operand" "% v") 1755 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))) 1756 (clobber (match_scratch:V64SI 3 "=&v"))] 1757 "" 1758 "#" 1759 "reload_completed" 1760 [(const_int 0)] 1761 { 1762 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); 1763 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); 1764 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0); 1765 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1); 1766 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); 1767 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); 1768 rtx tmp = operands[3]; 1769 1770 emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo)); 1771 emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo)); 1772 emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo)); 1773 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp)); 1774 emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi)); 1775 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp)); 1776 emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi)); 1777 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp)); 1778 DONE; 1779 }) 1780 1781(define_insn_and_split "mulv64di3_exec" 1782 [(set (match_operand:V64DI 0 "register_operand" "=&v") 1783 (vec_merge:V64DI 1784 (mult:V64DI 1785 (match_operand:V64DI 1 "gcn_alu_operand" "% v") 1786 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")) 1787 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1788 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1789 (clobber (match_scratch:V64SI 5 "=&v"))] 1790 "" 1791 "#" 1792 "reload_completed" 1793 [(const_int 0)] 1794 { 1795 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); 1796 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); 1797 rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0); 1798 rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1); 1799 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); 1800 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); 1801 rtx exec = operands[4]; 1802 rtx tmp = operands[5]; 1803 1804 rtx old_lo, old_hi; 1805 if (GET_CODE (operands[3]) == UNSPEC) 1806 { 1807 old_lo = old_hi = gcn_gen_undef (V64SImode); 1808 } 1809 else 1810 { 1811 old_lo = gcn_operand_part (V64DImode, operands[3], 0); 1812 old_hi = gcn_operand_part (V64DImode, operands[3], 1); 1813 } 1814 1815 rtx undef = gcn_gen_undef (V64SImode); 1816 1817 emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec)); 1818 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo, 1819 old_hi, exec)); 1820 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec)); 1821 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1822 emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec)); 1823 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1824 emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec)); 1825 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1826 DONE; 1827 }) 1828 1829(define_insn_and_split "mulv64di3_zext" 1830 [(set (match_operand:V64DI 0 "register_operand" "=&v") 1831 (mult:V64DI 1832 (zero_extend:V64DI 1833 (match_operand:V64SI 1 "gcn_alu_operand" " v")) 1834 (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))) 1835 (clobber (match_scratch:V64SI 3 "=&v"))] 1836 "" 1837 "#" 1838 "reload_completed" 1839 [(const_int 0)] 1840 { 1841 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); 1842 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); 1843 rtx left = operands[1]; 1844 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); 1845 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); 1846 rtx tmp = operands[3]; 1847 1848 emit_insn (gen_mulv64si3 (out_lo, left, right_lo)); 1849 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo)); 1850 emit_insn (gen_mulv64si3 (tmp, left, right_hi)); 1851 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp)); 1852 DONE; 1853 }) 1854 1855(define_insn_and_split "mulv64di3_zext_exec" 1856 [(set (match_operand:V64DI 0 "register_operand" "=&v") 1857 (vec_merge:V64DI 1858 (mult:V64DI 1859 (zero_extend:V64DI 1860 (match_operand:V64SI 1 "gcn_alu_operand" " v")) 1861 (match_operand:V64DI 2 "gcn_alu_operand" "vDA")) 1862 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1863 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1864 (clobber (match_scratch:V64SI 5 "=&v"))] 1865 "" 1866 "#" 1867 "reload_completed" 1868 [(const_int 0)] 1869 { 1870 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); 1871 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); 1872 rtx left = operands[1]; 1873 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); 1874 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); 1875 rtx exec = operands[4]; 1876 rtx tmp = operands[5]; 1877 1878 rtx old_lo, old_hi; 1879 if (GET_CODE (operands[3]) == UNSPEC) 1880 { 1881 old_lo = old_hi = gcn_gen_undef (V64SImode); 1882 } 1883 else 1884 { 1885 old_lo = gcn_operand_part (V64DImode, operands[3], 0); 1886 old_hi = gcn_operand_part (V64DImode, operands[3], 1); 1887 } 1888 1889 rtx undef = gcn_gen_undef (V64SImode); 1890 1891 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec)); 1892 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo, 1893 old_hi, exec)); 1894 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec)); 1895 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1896 DONE; 1897 }) 1898 1899(define_insn_and_split "mulv64di3_zext_dup2" 1900 [(set (match_operand:V64DI 0 "register_operand" "= &v") 1901 (mult:V64DI 1902 (zero_extend:V64DI 1903 (match_operand:V64SI 1 "gcn_alu_operand" " v")) 1904 (vec_duplicate:V64DI 1905 (match_operand:DI 2 "gcn_alu_operand" "SvDA")))) 1906 (clobber (match_scratch:V64SI 3 "= &v"))] 1907 "" 1908 "#" 1909 "reload_completed" 1910 [(const_int 0)] 1911 { 1912 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); 1913 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); 1914 rtx left = operands[1]; 1915 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); 1916 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); 1917 rtx tmp = operands[3]; 1918 1919 emit_insn (gen_mulv64si3 (out_lo, left, right_lo)); 1920 emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo)); 1921 emit_insn (gen_mulv64si3 (tmp, left, right_hi)); 1922 emit_insn (gen_addv64si3 (out_hi, out_hi, tmp)); 1923 DONE; 1924 }) 1925 1926(define_insn_and_split "mulv64di3_zext_dup2_exec" 1927 [(set (match_operand:V64DI 0 "register_operand" "= &v") 1928 (vec_merge:V64DI 1929 (mult:V64DI 1930 (zero_extend:V64DI 1931 (match_operand:V64SI 1 "gcn_alu_operand" " v")) 1932 (vec_duplicate:V64DI 1933 (match_operand:DI 2 "gcn_alu_operand" "SvDA"))) 1934 (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0") 1935 (match_operand:DI 4 "gcn_exec_reg_operand" " e"))) 1936 (clobber (match_scratch:V64SI 5 "= &v"))] 1937 "" 1938 "#" 1939 "reload_completed" 1940 [(const_int 0)] 1941 { 1942 rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0); 1943 rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1); 1944 rtx left = operands[1]; 1945 rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0); 1946 rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1); 1947 rtx exec = operands[4]; 1948 rtx tmp = operands[5]; 1949 1950 rtx old_lo, old_hi; 1951 if (GET_CODE (operands[3]) == UNSPEC) 1952 { 1953 old_lo = old_hi = gcn_gen_undef (V64SImode); 1954 } 1955 else 1956 { 1957 old_lo = gcn_operand_part (V64DImode, operands[3], 0); 1958 old_hi = gcn_operand_part (V64DImode, operands[3], 1); 1959 } 1960 1961 rtx undef = gcn_gen_undef (V64SImode); 1962 1963 emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec)); 1964 emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo, 1965 old_hi, exec)); 1966 emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec)); 1967 emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec)); 1968 DONE; 1969 }) 1970 1971;; }}} 1972;; {{{ ALU generic case 1973 1974(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI]) 1975 1976(define_code_iterator bitop [and ior xor]) 1977(define_code_iterator shiftop [ashift lshiftrt ashiftrt]) 1978(define_code_iterator minmaxop [smin smax umin umax]) 1979 1980(define_insn "<expander><mode>2<exec>" 1981 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v") 1982 (bitunop:VEC_1REG_INT_MODE 1983 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))] 1984 "" 1985 "v_<mnemonic>0\t%0, %1" 1986 [(set_attr "type" "vop1") 1987 (set_attr "length" "8")]) 1988 1989(define_insn "<expander><mode>3<exec>" 1990 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD") 1991 (bitop:VEC_1REG_INT_MODE 1992 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" 1993 "% v, 0") 1994 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand" 1995 "vSvB, v")))] 1996 "" 1997 "@ 1998 v_<mnemonic>0\t%0, %2, %1 1999 ds_<mnemonic>0\t%A0, %2%O0" 2000 [(set_attr "type" "vop2,ds") 2001 (set_attr "length" "8,8")]) 2002 2003(define_insn_and_split "<expander>v64di3" 2004 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD") 2005 (bitop:V64DI 2006 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD") 2007 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))] 2008 "" 2009 "@ 2010 # 2011 ds_<mnemonic>0\t%A0, %2%O0" 2012 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))" 2013 [(set (match_dup 3) 2014 (bitop:V64SI (match_dup 5) (match_dup 7))) 2015 (set (match_dup 4) 2016 (bitop:V64SI (match_dup 6) (match_dup 8)))] 2017 { 2018 operands[3] = gcn_operand_part (V64DImode, operands[0], 0); 2019 operands[4] = gcn_operand_part (V64DImode, operands[0], 1); 2020 operands[5] = gcn_operand_part (V64DImode, operands[1], 0); 2021 operands[6] = gcn_operand_part (V64DImode, operands[1], 1); 2022 operands[7] = gcn_operand_part (V64DImode, operands[2], 0); 2023 operands[8] = gcn_operand_part (V64DImode, operands[2], 1); 2024 } 2025 [(set_attr "type" "vmult,ds") 2026 (set_attr "length" "16,8")]) 2027 2028(define_insn_and_split "<expander>v64di3_exec" 2029 [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD") 2030 (vec_merge:V64DI 2031 (bitop:V64DI 2032 (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD") 2033 (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")) 2034 (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand" 2035 " U0,U0") 2036 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))] 2037 "!memory_operand (operands[0], VOIDmode) 2038 || (rtx_equal_p (operands[0], operands[1]) 2039 && register_operand (operands[2], VOIDmode))" 2040 "@ 2041 # 2042 ds_<mnemonic>0\t%A0, %2%O0" 2043 "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))" 2044 [(set (match_dup 5) 2045 (vec_merge:V64SI 2046 (bitop:V64SI (match_dup 7) (match_dup 9)) 2047 (match_dup 11) 2048 (match_dup 4))) 2049 (set (match_dup 6) 2050 (vec_merge:V64SI 2051 (bitop:V64SI (match_dup 8) (match_dup 10)) 2052 (match_dup 12) 2053 (match_dup 4)))] 2054 { 2055 operands[5] = gcn_operand_part (V64DImode, operands[0], 0); 2056 operands[6] = gcn_operand_part (V64DImode, operands[0], 1); 2057 operands[7] = gcn_operand_part (V64DImode, operands[1], 0); 2058 operands[8] = gcn_operand_part (V64DImode, operands[1], 1); 2059 operands[9] = gcn_operand_part (V64DImode, operands[2], 0); 2060 operands[10] = gcn_operand_part (V64DImode, operands[2], 1); 2061 operands[11] = gcn_operand_part (V64DImode, operands[3], 0); 2062 operands[12] = gcn_operand_part (V64DImode, operands[3], 1); 2063 } 2064 [(set_attr "type" "vmult,ds") 2065 (set_attr "length" "16,8")]) 2066 2067(define_insn "<expander>v64si3<exec>" 2068 [(set (match_operand:V64SI 0 "register_operand" "= v") 2069 (shiftop:V64SI 2070 (match_operand:V64SI 1 "gcn_alu_operand" " v") 2071 (vec_duplicate:V64SI 2072 (match_operand:SI 2 "gcn_alu_operand" "SvB"))))] 2073 "" 2074 "v_<revmnemonic>0\t%0, %2, %1" 2075 [(set_attr "type" "vop2") 2076 (set_attr "length" "8")]) 2077 2078(define_insn "v<expander>v64si3<exec>" 2079 [(set (match_operand:V64SI 0 "register_operand" "=v") 2080 (shiftop:V64SI 2081 (match_operand:V64SI 1 "gcn_alu_operand" " v") 2082 (match_operand:V64SI 2 "gcn_alu_operand" "vB")))] 2083 "" 2084 "v_<revmnemonic>0\t%0, %2, %1" 2085 [(set_attr "type" "vop2") 2086 (set_attr "length" "8")]) 2087 2088(define_insn "<expander><mode>3<exec>" 2089 [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD") 2090 (minmaxop:VEC_1REG_INT_MODE 2091 (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" 2092 "% v, 0") 2093 (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand" 2094 "vSvB, v")))] 2095 "" 2096 "@ 2097 v_<mnemonic>0\t%0, %2, %1 2098 ds_<mnemonic>0\t%A0, %2%O0" 2099 [(set_attr "type" "vop2,ds") 2100 (set_attr "length" "8,8")]) 2101 2102;; }}} 2103;; {{{ FP binops - special cases 2104 2105; GCN does not directly provide a DFmode subtract instruction, so we do it by 2106; adding the negated second operand to the first. 2107 2108(define_insn "subv64df3<exec>" 2109 [(set (match_operand:V64DF 0 "register_operand" "= v, v") 2110 (minus:V64DF 2111 (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v") 2112 (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))] 2113 "" 2114 "@ 2115 v_add_f64\t%0, %1, -%2 2116 v_add_f64\t%0, -%2, %1" 2117 [(set_attr "type" "vop3a") 2118 (set_attr "length" "8,8")]) 2119 2120(define_insn "subdf" 2121 [(set (match_operand:DF 0 "register_operand" "= v, v") 2122 (minus:DF 2123 (match_operand:DF 1 "gcn_alu_operand" "vSvB, v") 2124 (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))] 2125 "" 2126 "@ 2127 v_add_f64\t%0, %1, -%2 2128 v_add_f64\t%0, -%2, %1" 2129 [(set_attr "type" "vop3a") 2130 (set_attr "length" "8,8")]) 2131 2132;; }}} 2133;; {{{ FP binops - generic 2134 2135(define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF]) 2136(define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF]) 2137(define_mode_iterator FP_MODE [HF SF DF]) 2138(define_mode_iterator FP_1REG_MODE [HF SF]) 2139 2140(define_code_iterator comm_fp [plus mult smin smax]) 2141(define_code_iterator nocomm_fp [minus]) 2142(define_code_iterator all_fp [plus mult minus smin smax]) 2143 2144(define_insn "<expander><mode>3<exec>" 2145 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v") 2146 (comm_fp:VEC_FP_MODE 2147 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v") 2148 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))] 2149 "" 2150 "v_<mnemonic>0\t%0, %2, %1" 2151 [(set_attr "type" "vop2") 2152 (set_attr "length" "8")]) 2153 2154(define_insn "<expander><mode>3" 2155 [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL") 2156 (comm_fp:FP_MODE 2157 (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0") 2158 (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))] 2159 "" 2160 "@ 2161 v_<mnemonic>0\t%0, %2, %1 2162 v_<mnemonic>0\t%0, %1%O0" 2163 [(set_attr "type" "vop2,ds") 2164 (set_attr "length" "8")]) 2165 2166(define_insn "<expander><mode>3<exec>" 2167 [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v") 2168 (nocomm_fp:VEC_FP_1REG_MODE 2169 (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v") 2170 (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))] 2171 "" 2172 "@ 2173 v_<mnemonic>0\t%0, %1, %2 2174 v_<revmnemonic>0\t%0, %2, %1" 2175 [(set_attr "type" "vop2") 2176 (set_attr "length" "8,8")]) 2177 2178(define_insn "<expander><mode>3" 2179 [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v") 2180 (nocomm_fp:FP_1REG_MODE 2181 (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v") 2182 (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))] 2183 "" 2184 "@ 2185 v_<mnemonic>0\t%0, %1, %2 2186 v_<revmnemonic>0\t%0, %2, %1" 2187 [(set_attr "type" "vop2") 2188 (set_attr "length" "8,8")]) 2189 2190;; }}} 2191;; {{{ FP unops 2192 2193(define_insn "abs<mode>2" 2194 [(set (match_operand:FP_MODE 0 "register_operand" "=v") 2195 (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))] 2196 "" 2197 "v_add%i0\t%0, 0, |%1|" 2198 [(set_attr "type" "vop3a") 2199 (set_attr "length" "8")]) 2200 2201(define_insn "abs<mode>2<exec>" 2202 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v") 2203 (abs:VEC_FP_MODE 2204 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))] 2205 "" 2206 "v_add%i0\t%0, 0, |%1|" 2207 [(set_attr "type" "vop3a") 2208 (set_attr "length" "8")]) 2209 2210(define_insn "neg<mode>2<exec>" 2211 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v") 2212 (neg:VEC_FP_MODE 2213 (match_operand:VEC_FP_MODE 1 "register_operand" " v")))] 2214 "" 2215 "v_add%i0\t%0, 0, -%1" 2216 [(set_attr "type" "vop3a") 2217 (set_attr "length" "8")]) 2218 2219(define_insn "sqrt<mode>2<exec>" 2220 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v") 2221 (sqrt:VEC_FP_MODE 2222 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))] 2223 "flag_unsafe_math_optimizations" 2224 "v_sqrt%i0\t%0, %1" 2225 [(set_attr "type" "vop1") 2226 (set_attr "length" "8")]) 2227 2228(define_insn "sqrt<mode>2" 2229 [(set (match_operand:FP_MODE 0 "register_operand" "= v") 2230 (sqrt:FP_MODE 2231 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))] 2232 "flag_unsafe_math_optimizations" 2233 "v_sqrt%i0\t%0, %1" 2234 [(set_attr "type" "vop1") 2235 (set_attr "length" "8")]) 2236 2237;; }}} 2238;; {{{ FP fused multiply and add 2239 2240(define_insn "fma<mode>4<exec>" 2241 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v") 2242 (fma:VEC_FP_MODE 2243 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA") 2244 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA") 2245 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))] 2246 "" 2247 "v_fma%i0\t%0, %1, %2, %3" 2248 [(set_attr "type" "vop3a") 2249 (set_attr "length" "8")]) 2250 2251(define_insn "fma<mode>4_negop2<exec>" 2252 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v") 2253 (fma:VEC_FP_MODE 2254 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA") 2255 (neg:VEC_FP_MODE 2256 (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA")) 2257 (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))] 2258 "" 2259 "v_fma%i0\t%0, %1, -%2, %3" 2260 [(set_attr "type" "vop3a") 2261 (set_attr "length" "8")]) 2262 2263(define_insn "fma<mode>4" 2264 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v") 2265 (fma:FP_MODE 2266 (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA") 2267 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA") 2268 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))] 2269 "" 2270 "v_fma%i0\t%0, %1, %2, %3" 2271 [(set_attr "type" "vop3a") 2272 (set_attr "length" "8")]) 2273 2274(define_insn "fma<mode>4_negop2" 2275 [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v") 2276 (fma:FP_MODE 2277 (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA") 2278 (neg:FP_MODE 2279 (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA")) 2280 (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))] 2281 "" 2282 "v_fma%i0\t%0, %1, -%2, %3" 2283 [(set_attr "type" "vop3a") 2284 (set_attr "length" "8")]) 2285 2286;; }}} 2287;; {{{ FP division 2288 2289(define_insn "recip<mode>2<exec>" 2290 [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v") 2291 (div:VEC_FP_MODE 2292 (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1))) 2293 (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))] 2294 "" 2295 "v_rcp%i0\t%0, %1" 2296 [(set_attr "type" "vop1") 2297 (set_attr "length" "8")]) 2298 2299(define_insn "recip<mode>2" 2300 [(set (match_operand:FP_MODE 0 "register_operand" "= v") 2301 (div:FP_MODE 2302 (float:FP_MODE (const_int 1)) 2303 (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))] 2304 "" 2305 "v_rcp%i0\t%0, %1" 2306 [(set_attr "type" "vop1") 2307 (set_attr "length" "8")]) 2308 2309;; Do division via a = b * 1/c 2310;; The v_rcp_* instructions are not sufficiently accurate on their own, 2311;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson 2312;; which the ISA manual says is enough to improve the reciprocal accuracy. 2313;; 2314;; FIXME: This does not handle denormals, NaNs, division-by-zero etc. 2315 2316(define_expand "div<mode>3" 2317 [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand") 2318 (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand") 2319 (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")] 2320 "flag_reciprocal_math" 2321 { 2322 rtx two = gcn_vec_constant (<MODE>mode, 2323 const_double_from_real_value (dconst2, <SCALAR_MODE>mode)); 2324 rtx initrcp = gen_reg_rtx (<MODE>mode); 2325 rtx fma = gen_reg_rtx (<MODE>mode); 2326 rtx rcp; 2327 2328 bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR 2329 && real_identical 2330 (CONST_DOUBLE_REAL_VALUE 2331 (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1)); 2332 2333 if (is_rcp) 2334 rcp = operands[0]; 2335 else 2336 rcp = gen_reg_rtx (<MODE>mode); 2337 2338 emit_insn (gen_recip<mode>2 (initrcp, operands[2])); 2339 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two)); 2340 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma)); 2341 2342 if (!is_rcp) 2343 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp)); 2344 2345 DONE; 2346 }) 2347 2348(define_expand "div<mode>3" 2349 [(match_operand:FP_MODE 0 "gcn_valu_dst_operand") 2350 (match_operand:FP_MODE 1 "gcn_valu_src0_operand") 2351 (match_operand:FP_MODE 2 "gcn_valu_src0_operand")] 2352 "flag_reciprocal_math" 2353 { 2354 rtx two = const_double_from_real_value (dconst2, <MODE>mode); 2355 rtx initrcp = gen_reg_rtx (<MODE>mode); 2356 rtx fma = gen_reg_rtx (<MODE>mode); 2357 rtx rcp; 2358 2359 bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE 2360 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]), 2361 &dconstm1)); 2362 2363 if (is_rcp) 2364 rcp = operands[0]; 2365 else 2366 rcp = gen_reg_rtx (<MODE>mode); 2367 2368 emit_insn (gen_recip<mode>2 (initrcp, operands[2])); 2369 emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two)); 2370 emit_insn (gen_mul<mode>3 (rcp, initrcp, fma)); 2371 2372 if (!is_rcp) 2373 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp)); 2374 2375 DONE; 2376 }) 2377 2378;; }}} 2379;; {{{ Int/FP conversions 2380 2381(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) 2382(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) 2383 2384(define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF]) 2385(define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF]) 2386 2387(define_code_iterator cvt_op [fix unsigned_fix 2388 float unsigned_float 2389 float_extend float_truncate]) 2390(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc") 2391 (float "float") (unsigned_float "floatuns") 2392 (float_extend "extend") (float_truncate "trunc")]) 2393(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1") 2394 (float "%i0%i1") (unsigned_float "%i0%u1") 2395 (float_extend "%i0%i1") 2396 (float_truncate "%i0%i1")]) 2397 2398(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2" 2399 [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v") 2400 (cvt_op:CVT_TO_MODE 2401 (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))] 2402 "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode, 2403 <cvt_name>_cvt)" 2404 "v_cvt<cvt_operands>\t%0, %1" 2405 [(set_attr "type" "vop1") 2406 (set_attr "length" "8")]) 2407 2408(define_insn "<cvt_name><VCVT_FROM_MODE:mode><VCVT_TO_MODE:mode>2<exec>" 2409 [(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v") 2410 (cvt_op:VCVT_TO_MODE 2411 (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))] 2412 "gcn_valid_cvt_p (<VCVT_FROM_MODE:MODE>mode, <VCVT_TO_MODE:MODE>mode, 2413 <cvt_name>_cvt)" 2414 "v_cvt<cvt_operands>\t%0, %1" 2415 [(set_attr "type" "vop1") 2416 (set_attr "length" "8")]) 2417 2418;; }}} 2419;; {{{ Int/int conversions 2420 2421;; GCC can already do these for scalar types, but not for vector types. 2422;; Unfortunately you can't just do SUBREG on a vector to select the low part, 2423;; so there must be a few tricks here. 2424 2425(define_insn_and_split "vec_truncatev64div64si" 2426 [(set (match_operand:V64SI 0 "register_operand" "=v,&v") 2427 (truncate:V64SI 2428 (match_operand:V64DI 1 "register_operand" " 0, v")))] 2429 "" 2430 "#" 2431 "reload_completed" 2432 [(set (match_dup 0) (match_dup 1))] 2433 { 2434 operands[1] = gcn_operand_part (V64SImode, operands[1], 0); 2435 } 2436 [(set_attr "type" "vop2") 2437 (set_attr "length" "0,4")]) 2438 2439(define_insn_and_split "vec_truncatev64div64si_exec" 2440 [(set (match_operand:V64SI 0 "register_operand" "=v,&v") 2441 (vec_merge:V64SI 2442 (truncate:V64SI 2443 (match_operand:V64DI 1 "register_operand" " 0, v")) 2444 (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0") 2445 (match_operand:DI 3 "gcn_exec_operand" " e, e")))] 2446 "" 2447 "#" 2448 "reload_completed" 2449 [(parallel [(set (match_dup 0) 2450 (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3))) 2451 (clobber (scratch:V64DI))])] 2452 { 2453 operands[1] = gcn_operand_part (V64SImode, operands[1], 0); 2454 } 2455 [(set_attr "type" "vop2") 2456 (set_attr "length" "0,4")]) 2457 2458;; }}} 2459;; {{{ Vector comparison/merge 2460 2461(define_insn "vec_cmp<mode>di" 2462 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") 2463 (match_operator 1 "comparison_operator" 2464 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand" 2465 "vSv, B,vSv, B, v,vA") 2466 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand" 2467 " v, v, v, v,vA, v")])) 2468 (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))] 2469 "" 2470 "@ 2471 v_cmp%E1\tvcc, %2, %3 2472 v_cmp%E1\tvcc, %2, %3 2473 v_cmpx%E1\tvcc, %2, %3 2474 v_cmpx%E1\tvcc, %2, %3 2475 v_cmp%E1\t%0, %2, %3 2476 v_cmp%E1\t%0, %2, %3" 2477 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") 2478 (set_attr "length" "4,8,4,8,8,8")]) 2479 2480(define_expand "vec_cmpu<mode>di" 2481 [(match_operand:DI 0 "register_operand") 2482 (match_operator 1 "comparison_operator" 2483 [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand") 2484 (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])] 2485 "" 2486 { 2487 /* Unsigned comparisons use the same patterns as signed comparisons, 2488 except that they use unsigned operators (e.g. LTU vs LT). 2489 The '%E1' directive then does the Right Thing. */ 2490 emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2], 2491 operands[3])); 2492 DONE; 2493 }) 2494 2495(define_insn "vec_cmp<mode>di_exec" 2496 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg") 2497 (and:DI 2498 (match_operator 1 "comparison_operator" 2499 [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand" 2500 "vSv, B,vSv, B, v,vA") 2501 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand" 2502 " v, v, v, v,vA, v")]) 2503 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e"))) 2504 (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))] 2505 "" 2506 "@ 2507 v_cmp%E1\tvcc, %2, %3 2508 v_cmp%E1\tvcc, %2, %3 2509 v_cmpx%E1\tvcc, %2, %3 2510 v_cmpx%E1\tvcc, %2, %3 2511 v_cmp%E1\t%0, %2, %3 2512 v_cmp%E1\t%0, %2, %3" 2513 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a") 2514 (set_attr "length" "4,8,4,8,8,8")]) 2515 2516(define_insn "vec_cmp<mode>di_dup" 2517 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") 2518 (match_operator 1 "comparison_operator" 2519 [(vec_duplicate:VEC_1REG_MODE 2520 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" 2521 " Sv, B,Sv,B, A")) 2522 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand" 2523 " v, v, v,v, v")])) 2524 (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))] 2525 "" 2526 "@ 2527 v_cmp%E1\tvcc, %2, %3 2528 v_cmp%E1\tvcc, %2, %3 2529 v_cmpx%E1\tvcc, %2, %3 2530 v_cmpx%E1\tvcc, %2, %3 2531 v_cmp%E1\t%0, %2, %3" 2532 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") 2533 (set_attr "length" "4,8,4,8,8")]) 2534 2535(define_insn "vec_cmp<mode>di_dup_exec" 2536 [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg") 2537 (and:DI 2538 (match_operator 1 "comparison_operator" 2539 [(vec_duplicate:VEC_1REG_MODE 2540 (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" 2541 " Sv, B,Sv,B, A")) 2542 (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand" 2543 " v, v, v,v, v")]) 2544 (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e"))) 2545 (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))] 2546 "" 2547 "@ 2548 v_cmp%E1\tvcc, %2, %3 2549 v_cmp%E1\tvcc, %2, %3 2550 v_cmpx%E1\tvcc, %2, %3 2551 v_cmpx%E1\tvcc, %2, %3 2552 v_cmp%E1\t%0, %2, %3" 2553 [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a") 2554 (set_attr "length" "4,8,4,8,8")]) 2555 2556(define_expand "vcond_mask_<mode>di" 2557 [(parallel 2558 [(set (match_operand:VEC_REG_MODE 0 "register_operand" "") 2559 (vec_merge:VEC_REG_MODE 2560 (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "") 2561 (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "") 2562 (match_operand:DI 3 "register_operand" ""))) 2563 (clobber (scratch:V64DI))])] 2564 "" 2565 "") 2566 2567(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>" 2568 [(match_operand:VEC_1REG_MODE 0 "register_operand") 2569 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand") 2570 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand") 2571 (match_operator 3 "comparison_operator" 2572 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand") 2573 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])] 2574 "" 2575 { 2576 rtx tmp = gen_reg_rtx (DImode); 2577 emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4], 2578 operands[5])); 2579 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2], 2580 tmp)); 2581 DONE; 2582 }) 2583 2584(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>_exec" 2585 [(match_operand:VEC_1REG_MODE 0 "register_operand") 2586 (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand") 2587 (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand") 2588 (match_operator 3 "comparison_operator" 2589 [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand") 2590 (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")]) 2591 (match_operand:DI 6 "gcn_exec_reg_operand" "e")] 2592 "" 2593 { 2594 rtx tmp = gen_reg_rtx (DImode); 2595 emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4], 2596 operands[5], operands[6])); 2597 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2], 2598 tmp)); 2599 DONE; 2600 }) 2601 2602(define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>" 2603 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand") 2604 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand") 2605 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand") 2606 (match_operator 3 "comparison_operator" 2607 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand") 2608 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])] 2609 "" 2610 { 2611 rtx tmp = gen_reg_rtx (DImode); 2612 emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4], 2613 operands[5])); 2614 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2], 2615 tmp)); 2616 DONE; 2617 }) 2618 2619(define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>_exec" 2620 [(match_operand:VEC_1REG_INT_MODE 0 "register_operand") 2621 (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand") 2622 (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand") 2623 (match_operator 3 "comparison_operator" 2624 [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand") 2625 (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")]) 2626 (match_operand:DI 6 "gcn_exec_reg_operand" "e")] 2627 "" 2628 { 2629 rtx tmp = gen_reg_rtx (DImode); 2630 emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4], 2631 operands[5], operands[6])); 2632 emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2], 2633 tmp)); 2634 DONE; 2635 }) 2636 2637;; }}} 2638;; {{{ Fully masked loop support 2639 2640(define_expand "while_ultsidi" 2641 [(match_operand:DI 0 "register_operand") 2642 (match_operand:SI 1 "") 2643 (match_operand:SI 2 "")] 2644 "" 2645 { 2646 if (GET_CODE (operands[1]) != CONST_INT 2647 || GET_CODE (operands[2]) != CONST_INT) 2648 { 2649 rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); 2650 rtx tmp = _0_1_2_3; 2651 if (GET_CODE (operands[1]) != CONST_INT 2652 || INTVAL (operands[1]) != 0) 2653 { 2654 tmp = gen_reg_rtx (V64SImode); 2655 emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1])); 2656 } 2657 emit_insn (gen_vec_cmpv64sidi_dup (operands[0], 2658 gen_rtx_GT (VOIDmode, 0, 0), 2659 operands[2], tmp)); 2660 } 2661 else 2662 { 2663 HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]); 2664 HOST_WIDE_INT mask = (diff >= 64 ? -1 2665 : ~((unsigned HOST_WIDE_INT)-1 << diff)); 2666 emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask)); 2667 } 2668 DONE; 2669 }) 2670 2671(define_expand "maskload<mode>di" 2672 [(match_operand:VEC_REG_MODE 0 "register_operand") 2673 (match_operand:VEC_REG_MODE 1 "memory_operand") 2674 (match_operand 2 "")] 2675 "" 2676 { 2677 rtx exec = force_reg (DImode, operands[2]); 2678 rtx addr = gcn_expand_scalar_to_vector_address 2679 (<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode)); 2680 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1])); 2681 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1])); 2682 rtx undef = gcn_gen_undef (<MODE>mode); 2683 emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef, 2684 exec)); 2685 DONE; 2686 }) 2687 2688(define_expand "maskstore<mode>di" 2689 [(match_operand:VEC_REG_MODE 0 "memory_operand") 2690 (match_operand:VEC_REG_MODE 1 "register_operand") 2691 (match_operand 2 "")] 2692 "" 2693 { 2694 rtx exec = force_reg (DImode, operands[2]); 2695 rtx addr = gcn_expand_scalar_to_vector_address 2696 (<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode)); 2697 rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0])); 2698 rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0])); 2699 emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec)); 2700 DONE; 2701 }) 2702 2703(define_expand "mask_gather_load<mode>" 2704 [(match_operand:VEC_REG_MODE 0 "register_operand") 2705 (match_operand:DI 1 "register_operand") 2706 (match_operand 2 "register_operand") 2707 (match_operand 3 "immediate_operand") 2708 (match_operand:SI 4 "gcn_alu_operand") 2709 (match_operand:DI 5 "")] 2710 "" 2711 { 2712 rtx exec = force_reg (DImode, operands[5]); 2713 2714 /* TODO: more conversions will be needed when more types are vectorized. */ 2715 if (GET_MODE (operands[2]) == V64DImode) 2716 { 2717 rtx tmp = gen_reg_rtx (V64SImode); 2718 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2], 2719 gcn_gen_undef (V64SImode), 2720 exec)); 2721 operands[2] = tmp; 2722 } 2723 2724 emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2], 2725 operands[3], operands[4], exec)); 2726 DONE; 2727 }) 2728 2729(define_expand "mask_scatter_store<mode>" 2730 [(match_operand:DI 0 "register_operand") 2731 (match_operand 1 "register_operand") 2732 (match_operand 2 "immediate_operand") 2733 (match_operand:SI 3 "gcn_alu_operand") 2734 (match_operand:VEC_REG_MODE 4 "register_operand") 2735 (match_operand:DI 5 "")] 2736 "" 2737 { 2738 rtx exec = force_reg (DImode, operands[5]); 2739 2740 /* TODO: more conversions will be needed when more types are vectorized. */ 2741 if (GET_MODE (operands[1]) == V64DImode) 2742 { 2743 rtx tmp = gen_reg_rtx (V64SImode); 2744 emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1], 2745 gcn_gen_undef (V64SImode), 2746 exec)); 2747 operands[1] = tmp; 2748 } 2749 2750 emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2], 2751 operands[3], operands[4], exec)); 2752 DONE; 2753 }) 2754 2755; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented. 2756(define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF]) 2757(define_mode_iterator COND_INT_MODE [V64SI V64DI]) 2758 2759(define_code_iterator cond_op [plus minus]) 2760 2761(define_expand "cond_<expander><mode>" 2762 [(match_operand:COND_MODE 0 "register_operand") 2763 (match_operand:DI 1 "register_operand") 2764 (cond_op:COND_MODE 2765 (match_operand:COND_MODE 2 "gcn_alu_operand") 2766 (match_operand:COND_MODE 3 "gcn_alu_operand")) 2767 (match_operand:COND_MODE 4 "register_operand")] 2768 "" 2769 { 2770 operands[1] = force_reg (DImode, operands[1]); 2771 operands[2] = force_reg (<MODE>mode, operands[2]); 2772 2773 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], 2774 operands[3], operands[4], 2775 operands[1])); 2776 DONE; 2777 }) 2778 2779(define_code_iterator cond_bitop [and ior xor]) 2780 2781(define_expand "cond_<expander><mode>" 2782 [(match_operand:COND_INT_MODE 0 "register_operand") 2783 (match_operand:DI 1 "register_operand") 2784 (cond_bitop:COND_INT_MODE 2785 (match_operand:COND_INT_MODE 2 "gcn_alu_operand") 2786 (match_operand:COND_INT_MODE 3 "gcn_alu_operand")) 2787 (match_operand:COND_INT_MODE 4 "register_operand")] 2788 "" 2789 { 2790 operands[1] = force_reg (DImode, operands[1]); 2791 operands[2] = force_reg (<MODE>mode, operands[2]); 2792 2793 emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2], 2794 operands[3], operands[4], 2795 operands[1])); 2796 DONE; 2797 }) 2798 2799;; }}} 2800;; {{{ Vector reductions 2801 2802(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR 2803 UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR 2804 UNSPEC_PLUS_DPP_SHR 2805 UNSPEC_AND_DPP_SHR 2806 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) 2807 2808(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR 2809 UNSPEC_AND_DPP_SHR 2810 UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR]) 2811 2812; FIXME: Isn't there a better way of doing this? 2813(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR") 2814 (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR") 2815 (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR") 2816 (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR") 2817 (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR") 2818 (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR") 2819 (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR") 2820 (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")]) 2821 2822(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin") 2823 (UNSPEC_SMAX_DPP_SHR "smax") 2824 (UNSPEC_UMIN_DPP_SHR "umin") 2825 (UNSPEC_UMAX_DPP_SHR "umax") 2826 (UNSPEC_PLUS_DPP_SHR "plus") 2827 (UNSPEC_AND_DPP_SHR "and") 2828 (UNSPEC_IOR_DPP_SHR "ior") 2829 (UNSPEC_XOR_DPP_SHR "xor")]) 2830 2831(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0") 2832 (UNSPEC_SMAX_DPP_SHR "v_max%i0") 2833 (UNSPEC_UMIN_DPP_SHR "v_min%u0") 2834 (UNSPEC_UMAX_DPP_SHR "v_max%u0") 2835 (UNSPEC_PLUS_DPP_SHR "v_add%u0") 2836 (UNSPEC_AND_DPP_SHR "v_and%b0") 2837 (UNSPEC_IOR_DPP_SHR "v_or%b0") 2838 (UNSPEC_XOR_DPP_SHR "v_xor%b0")]) 2839 2840(define_expand "reduc_<reduc_op>_scal_<mode>" 2841 [(set (match_operand:<SCALAR_MODE> 0 "register_operand") 2842 (unspec:<SCALAR_MODE> 2843 [(match_operand:VEC_1REG_MODE 1 "register_operand")] 2844 REDUC_UNSPEC))] 2845 "" 2846 { 2847 rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1], 2848 <reduc_unspec>); 2849 2850 /* The result of the reduction is in lane 63 of tmp. */ 2851 emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp)); 2852 2853 DONE; 2854 }) 2855 2856(define_expand "reduc_<reduc_op>_scal_v64di" 2857 [(set (match_operand:DI 0 "register_operand") 2858 (unspec:DI 2859 [(match_operand:V64DI 1 "register_operand")] 2860 REDUC_2REG_UNSPEC))] 2861 "" 2862 { 2863 rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1], 2864 <reduc_unspec>); 2865 2866 /* The result of the reduction is in lane 63 of tmp. */ 2867 emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp)); 2868 2869 DONE; 2870 }) 2871 2872(define_insn "*<reduc_op>_dpp_shr_<mode>" 2873 [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v") 2874 (unspec:VEC_1REG_MODE 2875 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v") 2876 (match_operand:VEC_1REG_MODE 2 "register_operand" "v") 2877 (match_operand:SI 3 "const_int_operand" "n")] 2878 REDUC_UNSPEC))] 2879 "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode) 2880 && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)" 2881 { 2882 return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>", 2883 <reduc_unspec>, INTVAL (operands[3])); 2884 } 2885 [(set_attr "type" "vop_dpp") 2886 (set_attr "length" "8")]) 2887 2888(define_insn_and_split "*<reduc_op>_dpp_shr_v64di" 2889 [(set (match_operand:V64DI 0 "register_operand" "=&v") 2890 (unspec:V64DI 2891 [(match_operand:V64DI 1 "register_operand" "v0") 2892 (match_operand:V64DI 2 "register_operand" "v0") 2893 (match_operand:SI 3 "const_int_operand" "n")] 2894 REDUC_2REG_UNSPEC))] 2895 "" 2896 "#" 2897 "reload_completed" 2898 [(set (match_dup 4) 2899 (unspec:V64SI 2900 [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC)) 2901 (set (match_dup 5) 2902 (unspec:V64SI 2903 [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))] 2904 { 2905 operands[4] = gcn_operand_part (V64DImode, operands[0], 0); 2906 operands[5] = gcn_operand_part (V64DImode, operands[0], 1); 2907 operands[6] = gcn_operand_part (V64DImode, operands[1], 0); 2908 operands[7] = gcn_operand_part (V64DImode, operands[1], 1); 2909 operands[8] = gcn_operand_part (V64DImode, operands[2], 0); 2910 operands[9] = gcn_operand_part (V64DImode, operands[2], 1); 2911 } 2912 [(set_attr "type" "vmult") 2913 (set_attr "length" "16")]) 2914 2915; Special cases for addition. 2916 2917(define_insn "*plus_carry_dpp_shr_<mode>" 2918 [(set (match_operand:VEC_1REG_INT_MODE 0 "register_operand" "=v") 2919 (unspec:VEC_1REG_INT_MODE 2920 [(match_operand:VEC_1REG_INT_MODE 1 "register_operand" "v") 2921 (match_operand:VEC_1REG_INT_MODE 2 "register_operand" "v") 2922 (match_operand:SI 3 "const_int_operand" "n")] 2923 UNSPEC_PLUS_CARRY_DPP_SHR)) 2924 (clobber (reg:DI VCC_REG))] 2925 "" 2926 { 2927 const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0"; 2928 return gcn_expand_dpp_shr_insn (<MODE>mode, insn, 2929 UNSPEC_PLUS_CARRY_DPP_SHR, 2930 INTVAL (operands[3])); 2931 } 2932 [(set_attr "type" "vop_dpp") 2933 (set_attr "length" "8")]) 2934 2935(define_insn "*plus_carry_in_dpp_shr_v64si" 2936 [(set (match_operand:V64SI 0 "register_operand" "=v") 2937 (unspec:V64SI 2938 [(match_operand:V64SI 1 "register_operand" "v") 2939 (match_operand:V64SI 2 "register_operand" "v") 2940 (match_operand:SI 3 "const_int_operand" "n") 2941 (match_operand:DI 4 "register_operand" "cV")] 2942 UNSPEC_PLUS_CARRY_IN_DPP_SHR)) 2943 (clobber (reg:DI VCC_REG))] 2944 "" 2945 { 2946 const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0"; 2947 return gcn_expand_dpp_shr_insn (V64SImode, insn, 2948 UNSPEC_PLUS_CARRY_IN_DPP_SHR, 2949 INTVAL (operands[3])); 2950 } 2951 [(set_attr "type" "vop_dpp") 2952 (set_attr "length" "8")]) 2953 2954(define_insn_and_split "*plus_carry_dpp_shr_v64di" 2955 [(set (match_operand:V64DI 0 "register_operand" "=&v") 2956 (unspec:V64DI 2957 [(match_operand:V64DI 1 "register_operand" "v0") 2958 (match_operand:V64DI 2 "register_operand" "v0") 2959 (match_operand:SI 3 "const_int_operand" "n")] 2960 UNSPEC_PLUS_CARRY_DPP_SHR)) 2961 (clobber (reg:DI VCC_REG))] 2962 "" 2963 "#" 2964 "reload_completed" 2965 [(parallel [(set (match_dup 4) 2966 (unspec:V64SI 2967 [(match_dup 6) (match_dup 8) (match_dup 3)] 2968 UNSPEC_PLUS_CARRY_DPP_SHR)) 2969 (clobber (reg:DI VCC_REG))]) 2970 (parallel [(set (match_dup 5) 2971 (unspec:V64SI 2972 [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)] 2973 UNSPEC_PLUS_CARRY_IN_DPP_SHR)) 2974 (clobber (reg:DI VCC_REG))])] 2975 { 2976 operands[4] = gcn_operand_part (V64DImode, operands[0], 0); 2977 operands[5] = gcn_operand_part (V64DImode, operands[0], 1); 2978 operands[6] = gcn_operand_part (V64DImode, operands[1], 0); 2979 operands[7] = gcn_operand_part (V64DImode, operands[1], 1); 2980 operands[8] = gcn_operand_part (V64DImode, operands[2], 0); 2981 operands[9] = gcn_operand_part (V64DImode, operands[2], 1); 2982 } 2983 [(set_attr "type" "vmult") 2984 (set_attr "length" "16")]) 2985 2986; Instructions to move a scalar value from lane 63 of a vector register. 2987(define_insn "mov_from_lane63_<mode>" 2988 [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v") 2989 (unspec:<SCALAR_MODE> 2990 [(match_operand:VEC_1REG_MODE 1 "register_operand" "v,v")] 2991 UNSPEC_MOV_FROM_LANE63))] 2992 "" 2993 "@ 2994 v_readlane_b32\t%0, %1, 63 2995 v_mov_b32\t%0, %1 wave_ror:1" 2996 [(set_attr "type" "vop3a,vop_dpp") 2997 (set_attr "exec" "none,*") 2998 (set_attr "length" "8")]) 2999 3000(define_insn "mov_from_lane63_v64di" 3001 [(set (match_operand:DI 0 "register_operand" "=Sg,v") 3002 (unspec:DI 3003 [(match_operand:V64DI 1 "register_operand" "v,v")] 3004 UNSPEC_MOV_FROM_LANE63))] 3005 "" 3006 "@ 3007 v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63 3008 * if (REGNO (operands[0]) <= REGNO (operands[1])) \ 3009 return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \ 3010 \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \ 3011 else \ 3012 return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \ 3013 \"v_mov_b32\t%L0, %L1 wave_ror:1\";" 3014 [(set_attr "type" "vop3a,vop_dpp") 3015 (set_attr "exec" "none,*") 3016 (set_attr "length" "8")]) 3017 3018;; }}} 3019;; {{{ Miscellaneous 3020 3021(define_expand "vec_seriesv64si" 3022 [(match_operand:V64SI 0 "register_operand") 3023 (match_operand:SI 1 "gcn_alu_operand") 3024 (match_operand:SI 2 "gcn_alu_operand")] 3025 "" 3026 { 3027 rtx tmp = gen_reg_rtx (V64SImode); 3028 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); 3029 3030 emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2])); 3031 emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1])); 3032 DONE; 3033 }) 3034 3035(define_expand "vec_seriesv64di" 3036 [(match_operand:V64DI 0 "register_operand") 3037 (match_operand:DI 1 "gcn_alu_operand") 3038 (match_operand:DI 2 "gcn_alu_operand")] 3039 "" 3040 { 3041 rtx tmp = gen_reg_rtx (V64DImode); 3042 rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); 3043 3044 emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2])); 3045 emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1])); 3046 DONE; 3047 }) 3048 3049;; }}} 3050