;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
;; This file is free software; you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free
;; Software Foundation; either version 3 of the License, or (at your option)
;; any later version.
;; This file is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
;; for more details.
;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3. If not see
;; .
;; {{{ Vector iterators
; Vector modes for one vector register
(define_mode_iterator VEC_1REG_MODE
[V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator VEC_1REG_ALT
[V64QI V64HI V64SI V64HF V64SF])
(define_mode_iterator VEC_1REG_INT_MODE
[V64QI V64HI V64SI])
(define_mode_iterator VEC_1REG_INT_ALT
[V64QI V64HI V64SI])
; Vector modes for two vector registers
(define_mode_iterator VEC_2REG_MODE
[V64DI V64DF])
; All of above
(define_mode_iterator VEC_REG_MODE
[V64QI V64HI V64SI V64HF V64SF ; Single reg
V64DI V64DF]) ; Double reg
(define_mode_attr scalar_mode
[(V64QI "qi") (V64HI "hi") (V64SI "si")
(V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
(define_mode_attr SCALAR_MODE
[(V64QI "QI") (V64HI "HI") (V64SI "SI")
(V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
;; }}}
;; {{{ Substitutions
(define_subst_attr "exec" "vec_merge"
"" "_exec")
(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
"" "_exec")
(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
"" "_exec")
(define_subst_attr "exec_scatter" "scatter_store"
"" "_exec")
(define_subst "vec_merge"
[(set (match_operand:VEC_REG_MODE 0)
(match_operand:VEC_REG_MODE 1))]
""
[(set (match_dup 0)
(vec_merge:VEC_REG_MODE
(match_dup 1)
(match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
(define_subst "vec_merge_with_clobber"
[(set (match_operand:VEC_REG_MODE 0)
(match_operand:VEC_REG_MODE 1))
(clobber (match_operand 2))]
""
[(set (match_dup 0)
(vec_merge:VEC_REG_MODE
(match_dup 1)
(match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
(match_operand:DI 4 "gcn_exec_reg_operand" "e")))
(clobber (match_dup 2))])
(define_subst "vec_merge_with_vcc"
[(set (match_operand:VEC_REG_MODE 0)
(match_operand:VEC_REG_MODE 1))
(set (match_operand:DI 2)
(match_operand:DI 3))]
""
[(parallel
[(set (match_dup 0)
(vec_merge:VEC_REG_MODE
(match_dup 1)
(match_operand:VEC_REG_MODE 4
"gcn_register_or_unspec_operand" "U0")
(match_operand:DI 5 "gcn_exec_reg_operand" "e")))
(set (match_dup 2)
(and:DI (match_dup 3)
(reg:DI EXEC_REG)))])])
(define_subst "scatter_store"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand 0)
(match_operand 1)
(match_operand 2)
(match_operand 3)]
UNSPEC_SCATTER))]
""
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_dup 0)
(match_dup 1)
(match_dup 2)
(match_dup 3)
(match_operand:DI 4 "gcn_exec_reg_operand" "e")]
UNSPEC_SCATTER))])
;; }}}
;; {{{ Vector moves
; This is the entry point for all vector register moves. Memory accesses can
; come this way also, but will more usually use the reload_in/out,
; gather/scatter, maskload/store, etc.
(define_expand "mov"
[(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
(match_operand:VEC_REG_MODE 1 "general_operand"))]
""
{
if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
{
operands[1] = force_reg (mode, operands[1]);
rtx scratch = gen_rtx_SCRATCH (V64DImode);
rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[0],
scratch);
emit_insn (gen_scatter_expr (expr, operands[1], a, v));
DONE;
}
else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
{
rtx scratch = gen_rtx_SCRATCH (V64DImode);
rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[1],
scratch);
emit_insn (gen_gather_expr (operands[0], expr, a, v));
DONE;
}
else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
{
gcc_assert (!reload_completed);
rtx scratch = gen_reg_rtx (V64DImode);
emit_insn (gen_mov_sgprbase (operands[0], operands[1], scratch));
DONE;
}
})
; A pseudo instruction that helps LRA use the "U0" constraint.
(define_insn "mov_unspec"
[(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v")
(match_operand:VEC_REG_MODE 1 "gcn_unspec_operand" " U"))]
""
""
[(set_attr "type" "unknown")
(set_attr "length" "0")])
(define_insn "*mov"
[(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v")
(match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B"))]
""
"v_mov_b32\t%0, %1"
[(set_attr "type" "vop1,vop1")
(set_attr "length" "4,8")])
(define_insn "mov_exec"
[(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand"
"=v, v, v, v, v, m")
(vec_merge:VEC_1REG_MODE
(match_operand:VEC_1REG_MODE 1 "general_operand"
"vA, B, v,vA, m, v")
(match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand"
"U0,U0,vA,vA,U0,U0")
(match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
(clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
"@
v_mov_b32\t%0, %1
v_mov_b32\t%0, %1
v_cndmask_b32\t%0, %3, %1, vcc
v_cndmask_b32\t%0, %3, %1, %2
#
#"
[(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
(set_attr "length" "4,8,4,8,16,16")])
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
;(define_insn "*mov_exec_match"
; [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m")
; (vec_merge:VEC_1REG_MODE
; (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B, m, v")
; (match_dup 0)
; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
; "!MEM_P (operands[0]) || REG_P (operands[1])"
; "@
; v_mov_b32\t%0, %1
; v_mov_b32\t%0, %1
; #
; #"
; [(set_attr "type" "vop1,vop1,*,*")
; (set_attr "length" "4,8,16,16")])
(define_insn "*mov"
[(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
""
{
if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
else
return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
}
[(set_attr "type" "vmult")
(set_attr "length" "16")])
(define_insn "mov_exec"
[(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
"= v, v, v, v, m")
(vec_merge:VEC_2REG_MODE
(match_operand:VEC_2REG_MODE 1 "general_operand"
"vDB, v0, v0, m, v")
(match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
" U0,vDA0,vDA0,U0,U0")
(match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
(clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
"!MEM_P (operands[0]) || REG_P (operands[1])"
{
if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
case 1:
return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
"v_cndmask_b32\t%H0, %H3, %H1, vcc";
case 2:
return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
"v_cndmask_b32\t%H0, %H3, %H1, %2";
}
else
switch (which_alternative)
{
case 0:
return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
case 1:
return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
"v_cndmask_b32\t%L0, %L3, %L1, vcc";
case 2:
return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
"v_cndmask_b32\t%L0, %L3, %L1, %2";
}
return "#";
}
[(set_attr "type" "vmult,vmult,vmult,*,*")
(set_attr "length" "16,16,16,16,16")])
; This variant does not accept an unspec, but does permit MEM
; read/modify/write which is necessary for maskstore.
;(define_insn "*mov_exec_match"
; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
; (vec_merge:VEC_2REG_MODE
; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
; (match_dup 0)
; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
; "!MEM_P (operands[0]) || REG_P (operands[1])"
; "@
; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
; else \
; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
; #
; #"
; [(set_attr "type" "vmult,*,*")
; (set_attr "length" "16,16,16")])
; A SGPR-base load looks like:
; v, Sv
;
; There's no hardware instruction that corresponds to this, but vector base
; addresses are placed in an SGPR because it is easier to add to a vector.
; We also have a temporary vT, and the vector v1 holding numbered lanes.
;
; Rewrite as:
; vT = v1 << log2(element-size)
; vT += Sv
; flat_load v, vT
(define_insn "mov_sgprbase"
[(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m")
(unspec:VEC_1REG_MODE
[(match_operand:VEC_1REG_MODE 1 "general_operand" " vA,vB, m, v")]
UNSPEC_SGPRBASE))
(clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
"lra_in_progress || reload_completed"
"@
v_mov_b32\t%0, %1
v_mov_b32\t%0, %1
#
#"
[(set_attr "type" "vop1,vop1,*,*")
(set_attr "length" "4,8,12,12")])
(define_insn "mov_sgprbase"
[(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
(unspec:VEC_2REG_MODE
[(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
UNSPEC_SGPRBASE))
(clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
"lra_in_progress || reload_completed"
"@
* if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
else \
return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
#
#"
[(set_attr "type" "vmult,*,*")
(set_attr "length" "8,12,12")])
; reload_in was once a standard name, but here it's only referenced by
; gcn_secondary_reload. It allows a reload with a scratch register.
(define_expand "reload_in"
[(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v")
(match_operand:VEC_REG_MODE 1 "memory_operand" " m"))
(clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
""
{
emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2]));
DONE;
})
; reload_out is similar to reload_in, above.
(define_expand "reload_out"
[(set (match_operand:VEC_REG_MODE 0 "memory_operand" "= m")
(match_operand:VEC_REG_MODE 1 "register_operand" " v"))
(clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
""
{
emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2]));
DONE;
})
; Expand scalar addresses into gather/scatter patterns
(define_split
[(set (match_operand:VEC_REG_MODE 0 "memory_operand")
(unspec:VEC_REG_MODE
[(match_operand:VEC_REG_MODE 1 "general_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch:V64DI 2))]
""
[(set (mem:BLK (scratch))
(unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
UNSPEC_SCATTER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[0],
operands[2]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
})
(define_split
[(set (match_operand:VEC_REG_MODE 0 "memory_operand")
(vec_merge:VEC_REG_MODE
(match_operand:VEC_REG_MODE 1 "general_operand")
(match_operand:VEC_REG_MODE 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch:V64DI 4))]
""
[(set (mem:BLK (scratch))
(unspec:BLK [(match_dup 5) (match_dup 1)
(match_dup 6) (match_dup 7) (match_dup 3)]
UNSPEC_SCATTER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode,
operands[3],
operands[0],
operands[4]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
})
(define_split
[(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
(unspec:VEC_REG_MODE
[(match_operand:VEC_REG_MODE 1 "memory_operand")]
UNSPEC_SGPRBASE))
(clobber (match_scratch:V64DI 2))]
""
[(set (match_dup 0)
(unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
operands[1],
operands[2]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
})
(define_split
[(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
(vec_merge:VEC_REG_MODE
(match_operand:VEC_REG_MODE 1 "memory_operand")
(match_operand:VEC_REG_MODE 2 "")
(match_operand:DI 3 "gcn_exec_reg_operand")))
(clobber (match_scratch:V64DI 4))]
""
[(set (match_dup 0)
(vec_merge:VEC_REG_MODE
(unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
(mem:BLK (scratch))]
UNSPEC_GATHER)
(match_dup 2)
(match_dup 3)))]
{
operands[5] = gcn_expand_scalar_to_vector_address (mode,
operands[3],
operands[1],
operands[4]);
operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
})
; TODO: Add zero/sign extending variants.
;; }}}
;; {{{ Lane moves
; v_writelane and v_readlane work regardless of exec flags.
; We allow source to be scratch.
;
; FIXME these should take A immediates
(define_insn "*vec_set"
[(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v")
(vec_merge:VEC_1REG_MODE
(vec_duplicate:VEC_1REG_MODE
(match_operand: 1 "register_operand" " Sv"))
(match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
" U0")
(ashift (const_int 1)
(match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
""
"v_writelane_b32 %0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
; FIXME: 64bit operations really should be splitters, but I am not sure how
; to represent vertical subregs.
(define_insn "*vec_set"
[(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
(vec_merge:VEC_2REG_MODE
(vec_duplicate:VEC_2REG_MODE
(match_operand: 1 "register_operand" " Sv"))
(match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
" U0")
(ashift (const_int 1)
(match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
""
"v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_expand "vec_set"
[(set (match_operand:VEC_REG_MODE 0 "register_operand")
(vec_merge:VEC_REG_MODE
(vec_duplicate:VEC_REG_MODE
(match_operand: 1 "register_operand"))
(match_dup 0)
(ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
"")
(define_insn "*vec_set_1"
[(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
(vec_merge:VEC_1REG_MODE
(vec_duplicate:VEC_1REG_MODE
(match_operand: 1 "register_operand" "Sv"))
(match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
"U0")
(match_operand:SI 2 "const_int_operand" " i")))]
"((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
{
operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
return "v_writelane_b32 %0, %1, %2";
}
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "*vec_set_1"
[(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
(vec_merge:VEC_2REG_MODE
(vec_duplicate:VEC_2REG_MODE
(match_operand: 1 "register_operand" "Sv"))
(match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
"U0")
(match_operand:SI 2 "const_int_operand" " i")))]
"((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
{
operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
}
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_duplicate"
[(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
(vec_duplicate:VEC_1REG_MODE
(match_operand: 1 "gcn_alu_operand" "SvB")))]
""
"v_mov_b32\t%0, %1"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "vec_duplicate"
[(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
(vec_duplicate:VEC_2REG_MODE
(match_operand: 1 "gcn_alu_operand" "SvDB")))]
""
"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
[(set_attr "type" "vop3a")
(set_attr "length" "16")])
(define_insn "vec_extract"
[(set (match_operand: 0 "register_operand" "=Sg")
(vec_select:
(match_operand:VEC_1REG_MODE 1 "register_operand" " v")
(parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
""
"v_readlane_b32 %0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_insn "vec_extract"
[(set (match_operand: 0 "register_operand" "=Sg")
(vec_select:
(match_operand:VEC_2REG_MODE 1 "register_operand" " v")
(parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
""
"v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
[(set_attr "type" "vmult")
(set_attr "length" "16")
(set_attr "exec" "none")
(set_attr "laneselect" "yes")])
(define_expand "vec_init"
[(match_operand:VEC_REG_MODE 0 "register_operand")
(match_operand 1)]
""
{
gcn_expand_vector_init (operands[0], operands[1]);
DONE;
})
;; }}}
;; {{{ Scatter / Gather
;; GCN does not have an instruction for loading a vector from contiguous
;; memory so *all* loads and stores are eventually converted to scatter
;; or gather.
;;
;; GCC does not permit MEM to hold vectors of addresses, so we must use an
;; unspec. The unspec formats are as follows:
;;
;; (unspec:V64??
;; [()
;; ()
;; ()
;; (mem:BLK (scratch))]
;; UNSPEC_GATHER)
;;
;; (unspec:BLK
;; [()
;; ()
;; ()
;; ()
;; ()]
;; UNSPEC_SCATTER)
;;
;; - Loads are expected to be wrapped in a vec_merge, so do not need .
;; - The mem:BLK does not contain any real information, but indicates that an
;; unknown memory read is taking place. Stores are expected to use a similar
;; mem:BLK outside the unspec.
;; - The address space and glc (volatile) fields are there to replace the
;; fields normally found in a MEM.
;; - Multiple forms of address expression are supported, below.
(define_expand "gather_load"
[(match_operand:VEC_REG_MODE 0 "register_operand")
(match_operand:DI 1 "register_operand")
(match_operand 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:SI 4 "gcn_alu_operand")]
""
{
rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
operands[2], operands[4],
INTVAL (operands[3]), NULL);
if (GET_MODE (addr) == V64DImode)
emit_insn (gen_gather_insn_1offset (operands[0], addr, const0_rtx,
const0_rtx, const0_rtx));
else
emit_insn (gen_gather_insn_2offsets (operands[0], operands[1],
addr, const0_rtx, const0_rtx,
const0_rtx));
DONE;
})
(define_expand "gather_exec"
[(match_operand:VEC_REG_MODE 0 "register_operand")
(match_operand:DI 1 "register_operand")
(match_operand:V64SI 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:SI 4 "gcn_alu_operand")
(match_operand:DI 5 "gcn_exec_reg_operand")]
""
{
rtx undefmode = gcn_gen_undef (mode);
rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
operands[2], operands[4],
INTVAL (operands[3]), operands[5]);
if (GET_MODE (addr) == V64DImode)
emit_insn (gen_gather_insn_1offset_exec (operands[0], addr,
const0_rtx, const0_rtx,
const0_rtx, undefmode,
operands[5]));
else
emit_insn (gen_gather_insn_2offsets_exec (operands[0], operands[1],
addr, const0_rtx,
const0_rtx, const0_rtx,
undefmode, operands[5]));
DONE;
})
; Allow any address expression
(define_expand "gather_expr"
[(set (match_operand:VEC_REG_MODE 0 "register_operand")
(unspec:VEC_REG_MODE
[(match_operand 1 "")
(match_operand 2 "immediate_operand")
(match_operand 3 "immediate_operand")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
""
{})
(define_insn "gather_insn_1offset"
[(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
(unspec:VEC_REG_MODE
[(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
(vec_duplicate:V64DI
(match_operand 2 "immediate_operand" " n")))
(match_operand 3 "immediate_operand" " n")
(match_operand 4 "immediate_operand" " n")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
"(AS_FLAT_P (INTVAL (operands[3]))
&& ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
|| ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
|| (AS_GLOBAL_P (INTVAL (operands[3]))
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[3]);
const char *glc = INTVAL (operands[4]) ? " glc" : "";
static char buf[200];
if (AS_FLAT_P (as))
{
if (TARGET_GCN5_PLUS)
sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
glc);
else
sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc);
}
else if (AS_GLOBAL_P (as))
sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;"
"s_waitcnt\tvmcnt(0)", glc);
else
gcc_unreachable ();
return buf;
}
[(set_attr "type" "flat")
(set_attr "length" "12")])
(define_insn "gather_insn_1offset_ds"
[(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
(unspec:VEC_REG_MODE
[(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
(vec_duplicate:V64SI
(match_operand 2 "immediate_operand" " n")))
(match_operand 3 "immediate_operand" " n")
(match_operand 4 "immediate_operand" " n")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
"(AS_ANY_DS_P (INTVAL (operands[3]))
&& ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
{
addr_space_t as = INTVAL (operands[3]);
static char buf[200];
sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
(AS_GDS_P (as) ? " gds" : ""));
return buf;
}
[(set_attr "type" "ds")
(set_attr "length" "12")])
(define_insn "gather_insn_2offsets"
[(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
(unspec:VEC_REG_MODE
[(plus:V64DI
(plus:V64DI
(vec_duplicate:V64DI
(match_operand:DI 1 "register_operand" "Sv"))
(sign_extend:V64DI
(match_operand:V64SI 2 "register_operand" " v")))
(vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
(match_operand 4 "immediate_operand" " n")
(match_operand 5 "immediate_operand" " n")
(mem:BLK (scratch))]
UNSPEC_GATHER))]
"(AS_GLOBAL_P (INTVAL (operands[4]))
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[4]);
const char *glc = INTVAL (operands[5]) ? " glc" : "";
static char buf[200];
if (AS_GLOBAL_P (as))
{
/* Work around assembler bug in which a 64-bit register is expected,
but a 32-bit value would be correct. */
int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
"s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
}
else
gcc_unreachable ();
return buf;
}
[(set_attr "type" "flat")
(set_attr "length" "12")])
(define_expand "scatter_store"
[(match_operand:DI 0 "register_operand")
(match_operand 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand:SI 3 "gcn_alu_operand")
(match_operand:VEC_REG_MODE 4 "register_operand")]
""
{
rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
operands[1], operands[3],
INTVAL (operands[2]), NULL);
if (GET_MODE (addr) == V64DImode)
emit_insn (gen_scatter_insn_1offset (addr, const0_rtx, operands[4],
const0_rtx, const0_rtx));
else
emit_insn (gen_scatter_insn_2offsets (operands[0], addr,
const0_rtx, operands[4],
const0_rtx, const0_rtx));
DONE;
})
(define_expand "scatter_exec"
[(match_operand:DI 0 "register_operand")
(match_operand 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand:SI 3 "gcn_alu_operand")
(match_operand:VEC_REG_MODE 4 "register_operand")
(match_operand:DI 5 "gcn_exec_reg_operand")]
""
{
operands[5] = force_reg (DImode, operands[5]);
rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
operands[1], operands[3],
INTVAL (operands[2]), operands[5]);
if (GET_MODE (addr) == V64DImode)
emit_insn (gen_scatter_insn_1offset_exec (addr, const0_rtx,
operands[4], const0_rtx,
const0_rtx,
operands[5]));
else
emit_insn (gen_scatter_insn_2offsets_exec (operands[0], addr,
const0_rtx, operands[4],
const0_rtx, const0_rtx,
operands[5]));
DONE;
})
; Allow any address expression
(define_expand "scatter_expr"
[(set (mem:BLK (scratch))
(unspec:BLK
[(match_operand:V64DI 0 "")
(match_operand:VEC_REG_MODE 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand 3 "immediate_operand")]
UNSPEC_SCATTER))]
""
{})
(define_insn "scatter_insn_1offset"
[(set (mem:BLK (scratch))
(unspec:BLK
[(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
(vec_duplicate:V64DI
(match_operand 1 "immediate_operand" "n")))
(match_operand:VEC_REG_MODE 2 "register_operand" "v")
(match_operand 3 "immediate_operand" "n")
(match_operand 4 "immediate_operand" "n")]
UNSPEC_SCATTER))]
"(AS_FLAT_P (INTVAL (operands[3]))
&& (INTVAL(operands[1]) == 0
|| (TARGET_GCN5_PLUS
&& (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
|| (AS_GLOBAL_P (INTVAL (operands[3]))
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[3]);
const char *glc = INTVAL (operands[4]) ? " glc" : "";
static char buf[200];
if (AS_FLAT_P (as))
{
if (TARGET_GCN5_PLUS)
sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;"
"s_waitcnt\texpcnt(0)", glc);
else
sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\texpcnt(0)",
glc);
}
else if (AS_GLOBAL_P (as))
sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;"
"s_waitcnt\texpcnt(0)", glc);
else
gcc_unreachable ();
return buf;
}
[(set_attr "type" "flat")
(set_attr "length" "12")])
(define_insn "scatter_insn_1offset_ds"
[(set (mem:BLK (scratch))
(unspec:BLK
[(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
(vec_duplicate:V64SI
(match_operand 1 "immediate_operand" "n")))
(match_operand:VEC_REG_MODE 2 "register_operand" "v")
(match_operand 3 "immediate_operand" "n")
(match_operand 4 "immediate_operand" "n")]
UNSPEC_SCATTER))]
"(AS_ANY_DS_P (INTVAL (operands[3]))
&& ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
{
addr_space_t as = INTVAL (operands[3]);
static char buf[200];
sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\texpcnt(0)",
(AS_GDS_P (as) ? " gds" : ""));
return buf;
}
[(set_attr "type" "ds")
(set_attr "length" "12")])
(define_insn "scatter_insn_2offsets"
[(set (mem:BLK (scratch))
(unspec:BLK
[(plus:V64DI
(plus:V64DI
(vec_duplicate:V64DI
(match_operand:DI 0 "register_operand" "Sv"))
(sign_extend:V64DI
(match_operand:V64SI 1 "register_operand" " v")))
(vec_duplicate:V64DI (match_operand 2 "immediate_operand"
" n")))
(match_operand:VEC_REG_MODE 3 "register_operand" " v")
(match_operand 4 "immediate_operand" " n")
(match_operand 5 "immediate_operand" " n")]
UNSPEC_SCATTER))]
"(AS_GLOBAL_P (INTVAL (operands[4]))
&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
{
addr_space_t as = INTVAL (operands[4]);
const char *glc = INTVAL (operands[5]) ? " glc" : "";
static char buf[200];
if (AS_GLOBAL_P (as))
{
/* Work around assembler bug in which a 64-bit register is expected,
but a 32-bit value would be correct. */
int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;"
"s_waitcnt\texpcnt(0)", reg, reg + 1, glc);
}
else
gcc_unreachable ();
return buf;
}
[(set_attr "type" "flat")
(set_attr "length" "12")])
;; }}}
;; {{{ Permutations
(define_insn "ds_bpermute"
[(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
(unspec:VEC_1REG_MODE
[(match_operand:VEC_1REG_MODE 2 "register_operand" " v")
(match_operand:V64SI 1 "register_operand" " v")
(match_operand:DI 3 "gcn_exec_reg_operand" " e")]
UNSPEC_BPERMUTE))]
""
"ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
[(set_attr "type" "vop2")
(set_attr "length" "12")])
(define_insn_and_split "ds_bpermute"
[(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
(unspec:VEC_2REG_MODE
[(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
(match_operand:V64SI 1 "register_operand" " v")
(match_operand:DI 3 "gcn_exec_reg_operand" " e")]
UNSPEC_BPERMUTE))]
""
"#"
"reload_completed"
[(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
UNSPEC_BPERMUTE))
(set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
UNSPEC_BPERMUTE))]
{
operands[4] = gcn_operand_part (mode, operands[0], 0);
operands[5] = gcn_operand_part (mode, operands[0], 1);
operands[6] = gcn_operand_part (mode, operands[2], 0);
operands[7] = gcn_operand_part (mode, operands[2], 1);
}
[(set_attr "type" "vmult")
(set_attr "length" "24")])
;; }}}
;; {{{ ALU special case: add/sub
(define_insn "addv64si3"
[(set (match_operand:V64SI 0 "register_operand" "= v")
(plus:V64SI
(match_operand:V64SI 1 "register_operand" "% v")
(match_operand:V64SI 2 "gcn_alu_operand" "vSvB")))
(clobber (reg:DI VCC_REG))]
""
"v_add%^_u32\t%0, vcc, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8")])
(define_insn "addv64si3_dup"
[(set (match_operand:V64SI 0 "register_operand" "= v")
(plus:V64SI
(vec_duplicate:V64SI
(match_operand:SI 2 "gcn_alu_operand" "SvB"))
(match_operand:V64SI 1 "register_operand" " v")))
(clobber (reg:DI VCC_REG))]
""
"v_add%^_u32\t%0, vcc, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8")])
(define_insn "addv64si3_vcc"
[(set (match_operand:V64SI 0 "register_operand" "= v, v")
(plus:V64SI
(match_operand:V64SI 1 "register_operand" "% v, v")
(match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
(set (match_operand:DI 3 "register_operand" "= cV, Sg")
(ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
(match_dup 1)))]
""
"v_add%^_u32\t%0, %3, %2, %1"
[(set_attr "type" "vop2,vop3b")
(set_attr "length" "8")])
; This pattern only changes the VCC bits when the corresponding lane is
; enabled, so the set must be described as an ior.
(define_insn "addv64si3_vcc_dup"
[(set (match_operand:V64SI 0 "register_operand" "= v, v")
(plus:V64SI
(vec_duplicate:V64SI
(match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
(match_operand:V64SI 2 "register_operand" " v, v")))
(set (match_operand:DI 3 "register_operand" "=cV, Sg")
(ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
(match_dup 1))
(vec_duplicate:V64SI (match_dup 2))))]
""
"v_add%^_u32\t%0, %3, %2, %1"
[(set_attr "type" "vop2,vop3b")
(set_attr "length" "8,8")])
; This pattern does not accept SGPR because VCC read already counts as an
; SGPR use and number of SGPR operands is limited to 1.
(define_insn "addcv64si3"
[(set (match_operand:V64SI 0 "register_operand" "=v,v")
(plus:V64SI
(plus:V64SI
(vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_operand:DI 3 "register_operand" " cV,Sv"))
(match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
(match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
(set (match_operand:DI 4 "register_operand" "=cV,Sg")
(ior:DI (ltu:DI (plus:V64SI
(plus:V64SI
(vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_dup 3))
(match_dup 1))
(match_dup 2))
(match_dup 2))
(ltu:DI (plus:V64SI
(vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_dup 3))
(match_dup 1))
(match_dup 1))))]
""
"v_addc%^_u32\t%0, %4, %1, %2, %3"
[(set_attr "type" "vop2,vop3b")
(set_attr "length" "4,8")])
(define_insn "addcv64si3_dup"
[(set (match_operand:V64SI 0 "register_operand" "=v,v")
(plus:V64SI
(plus:V64SI
(vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_operand:DI 3 "register_operand" " cV, Sv"))
(match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
(vec_duplicate:V64SI
(match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
(set (match_operand:DI 4 "register_operand" "=cV, Sg")
(ior:DI (ltu:DI (plus:V64SI (plus:V64SI
(vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_dup 3))
(match_dup 1))
(vec_duplicate:V64SI
(match_dup 2)))
(vec_duplicate:V64SI
(match_dup 2)))
(ltu:DI (plus:V64SI (vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_dup 3))
(match_dup 1))
(match_dup 1))))]
""
"v_addc%^_u32\t%0, %4, %1, %2, %3"
[(set_attr "type" "vop2,vop3b")
(set_attr "length" "4,8")])
(define_insn "subv64si3"
[(set (match_operand:V64SI 0 "register_operand" "= v, v")
(minus:V64SI
(match_operand:V64SI 1 "gcn_alu_operand" "vSvB, v")
(match_operand:V64SI 2 "gcn_alu_operand" " v,vSvB")))
(clobber (reg:DI VCC_REG))]
""
"@
v_sub%^_u32\t%0, vcc, %1, %2
v_subrev%^_u32\t%0, vcc, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8,8")])
(define_insn "subv64si3_vcc"
[(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
(minus:V64SI
(match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
(match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
(set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
(gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
(match_dup 1)))]
""
"@
v_sub%^_u32\t%0, %3, %1, %2
v_sub%^_u32\t%0, %3, %1, %2
v_subrev%^_u32\t%0, %3, %2, %1
v_subrev%^_u32\t%0, %3, %2, %1"
[(set_attr "type" "vop2,vop3b,vop2,vop3b")
(set_attr "length" "8")])
; This pattern does not accept SGPR because VCC read already counts
; as a SGPR use and number of SGPR operands is limited to 1.
(define_insn "subcv64si3"
[(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
(minus:V64SI
(minus:V64SI
(vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
(match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
(match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
(set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
(ior:DI (gtu:DI (minus:V64SI (minus:V64SI
(vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_dup 3))
(match_dup 1))
(match_dup 2))
(match_dup 2))
(ltu:DI (minus:V64SI (vec_merge:V64SI
(vec_duplicate:V64SI (const_int 1))
(vec_duplicate:V64SI (const_int 0))
(match_dup 3))
(match_dup 1))
(match_dup 1))))]
""
"@
v_subb%^_u32\t%0, %4, %1, %2, %3
v_subb%^_u32\t%0, %4, %1, %2, %3
v_subbrev%^_u32\t%0, %4, %2, %1, %3
v_subbrev%^_u32\t%0, %4, %2, %1, %3"
[(set_attr "type" "vop2,vop3b,vop2,vop3b")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3"
[(set (match_operand:V64DI 0 "register_operand" "= &v")
(plus:V64DI
(match_operand:V64DI 1 "register_operand" "% v0")
(match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[1])
&& gcn_can_split_p (V64DImode, operands[2])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (V64DImode, operands[1], 0),
gcn_operand_part (V64DImode, operands[2], 0),
vcc));
emit_insn (gen_addcv64si3
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[1], 1),
gcn_operand_part (V64DImode, operands[2], 1),
vcc, vcc));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_exec"
[(set (match_operand:V64DI 0 "register_operand" "= &v")
(vec_merge:V64DI
(plus:V64DI
(match_operand:V64DI 1 "register_operand" "% v0")
(match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[1])
&& gcn_can_split_p (V64DImode, operands[2])
&& gcn_can_split_p (V64DImode, operands[4])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_exec
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (V64DImode, operands[1], 0),
gcn_operand_part (V64DImode, operands[2], 0),
vcc,
gcn_operand_part (V64DImode, operands[3], 0),
operands[4]));
emit_insn (gen_addcv64si3_exec
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[1], 1),
gcn_operand_part (V64DImode, operands[2], 1),
vcc, vcc,
gcn_operand_part (V64DImode, operands[3], 1),
operands[4]));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "subv64di3"
[(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
(minus:V64DI
(match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
(match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[1])
&& gcn_can_split_p (V64DImode, operands[2])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_subv64si3_vcc
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (V64DImode, operands[1], 0),
gcn_operand_part (V64DImode, operands[2], 0),
vcc));
emit_insn (gen_subcv64si3
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[1], 1),
gcn_operand_part (V64DImode, operands[2], 1),
vcc, vcc));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8,8")])
(define_insn_and_split "subv64di3_exec"
[(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
(vec_merge:V64DI
(minus:V64DI
(match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
(match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand"
" U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
"register_operand (operands[1], VOIDmode)
|| register_operand (operands[2], VOIDmode)"
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[1])
&& gcn_can_split_p (V64DImode, operands[2])
&& gcn_can_split_p (V64DImode, operands[3])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_subv64si3_vcc_exec
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (V64DImode, operands[1], 0),
gcn_operand_part (V64DImode, operands[2], 0),
vcc,
gcn_operand_part (V64DImode, operands[3], 0),
operands[4]));
emit_insn (gen_subcv64si3_exec
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[1], 1),
gcn_operand_part (V64DImode, operands[2], 1),
vcc, vcc,
gcn_operand_part (V64DImode, operands[3], 1),
operands[4]));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8,8")])
(define_insn_and_split "addv64di3_dup"
[(set (match_operand:V64DI 0 "register_operand" "= &v")
(plus:V64DI
(match_operand:V64DI 1 "register_operand" " v0")
(vec_duplicate:V64DI
(match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[1])
&& gcn_can_split_p (V64DImode, operands[2])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_dup
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[2], 0),
gcn_operand_part (V64DImode, operands[1], 0),
vcc));
emit_insn (gen_addcv64si3_dup
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[1], 1),
gcn_operand_part (DImode, operands[2], 1),
vcc, vcc));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_dup_exec"
[(set (match_operand:V64DI 0 "register_operand" "= &v")
(vec_merge:V64DI
(plus:V64DI
(match_operand:V64DI 1 "register_operand" " v0")
(vec_duplicate:V64DI
(match_operand:DI 2 "gcn_alu_operand" "SvDB")))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[1])
&& gcn_can_split_p (V64DImode, operands[2])
&& gcn_can_split_p (V64DImode, operands[3])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_dup_exec
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[2], 0),
gcn_operand_part (V64DImode, operands[1], 0),
vcc,
gcn_operand_part (V64DImode, operands[3], 0),
operands[4]));
emit_insn (gen_addcv64si3_dup_exec
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[1], 1),
gcn_operand_part (DImode, operands[2], 1),
vcc, vcc,
gcn_operand_part (V64DImode, operands[3], 1),
operands[4]));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext"
[(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
(plus:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
(match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[2])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc
(gcn_operand_part (V64DImode, operands[0], 0),
operands[1],
gcn_operand_part (V64DImode, operands[2], 0),
vcc));
emit_insn (gen_addcv64si3
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[2], 1),
const0_rtx, vcc, vcc));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8,8")])
(define_insn_and_split "addv64di3_zext_exec"
[(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
(match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[2])
&& gcn_can_split_p (V64DImode, operands[3])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_exec
(gcn_operand_part (V64DImode, operands[0], 0),
operands[1],
gcn_operand_part (V64DImode, operands[2], 0),
vcc,
gcn_operand_part (V64DImode, operands[3], 0),
operands[4]));
emit_insn (gen_addcv64si3_exec
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[2], 1),
const0_rtx, vcc, vcc,
gcn_operand_part (V64DImode, operands[3], 1),
operands[4]));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8,8")])
(define_insn_and_split "addv64di3_zext_dup"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
(plus:V64DI
(zero_extend:V64DI
(vec_duplicate:V64SI
(match_operand:SI 1 "gcn_alu_operand" "BSv")))
(match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[2])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_dup
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[1], 0),
gcn_operand_part (V64DImode, operands[2], 0),
vcc));
emit_insn (gen_addcv64si3
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[2], 1),
const0_rtx, vcc, vcc));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup_exec"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI
(vec_duplicate:V64SI
(match_operand:SI 1 "gcn_alu_operand" "BSv")))
(match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[2])
&& gcn_can_split_p (V64DImode, operands[3])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_dup_exec
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[1], 0),
gcn_operand_part (V64DImode, operands[2], 0),
vcc,
gcn_operand_part (V64DImode, operands[3], 0),
operands[4]));
emit_insn (gen_addcv64si3_exec
(gcn_operand_part (V64DImode, operands[0], 1),
gcn_operand_part (V64DImode, operands[2], 1),
const0_rtx, vcc, vcc,
gcn_operand_part (V64DImode, operands[3], 1),
operands[4]));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup2"
[(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
(zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_dup
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[2], 0),
operands[1],
vcc));
rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
emit_insn (gen_vec_duplicatev64si
(dsthi, gcn_operand_part (DImode, operands[2], 1)));
emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_zext_dup2_exec"
[(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
(zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
" vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[3])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_addv64si3_vcc_dup_exec
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[2], 0),
operands[1],
vcc,
gcn_operand_part (V64DImode, operands[3], 0),
operands[4]));
rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
emit_insn (gen_vec_duplicatev64si_exec
(dsthi, gcn_operand_part (DImode, operands[2], 1),
gcn_gen_undef (V64SImode), operands[4]));
emit_insn (gen_addcv64si3_exec
(dsthi, dsthi, const0_rtx, vcc, vcc,
gcn_operand_part (V64DImode, operands[3], 1),
operands[4]));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_sext_dup2"
[(set (match_operand:V64DI 0 "register_operand" "= v")
(plus:V64DI
(sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
(clobber (match_scratch:V64SI 3 "=&v"))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
emit_insn (gen_addv64si3_vcc_dup
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[2], 0),
operands[1],
vcc));
rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
emit_insn (gen_vec_duplicatev64si
(dsthi, gcn_operand_part (DImode, operands[2], 1)));
emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
(define_insn_and_split "addv64di3_sext_dup2_exec"
[(set (match_operand:V64DI 0 "register_operand" "= v")
(vec_merge:V64DI
(plus:V64DI
(sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
" vA"))
(vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (match_scratch:V64SI 5 "=&v"))
(clobber (reg:DI VCC_REG))]
""
"#"
"gcn_can_split_p (V64DImode, operands[0])
&& gcn_can_split_p (V64DImode, operands[3])"
[(const_int 0)]
{
rtx vcc = gen_rtx_REG (DImode, VCC_REG);
emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
gcn_gen_undef (V64SImode), operands[4]));
emit_insn (gen_addv64si3_vcc_dup_exec
(gcn_operand_part (V64DImode, operands[0], 0),
gcn_operand_part (DImode, operands[2], 0),
operands[1],
vcc,
gcn_operand_part (V64DImode, operands[3], 0),
operands[4]));
rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
emit_insn (gen_vec_duplicatev64si_exec
(dsthi, gcn_operand_part (DImode, operands[2], 1),
gcn_gen_undef (V64SImode), operands[4]));
emit_insn (gen_addcv64si3_exec
(dsthi, dsthi, operands[5], vcc, vcc,
gcn_operand_part (V64DImode, operands[3], 1),
operands[4]));
DONE;
}
[(set_attr "type" "vmult")
(set_attr "length" "8")])
;; }}}
;; {{{ DS memory ALU: add/sub
(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
;; FIXME: the vector patterns probably need RD expanded to a vector of
;; addresses. For now, the only way a vector can get into LDS is
;; if the user puts it there manually.
;;
;; FIXME: the scalar patterns are probably fine in themselves, but need to be
;; checked to see if anything can ever use them.
(define_insn "add3_ds"
[(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
(plus:DS_ARITH_MODE
(match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
(match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
"rtx_equal_p (operands[0], operands[1])"
"ds_add%u0\t%A0, %2%O0"
[(set_attr "type" "ds")
(set_attr "length" "8")])
(define_insn "add3_ds_scalar"
[(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
(plus:DS_ARITH_SCALAR_MODE
(match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
"%RD")
(match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
"rtx_equal_p (operands[0], operands[1])"
"ds_add%u0\t%A0, %2%O0"
[(set_attr "type" "ds")
(set_attr "length" "8")])
(define_insn "sub3_ds"
[(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
(minus:DS_ARITH_MODE
(match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
(match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
"rtx_equal_p (operands[0], operands[1])"
"ds_sub%u0\t%A0, %2%O0"
[(set_attr "type" "ds")
(set_attr "length" "8")])
(define_insn "sub3_ds_scalar"
[(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
(minus:DS_ARITH_SCALAR_MODE
(match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
" RD")
(match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
"rtx_equal_p (operands[0], operands[1])"
"ds_sub%u0\t%A0, %2%O0"
[(set_attr "type" "ds")
(set_attr "length" "8")])
(define_insn "subr3_ds"
[(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
(minus:DS_ARITH_MODE
(match_operand:DS_ARITH_MODE 2 "register_operand" " v")
(match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
"rtx_equal_p (operands[0], operands[1])"
"ds_rsub%u0\t%A0, %2%O0"
[(set_attr "type" "ds")
(set_attr "length" "8")])
(define_insn "subr3_ds_scalar"
[(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
(minus:DS_ARITH_SCALAR_MODE
(match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
(match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
" RD")))]
"rtx_equal_p (operands[0], operands[1])"
"ds_rsub%u0\t%A0, %2%O0"
[(set_attr "type" "ds")
(set_attr "length" "8")])
;; }}}
;; {{{ ALU special case: mult
(define_insn "mulv64si3_highpart"
[(set (match_operand:V64SI 0 "register_operand" "= v")
(truncate:V64SI
(lshiftrt:V64DI
(mult:V64DI
(any_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" " %v"))
(any_extend:V64DI
(match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
(const_int 32))))]
""
"v_mul_hi0\t%0, %2, %1"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "mulv64si3"
[(set (match_operand:V64SI 0 "register_operand" "= v")
(mult:V64SI
(match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
(match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))]
""
"v_mul_lo_u32\t%0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "mulv64si3_dup"
[(set (match_operand:V64SI 0 "register_operand" "= v")
(mult:V64SI
(match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
(vec_duplicate:V64SI
(match_operand:SI 2 "gcn_alu_operand" " SvA"))))]
""
"v_mul_lo_u32\t%0, %1, %2"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn_and_split "mulv64di3"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
(mult:V64DI
(match_operand:V64DI 1 "gcn_alu_operand" "% v")
(match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
(clobber (match_scratch:V64SI 3 "=&v"))]
""
"#"
"reload_completed"
[(const_int 0)]
{
rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
rtx tmp = operands[3];
emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
DONE;
})
(define_insn_and_split "mulv64di3_exec"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
(vec_merge:V64DI
(mult:V64DI
(match_operand:V64DI 1 "gcn_alu_operand" "% v")
(match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (match_scratch:V64SI 5 "=&v"))]
""
"#"
"reload_completed"
[(const_int 0)]
{
rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
rtx exec = operands[4];
rtx tmp = operands[5];
rtx old_lo, old_hi;
if (GET_CODE (operands[3]) == UNSPEC)
{
old_lo = old_hi = gcn_gen_undef (V64SImode);
}
else
{
old_lo = gcn_operand_part (V64DImode, operands[3], 0);
old_hi = gcn_operand_part (V64DImode, operands[3], 1);
}
rtx undef = gcn_gen_undef (V64SImode);
emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
old_hi, exec));
emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
DONE;
})
(define_insn_and_split "mulv64di3_zext"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
(mult:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" " v"))
(match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
(clobber (match_scratch:V64SI 3 "=&v"))]
""
"#"
"reload_completed"
[(const_int 0)]
{
rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
rtx left = operands[1];
rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
rtx tmp = operands[3];
emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
emit_insn (gen_mulv64si3 (tmp, left, right_hi));
emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
DONE;
})
(define_insn_and_split "mulv64di3_zext_exec"
[(set (match_operand:V64DI 0 "register_operand" "=&v")
(vec_merge:V64DI
(mult:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" " v"))
(match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (match_scratch:V64SI 5 "=&v"))]
""
"#"
"reload_completed"
[(const_int 0)]
{
rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
rtx left = operands[1];
rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
rtx exec = operands[4];
rtx tmp = operands[5];
rtx old_lo, old_hi;
if (GET_CODE (operands[3]) == UNSPEC)
{
old_lo = old_hi = gcn_gen_undef (V64SImode);
}
else
{
old_lo = gcn_operand_part (V64DImode, operands[3], 0);
old_hi = gcn_operand_part (V64DImode, operands[3], 1);
}
rtx undef = gcn_gen_undef (V64SImode);
emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
old_hi, exec));
emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
DONE;
})
(define_insn_and_split "mulv64di3_zext_dup2"
[(set (match_operand:V64DI 0 "register_operand" "= &v")
(mult:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" " v"))
(vec_duplicate:V64DI
(match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
(clobber (match_scratch:V64SI 3 "= &v"))]
""
"#"
"reload_completed"
[(const_int 0)]
{
rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
rtx left = operands[1];
rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
rtx tmp = operands[3];
emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
emit_insn (gen_mulv64si3 (tmp, left, right_hi));
emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
DONE;
})
(define_insn_and_split "mulv64di3_zext_dup2_exec"
[(set (match_operand:V64DI 0 "register_operand" "= &v")
(vec_merge:V64DI
(mult:V64DI
(zero_extend:V64DI
(match_operand:V64SI 1 "gcn_alu_operand" " v"))
(vec_duplicate:V64DI
(match_operand:DI 2 "gcn_alu_operand" "SvDA")))
(match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e")))
(clobber (match_scratch:V64SI 5 "= &v"))]
""
"#"
"reload_completed"
[(const_int 0)]
{
rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
rtx left = operands[1];
rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
rtx exec = operands[4];
rtx tmp = operands[5];
rtx old_lo, old_hi;
if (GET_CODE (operands[3]) == UNSPEC)
{
old_lo = old_hi = gcn_gen_undef (V64SImode);
}
else
{
old_lo = gcn_operand_part (V64DImode, operands[3], 0);
old_hi = gcn_operand_part (V64DImode, operands[3], 1);
}
rtx undef = gcn_gen_undef (V64SImode);
emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
old_hi, exec));
emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
DONE;
})
;; }}}
;; {{{ ALU generic case
(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
(define_code_iterator bitop [and ior xor])
(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
(define_code_iterator minmaxop [smin smax umin umax])
(define_insn "2"
[(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
(bitunop:VEC_1REG_INT_MODE
(match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
""
"v_0\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
(define_insn "3"
[(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
(bitop:VEC_1REG_INT_MODE
(match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
"% v, 0")
(match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
"vSvB, v")))]
""
"@
v_0\t%0, %2, %1
ds_0\t%A0, %2%O0"
[(set_attr "type" "vop2,ds")
(set_attr "length" "8,8")])
(define_insn_and_split "v64di3"
[(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
(bitop:V64DI
(match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
(match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
""
"@
#
ds_0\t%A0, %2%O0"
"(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
[(set (match_dup 3)
(bitop:V64SI (match_dup 5) (match_dup 7)))
(set (match_dup 4)
(bitop:V64SI (match_dup 6) (match_dup 8)))]
{
operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
}
[(set_attr "type" "vmult,ds")
(set_attr "length" "16,8")])
(define_insn_and_split "v64di3_exec"
[(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
(vec_merge:V64DI
(bitop:V64DI
(match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
(match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
(match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
" U0,U0")
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
"!memory_operand (operands[0], VOIDmode)
|| (rtx_equal_p (operands[0], operands[1])
&& register_operand (operands[2], VOIDmode))"
"@
#
ds_0\t%A0, %2%O0"
"(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
[(set (match_dup 5)
(vec_merge:V64SI
(bitop:V64SI (match_dup 7) (match_dup 9))
(match_dup 11)
(match_dup 4)))
(set (match_dup 6)
(vec_merge:V64SI
(bitop:V64SI (match_dup 8) (match_dup 10))
(match_dup 12)
(match_dup 4)))]
{
operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
}
[(set_attr "type" "vmult,ds")
(set_attr "length" "16,8")])
(define_insn "v64si3"
[(set (match_operand:V64SI 0 "register_operand" "= v")
(shiftop:V64SI
(match_operand:V64SI 1 "gcn_alu_operand" " v")
(vec_duplicate:V64SI
(match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
""
"v_0\t%0, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8")])
(define_insn "vv64si3"
[(set (match_operand:V64SI 0 "register_operand" "=v")
(shiftop:V64SI
(match_operand:V64SI 1 "gcn_alu_operand" " v")
(match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
""
"v_0\t%0, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8")])
(define_insn "3"
[(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
(minmaxop:VEC_1REG_INT_MODE
(match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
"% v, 0")
(match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
"vSvB, v")))]
""
"@
v_0\t%0, %2, %1
ds_0\t%A0, %2%O0"
[(set_attr "type" "vop2,ds")
(set_attr "length" "8,8")])
;; }}}
;; {{{ FP binops - special cases
; GCN does not directly provide a DFmode subtract instruction, so we do it by
; adding the negated second operand to the first.
(define_insn "subv64df3"
[(set (match_operand:V64DF 0 "register_operand" "= v, v")
(minus:V64DF
(match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
(match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
""
"@
v_add_f64\t%0, %1, -%2
v_add_f64\t%0, -%2, %1"
[(set_attr "type" "vop3a")
(set_attr "length" "8,8")])
(define_insn "subdf"
[(set (match_operand:DF 0 "register_operand" "= v, v")
(minus:DF
(match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
(match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
""
"@
v_add_f64\t%0, %1, -%2
v_add_f64\t%0, -%2, %1"
[(set_attr "type" "vop3a")
(set_attr "length" "8,8")])
;; }}}
;; {{{ FP binops - generic
(define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
(define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
(define_mode_iterator FP_MODE [HF SF DF])
(define_mode_iterator FP_1REG_MODE [HF SF])
(define_code_iterator comm_fp [plus mult smin smax])
(define_code_iterator nocomm_fp [minus])
(define_code_iterator all_fp [plus mult minus smin smax])
(define_insn "3"
[(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
(comm_fp:VEC_FP_MODE
(match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
(match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
""
"v_0\t%0, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8")])
(define_insn "3"
[(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
(comm_fp:FP_MODE
(match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
(match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
""
"@
v_0\t%0, %2, %1
v_0\t%0, %1%O0"
[(set_attr "type" "vop2,ds")
(set_attr "length" "8")])
(define_insn "3"
[(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
(nocomm_fp:VEC_FP_1REG_MODE
(match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
(match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
""
"@
v_0\t%0, %1, %2
v_0\t%0, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8,8")])
(define_insn "3"
[(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
(nocomm_fp:FP_1REG_MODE
(match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
(match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
""
"@
v_0\t%0, %1, %2
v_0\t%0, %2, %1"
[(set_attr "type" "vop2")
(set_attr "length" "8,8")])
;; }}}
;; {{{ FP unops
(define_insn "abs2"
[(set (match_operand:FP_MODE 0 "register_operand" "=v")
(abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
""
"v_add%i0\t%0, 0, |%1|"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "abs2"
[(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
(abs:VEC_FP_MODE
(match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
""
"v_add%i0\t%0, 0, |%1|"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "neg2"
[(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
(neg:VEC_FP_MODE
(match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
""
"v_add%i0\t%0, 0, -%1"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "sqrt2"
[(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
(sqrt:VEC_FP_MODE
(match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
"flag_unsafe_math_optimizations"
"v_sqrt%i0\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
(define_insn "sqrt2"
[(set (match_operand:FP_MODE 0 "register_operand" "= v")
(sqrt:FP_MODE
(match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
"flag_unsafe_math_optimizations"
"v_sqrt%i0\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
;; }}}
;; {{{ FP fused multiply and add
(define_insn "fma4"
[(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
(fma:VEC_FP_MODE
(match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
(match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
(match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
""
"v_fma%i0\t%0, %1, %2, %3"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "fma4_negop2"
[(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
(fma:VEC_FP_MODE
(match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
(neg:VEC_FP_MODE
(match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
(match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
""
"v_fma%i0\t%0, %1, -%2, %3"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "fma4"
[(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
(fma:FP_MODE
(match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
(match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
(match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
""
"v_fma%i0\t%0, %1, %2, %3"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
(define_insn "fma4_negop2"
[(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
(fma:FP_MODE
(match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
(neg:FP_MODE
(match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
(match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
""
"v_fma%i0\t%0, %1, -%2, %3"
[(set_attr "type" "vop3a")
(set_attr "length" "8")])
;; }}}
;; {{{ FP division
(define_insn "recip2"
[(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
(div:VEC_FP_MODE
(vec_duplicate:VEC_FP_MODE (float: (const_int 1)))
(match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
""
"v_rcp%i0\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
(define_insn "recip2"
[(set (match_operand:FP_MODE 0 "register_operand" "= v")
(div:FP_MODE
(float:FP_MODE (const_int 1))
(match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
""
"v_rcp%i0\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
;; Do division via a = b * 1/c
;; The v_rcp_* instructions are not sufficiently accurate on their own,
;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
;; which the ISA manual says is enough to improve the reciprocal accuracy.
;;
;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
(define_expand "div3"
[(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
(match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
(match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
"flag_reciprocal_math"
{
rtx two = gcn_vec_constant (mode,
const_double_from_real_value (dconst2, mode));
rtx initrcp = gen_reg_rtx (mode);
rtx fma = gen_reg_rtx (mode);
rtx rcp;
bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
&& real_identical
(CONST_DOUBLE_REAL_VALUE
(CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
if (is_rcp)
rcp = operands[0];
else
rcp = gen_reg_rtx (mode);
emit_insn (gen_recip2 (initrcp, operands[2]));
emit_insn (gen_fma4_negop2 (fma, initrcp, operands[2], two));
emit_insn (gen_mul3 (rcp, initrcp, fma));
if (!is_rcp)
emit_insn (gen_mul3 (operands[0], operands[1], rcp));
DONE;
})
(define_expand "div3"
[(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
(match_operand:FP_MODE 1 "gcn_valu_src0_operand")
(match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
"flag_reciprocal_math"
{
rtx two = const_double_from_real_value (dconst2, mode);
rtx initrcp = gen_reg_rtx (mode);
rtx fma = gen_reg_rtx (mode);
rtx rcp;
bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
&& real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
&dconstm1));
if (is_rcp)
rcp = operands[0];
else
rcp = gen_reg_rtx (mode);
emit_insn (gen_recip2 (initrcp, operands[2]));
emit_insn (gen_fma4_negop2 (fma, initrcp, operands[2], two));
emit_insn (gen_mul3 (rcp, initrcp, fma));
if (!is_rcp)
emit_insn (gen_mul3 (operands[0], operands[1], rcp));
DONE;
})
;; }}}
;; {{{ Int/FP conversions
(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
(define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF])
(define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF])
(define_code_iterator cvt_op [fix unsigned_fix
float unsigned_float
float_extend float_truncate])
(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
(float "float") (unsigned_float "floatuns")
(float_extend "extend") (float_truncate "trunc")])
(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
(float "%i0%i1") (unsigned_float "%i0%u1")
(float_extend "%i0%i1")
(float_truncate "%i0%i1")])
(define_insn "2"
[(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
(cvt_op:CVT_TO_MODE
(match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
"gcn_valid_cvt_p (mode, mode,
_cvt)"
"v_cvt\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
(define_insn "2"
[(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v")
(cvt_op:VCVT_TO_MODE
(match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
"gcn_valid_cvt_p (mode, mode,
_cvt)"
"v_cvt\t%0, %1"
[(set_attr "type" "vop1")
(set_attr "length" "8")])
;; }}}
;; {{{ Int/int conversions
;; GCC can already do these for scalar types, but not for vector types.
;; Unfortunately you can't just do SUBREG on a vector to select the low part,
;; so there must be a few tricks here.
(define_insn_and_split "vec_truncatev64div64si"
[(set (match_operand:V64SI 0 "register_operand" "=v,&v")
(truncate:V64SI
(match_operand:V64DI 1 "register_operand" " 0, v")))]
""
"#"
"reload_completed"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
}
[(set_attr "type" "vop2")
(set_attr "length" "0,4")])
(define_insn_and_split "vec_truncatev64div64si_exec"
[(set (match_operand:V64SI 0 "register_operand" "=v,&v")
(vec_merge:V64SI
(truncate:V64SI
(match_operand:V64DI 1 "register_operand" " 0, v"))
(match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0")
(match_operand:DI 3 "gcn_exec_operand" " e, e")))]
""
"#"
"reload_completed"
[(parallel [(set (match_dup 0)
(vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3)))
(clobber (scratch:V64DI))])]
{
operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
}
[(set_attr "type" "vop2")
(set_attr "length" "0,4")])
;; }}}
;; {{{ Vector comparison/merge
(define_insn "vec_cmpdi"
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
(match_operator 1 "comparison_operator"
[(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
"vSv, B,vSv, B, v,vA")
(match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
" v, v, v, v,vA, v")]))
(clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmp%E1\t%0, %2, %3
v_cmp%E1\t%0, %2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
(set_attr "length" "4,8,4,8,8,8")])
(define_expand "vec_cmpudi"
[(match_operand:DI 0 "register_operand")
(match_operator 1 "comparison_operator"
[(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
(match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
""
{
/* Unsigned comparisons use the same patterns as signed comparisons,
except that they use unsigned operators (e.g. LTU vs LT).
The '%E1' directive then does the Right Thing. */
emit_insn (gen_vec_cmpdi (operands[0], operands[1], operands[2],
operands[3]));
DONE;
})
(define_insn "vec_cmpdi_exec"
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
(and:DI
(match_operator 1 "comparison_operator"
[(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
"vSv, B,vSv, B, v,vA")
(match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
" v, v, v, v,vA, v")])
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
(clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmp%E1\t%0, %2, %3
v_cmp%E1\t%0, %2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
(set_attr "length" "4,8,4,8,8,8")])
(define_insn "vec_cmpdi_dup"
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
(match_operator 1 "comparison_operator"
[(vec_duplicate:VEC_1REG_MODE
(match_operand: 2 "gcn_alu_operand"
" Sv, B,Sv,B, A"))
(match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
" v, v, v,v, v")]))
(clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmp%E1\t%0, %2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
(set_attr "length" "4,8,4,8,8")])
(define_insn "vec_cmpdi_dup_exec"
[(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
(and:DI
(match_operator 1 "comparison_operator"
[(vec_duplicate:VEC_1REG_MODE
(match_operand: 2 "gcn_alu_operand"
" Sv, B,Sv,B, A"))
(match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
" v, v, v,v, v")])
(match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
(clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
""
"@
v_cmp%E1\tvcc, %2, %3
v_cmp%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmpx%E1\tvcc, %2, %3
v_cmp%E1\t%0, %2, %3"
[(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
(set_attr "length" "4,8,4,8,8")])
(define_expand "vcond_mask_di"
[(parallel
[(set (match_operand:VEC_REG_MODE 0 "register_operand" "")
(vec_merge:VEC_REG_MODE
(match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "")
(match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "")
(match_operand:DI 3 "register_operand" "")))
(clobber (scratch:V64DI))])]
""
"")
(define_expand "vcond"
[(match_operand:VEC_1REG_MODE 0 "register_operand")
(match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
(match_operator 3 "comparison_operator"
[(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
(match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
""
{
rtx tmp = gen_reg_rtx (DImode);
emit_insn (gen_vec_cmpdi (tmp, operands[3], operands[4],
operands[5]));
emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
tmp));
DONE;
})
(define_expand "vcond_exec"
[(match_operand:VEC_1REG_MODE 0 "register_operand")
(match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
(match_operator 3 "comparison_operator"
[(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
(match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
(match_operand:DI 6 "gcn_exec_reg_operand" "e")]
""
{
rtx tmp = gen_reg_rtx (DImode);
emit_insn (gen_vec_cmpdi_exec (tmp, operands[3], operands[4],
operands[5], operands[6]));
emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
tmp));
DONE;
})
(define_expand "vcondu"
[(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
(match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
(match_operator 3 "comparison_operator"
[(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
(match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
""
{
rtx tmp = gen_reg_rtx (DImode);
emit_insn (gen_vec_cmpdi (tmp, operands[3], operands[4],
operands[5]));
emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
tmp));
DONE;
})
(define_expand "vcondu_exec"
[(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
(match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
(match_operator 3 "comparison_operator"
[(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
(match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
(match_operand:DI 6 "gcn_exec_reg_operand" "e")]
""
{
rtx tmp = gen_reg_rtx (DImode);
emit_insn (gen_vec_cmpdi_exec (tmp, operands[3], operands[4],
operands[5], operands[6]));
emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
tmp));
DONE;
})
;; }}}
;; {{{ Fully masked loop support
(define_expand "while_ultsidi"
[(match_operand:DI 0 "register_operand")
(match_operand:SI 1 "")
(match_operand:SI 2 "")]
""
{
if (GET_CODE (operands[1]) != CONST_INT
|| GET_CODE (operands[2]) != CONST_INT)
{
rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
rtx tmp = _0_1_2_3;
if (GET_CODE (operands[1]) != CONST_INT
|| INTVAL (operands[1]) != 0)
{
tmp = gen_reg_rtx (V64SImode);
emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
}
emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
gen_rtx_GT (VOIDmode, 0, 0),
operands[2], tmp));
}
else
{
HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
HOST_WIDE_INT mask = (diff >= 64 ? -1
: ~((unsigned HOST_WIDE_INT)-1 << diff));
emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
}
DONE;
})
(define_expand "maskloaddi"
[(match_operand:VEC_REG_MODE 0 "register_operand")
(match_operand:VEC_REG_MODE 1 "memory_operand")
(match_operand 2 "")]
""
{
rtx exec = force_reg (DImode, operands[2]);
rtx addr = gcn_expand_scalar_to_vector_address
(mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
rtx undef = gcn_gen_undef (mode);
emit_insn (gen_gather_expr_exec (operands[0], addr, as, v, undef,
exec));
DONE;
})
(define_expand "maskstoredi"
[(match_operand:VEC_REG_MODE 0 "memory_operand")
(match_operand:VEC_REG_MODE 1 "register_operand")
(match_operand 2 "")]
""
{
rtx exec = force_reg (DImode, operands[2]);
rtx addr = gcn_expand_scalar_to_vector_address
(mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
emit_insn (gen_scatter_expr_exec (addr, operands[1], as, v, exec));
DONE;
})
(define_expand "mask_gather_load"
[(match_operand:VEC_REG_MODE 0 "register_operand")
(match_operand:DI 1 "register_operand")
(match_operand 2 "register_operand")
(match_operand 3 "immediate_operand")
(match_operand:SI 4 "gcn_alu_operand")
(match_operand:DI 5 "")]
""
{
rtx exec = force_reg (DImode, operands[5]);
/* TODO: more conversions will be needed when more types are vectorized. */
if (GET_MODE (operands[2]) == V64DImode)
{
rtx tmp = gen_reg_rtx (V64SImode);
emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2],
gcn_gen_undef (V64SImode),
exec));
operands[2] = tmp;
}
emit_insn (gen_gather_exec (operands[0], operands[1], operands[2],
operands[3], operands[4], exec));
DONE;
})
(define_expand "mask_scatter_store"
[(match_operand:DI 0 "register_operand")
(match_operand 1 "register_operand")
(match_operand 2 "immediate_operand")
(match_operand:SI 3 "gcn_alu_operand")
(match_operand:VEC_REG_MODE 4 "register_operand")
(match_operand:DI 5 "")]
""
{
rtx exec = force_reg (DImode, operands[5]);
/* TODO: more conversions will be needed when more types are vectorized. */
if (GET_MODE (operands[1]) == V64DImode)
{
rtx tmp = gen_reg_rtx (V64SImode);
emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1],
gcn_gen_undef (V64SImode),
exec));
operands[1] = tmp;
}
emit_insn (gen_scatter_exec (operands[0], operands[1], operands[2],
operands[3], operands[4], exec));
DONE;
})
; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
(define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
(define_mode_iterator COND_INT_MODE [V64SI V64DI])
(define_code_iterator cond_op [plus minus])
(define_expand "cond_"
[(match_operand:COND_MODE 0 "register_operand")
(match_operand:DI 1 "register_operand")
(cond_op:COND_MODE
(match_operand:COND_MODE 2 "gcn_alu_operand")
(match_operand:COND_MODE 3 "gcn_alu_operand"))
(match_operand:COND_MODE 4 "register_operand")]
""
{
operands[1] = force_reg (DImode, operands[1]);
operands[2] = force_reg (mode, operands[2]);
emit_insn (gen_3_exec (operands[0], operands[2],
operands[3], operands[4],
operands[1]));
DONE;
})
(define_code_iterator cond_bitop [and ior xor])
(define_expand "cond_