1;; Copyright (C) 2016-2020 Free Software Foundation, Inc.
2
3;; This file is free software; you can redistribute it and/or modify it under
4;; the terms of the GNU General Public License as published by the Free
5;; Software Foundation; either version 3 of the License, or (at your option)
6;; any later version.
7
8;; This file is distributed in the hope that it will be useful, but WITHOUT
9;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11;; for more details.
12
13;; You should have received a copy of the GNU General Public License
14;; along with GCC; see the file COPYING3.  If not see
15;; <http://www.gnu.org/licenses/>.
16
17;; {{{ Vector iterators
18
19; Vector modes for specific types
20; (This will make more sense when there are multiple vector sizes)
21(define_mode_iterator V_QI
22		      [V64QI])
23(define_mode_iterator V_HI
24		      [V64HI])
25(define_mode_iterator V_HF
26		      [V64HF])
27(define_mode_iterator V_SI
28		      [V64SI])
29(define_mode_iterator V_SF
30		      [V64SF])
31(define_mode_iterator V_DI
32		      [V64DI])
33(define_mode_iterator V_DF
34		      [V64DF])
35
36; Vector modes for sub-dword modes
37(define_mode_iterator V_QIHI
38		      [V64QI V64HI])
39
40; Vector modes for one vector register
41(define_mode_iterator V_1REG
42		      [V64QI V64HI V64SI V64HF V64SF])
43
44(define_mode_iterator V_INT_1REG
45		      [V64QI V64HI V64SI])
46(define_mode_iterator V_INT_1REG_ALT
47		      [V64QI V64HI V64SI])
48(define_mode_iterator V_FP_1REG
49		      [V64HF V64SF])
50
51; Vector modes for two vector registers
52(define_mode_iterator V_2REG
53		      [V64DI V64DF])
54
55; Vector modes with native support
56(define_mode_iterator V_noQI
57		      [V64HI V64HF V64SI V64SF V64DI V64DF])
58(define_mode_iterator V_noHI
59		      [V64HF V64SI V64SF V64DI V64DF])
60
61(define_mode_iterator V_INT_noQI
62		      [V64HI V64SI V64DI])
63
64; All of above
65(define_mode_iterator V_ALL
66		      [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
67(define_mode_iterator V_ALL_ALT
68		      [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
69
70(define_mode_iterator V_INT
71		      [V64QI V64HI V64SI V64DI])
72(define_mode_iterator V_FP
73		      [V64HF V64SF V64DF])
74
75(define_mode_attr scalar_mode
76  [(V64QI "qi") (V64HI "hi") (V64SI "si")
77   (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
78
79(define_mode_attr SCALAR_MODE
80  [(V64QI "QI") (V64HI "HI") (V64SI "SI")
81   (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
82
83(define_mode_attr vnsi
84  [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
85   (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
86
87(define_mode_attr VnSI
88  [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
89   (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
90
91(define_mode_attr vndi
92  [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
93   (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
94
95(define_mode_attr VnDI
96  [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
97   (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
98
99(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")])
100
101;; }}}
102;; {{{ Substitutions
103
104(define_subst_attr "exec" "vec_merge"
105		   "" "_exec")
106(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
107		   "" "_exec")
108(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
109		   "" "_exec")
110(define_subst_attr "exec_scatter" "scatter_store"
111		   "" "_exec")
112
113(define_subst "vec_merge"
114  [(set (match_operand:V_ALL 0)
115	(match_operand:V_ALL 1))]
116  ""
117  [(set (match_dup 0)
118	(vec_merge:V_ALL
119	  (match_dup 1)
120	  (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
121	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
122
123(define_subst "vec_merge_with_clobber"
124  [(set (match_operand:V_ALL 0)
125	(match_operand:V_ALL 1))
126   (clobber (match_operand 2))]
127  ""
128  [(set (match_dup 0)
129	(vec_merge:V_ALL
130	  (match_dup 1)
131	  (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
132	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
133   (clobber (match_dup 2))])
134
135(define_subst "vec_merge_with_vcc"
136  [(set (match_operand:V_ALL 0)
137	(match_operand:V_ALL 1))
138   (set (match_operand:DI 2)
139	(match_operand:DI 3))]
140  ""
141  [(parallel
142     [(set (match_dup 0)
143	   (vec_merge:V_ALL
144	     (match_dup 1)
145	     (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
146	     (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
147      (set (match_dup 2)
148	   (and:DI (match_dup 3)
149		   (reg:DI EXEC_REG)))])])
150
151(define_subst "scatter_store"
152  [(set (mem:BLK (scratch))
153	(unspec:BLK
154	  [(match_operand 0)
155	   (match_operand 1)
156	   (match_operand 2)
157	   (match_operand 3)]
158	  UNSPEC_SCATTER))]
159  ""
160  [(set (mem:BLK (scratch))
161	(unspec:BLK
162	  [(match_dup 0)
163	   (match_dup 1)
164	   (match_dup 2)
165	   (match_dup 3)
166	   (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
167	  UNSPEC_SCATTER))])
168
169;; }}}
170;; {{{ Vector moves
171
172; This is the entry point for all vector register moves.  Memory accesses can
173; come this way also, but will more usually use the reload_in/out,
174; gather/scatter, maskload/store, etc.
175
176(define_expand "mov<mode>"
177  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
178	(match_operand:V_ALL 1 "general_operand"))]
179  ""
180  {
181    if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
182      {
183	operands[1] = force_reg (<MODE>mode, operands[1]);
184	rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
185	rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
186	rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
187	rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
188							operands[0],
189							scratch);
190	emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
191	DONE;
192      }
193    else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
194      {
195	rtx scratch = gen_rtx_SCRATCH (<VnDI>mode);
196	rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
197	rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
198	rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
199							operands[1],
200							scratch);
201	emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
202	DONE;
203      }
204    else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
205      {
206        gcc_assert (!reload_completed);
207	rtx scratch = gen_reg_rtx (<VnDI>mode);
208	emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
209	DONE;
210      }
211  })
212
213; A pseudo instruction that helps LRA use the "U0" constraint.
214
215(define_insn "mov<mode>_unspec"
216  [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
217	(match_operand:V_ALL 1 "gcn_unspec_operand"   " U"))]
218  ""
219  ""
220  [(set_attr "type" "unknown")
221   (set_attr "length" "0")])
222
223(define_insn "*mov<mode>"
224  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v")
225	(match_operand:V_1REG 1 "general_operand"      "vA,B"))]
226  ""
227  "v_mov_b32\t%0, %1"
228  [(set_attr "type" "vop1,vop1")
229   (set_attr "length" "4,8")])
230
231(define_insn "mov<mode>_exec"
232  [(set (match_operand:V_1REG 0 "nonimmediate_operand"	 "=v, v, v, v, v, m")
233	(vec_merge:V_1REG
234	  (match_operand:V_1REG 1 "general_operand"	 "vA, B, v,vA, m, v")
235	  (match_operand:V_1REG 2 "gcn_alu_or_unspec_operand"
236							 "U0,U0,vA,vA,U0,U0")
237	  (match_operand:DI 3 "register_operand"	 " e, e,cV,Sv, e, e")))
238   (clobber (match_scratch:<VnDI> 4			 "=X, X, X, X,&v,&v"))]
239  "!MEM_P (operands[0]) || REG_P (operands[1])"
240  "@
241   v_mov_b32\t%0, %1
242   v_mov_b32\t%0, %1
243   v_cndmask_b32\t%0, %2, %1, vcc
244   v_cndmask_b32\t%0, %2, %1, %3
245   #
246   #"
247  [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
248   (set_attr "length" "4,8,4,8,16,16")])
249
250; This variant does not accept an unspec, but does permit MEM
251; read/modify/write which is necessary for maskstore.
252
253;(define_insn "*mov<mode>_exec_match"
254;  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "=v,v, v, m")
255;	(vec_merge:V_1REG
256;	  (match_operand:V_1REG 1 "general_operand"	"vA,B, m, v")
257;	  (match_dup 0)
258;	  (match_operand:DI 2 "gcn_exec_reg_operand"	" e,e, e, e")))
259;   (clobber (match_scratch:<VnDI> 3			"=X,X,&v,&v"))]
260;  "!MEM_P (operands[0]) || REG_P (operands[1])"
261;  "@
262;  v_mov_b32\t%0, %1
263;  v_mov_b32\t%0, %1
264;  #
265;  #"
266;  [(set_attr "type" "vop1,vop1,*,*")
267;   (set_attr "length" "4,8,16,16")])
268
269(define_insn "*mov<mode>"
270  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v")
271	(match_operand:V_2REG 1 "general_operand"      "vDB"))]
272  ""
273  {
274    if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
275      return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
276    else
277      return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
278  }
279  [(set_attr "type" "vmult")
280   (set_attr "length" "16")])
281
282(define_insn "mov<mode>_exec"
283  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v,   v,   v, v, m")
284	(vec_merge:V_2REG
285	  (match_operand:V_2REG 1 "general_operand"    "vDB,  v0,  v0, m, v")
286	  (match_operand:V_2REG 2 "gcn_alu_or_unspec_operand"
287						       " U0,vDA0,vDA0,U0,U0")
288	  (match_operand:DI 3 "register_operand"       "  e,  cV,  Sv, e, e")))
289   (clobber (match_scratch:<VnDI> 4		       "= X,   X,   X,&v,&v"))]
290  "!MEM_P (operands[0]) || REG_P (operands[1])"
291  {
292    if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
293      switch (which_alternative)
294	{
295	case 0:
296	  return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
297	case 1:
298	  return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
299		 "v_cndmask_b32\t%H0, %H2, %H1, vcc";
300	case 2:
301	  return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
302		 "v_cndmask_b32\t%H0, %H2, %H1, %3";
303	}
304    else
305      switch (which_alternative)
306	{
307	case 0:
308	  return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
309	case 1:
310	  return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
311		 "v_cndmask_b32\t%L0, %L2, %L1, vcc";
312	case 2:
313	  return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
314		 "v_cndmask_b32\t%L0, %L2, %L1, %3";
315	}
316
317    return "#";
318  }
319  [(set_attr "type" "vmult,vmult,vmult,*,*")
320   (set_attr "length" "16,16,16,16,16")])
321
322; This variant does not accept an unspec, but does permit MEM
323; read/modify/write which is necessary for maskstore.
324
325;(define_insn "*mov<mode>_exec_match"
326;  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "=v, v, m")
327;	(vec_merge:V_2REG
328;	  (match_operand:V_2REG 1 "general_operand"	"vDB, m, v")
329;	  (match_dup 0)
330;	  (match_operand:DI 2 "gcn_exec_reg_operand"	" e, e, e")))
331;   (clobber (match_scratch:<VnDI> 3			"=X,&v,&v"))]
332;  "!MEM_P (operands[0]) || REG_P (operands[1])"
333;  "@
334;   * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
335;       return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
336;     else \
337;       return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
338;   #
339;   #"
340;  [(set_attr "type" "vmult,*,*")
341;   (set_attr "length" "16,16,16")])
342
343; A SGPR-base load looks like:
344;   <load> v, Sv
345;
346; There's no hardware instruction that corresponds to this, but vector base
347; addresses are placed in an SGPR because it is easier to add to a vector.
348; We also have a temporary vT, and the vector v1 holding numbered lanes.
349;
350; Rewrite as:
351;   vT = v1 << log2(element-size)
352;   vT += Sv
353;   flat_load v, vT
354
355(define_insn "mov<mode>_sgprbase"
356  [(set (match_operand:V_1REG 0 "nonimmediate_operand" "= v, v, v, m")
357	(unspec:V_1REG
358	  [(match_operand:V_1REG 1 "general_operand"   " vA,vB, m, v")]
359	  UNSPEC_SGPRBASE))
360   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v,&v"))]
361  "lra_in_progress || reload_completed"
362  "@
363   v_mov_b32\t%0, %1
364   v_mov_b32\t%0, %1
365   #
366   #"
367  [(set_attr "type" "vop1,vop1,*,*")
368   (set_attr "length" "4,8,12,12")])
369
370(define_insn "mov<mode>_sgprbase"
371  [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v, v, m")
372	(unspec:V_2REG
373	  [(match_operand:V_2REG 1 "general_operand"   "vDB, m, v")]
374	  UNSPEC_SGPRBASE))
375   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v"))]
376  "lra_in_progress || reload_completed"
377  "@
378   * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
379       return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
380     else \
381       return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
382   #
383   #"
384  [(set_attr "type" "vmult,*,*")
385   (set_attr "length" "8,12,12")])
386
387; reload_in was once a standard name, but here it's only referenced by
388; gcn_secondary_reload.  It allows a reload with a scratch register.
389
390(define_expand "reload_in<mode>"
391  [(set (match_operand:V_ALL 0 "register_operand"     "= v")
392	(match_operand:V_ALL 1 "memory_operand"	      "  m"))
393   (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
394  ""
395  {
396    emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
397    DONE;
398  })
399
400; reload_out is similar to reload_in, above.
401
402(define_expand "reload_out<mode>"
403  [(set (match_operand:V_ALL 0 "memory_operand"	      "= m")
404	(match_operand:V_ALL 1 "register_operand"     "  v"))
405   (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
406  ""
407  {
408    emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
409    DONE;
410  })
411
412; Expand scalar addresses into gather/scatter patterns
413
414(define_split
415  [(set (match_operand:V_ALL 0 "memory_operand")
416	(unspec:V_ALL
417	  [(match_operand:V_ALL 1 "general_operand")]
418	  UNSPEC_SGPRBASE))
419   (clobber (match_scratch:<VnDI> 2))]
420  ""
421  [(set (mem:BLK (scratch))
422	(unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
423		    UNSPEC_SCATTER))]
424  {
425    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
426						       operands[0],
427						       operands[2]);
428    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
429    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
430  })
431
432(define_split
433  [(set (match_operand:V_ALL 0 "memory_operand")
434	(vec_merge:V_ALL
435	  (match_operand:V_ALL 1 "general_operand")
436	  (match_operand:V_ALL 2 "")
437	  (match_operand:DI 3 "gcn_exec_reg_operand")))
438   (clobber (match_scratch:<VnDI> 4))]
439  ""
440  [(set (mem:BLK (scratch))
441	(unspec:BLK [(match_dup 5) (match_dup 1)
442		     (match_dup 6) (match_dup 7) (match_dup 3)]
443		    UNSPEC_SCATTER))]
444  {
445    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
446						       operands[3],
447						       operands[0],
448						       operands[4]);
449    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
450    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
451  })
452
453(define_split
454  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
455	(unspec:V_ALL
456	  [(match_operand:V_ALL 1 "memory_operand")]
457	  UNSPEC_SGPRBASE))
458   (clobber (match_scratch:<VnDI> 2))]
459  ""
460  [(set (match_dup 0)
461	(unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
462		       (mem:BLK (scratch))]
463		      UNSPEC_GATHER))]
464  {
465    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
466						       operands[1],
467						       operands[2]);
468    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
469    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
470  })
471
472(define_split
473  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
474	(vec_merge:V_ALL
475	  (match_operand:V_ALL 1 "memory_operand")
476	  (match_operand:V_ALL 2 "")
477	  (match_operand:DI 3 "gcn_exec_reg_operand")))
478   (clobber (match_scratch:<VnDI> 4))]
479  ""
480  [(set (match_dup 0)
481	(vec_merge:V_ALL
482	  (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
483			 (mem:BLK (scratch))]
484			 UNSPEC_GATHER)
485	  (match_dup 2)
486	  (match_dup 3)))]
487  {
488    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
489						       operands[3],
490						       operands[1],
491						       operands[4]);
492    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
493    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
494  })
495
496; TODO: Add zero/sign extending variants.
497
498;; }}}
499;; {{{ Lane moves
500
501; v_writelane and v_readlane work regardless of exec flags.
502; We allow source to be scratch.
503;
504; FIXME these should take A immediates
505
506(define_insn "*vec_set<mode>"
507  [(set (match_operand:V_1REG 0 "register_operand"		   "= v")
508	(vec_merge:V_1REG
509	  (vec_duplicate:V_1REG
510	    (match_operand:<SCALAR_MODE> 1 "register_operand"	   " Sv"))
511	  (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" " U0")
512	  (ashift (const_int 1)
513		  (match_operand:SI 2 "gcn_alu_operand"		   "SvB"))))]
514  ""
515  "v_writelane_b32 %0, %1, %2"
516  [(set_attr "type" "vop3a")
517   (set_attr "length" "8")
518   (set_attr "exec" "none")
519   (set_attr "laneselect" "yes")])
520
521; FIXME: 64bit operations really should be splitters, but I am not sure how
522; to represent vertical subregs.
523(define_insn "*vec_set<mode>"
524  [(set (match_operand:V_2REG 0 "register_operand"		   "= v")
525	(vec_merge:V_2REG
526	  (vec_duplicate:V_2REG
527	    (match_operand:<SCALAR_MODE> 1 "register_operand"	   " Sv"))
528	  (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" " U0")
529	  (ashift (const_int 1)
530		  (match_operand:SI 2 "gcn_alu_operand"		   "SvB"))))]
531  ""
532  "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
533  [(set_attr "type" "vmult")
534   (set_attr "length" "16")
535   (set_attr "exec" "none")
536   (set_attr "laneselect" "yes")])
537
538(define_expand "vec_set<mode>"
539  [(set (match_operand:V_ALL 0 "register_operand")
540	(vec_merge:V_ALL
541	  (vec_duplicate:V_ALL
542	    (match_operand:<SCALAR_MODE> 1 "register_operand"))
543	  (match_dup 0)
544	  (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
545  "")
546
547(define_insn "*vec_set<mode>_1"
548  [(set (match_operand:V_1REG 0 "register_operand"		   "=v")
549	(vec_merge:V_1REG
550	  (vec_duplicate:V_1REG
551	    (match_operand:<SCALAR_MODE> 1 "register_operand"	   "Sv"))
552	  (match_operand:V_1REG 3 "gcn_register_or_unspec_operand" "U0")
553	  (match_operand:SI 2 "const_int_operand"		   " i")))]
554  "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
555  {
556    operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
557    return "v_writelane_b32 %0, %1, %2";
558  }
559  [(set_attr "type" "vop3a")
560   (set_attr "length" "8")
561   (set_attr "exec" "none")
562   (set_attr "laneselect" "yes")])
563
564(define_insn "*vec_set<mode>_1"
565  [(set (match_operand:V_2REG 0 "register_operand"		   "=v")
566	(vec_merge:V_2REG
567	  (vec_duplicate:V_2REG
568	    (match_operand:<SCALAR_MODE> 1 "register_operand"	   "Sv"))
569	  (match_operand:V_2REG 3 "gcn_register_or_unspec_operand" "U0")
570	  (match_operand:SI 2 "const_int_operand"		   " i")))]
571  "((unsigned) exact_log2 (INTVAL (operands[2])) < GET_MODE_NUNITS (<MODE>mode))"
572  {
573    operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
574    return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
575  }
576  [(set_attr "type" "vmult")
577   (set_attr "length" "16")
578   (set_attr "exec" "none")
579   (set_attr "laneselect" "yes")])
580
581(define_insn "vec_duplicate<mode><exec>"
582  [(set (match_operand:V_1REG 0 "register_operand"	   "=v")
583	(vec_duplicate:V_1REG
584	  (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
585  ""
586  "v_mov_b32\t%0, %1"
587  [(set_attr "type" "vop3a")
588   (set_attr "length" "8")])
589
590(define_insn "vec_duplicate<mode><exec>"
591  [(set (match_operand:V_2REG 0 "register_operand"	   "=  v")
592	(vec_duplicate:V_2REG
593	  (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
594  ""
595  "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
596  [(set_attr "type" "vop3a")
597   (set_attr "length" "16")])
598
599(define_insn "vec_extract<mode><scalar_mode>"
600  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=Sg")
601	(vec_select:<SCALAR_MODE>
602	  (match_operand:V_1REG 1 "register_operand"	   "  v")
603	  (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
604  ""
605  "v_readlane_b32 %0, %1, %2"
606  [(set_attr "type" "vop3a")
607   (set_attr "length" "8")
608   (set_attr "exec" "none")
609   (set_attr "laneselect" "yes")])
610
611(define_insn "vec_extract<mode><scalar_mode>"
612  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=&Sg")
613	(vec_select:<SCALAR_MODE>
614	  (match_operand:V_2REG 1 "register_operand"	   "   v")
615	  (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
616  ""
617  "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
618  [(set_attr "type" "vmult")
619   (set_attr "length" "16")
620   (set_attr "exec" "none")
621   (set_attr "laneselect" "yes")])
622
623(define_expand "extract_last_<mode>"
624  [(match_operand:<SCALAR_MODE> 0 "register_operand")
625   (match_operand:DI 1 "gcn_alu_operand")
626   (match_operand:V_ALL 2 "register_operand")]
627  "can_create_pseudo_p ()"
628  {
629    rtx dst = operands[0];
630    rtx mask = operands[1];
631    rtx vect = operands[2];
632    rtx tmpreg = gen_reg_rtx (SImode);
633
634    emit_insn (gen_clzdi2 (tmpreg, mask));
635    emit_insn (gen_subsi3 (tmpreg, GEN_INT (63), tmpreg));
636    emit_insn (gen_vec_extract<mode><scalar_mode> (dst, vect, tmpreg));
637    DONE;
638  })
639
640(define_expand "fold_extract_last_<mode>"
641  [(match_operand:<SCALAR_MODE> 0 "register_operand")
642   (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
643   (match_operand:DI 2 "gcn_alu_operand")
644   (match_operand:V_ALL 3 "register_operand")]
645  "can_create_pseudo_p ()"
646  {
647    rtx dst = operands[0];
648    rtx default_value = operands[1];
649    rtx mask = operands[2];
650    rtx vect = operands[3];
651    rtx else_label = gen_label_rtx ();
652    rtx end_label = gen_label_rtx ();
653
654    rtx cond = gen_rtx_EQ (VOIDmode, mask, const0_rtx);
655    emit_jump_insn (gen_cbranchdi4 (cond, mask, const0_rtx, else_label));
656    emit_insn (gen_extract_last_<mode> (dst, mask, vect));
657    emit_jump_insn (gen_jump (end_label));
658    emit_barrier ();
659    emit_label (else_label);
660    emit_move_insn (dst, default_value);
661    emit_label (end_label);
662    DONE;
663  })
664
665(define_expand "vec_init<mode><scalar_mode>"
666  [(match_operand:V_ALL 0 "register_operand")
667   (match_operand 1)]
668  ""
669  {
670    gcn_expand_vector_init (operands[0], operands[1]);
671    DONE;
672  })
673
674;; }}}
675;; {{{ Scatter / Gather
676
677;; GCN does not have an instruction for loading a vector from contiguous
678;; memory so *all* loads and stores are eventually converted to scatter
679;; or gather.
680;;
681;; GCC does not permit MEM to hold vectors of addresses, so we must use an
682;; unspec.  The unspec formats are as follows:
683;;
684;;     (unspec:V??
685;;	 [(<address expression>)
686;;	  (<addr_space_t>)
687;;	  (<use_glc>)
688;;	  (mem:BLK (scratch))]
689;;	 UNSPEC_GATHER)
690;;
691;;     (unspec:BLK
692;;	  [(<address expression>)
693;;	   (<source register>)
694;;	   (<addr_space_t>)
695;;	   (<use_glc>)
696;;	   (<exec>)]
697;;	  UNSPEC_SCATTER)
698;;
699;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
700;; - The mem:BLK does not contain any real information, but indicates that an
701;;   unknown memory read is taking place.  Stores are expected to use a similar
702;;   mem:BLK outside the unspec.
703;; - The address space and glc (volatile) fields are there to replace the
704;;   fields normally found in a MEM.
705;; - Multiple forms of address expression are supported, below.
706
707(define_expand "gather_load<mode><vnsi>"
708  [(match_operand:V_ALL 0 "register_operand")
709   (match_operand:DI 1 "register_operand")
710   (match_operand:<VnSI> 2 "register_operand")
711   (match_operand 3 "immediate_operand")
712   (match_operand:SI 4 "gcn_alu_operand")]
713  ""
714  {
715    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
716					  operands[2], operands[4],
717					  INTVAL (operands[3]), NULL);
718
719    if (GET_MODE (addr) == <VnDI>mode)
720      emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
721						const0_rtx, const0_rtx));
722    else
723      emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
724						 addr, const0_rtx, const0_rtx,
725						 const0_rtx));
726    DONE;
727  })
728
729; Allow any address expression
730(define_expand "gather<mode>_expr<exec>"
731  [(set (match_operand:V_ALL 0 "register_operand")
732	(unspec:V_ALL
733	  [(match_operand 1 "")
734	   (match_operand 2 "immediate_operand")
735	   (match_operand 3 "immediate_operand")
736	   (mem:BLK (scratch))]
737	  UNSPEC_GATHER))]
738    ""
739    {})
740
741(define_insn "gather<mode>_insn_1offset<exec>"
742  [(set (match_operand:V_ALL 0 "register_operand"		   "=v")
743	(unspec:V_ALL
744	  [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v")
745			(vec_duplicate:<VnDI>
746			  (match_operand 2 "immediate_operand"	   " n")))
747	   (match_operand 3 "immediate_operand"			   " n")
748	   (match_operand 4 "immediate_operand"			   " n")
749	   (mem:BLK (scratch))]
750	  UNSPEC_GATHER))]
751  "(AS_FLAT_P (INTVAL (operands[3]))
752    && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
753	|| ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
754    || (AS_GLOBAL_P (INTVAL (operands[3]))
755	&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
756  {
757    addr_space_t as = INTVAL (operands[3]);
758    const char *glc = INTVAL (operands[4]) ? " glc" : "";
759
760    static char buf[200];
761    if (AS_FLAT_P (as))
762      {
763	if (TARGET_GCN5_PLUS)
764	  sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
765		   glc);
766	else
767	  sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
768      }
769    else if (AS_GLOBAL_P (as))
770      sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
771	       "s_waitcnt\tvmcnt(0)", glc);
772    else
773      gcc_unreachable ();
774
775    return buf;
776  }
777  [(set_attr "type" "flat")
778   (set_attr "length" "12")])
779
780(define_insn "gather<mode>_insn_1offset_ds<exec>"
781  [(set (match_operand:V_ALL 0 "register_operand"		   "=v")
782	(unspec:V_ALL
783	  [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
784			(vec_duplicate:<VnSI>
785			  (match_operand 2 "immediate_operand"	   " n")))
786	   (match_operand 3 "immediate_operand"			   " n")
787	   (match_operand 4 "immediate_operand"			   " n")
788	   (mem:BLK (scratch))]
789	  UNSPEC_GATHER))]
790  "(AS_ANY_DS_P (INTVAL (operands[3]))
791    && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
792  {
793    addr_space_t as = INTVAL (operands[3]);
794    static char buf[200];
795    sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
796	     (AS_GDS_P (as) ? " gds" : ""));
797    return buf;
798  }
799  [(set_attr "type" "ds")
800   (set_attr "length" "12")])
801
802(define_insn "gather<mode>_insn_2offsets<exec>"
803  [(set (match_operand:V_ALL 0 "register_operand"			"=v")
804	(unspec:V_ALL
805	  [(plus:<VnDI>
806	     (plus:<VnDI>
807	       (vec_duplicate:<VnDI>
808		 (match_operand:DI 1 "register_operand"			"Sv"))
809	       (sign_extend:<VnDI>
810		 (match_operand:<VnSI> 2 "register_operand"		" v")))
811	     (vec_duplicate:<VnDI> (match_operand 3 "immediate_operand" " n")))
812	   (match_operand 4 "immediate_operand"				" n")
813	   (match_operand 5 "immediate_operand"				" n")
814	   (mem:BLK (scratch))]
815	  UNSPEC_GATHER))]
816  "(AS_GLOBAL_P (INTVAL (operands[4]))
817    && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
818  {
819    addr_space_t as = INTVAL (operands[4]);
820    const char *glc = INTVAL (operands[5]) ? " glc" : "";
821
822    static char buf[200];
823    if (AS_GLOBAL_P (as))
824      {
825	/* Work around assembler bug in which a 64-bit register is expected,
826	but a 32-bit value would be correct.  */
827	int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
828	sprintf (buf, "global_load%%o0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
829		      "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
830      }
831    else
832      gcc_unreachable ();
833      
834    return buf;
835  }
836  [(set_attr "type" "flat")
837   (set_attr "length" "12")])
838
839(define_expand "scatter_store<mode><vnsi>"
840  [(match_operand:DI 0 "register_operand")
841   (match_operand:<VnSI> 1 "register_operand")
842   (match_operand 2 "immediate_operand")
843   (match_operand:SI 3 "gcn_alu_operand")
844   (match_operand:V_ALL 4 "register_operand")]
845  ""
846  {
847    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
848					  operands[1], operands[3],
849					  INTVAL (operands[2]), NULL);
850
851    if (GET_MODE (addr) == <VnDI>mode)
852      emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
853						 const0_rtx, const0_rtx));
854    else
855      emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
856						  const0_rtx, operands[4],
857						  const0_rtx, const0_rtx));
858    DONE;
859  })
860
861; Allow any address expression
862(define_expand "scatter<mode>_expr<exec_scatter>"
863  [(set (mem:BLK (scratch))
864	(unspec:BLK
865	  [(match_operand:<VnDI> 0 "")
866	   (match_operand:V_ALL 1 "register_operand")
867	   (match_operand 2 "immediate_operand")
868	   (match_operand 3 "immediate_operand")]
869	  UNSPEC_SCATTER))]
870  ""
871  {})
872
873(define_insn "scatter<mode>_insn_1offset<exec_scatter>"
874  [(set (mem:BLK (scratch))
875	(unspec:BLK
876	  [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
877			(vec_duplicate:<VnDI>
878			  (match_operand 1 "immediate_operand"	   "n")))
879	   (match_operand:V_ALL 2 "register_operand"		   "v")
880	   (match_operand 3 "immediate_operand"			   "n")
881	   (match_operand 4 "immediate_operand"			   "n")]
882	  UNSPEC_SCATTER))]
883  "(AS_FLAT_P (INTVAL (operands[3]))
884    && (INTVAL(operands[1]) == 0
885	|| (TARGET_GCN5_PLUS
886	    && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
887    || (AS_GLOBAL_P (INTVAL (operands[3]))
888	&& (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
889  {
890    addr_space_t as = INTVAL (operands[3]);
891    const char *glc = INTVAL (operands[4]) ? " glc" : "";
892
893    static char buf[200];
894    if (AS_FLAT_P (as))
895      {
896	if (TARGET_GCN5_PLUS)
897	  sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
898	else
899	  sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
900      }
901    else if (AS_GLOBAL_P (as))
902      sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
903    else
904      gcc_unreachable ();
905
906    return buf;
907  }
908  [(set_attr "type" "flat")
909   (set_attr "length" "12")])
910
911(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
912  [(set (mem:BLK (scratch))
913	(unspec:BLK
914	  [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
915			(vec_duplicate:<VnSI>
916			  (match_operand 1 "immediate_operand"	   "n")))
917	   (match_operand:V_ALL 2 "register_operand"		   "v")
918	   (match_operand 3 "immediate_operand"			   "n")
919	   (match_operand 4 "immediate_operand"			   "n")]
920	  UNSPEC_SCATTER))]
921  "(AS_ANY_DS_P (INTVAL (operands[3]))
922    && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
923  {
924    addr_space_t as = INTVAL (operands[3]);
925    static char buf[200];
926    sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s",
927	     (AS_GDS_P (as) ? " gds" : ""));
928    return buf;
929  }
930  [(set_attr "type" "ds")
931   (set_attr "length" "12")])
932
933(define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
934  [(set (mem:BLK (scratch))
935	(unspec:BLK
936	  [(plus:<VnDI>
937	     (plus:<VnDI>
938	       (vec_duplicate:<VnDI>
939		 (match_operand:DI 0 "register_operand"			"Sv"))
940	       (sign_extend:<VnDI>
941		 (match_operand:<VnSI> 1 "register_operand"		" v")))
942	     (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
943	   (match_operand:V_ALL 3 "register_operand"			" v")
944	   (match_operand 4 "immediate_operand"				" n")
945	   (match_operand 5 "immediate_operand"				" n")]
946	  UNSPEC_SCATTER))]
947  "(AS_GLOBAL_P (INTVAL (operands[4]))
948    && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
949  {
950    addr_space_t as = INTVAL (operands[4]);
951    const char *glc = INTVAL (operands[5]) ? " glc" : "";
952
953    static char buf[200];
954    if (AS_GLOBAL_P (as))
955      {
956	/* Work around assembler bug in which a 64-bit register is expected,
957	but a 32-bit value would be correct.  */
958	int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
959	sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s",
960		 reg, reg + 1, glc);
961      }
962    else
963      gcc_unreachable ();
964
965    return buf;
966  }
967  [(set_attr "type" "flat")
968   (set_attr "length" "12")])
969
970;; }}}
971;; {{{ Permutations
972
973(define_insn "ds_bpermute<mode>"
974  [(set (match_operand:V_1REG 0 "register_operand"    "=v")
975	(unspec:V_1REG
976	  [(match_operand:V_1REG 2 "register_operand" " v")
977	   (match_operand:<VnSI> 1 "register_operand" " v")
978	   (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
979	  UNSPEC_BPERMUTE))]
980  ""
981  "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
982  [(set_attr "type" "vop2")
983   (set_attr "length" "12")])
984
985(define_insn_and_split "ds_bpermute<mode>"
986  [(set (match_operand:V_2REG 0 "register_operand"    "=&v")
987	(unspec:V_2REG
988	  [(match_operand:V_2REG 2 "register_operand" " v0")
989	   (match_operand:<VnSI> 1 "register_operand" "  v")
990	   (match_operand:DI 3 "gcn_exec_reg_operand" "  e")]
991	  UNSPEC_BPERMUTE))]
992  ""
993  "#"
994  "reload_completed"
995  [(set (match_dup 4) (unspec:<VnSI>
996			[(match_dup 6) (match_dup 1) (match_dup 3)]
997			UNSPEC_BPERMUTE))
998   (set (match_dup 5) (unspec:<VnSI>
999			[(match_dup 7) (match_dup 1) (match_dup 3)]
1000			UNSPEC_BPERMUTE))]
1001  {
1002    operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
1003    operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
1004    operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
1005    operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
1006  }
1007  [(set_attr "type" "vmult")
1008   (set_attr "length" "24")])
1009
1010(define_insn "@dpp_move<mode>"
1011  [(set (match_operand:V_noHI 0 "register_operand"    "=v")
1012	(unspec:V_noHI
1013	  [(match_operand:V_noHI 1 "register_operand" " v")
1014	   (match_operand:SI 2 "const_int_operand"    " n")]
1015	  UNSPEC_MOV_DPP_SHR))]
1016  ""
1017  {
1018    return gcn_expand_dpp_shr_insn (<MODE>mode, "v_mov_b32",
1019				    UNSPEC_MOV_DPP_SHR, INTVAL (operands[2]));
1020  }
1021  [(set_attr "type" "vop_dpp")
1022   (set_attr "length" "16")])
1023
1024;; }}}
1025;; {{{ ALU special case: add/sub
1026
1027(define_insn "add<mode>3<exec_clobber>"
1028  [(set (match_operand:V_INT_1REG 0 "register_operand"   "=  v")
1029	(plus:V_INT_1REG
1030	  (match_operand:V_INT_1REG 1 "register_operand" "%  v")
1031	  (match_operand:V_INT_1REG 2 "gcn_alu_operand"  "vSvB")))
1032   (clobber (reg:DI VCC_REG))]
1033  ""
1034  "v_add%^_u32\t%0, vcc, %2, %1"
1035  [(set_attr "type" "vop2")
1036   (set_attr "length" "8")])
1037
1038(define_insn "add<mode>3_dup<exec_clobber>"
1039  [(set (match_operand:V_INT_1REG 0 "register_operand"	     "= v")
1040	(plus:V_INT_1REG
1041	  (vec_duplicate:V_INT_1REG
1042	    (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "SvB"))
1043	  (match_operand:V_INT_1REG 1 "register_operand"     "  v")))
1044   (clobber (reg:DI VCC_REG))]
1045  ""
1046  "v_add%^_u32\t%0, vcc, %2, %1"
1047  [(set_attr "type" "vop2")
1048   (set_attr "length" "8")])
1049
1050(define_insn "add<mode>3_vcc<exec_vcc>"
1051  [(set (match_operand:V_SI 0 "register_operand"   "=  v,   v")
1052	(plus:V_SI
1053	  (match_operand:V_SI 1 "register_operand" "%  v,   v")
1054	  (match_operand:V_SI 2 "gcn_alu_operand"  "vSvB,vSvB")))
1055   (set (match_operand:DI 3 "register_operand"	   "= cV,  Sg")
1056	(ltu:DI (plus:V_SI (match_dup 1) (match_dup 2))
1057		(match_dup 1)))]
1058  ""
1059  "v_add%^_u32\t%0, %3, %2, %1"
1060  [(set_attr "type" "vop2,vop3b")
1061   (set_attr "length" "8")])
1062
1063; This pattern only changes the VCC bits when the corresponding lane is
1064; enabled, so the set must be described as an ior.
1065
1066(define_insn "add<mode>3_vcc_dup<exec_vcc>"
1067  [(set (match_operand:V_SI 0 "register_operand"   "= v,  v")
1068	(plus:V_SI
1069	  (vec_duplicate:V_SI
1070	    (match_operand:SI 1 "gcn_alu_operand"  "SvB,SvB"))
1071	  (match_operand:V_SI 2 "register_operand" "  v,  v")))
1072   (set (match_operand:DI 3 "register_operand"	   "=cV, Sg")
1073	(ltu:DI (plus:V_SI (vec_duplicate:V_SI (match_dup 2))
1074			   (match_dup 1))
1075		(vec_duplicate:V_SI (match_dup 2))))]
1076  ""
1077  "v_add%^_u32\t%0, %3, %2, %1"
1078  [(set_attr "type" "vop2,vop3b")
1079   (set_attr "length" "8,8")])
1080
1081; v_addc does not accept an SGPR because the VCC read already counts as an
1082; SGPR use and the number of SGPR operands is limited to 1.  It does not
1083; accept "B" immediate constants due to a related bus conflict.
1084
1085(define_insn "addc<mode>3<exec_vcc>"
1086  [(set (match_operand:V_SI 0 "register_operand"     "=v,   v")
1087	(plus:V_SI
1088	  (plus:V_SI
1089	    (vec_merge:V_SI
1090	      (vec_duplicate:V_SI (const_int 1))
1091	      (vec_duplicate:V_SI (const_int 0))
1092	      (match_operand:DI 3 "register_operand" " cV,cVSv"))
1093	    (match_operand:V_SI 1 "gcn_alu_operand"  "% v,  vA"))
1094	  (match_operand:V_SI 2 "gcn_alu_operand"    " vA,  vA")))
1095   (set (match_operand:DI 4 "register_operand"	     "=cV,cVSg")
1096	(ior:DI (ltu:DI (plus:V_SI
1097			  (plus:V_SI
1098			    (vec_merge:V_SI
1099			      (vec_duplicate:V_SI (const_int 1))
1100			      (vec_duplicate:V_SI (const_int 0))
1101			      (match_dup 3))
1102			    (match_dup 1))
1103			  (match_dup 2))
1104			(match_dup 2))
1105		(ltu:DI (plus:V_SI
1106			  (vec_merge:V_SI
1107			    (vec_duplicate:V_SI (const_int 1))
1108			    (vec_duplicate:V_SI (const_int 0))
1109			    (match_dup 3))
1110			  (match_dup 1))
1111			(match_dup 1))))]
1112  ""
1113  "v_addc%^_u32\t%0, %4, %2, %1, %3"
1114  [(set_attr "type" "vop2,vop3b")
1115   (set_attr "length" "4,8")])
1116
1117(define_insn "sub<mode>3<exec_clobber>"
1118  [(set (match_operand:V_INT_1REG 0 "register_operand"  "=  v,   v")
1119	(minus:V_INT_1REG
1120	  (match_operand:V_INT_1REG 1 "gcn_alu_operand" "vSvB,   v")
1121	  (match_operand:V_INT_1REG 2 "gcn_alu_operand" "   v,vSvB")))
1122   (clobber (reg:DI VCC_REG))]
1123  ""
1124  "@
1125   v_sub%^_u32\t%0, vcc, %1, %2
1126   v_subrev%^_u32\t%0, vcc, %2, %1"
1127  [(set_attr "type" "vop2")
1128   (set_attr "length" "8,8")])
1129
1130(define_insn "sub<mode>3_vcc<exec_vcc>"
1131  [(set (match_operand:V_SI 0 "register_operand"  "=  v,   v,   v,   v")
1132	(minus:V_SI
1133	  (match_operand:V_SI 1 "gcn_alu_operand" "vSvB,vSvB,   v,   v")
1134	  (match_operand:V_SI 2 "gcn_alu_operand" "   v,   v,vSvB,vSvB")))
1135   (set (match_operand:DI 3 "register_operand"	  "= cV,  Sg,  cV,  Sg")
1136	(gtu:DI (minus:V_SI (match_dup 1) (match_dup 2))
1137		(match_dup 1)))]
1138  ""
1139  "@
1140   v_sub%^_u32\t%0, %3, %1, %2
1141   v_sub%^_u32\t%0, %3, %1, %2
1142   v_subrev%^_u32\t%0, %3, %2, %1
1143   v_subrev%^_u32\t%0, %3, %2, %1"
1144  [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1145   (set_attr "length" "8")])
1146
1147; v_subb does not accept an SGPR because the VCC read already counts as an
1148; SGPR use and the number of SGPR operands is limited to 1.  It does not
1149; accept "B" immediate constants due to a related bus conflict.
1150
1151(define_insn "subc<mode>3<exec_vcc>"
1152  [(set (match_operand:V_SI 0 "register_operand"    "= v, v, v, v")
1153	(minus:V_SI
1154	  (minus:V_SI
1155	    (vec_merge:V_SI
1156	      (vec_duplicate:V_SI (const_int 1))
1157	      (vec_duplicate:V_SI (const_int 0))
1158	      (match_operand:DI 3 "gcn_alu_operand" " cV,cVSv,cV,cVSv"))
1159	    (match_operand:V_SI 1 "gcn_alu_operand" " vA,  vA, v,  vA"))
1160	  (match_operand:V_SI 2 "gcn_alu_operand"   "  v,  vA,vA,  vA")))
1161   (set (match_operand:DI 4 "register_operand"	    "=cV,cVSg,cV,cVSg")
1162	(ior:DI (gtu:DI (minus:V_SI (minus:V_SI
1163				      (vec_merge:V_SI
1164					(vec_duplicate:V_SI (const_int 1))
1165					(vec_duplicate:V_SI (const_int 0))
1166					(match_dup 3))
1167				       (match_dup 1))
1168				     (match_dup 2))
1169			(match_dup 2))
1170		(ltu:DI (minus:V_SI (vec_merge:V_SI
1171				      (vec_duplicate:V_SI (const_int 1))
1172				      (vec_duplicate:V_SI (const_int 0))
1173				      (match_dup 3))
1174				    (match_dup 1))
1175			(match_dup 1))))]
1176  ""
1177  "@
1178   v_subb%^_u32\t%0, %4, %1, %2, %3
1179   v_subb%^_u32\t%0, %4, %1, %2, %3
1180   v_subbrev%^_u32\t%0, %4, %2, %1, %3
1181   v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1182  [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1183   (set_attr "length" "4,8,4,8")])
1184
1185(define_insn_and_split "add<mode>3"
1186  [(set (match_operand:V_DI 0 "register_operand"   "=  v")
1187	(plus:V_DI
1188	  (match_operand:V_DI 1 "register_operand" "%vDb")
1189	  (match_operand:V_DI 2 "gcn_alu_operand"  " vDb")))
1190   (clobber (reg:DI VCC_REG))]
1191  ""
1192  "#"
1193  "gcn_can_split_p  (<MODE>mode, operands[0])
1194   && gcn_can_split_p (<MODE>mode, operands[1])
1195   && gcn_can_split_p (<MODE>mode, operands[2])"
1196  [(const_int 0)]
1197  {
1198    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1199    emit_insn (gen_add<vnsi>3_vcc
1200		(gcn_operand_part (<MODE>mode, operands[0], 0),
1201		 gcn_operand_part (<MODE>mode, operands[1], 0),
1202		 gcn_operand_part (<MODE>mode, operands[2], 0),
1203		 vcc));
1204    emit_insn (gen_addc<vnsi>3
1205		(gcn_operand_part (<MODE>mode, operands[0], 1),
1206		 gcn_operand_part (<MODE>mode, operands[1], 1),
1207		 gcn_operand_part (<MODE>mode, operands[2], 1),
1208		 vcc, vcc));
1209    DONE;
1210  }
1211  [(set_attr "type" "vmult")
1212   (set_attr "length" "8")])
1213
1214(define_insn_and_split "add<mode>3_exec"
1215  [(set (match_operand:V_DI 0 "register_operand"		 "=  v")
1216	(vec_merge:V_DI
1217	  (plus:V_DI
1218	    (match_operand:V_DI 1 "register_operand"		 "%vDb")
1219	    (match_operand:V_DI 2 "gcn_alu_operand"		 " vDb"))
1220	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" "  U0")
1221	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "   e")))
1222   (clobber (reg:DI VCC_REG))]
1223  ""
1224  "#"
1225  "gcn_can_split_p  (<MODE>mode, operands[0])
1226   && gcn_can_split_p (<MODE>mode, operands[1])
1227   && gcn_can_split_p (<MODE>mode, operands[2])
1228   && gcn_can_split_p (<MODE>mode, operands[4])"
1229  [(const_int 0)]
1230  {
1231    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1232    emit_insn (gen_add<vnsi>3_vcc_exec
1233		(gcn_operand_part (<MODE>mode, operands[0], 0),
1234		 gcn_operand_part (<MODE>mode, operands[1], 0),
1235		 gcn_operand_part (<MODE>mode, operands[2], 0),
1236		 vcc,
1237		 gcn_operand_part (<MODE>mode, operands[3], 0),
1238		 operands[4]));
1239    emit_insn (gen_addc<vnsi>3_exec
1240		(gcn_operand_part (<MODE>mode, operands[0], 1),
1241		 gcn_operand_part (<MODE>mode, operands[1], 1),
1242		 gcn_operand_part (<MODE>mode, operands[2], 1),
1243		 vcc, vcc,
1244		 gcn_operand_part (<MODE>mode, operands[3], 1),
1245		 operands[4]));
1246    DONE;
1247  }
1248  [(set_attr "type" "vmult")
1249   (set_attr "length" "8")])
1250
1251(define_insn_and_split "sub<mode>3"
1252  [(set (match_operand:V_DI 0 "register_operand"  "= v,  v")
1253	(minus:V_DI                                        
1254	  (match_operand:V_DI 1 "gcn_alu_operand" "vDb,  v")
1255	  (match_operand:V_DI 2 "gcn_alu_operand" "  v,vDb")))
1256   (clobber (reg:DI VCC_REG))]
1257  ""
1258  "#"
1259  "gcn_can_split_p  (<MODE>mode, operands[0])
1260   && gcn_can_split_p (<MODE>mode, operands[1])
1261   && gcn_can_split_p (<MODE>mode, operands[2])"
1262  [(const_int 0)]
1263  {
1264    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1265    emit_insn (gen_sub<vnsi>3_vcc
1266		(gcn_operand_part (<MODE>mode, operands[0], 0),
1267		 gcn_operand_part (<MODE>mode, operands[1], 0),
1268		 gcn_operand_part (<MODE>mode, operands[2], 0),
1269		 vcc));
1270    emit_insn (gen_subc<vnsi>3
1271		(gcn_operand_part (<MODE>mode, operands[0], 1),
1272		 gcn_operand_part (<MODE>mode, operands[1], 1),
1273		 gcn_operand_part (<MODE>mode, operands[2], 1),
1274		 vcc, vcc));
1275    DONE;
1276  }
1277  [(set_attr "type" "vmult")
1278   (set_attr "length" "8")])
1279
1280(define_insn_and_split "sub<mode>3_exec"
1281  [(set (match_operand:V_DI 0 "register_operand"		 "=  v,   v")
1282	(vec_merge:V_DI                                                         
1283	  (minus:V_DI                                                           
1284	    (match_operand:V_DI 1 "gcn_alu_operand"		 "vSvB,   v")
1285	    (match_operand:V_DI 2 "gcn_alu_operand"		 "   v,vSvB"))
1286	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0,  U0")
1287	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "   e,   e")))
1288   (clobber (reg:DI VCC_REG))]
1289  "register_operand (operands[1], VOIDmode)
1290   || register_operand (operands[2], VOIDmode)"
1291  "#"
1292  "gcn_can_split_p  (<MODE>mode, operands[0])
1293   && gcn_can_split_p (<MODE>mode, operands[1])
1294   && gcn_can_split_p (<MODE>mode, operands[2])
1295   && gcn_can_split_p (<MODE>mode, operands[3])"
1296  [(const_int 0)]
1297  {
1298    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1299    emit_insn (gen_sub<vnsi>3_vcc_exec
1300		(gcn_operand_part (<MODE>mode, operands[0], 0),
1301		 gcn_operand_part (<MODE>mode, operands[1], 0),
1302		 gcn_operand_part (<MODE>mode, operands[2], 0),
1303		 vcc,
1304		 gcn_operand_part (<MODE>mode, operands[3], 0),
1305		 operands[4]));
1306    emit_insn (gen_subc<vnsi>3_exec
1307		(gcn_operand_part (<MODE>mode, operands[0], 1),
1308		 gcn_operand_part (<MODE>mode, operands[1], 1),
1309		 gcn_operand_part (<MODE>mode, operands[2], 1),
1310		 vcc, vcc,
1311		 gcn_operand_part (<MODE>mode, operands[3], 1),
1312		 operands[4]));
1313    DONE;
1314  }
1315  [(set_attr "type" "vmult")
1316   (set_attr "length" "8")])
1317
1318(define_insn_and_split "add<mode>3_zext"
1319  [(set (match_operand:V_DI 0 "register_operand"      "= v,  v")
1320	(plus:V_DI
1321	  (zero_extend:V_DI
1322	    (match_operand:<VnSI> 1 "gcn_alu_operand" " vA, vB"))
1323	  (match_operand:V_DI 2 "gcn_alu_operand"     "vDb,vDA")))
1324   (clobber (reg:DI VCC_REG))]
1325  ""
1326  "#"
1327  "gcn_can_split_p  (<MODE>mode, operands[0])
1328   && gcn_can_split_p (<MODE>mode, operands[2])"
1329  [(const_int 0)]
1330  {
1331    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1332    emit_insn (gen_add<vnsi>3_vcc
1333		(gcn_operand_part (<MODE>mode, operands[0], 0),
1334		 operands[1],
1335		 gcn_operand_part (<MODE>mode, operands[2], 0),
1336		 vcc));
1337    emit_insn (gen_addc<vnsi>3
1338		(gcn_operand_part (<MODE>mode, operands[0], 1),
1339		 gcn_operand_part (<MODE>mode, operands[2], 1),
1340		 const0_rtx, vcc, vcc));
1341    DONE;
1342  }
1343  [(set_attr "type" "vmult")
1344   (set_attr "length" "8")])
1345
1346(define_insn_and_split "add<mode>3_zext_exec"
1347  [(set (match_operand:V_DI 0 "register_operand"		 "= v,  v")
1348	(vec_merge:V_DI
1349	  (plus:V_DI
1350	    (zero_extend:V_DI
1351	      (match_operand:<VnSI> 1 "gcn_alu_operand"		 " vA, vB"))
1352	    (match_operand:V_DI 2 "gcn_alu_operand"		 "vDb,vDA"))
1353	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1354	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "  e,  e")))
1355   (clobber (reg:DI VCC_REG))]
1356  ""
1357  "#"
1358  "gcn_can_split_p  (<MODE>mode, operands[0])
1359   && gcn_can_split_p (<MODE>mode, operands[2])
1360   && gcn_can_split_p (<MODE>mode, operands[3])"
1361  [(const_int 0)]
1362  {
1363    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1364    emit_insn (gen_add<vnsi>3_vcc_exec
1365		(gcn_operand_part (<MODE>mode, operands[0], 0),
1366		 operands[1],
1367		 gcn_operand_part (<MODE>mode, operands[2], 0),
1368		 vcc,
1369		 gcn_operand_part (<MODE>mode, operands[3], 0),
1370		 operands[4]));
1371    emit_insn (gen_addc<vnsi>3_exec
1372		(gcn_operand_part (<MODE>mode, operands[0], 1),
1373		 gcn_operand_part (<MODE>mode, operands[2], 1),
1374		 const0_rtx, vcc, vcc,
1375		 gcn_operand_part (<MODE>mode, operands[3], 1),
1376		 operands[4]));
1377    DONE;
1378  }
1379  [(set_attr "type" "vmult")
1380   (set_attr "length" "8")])
1381
1382(define_insn_and_split "add<mode>3_vcc_zext_dup"
1383  [(set (match_operand:V_DI 0 "register_operand"    "=    v,    v")
1384	(plus:V_DI
1385	  (zero_extend:V_DI
1386	    (vec_duplicate:<VnSI>
1387	      (match_operand:SI 1 "gcn_alu_operand" "   BSv,  ASv")))
1388	  (match_operand:V_DI 2 "gcn_alu_operand"   "   vDA,  vDb")))
1389   (set (match_operand:DI 3 "register_operand"	    "=&SgcV,&SgcV")
1390	(ltu:DI (plus:V_DI 
1391		  (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1392		  (match_dup 2))
1393		(match_dup 1)))]
1394  ""
1395  "#"
1396  "gcn_can_split_p  (<MODE>mode, operands[0])
1397   && gcn_can_split_p (<MODE>mode, operands[2])"
1398  [(const_int 0)]
1399  {
1400    emit_insn (gen_add<vnsi>3_vcc_dup
1401		(gcn_operand_part (<MODE>mode, operands[0], 0),
1402		 gcn_operand_part (DImode, operands[1], 0),
1403		 gcn_operand_part (<MODE>mode, operands[2], 0),
1404		 operands[3]));
1405    emit_insn (gen_addc<vnsi>3
1406		(gcn_operand_part (<MODE>mode, operands[0], 1),
1407		 gcn_operand_part (<MODE>mode, operands[2], 1),
1408		 const0_rtx, operands[3], operands[3]));
1409    DONE;
1410  }
1411  [(set_attr "type" "vmult")
1412   (set_attr "length" "8")])
1413
1414(define_expand "add<mode>3_zext_dup"
1415  [(match_operand:V_DI 0 "register_operand")
1416   (match_operand:SI 1 "gcn_alu_operand")
1417   (match_operand:V_DI 2 "gcn_alu_operand")]
1418  ""
1419  {
1420    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1421    emit_insn (gen_add<mode>3_vcc_zext_dup (operands[0], operands[1],
1422					    operands[2], vcc));
1423    DONE;
1424  })
1425
1426(define_insn_and_split "add<mode>3_vcc_zext_dup_exec"
1427  [(set (match_operand:V_DI 0 "register_operand"	      "=    v,    v")
1428	(vec_merge:V_DI
1429	  (plus:V_DI
1430	    (zero_extend:V_DI
1431	      (vec_duplicate:<VnSI>
1432		(match_operand:SI 1 "gcn_alu_operand"	      "   ASv,  BSv")))
1433	    (match_operand:V_DI 2 "gcn_alu_operand"	      "   vDb,  vDA"))
1434	  (match_operand:V_DI 4 "gcn_register_or_unspec_operand" " U0,   U0")
1435	  (match_operand:DI 5 "gcn_exec_reg_operand"	      "     e,    e")))
1436   (set (match_operand:DI 3 "register_operand"		      "=&SgcV,&SgcV")
1437	(and:DI
1438	  (ltu:DI (plus:V_DI 
1439		    (zero_extend:V_DI (vec_duplicate:<VnSI> (match_dup 1)))
1440		    (match_dup 2))
1441		  (match_dup 1))
1442	  (match_dup 5)))]
1443  ""
1444  "#"
1445  "gcn_can_split_p  (<MODE>mode, operands[0])
1446   && gcn_can_split_p (<MODE>mode, operands[2])
1447   && gcn_can_split_p (<MODE>mode, operands[4])"
1448  [(const_int 0)]
1449  {
1450    emit_insn (gen_add<vnsi>3_vcc_dup_exec
1451		(gcn_operand_part (<MODE>mode, operands[0], 0),
1452		 gcn_operand_part (DImode, operands[1], 0),
1453		 gcn_operand_part (<MODE>mode, operands[2], 0),
1454		 operands[3],
1455		 gcn_operand_part (<MODE>mode, operands[4], 0),
1456		 operands[5]));
1457    emit_insn (gen_addc<vnsi>3_exec
1458		(gcn_operand_part (<MODE>mode, operands[0], 1),
1459		 gcn_operand_part (<MODE>mode, operands[2], 1),
1460		 const0_rtx, operands[3], operands[3],
1461		 gcn_operand_part (<MODE>mode, operands[4], 1),
1462		 operands[5]));
1463    DONE;
1464  }
1465  [(set_attr "type" "vmult")
1466   (set_attr "length" "8")])
1467
1468(define_expand "add<mode>3_zext_dup_exec"
1469  [(match_operand:V_DI 0 "register_operand")
1470   (match_operand:SI 1 "gcn_alu_operand")
1471   (match_operand:V_DI 2 "gcn_alu_operand")
1472   (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1473   (match_operand:DI 4 "gcn_exec_reg_operand")]
1474  ""
1475  {
1476    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1477    emit_insn (gen_add<mode>3_vcc_zext_dup_exec (operands[0], operands[1],
1478						 operands[2], vcc, operands[3],
1479						 operands[4]));
1480    DONE;
1481  })
1482
1483(define_insn_and_split "add<mode>3_vcc_zext_dup2"
1484  [(set (match_operand:V_DI 0 "register_operand"		   "=    v")
1485	(plus:V_DI
1486	  (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1487	  (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand" " DbSv"))))
1488   (set (match_operand:DI 3 "register_operand"			   "=&SgcV")
1489	(ltu:DI (plus:V_DI 
1490		  (zero_extend:V_DI (match_dup 1))
1491		  (vec_duplicate:V_DI (match_dup 2)))
1492		(match_dup 1)))]
1493  ""
1494  "#"
1495  "gcn_can_split_p (<MODE>mode, operands[0])"
1496  [(const_int 0)]
1497  {
1498    emit_insn (gen_add<vnsi>3_vcc_dup
1499		(gcn_operand_part (<MODE>mode, operands[0], 0),
1500		 gcn_operand_part (DImode, operands[2], 0),
1501		 operands[1],
1502		 operands[3]));
1503    rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1504    emit_insn (gen_vec_duplicate<vnsi>
1505		(dsthi, gcn_operand_part (DImode, operands[2], 1)));
1506    emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, const0_rtx, operands[3],
1507				operands[3]));
1508    DONE;
1509  }
1510  [(set_attr "type" "vmult")
1511   (set_attr "length" "8")])
1512
1513(define_expand "add<mode>3_zext_dup2"
1514  [(match_operand:V_DI 0 "register_operand")
1515   (match_operand:<VnSI> 1 "gcn_alu_operand")
1516   (match_operand:DI 2 "gcn_alu_operand")]
1517  ""
1518  {
1519    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1520    emit_insn (gen_add<mode>3_vcc_zext_dup2 (operands[0], operands[1],
1521					     operands[2], vcc));
1522    DONE;
1523  })
1524
1525(define_insn_and_split "add<mode>3_vcc_zext_dup2_exec"
1526  [(set (match_operand:V_DI 0 "register_operand"		    "=    v")
1527	(vec_merge:V_DI
1528	  (plus:V_DI
1529	    (zero_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1530	    (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"  "BSv")))
1531	  (match_operand:V_DI 4 "gcn_register_or_unspec_operand"    "    U0")
1532	  (match_operand:DI 5 "gcn_exec_reg_operand"		    "     e")))
1533   (set (match_operand:DI 3 "register_operand"			    "=&SgcV")
1534	(and:DI
1535	  (ltu:DI (plus:V_DI 
1536		    (zero_extend:V_DI (match_dup 1))
1537		    (vec_duplicate:V_DI (match_dup 2)))
1538		  (match_dup 1))
1539	  (match_dup 5)))]
1540  ""
1541  "#"
1542  "gcn_can_split_p  (<MODE>mode, operands[0])
1543   && gcn_can_split_p (<MODE>mode, operands[4])"
1544  [(const_int 0)]
1545  {
1546    emit_insn (gen_add<vnsi>3_vcc_dup_exec
1547		(gcn_operand_part (<MODE>mode, operands[0], 0),
1548		 gcn_operand_part (DImode, operands[2], 0),
1549		 operands[1],
1550		 operands[3],
1551		 gcn_operand_part (<MODE>mode, operands[4], 0),
1552		 operands[5]));
1553    rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1554    emit_insn (gen_vec_duplicate<vnsi>_exec
1555		(dsthi, gcn_operand_part (DImode, operands[2], 1),
1556		 gcn_operand_part (<MODE>mode, operands[4], 1),
1557		 operands[5]));
1558    emit_insn (gen_addc<vnsi>3_exec
1559		(dsthi, dsthi, const0_rtx, operands[3], operands[3],
1560		 gcn_operand_part (<MODE>mode, operands[4], 1),
1561		 operands[5]));
1562    DONE;
1563  }
1564  [(set_attr "type" "vmult")
1565   (set_attr "length" "8")])
1566
1567(define_expand "add<mode>3_zext_dup2_exec"
1568  [(match_operand:V_DI 0 "register_operand")
1569   (match_operand:<VnSI> 1 "gcn_alu_operand")
1570   (match_operand:DI 2 "gcn_alu_operand")
1571   (match_operand:V_DI 3 "gcn_register_or_unspec_operand")
1572   (match_operand:DI 4 "gcn_exec_reg_operand")]
1573  ""
1574  {
1575    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1576    emit_insn (gen_add<mode>3_vcc_zext_dup2_exec (operands[0], operands[1],
1577						  operands[2], vcc,
1578						  operands[3], operands[4]));
1579    DONE;
1580  })
1581
1582(define_insn_and_split "add<mode>3_sext_dup2"
1583  [(set (match_operand:V_DI 0 "register_operand"		      "= v")
1584	(plus:V_DI
1585	  (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" " vA"))
1586	  (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"   "BSv"))))
1587   (clobber (match_scratch:<VnSI> 3				      "=&v"))
1588   (clobber (reg:DI VCC_REG))]
1589  ""
1590  "#"
1591  "gcn_can_split_p  (<MODE>mode, operands[0])"
1592  [(const_int 0)]
1593  {
1594    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1595    emit_insn (gen_ashr<vnsi>3 (operands[3], operands[1], GEN_INT (31)));
1596    emit_insn (gen_add<vnsi>3_vcc_dup
1597		(gcn_operand_part (<MODE>mode, operands[0], 0),
1598		 gcn_operand_part (DImode, operands[2], 0),
1599		 operands[1],
1600		 vcc));
1601    rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1602    emit_insn (gen_vec_duplicate<vnsi>
1603		(dsthi, gcn_operand_part (DImode, operands[2], 1)));
1604    emit_insn (gen_addc<vnsi>3 (dsthi, dsthi, operands[3], vcc, vcc));
1605    DONE;
1606  }
1607  [(set_attr "type" "vmult")
1608   (set_attr "length" "8")])
1609
1610(define_insn_and_split "add<mode>3_sext_dup2_exec"
1611  [(set (match_operand:V_DI 0 "register_operand"		       "= v")
1612	(vec_merge:V_DI
1613	  (plus:V_DI
1614	    (sign_extend:V_DI (match_operand:<VnSI> 1 "gcn_alu_operand" "vA"))
1615	    (vec_duplicate:V_DI (match_operand:DI 2 "gcn_alu_operand"  "BSv")))
1616	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand"       " U0")
1617	  (match_operand:DI 4 "gcn_exec_reg_operand"		       "  e")))
1618   (clobber (match_scratch:<VnSI> 5				       "=&v"))
1619   (clobber (reg:DI VCC_REG))]
1620  ""
1621  "#"
1622  "gcn_can_split_p  (<MODE>mode, operands[0])
1623   && gcn_can_split_p (<MODE>mode, operands[3])"
1624  [(const_int 0)]
1625  {
1626    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1627    emit_insn (gen_ashr<vnsi>3_exec (operands[5], operands[1], GEN_INT (31),
1628				     gcn_gen_undef (<VnSI>mode), operands[4]));
1629    emit_insn (gen_add<vnsi>3_vcc_dup_exec
1630		(gcn_operand_part (<MODE>mode, operands[0], 0),
1631		 gcn_operand_part (DImode, operands[2], 0),
1632		 operands[1],
1633		 vcc,
1634		 gcn_operand_part (<MODE>mode, operands[3], 0),
1635		 operands[4]));
1636    rtx dsthi = gcn_operand_part (<MODE>mode, operands[0], 1);
1637    emit_insn (gen_vec_duplicate<vnsi>_exec
1638		(dsthi, gcn_operand_part (DImode, operands[2], 1),
1639		gcn_operand_part (<MODE>mode, operands[3], 1),
1640		operands[4]));
1641    emit_insn (gen_addc<vnsi>3_exec
1642		(dsthi, dsthi, operands[5], vcc, vcc,
1643		 gcn_operand_part (<MODE>mode, operands[3], 1),
1644		 operands[4]));
1645    DONE;
1646  }
1647  [(set_attr "type" "vmult")
1648   (set_attr "length" "8")])
1649
1650;; }}}
1651;; {{{ DS memory ALU: add/sub
1652
1653(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1654(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1655
1656;; FIXME: the vector patterns probably need RD expanded to a vector of
1657;;        addresses.  For now, the only way a vector can get into LDS is
1658;;        if the user puts it there manually.
1659;;
1660;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1661;;        checked to see if anything can ever use them.
1662
1663(define_insn "add<mode>3_ds<exec>"
1664  [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand"	 "=RD")
1665	(plus:DS_ARITH_MODE
1666	  (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1667	  (match_operand:DS_ARITH_MODE 2 "register_operand"	 "  v")))]
1668  "rtx_equal_p (operands[0], operands[1])"
1669  "ds_add%u0\t%A0, %2%O0"
1670  [(set_attr "type" "ds")
1671   (set_attr "length" "8")])
1672
1673(define_insn "add<mode>3_ds_scalar"
1674  [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1675	(plus:DS_ARITH_SCALAR_MODE
1676	  (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1677								      "%RD")
1678	  (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand"    "  v")))]
1679  "rtx_equal_p (operands[0], operands[1])"
1680  "ds_add%u0\t%A0, %2%O0"
1681  [(set_attr "type" "ds")
1682   (set_attr "length" "8")])
1683
1684(define_insn "sub<mode>3_ds<exec>"
1685  [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand"	 "=RD")
1686	(minus:DS_ARITH_MODE
1687	  (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1688	  (match_operand:DS_ARITH_MODE 2 "register_operand"	 "  v")))]
1689  "rtx_equal_p (operands[0], operands[1])"
1690  "ds_sub%u0\t%A0, %2%O0"
1691  [(set_attr "type" "ds")
1692   (set_attr "length" "8")])
1693
1694(define_insn "sub<mode>3_ds_scalar"
1695  [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1696	(minus:DS_ARITH_SCALAR_MODE
1697	  (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1698								      " RD")
1699	  (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand"    "  v")))]
1700  "rtx_equal_p (operands[0], operands[1])"
1701  "ds_sub%u0\t%A0, %2%O0"
1702  [(set_attr "type" "ds")
1703   (set_attr "length" "8")])
1704
1705(define_insn "subr<mode>3_ds<exec>"
1706  [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand"	 "=RD")
1707	(minus:DS_ARITH_MODE
1708	  (match_operand:DS_ARITH_MODE 2 "register_operand"	 "  v")
1709	  (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1710  "rtx_equal_p (operands[0], operands[1])"
1711  "ds_rsub%u0\t%A0, %2%O0"
1712  [(set_attr "type" "ds")
1713   (set_attr "length" "8")])
1714
1715(define_insn "subr<mode>3_ds_scalar"
1716  [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1717	(minus:DS_ARITH_SCALAR_MODE
1718	  (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand"    "  v")
1719	  (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 
1720								      " RD")))]
1721  "rtx_equal_p (operands[0], operands[1])"
1722  "ds_rsub%u0\t%A0, %2%O0"
1723  [(set_attr "type" "ds")
1724   (set_attr "length" "8")])
1725
1726;; }}}
1727;; {{{ ALU special case: mult
1728
1729(define_insn "<su>mul<mode>3_highpart<exec>"
1730  [(set (match_operand:V_SI 0 "register_operand"        "=  v")
1731	(truncate:V_SI
1732	  (lshiftrt:<VnDI>
1733	    (mult:<VnDI>
1734	      (any_extend:<VnDI>
1735		(match_operand:V_SI 1 "gcn_alu_operand" "  %v"))
1736	      (any_extend:<VnDI>
1737		(match_operand:V_SI 2 "gcn_alu_operand" "vSvA")))
1738	    (const_int 32))))]
1739  ""
1740  "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1741  [(set_attr "type" "vop3a")
1742   (set_attr "length" "8")])
1743
1744(define_insn "mul<mode>3<exec>"
1745  [(set (match_operand:V_INT_1REG 0 "register_operand"  "=   v")
1746	(mult:V_INT_1REG
1747	  (match_operand:V_INT_1REG 1 "gcn_alu_operand" "%vSvA")
1748	  (match_operand:V_INT_1REG 2 "gcn_alu_operand" " vSvA")))]
1749  ""
1750  "v_mul_lo_u32\t%0, %1, %2"
1751  [(set_attr "type" "vop3a")
1752   (set_attr "length" "8")])
1753
1754(define_insn "mul<mode>3_dup<exec>"
1755  [(set (match_operand:V_INT_1REG 0 "register_operand"	     "=   v")
1756	(mult:V_INT_1REG
1757	  (match_operand:V_INT_1REG 1 "gcn_alu_operand"	     "%vSvA")
1758	  (vec_duplicate:V_INT_1REG
1759	    (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand" "  SvA"))))]
1760  ""
1761  "v_mul_lo_u32\t%0, %1, %2"
1762  [(set_attr "type" "vop3a")
1763   (set_attr "length" "8")])
1764
1765(define_insn_and_split "mul<mode>3"
1766  [(set (match_operand:V_DI 0 "register_operand"  "=&v")
1767	(mult:V_DI
1768	  (match_operand:V_DI 1 "gcn_alu_operand" "% v")
1769	  (match_operand:V_DI 2 "gcn_alu_operand" "vDA")))
1770   (clobber (match_scratch:<VnSI> 3		  "=&v"))]
1771  ""
1772  "#"
1773  "reload_completed"
1774  [(const_int 0)]
1775  {
1776    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1777    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1778    rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1779    rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1780    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1781    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1782    rtx tmp = operands[3];
1783
1784    emit_insn (gen_mul<vnsi>3 (out_lo, left_lo, right_lo));
1785    emit_insn (gen_umul<vnsi>3_highpart (out_hi, left_lo, right_lo));
1786    emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_lo));
1787    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1788    emit_insn (gen_mul<vnsi>3 (tmp, left_lo, right_hi));
1789    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1790    emit_insn (gen_mul<vnsi>3 (tmp, left_hi, right_hi));
1791    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1792    DONE;
1793  })
1794
1795(define_insn_and_split "mul<mode>3_exec"
1796  [(set (match_operand:V_DI 0 "register_operand"		 "=&v")
1797	(vec_merge:V_DI
1798	  (mult:V_DI
1799	    (match_operand:V_DI 1 "gcn_alu_operand"		 "% v")
1800	    (match_operand:V_DI 2 "gcn_alu_operand"		 "vDA"))
1801	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1802	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "  e")))
1803   (clobber (match_scratch:<VnSI> 5				 "=&v"))]
1804  ""
1805  "#"
1806  "reload_completed"
1807  [(const_int 0)]
1808  {
1809    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1810    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1811    rtx left_lo = gcn_operand_part (<MODE>mode, operands[1], 0);
1812    rtx left_hi = gcn_operand_part (<MODE>mode, operands[1], 1);
1813    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1814    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1815    rtx exec = operands[4];
1816    rtx tmp = operands[5];
1817
1818    rtx old_lo, old_hi;
1819    if (GET_CODE (operands[3]) == UNSPEC)
1820      {
1821	old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1822      }
1823    else
1824      {
1825	old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1826	old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1827      }
1828
1829    rtx undef = gcn_gen_undef (<VnSI>mode);
1830
1831    emit_insn (gen_mul<vnsi>3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1832    emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left_lo, right_lo,
1833					      old_hi, exec));
1834    emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_lo, undef, exec));
1835    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1836    emit_insn (gen_mul<vnsi>3_exec (tmp, left_lo, right_hi, undef, exec));
1837    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1838    emit_insn (gen_mul<vnsi>3_exec (tmp, left_hi, right_hi, undef, exec));
1839    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1840    DONE;
1841  })
1842
1843(define_insn_and_split "mul<mode>3_zext"
1844  [(set (match_operand:V_DI 0 "register_operand"      "=&v")
1845	(mult:V_DI
1846	  (zero_extend:V_DI
1847	    (match_operand:<VnSI> 1 "gcn_alu_operand" "  v"))
1848	  (match_operand:V_DI 2 "gcn_alu_operand"     "vDA")))
1849   (clobber (match_scratch:<VnSI> 3		      "=&v"))]
1850  ""
1851  "#"
1852  "reload_completed"
1853  [(const_int 0)]
1854  {
1855    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1856    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1857    rtx left = operands[1];
1858    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1859    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1860    rtx tmp = operands[3];
1861
1862    emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1863    emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1864    emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1865    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1866    DONE;
1867  })
1868
1869(define_insn_and_split "mul<mode>3_zext_exec"
1870  [(set (match_operand:V_DI 0 "register_operand"		 "=&v")
1871	(vec_merge:V_DI
1872	  (mult:V_DI
1873	    (zero_extend:V_DI
1874	      (match_operand:<VnSI> 1 "gcn_alu_operand"		 "  v"))
1875	    (match_operand:V_DI 2 "gcn_alu_operand"		 "vDA"))
1876	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" " U0")
1877	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "  e")))
1878   (clobber (match_scratch:<VnSI> 5				 "=&v"))]
1879  ""
1880  "#"
1881  "reload_completed"
1882  [(const_int 0)]
1883  {
1884    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1885    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1886    rtx left = operands[1];
1887    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1888    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1889    rtx exec = operands[4];
1890    rtx tmp = operands[5];
1891
1892    rtx old_lo, old_hi;
1893    if (GET_CODE (operands[3]) == UNSPEC)
1894      {
1895	old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1896      }
1897    else
1898      {
1899	old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1900	old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1901      }
1902
1903    rtx undef = gcn_gen_undef (<VnSI>mode);
1904
1905    emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1906    emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1907					      old_hi, exec));
1908    emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1909    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1910    DONE;
1911  })
1912
1913(define_insn_and_split "mul<mode>3_zext_dup2"
1914  [(set (match_operand:V_DI 0 "register_operand"      "= &v")
1915	(mult:V_DI
1916	  (zero_extend:V_DI
1917	    (match_operand:<VnSI> 1 "gcn_alu_operand" "   v"))
1918	  (vec_duplicate:V_DI
1919	    (match_operand:DI 2 "gcn_alu_operand"     "SvDA"))))
1920   (clobber (match_scratch:<VnSI> 3		      "= &v"))]
1921  ""
1922  "#"
1923  "reload_completed"
1924  [(const_int 0)]
1925  {
1926    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1927    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1928    rtx left = operands[1];
1929    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1930    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1931    rtx tmp = operands[3];
1932
1933    emit_insn (gen_mul<vnsi>3 (out_lo, left, right_lo));
1934    emit_insn (gen_umul<vnsi>3_highpart (out_hi, left, right_lo));
1935    emit_insn (gen_mul<vnsi>3 (tmp, left, right_hi));
1936    emit_insn (gen_add<vnsi>3 (out_hi, out_hi, tmp));
1937    DONE;
1938  })
1939
1940(define_insn_and_split "mul<mode>3_zext_dup2_exec"
1941  [(set (match_operand:V_DI 0 "register_operand"		 "= &v")
1942	(vec_merge:V_DI
1943	  (mult:V_DI
1944	    (zero_extend:V_DI
1945	      (match_operand:<VnSI> 1 "gcn_alu_operand"		 "   v"))
1946	    (vec_duplicate:V_DI
1947	      (match_operand:DI 2 "gcn_alu_operand"		 "SvDA")))
1948	  (match_operand:V_DI 3 "gcn_register_or_unspec_operand" "  U0")
1949	  (match_operand:DI 4 "gcn_exec_reg_operand"		 "   e")))
1950   (clobber (match_scratch:<VnSI> 5				 "= &v"))]
1951  ""
1952  "#"
1953  "reload_completed"
1954  [(const_int 0)]
1955  {
1956    rtx out_lo = gcn_operand_part (<MODE>mode, operands[0], 0);
1957    rtx out_hi = gcn_operand_part (<MODE>mode, operands[0], 1);
1958    rtx left = operands[1];
1959    rtx right_lo = gcn_operand_part (<MODE>mode, operands[2], 0);
1960    rtx right_hi = gcn_operand_part (<MODE>mode, operands[2], 1);
1961    rtx exec = operands[4];
1962    rtx tmp = operands[5];
1963
1964    rtx old_lo, old_hi;
1965    if (GET_CODE (operands[3]) == UNSPEC)
1966      {
1967	old_lo = old_hi = gcn_gen_undef (<VnSI>mode);
1968      }
1969    else
1970      {
1971	old_lo = gcn_operand_part (<MODE>mode, operands[3], 0);
1972	old_hi = gcn_operand_part (<MODE>mode, operands[3], 1);
1973      }
1974
1975    rtx undef = gcn_gen_undef (<VnSI>mode);
1976
1977    emit_insn (gen_mul<vnsi>3_exec (out_lo, left, right_lo, old_lo, exec));
1978    emit_insn (gen_umul<vnsi>3_highpart_exec (out_hi, left, right_lo,
1979					      old_hi, exec));
1980    emit_insn (gen_mul<vnsi>3_exec (tmp, left, right_hi, undef, exec));
1981    emit_insn (gen_add<vnsi>3_exec (out_hi, out_hi, tmp, out_hi, exec));
1982    DONE;
1983  })
1984
1985;; }}}
1986;; {{{ ALU generic case
1987
1988(define_code_iterator bitop [and ior xor])
1989(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1990(define_code_iterator minmaxop [smin smax umin umax])
1991
1992(define_insn "<expander><mode>2<exec>"
1993  [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand"    "=  v")
1994	(bitunop:V_INT_1REG
1995	  (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand" "vSvB")))]
1996  ""
1997  "v_<mnemonic>0\t%0, %1"
1998  [(set_attr "type" "vop1")
1999   (set_attr "length" "8")])
2000
2001(define_insn "<expander><mode>3<exec>"
2002  [(set (match_operand:V_INT_1REG 0 "gcn_valu_dst_operand"	 "=  v,RD")
2003	(bitop:V_INT_1REG
2004	  (match_operand:V_INT_1REG 1 "gcn_valu_src0_operand"	 "%  v, 0")
2005	  (match_operand:V_INT_1REG 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2006  ""
2007  "@
2008   v_<mnemonic>0\t%0, %2, %1
2009   ds_<mnemonic>0\t%A0, %2%O0"
2010  [(set_attr "type" "vop2,ds")
2011   (set_attr "length" "8,8")])
2012
2013(define_insn_and_split "<expander><mode>3"
2014  [(set (match_operand:V_DI 0 "gcn_valu_dst_operand"	   "=  v,RD")
2015	(bitop:V_DI
2016	  (match_operand:V_DI 1 "gcn_valu_src0_operand"    "%  v,RD")
2017	  (match_operand:V_DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2018  ""
2019  "@
2020   #
2021   ds_<mnemonic>0\t%A0, %2%O0"
2022  "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2023  [(set (match_dup 3)
2024	(bitop:<VnSI> (match_dup 5) (match_dup 7)))
2025   (set (match_dup 4)
2026	(bitop:<VnSI> (match_dup 6) (match_dup 8)))]
2027  {
2028    operands[3] = gcn_operand_part (<MODE>mode, operands[0], 0);
2029    operands[4] = gcn_operand_part (<MODE>mode, operands[0], 1);
2030    operands[5] = gcn_operand_part (<MODE>mode, operands[1], 0);
2031    operands[6] = gcn_operand_part (<MODE>mode, operands[1], 1);
2032    operands[7] = gcn_operand_part (<MODE>mode, operands[2], 0);
2033    operands[8] = gcn_operand_part (<MODE>mode, operands[2], 1);
2034  }
2035  [(set_attr "type" "vmult,ds")
2036   (set_attr "length" "16,8")])
2037
2038(define_insn_and_split "<expander><mode>3_exec"
2039  [(set (match_operand:V_DI 0 "gcn_valu_dst_operand"		  "=  v,RD")
2040	(vec_merge:V_DI
2041	  (bitop:V_DI
2042	    (match_operand:V_DI 1 "gcn_valu_src0_operand"	  "%  v,RD")
2043	    (match_operand:V_DI 2 "gcn_valu_src1com_operand"	  "vSvB, v"))
2044	  (match_operand:V_DI 3 "gcn_register_ds_or_unspec_operand" "U0,U0")
2045	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "   e, e")))]
2046  "!memory_operand (operands[0], VOIDmode)
2047   || (rtx_equal_p (operands[0], operands[1])
2048       && register_operand (operands[2], VOIDmode))"
2049  "@
2050   #
2051   ds_<mnemonic>0\t%A0, %2%O0"
2052  "(reload_completed && !gcn_ds_memory_operand (operands[0], <MODE>mode))"
2053  [(set (match_dup 5)
2054	(vec_merge:<VnSI>
2055	  (bitop:<VnSI> (match_dup 7) (match_dup 9))
2056	  (match_dup 11)
2057	  (match_dup 4)))
2058   (set (match_dup 6)
2059	(vec_merge:<VnSI>
2060	  (bitop:<VnSI> (match_dup 8) (match_dup 10))
2061	  (match_dup 12)
2062	  (match_dup 4)))]
2063  {
2064    operands[5] = gcn_operand_part (<MODE>mode, operands[0], 0);
2065    operands[6] = gcn_operand_part (<MODE>mode, operands[0], 1);
2066    operands[7] = gcn_operand_part (<MODE>mode, operands[1], 0);
2067    operands[8] = gcn_operand_part (<MODE>mode, operands[1], 1);
2068    operands[9] = gcn_operand_part (<MODE>mode, operands[2], 0);
2069    operands[10] = gcn_operand_part (<MODE>mode, operands[2], 1);
2070    operands[11] = gcn_operand_part (<MODE>mode, operands[3], 0);
2071    operands[12] = gcn_operand_part (<MODE>mode, operands[3], 1);
2072  }
2073  [(set_attr "type" "vmult,ds")
2074   (set_attr "length" "16,8")])
2075
2076(define_expand "<expander><mode>3"
2077  [(set (match_operand:V_QIHI 0 "register_operand"  "= v")
2078	(shiftop:V_QIHI
2079	  (match_operand:V_QIHI 1 "gcn_alu_operand" "  v")
2080	  (vec_duplicate:V_QIHI
2081	    (match_operand:SI 2 "gcn_alu_operand"   "SvB"))))]
2082  ""
2083  {
2084    enum {ashift, lshiftrt, ashiftrt};
2085    bool unsignedp = (<code> == lshiftrt);
2086    rtx insi1 = gen_reg_rtx (<VnSI>mode);
2087    rtx insi2 = gen_reg_rtx (SImode);
2088    rtx outsi = gen_reg_rtx (<VnSI>mode);
2089
2090    convert_move (insi1, operands[1], unsignedp);
2091    convert_move (insi2, operands[2], unsignedp);
2092    emit_insn (gen_<expander><vnsi>3 (outsi, insi1, insi2));
2093    convert_move (operands[0], outsi, unsignedp);
2094    DONE;
2095  })
2096
2097(define_insn "<expander><mode>3<exec>"
2098  [(set (match_operand:V_SI 0 "register_operand"  "= v")
2099	(shiftop:V_SI
2100	  (match_operand:V_SI 1 "gcn_alu_operand" "  v")
2101	  (vec_duplicate:V_SI
2102	    (match_operand:SI 2 "gcn_alu_operand"  "SvB"))))]
2103  ""
2104  "v_<revmnemonic>0\t%0, %2, %1"
2105  [(set_attr "type" "vop2")
2106   (set_attr "length" "8")])
2107
2108(define_expand "v<expander><mode>3"
2109  [(set (match_operand:V_QIHI 0 "register_operand"  "=v")
2110	(shiftop:V_QIHI
2111	  (match_operand:V_QIHI 1 "gcn_alu_operand" " v")
2112	  (match_operand:V_QIHI 2 "gcn_alu_operand" "vB")))]
2113  ""
2114  {
2115    enum {ashift, lshiftrt, ashiftrt};
2116    bool unsignedp = (<code> == lshiftrt);
2117    rtx insi1 = gen_reg_rtx (<VnSI>mode);
2118    rtx insi2 = gen_reg_rtx (<VnSI>mode);
2119    rtx outsi = gen_reg_rtx (<VnSI>mode);
2120
2121    convert_move (insi1, operands[1], unsignedp);
2122    convert_move (insi2, operands[2], unsignedp);
2123    emit_insn (gen_v<expander><vnsi>3 (outsi, insi1, insi2));
2124    convert_move (operands[0], outsi, unsignedp);
2125    DONE;
2126  })
2127
2128(define_insn "v<expander><mode>3<exec>"
2129  [(set (match_operand:V_SI 0 "register_operand"  "=v")
2130	(shiftop:V_SI
2131	  (match_operand:V_SI 1 "gcn_alu_operand" " v")
2132	  (match_operand:V_SI 2 "gcn_alu_operand" "vB")))]
2133  ""
2134  "v_<revmnemonic>0\t%0, %2, %1"
2135  [(set_attr "type" "vop2")
2136   (set_attr "length" "8")])
2137
2138(define_expand "<expander><mode>3"
2139  [(set (match_operand:V_QIHI 0 "gcn_valu_dst_operand")
2140	(minmaxop:V_QIHI
2141	  (match_operand:V_QIHI 1 "gcn_valu_src0_operand")
2142	  (match_operand:V_QIHI 2 "gcn_valu_src1com_operand")))]
2143  ""
2144  {
2145    enum {smin, umin, smax, umax};
2146    bool unsignedp = (<code> == umax || <code> == umin);
2147    rtx insi1 = gen_reg_rtx (<VnSI>mode);
2148    rtx insi2 = gen_reg_rtx (<VnSI>mode);
2149    rtx outsi = gen_reg_rtx (<VnSI>mode);
2150
2151    convert_move (insi1, operands[1], unsignedp);
2152    convert_move (insi2, operands[2], unsignedp);
2153    emit_insn (gen_<code><vnsi>3 (outsi, insi1, insi2));
2154    convert_move (operands[0], outsi, unsignedp);
2155    DONE;
2156  })
2157
2158(define_insn "<expander><vnsi>3<exec>"
2159  [(set (match_operand:V_SI 0 "gcn_valu_dst_operand"	   "=  v,RD")
2160	(minmaxop:V_SI
2161	  (match_operand:V_SI 1 "gcn_valu_src0_operand"    "%  v, 0")
2162	  (match_operand:V_SI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
2163  ""
2164  "@
2165   v_<mnemonic>0\t%0, %2, %1
2166   ds_<mnemonic>0\t%A0, %2%O0"
2167  [(set_attr "type" "vop2,ds")
2168   (set_attr "length" "8,8")])
2169
2170;; }}}
2171;; {{{ FP binops - special cases
2172
2173; GCN does not directly provide a DFmode subtract instruction, so we do it by
2174; adding the negated second operand to the first.
2175
2176(define_insn "sub<mode>3<exec>"
2177  [(set (match_operand:V_DF 0 "register_operand"  "=  v,   v")
2178	(minus:V_DF
2179	  (match_operand:V_DF 1 "gcn_alu_operand" "vSvB,   v")
2180	  (match_operand:V_DF 2 "gcn_alu_operand" "   v,vSvB")))]
2181  ""
2182  "@
2183   v_add_f64\t%0, %1, -%2
2184   v_add_f64\t%0, -%2, %1"
2185  [(set_attr "type" "vop3a")
2186   (set_attr "length" "8,8")])
2187
2188(define_insn "subdf"
2189  [(set (match_operand:DF 0 "register_operand"  "=  v,   v")
2190	(minus:DF
2191	  (match_operand:DF 1 "gcn_alu_operand" "vSvB,   v")
2192	  (match_operand:DF 2 "gcn_alu_operand" "   v,vSvB")))]
2193  ""
2194  "@
2195   v_add_f64\t%0, %1, -%2
2196   v_add_f64\t%0, -%2, %1"
2197  [(set_attr "type" "vop3a")
2198   (set_attr "length" "8,8")])
2199
2200;; }}}
2201;; {{{ FP binops - generic
2202
2203(define_code_iterator comm_fp [plus mult smin smax])
2204(define_code_iterator nocomm_fp [minus])
2205(define_code_iterator all_fp [plus mult minus smin smax])
2206
2207(define_insn "<expander><mode>3<exec>"
2208  [(set (match_operand:V_FP 0 "register_operand"  "=  v")
2209	(comm_fp:V_FP
2210	  (match_operand:V_FP 1 "gcn_alu_operand" "%  v")
2211	  (match_operand:V_FP 2 "gcn_alu_operand" "vSvB")))]
2212  ""
2213  "v_<mnemonic>0\t%0, %2, %1"
2214  [(set_attr "type" "vop2")
2215   (set_attr "length" "8")])
2216
2217(define_insn "<expander><mode>3"
2218  [(set (match_operand:FP 0 "gcn_valu_dst_operand"    "=  v,  RL")
2219	(comm_fp:FP
2220	  (match_operand:FP 1 "gcn_valu_src0_operand" "%  v,   0")
2221	  (match_operand:FP 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2222  ""
2223  "@
2224  v_<mnemonic>0\t%0, %2, %1
2225  v_<mnemonic>0\t%0, %1%O0"
2226  [(set_attr "type" "vop2,ds")
2227   (set_attr "length" "8")])
2228
2229(define_insn "<expander><mode>3<exec>"
2230  [(set (match_operand:V_FP_1REG 0 "register_operand"  "=  v,   v")
2231	(nocomm_fp:V_FP_1REG
2232	  (match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB,   v")
2233	  (match_operand:V_FP_1REG 2 "gcn_alu_operand" "   v,vSvB")))]
2234  ""
2235  "@
2236   v_<mnemonic>0\t%0, %1, %2
2237   v_<revmnemonic>0\t%0, %2, %1"
2238  [(set_attr "type" "vop2")
2239   (set_attr "length" "8,8")])
2240
2241(define_insn "<expander><mode>3"
2242  [(set (match_operand:FP_1REG 0 "register_operand"  "=  v,   v")
2243	(nocomm_fp:FP_1REG
2244	  (match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB,   v")
2245	  (match_operand:FP_1REG 2 "gcn_alu_operand" "   v,vSvB")))]
2246  ""
2247  "@
2248   v_<mnemonic>0\t%0, %1, %2
2249   v_<revmnemonic>0\t%0, %2, %1"
2250  [(set_attr "type" "vop2")
2251   (set_attr "length" "8,8")])
2252
2253;; }}}
2254;; {{{ FP unops
2255
2256(define_insn "abs<mode>2"
2257  [(set (match_operand:FP 0 "register_operand"		 "=v")
2258	(abs:FP (match_operand:FP 1 "register_operand" " v")))]
2259  ""
2260  "v_add%i0\t%0, 0, |%1|"
2261  [(set_attr "type" "vop3a")
2262   (set_attr "length" "8")])
2263
2264(define_insn "abs<mode>2<exec>"
2265  [(set (match_operand:V_FP 0 "register_operand"   "=v")
2266	(abs:V_FP
2267	  (match_operand:V_FP 1 "register_operand" " v")))]
2268  ""
2269  "v_add%i0\t%0, 0, |%1|"
2270  [(set_attr "type" "vop3a")
2271   (set_attr "length" "8")])
2272
2273(define_insn "neg<mode>2<exec>"
2274  [(set (match_operand:V_FP 0 "register_operand"   "=v")
2275	(neg:V_FP
2276	  (match_operand:V_FP 1 "register_operand" " v")))]
2277  ""
2278  "v_add%i0\t%0, 0, -%1"
2279  [(set_attr "type" "vop3a")
2280   (set_attr "length" "8")])
2281
2282(define_insn "sqrt<mode>2<exec>"
2283  [(set (match_operand:V_FP 0 "register_operand"  "=  v")
2284	(sqrt:V_FP
2285	  (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
2286  "flag_unsafe_math_optimizations"
2287  "v_sqrt%i0\t%0, %1"
2288  [(set_attr "type" "vop1")
2289   (set_attr "length" "8")])
2290
2291(define_insn "sqrt<mode>2"
2292  [(set (match_operand:FP 0 "register_operand"  "=  v")
2293	(sqrt:FP
2294	  (match_operand:FP 1 "gcn_alu_operand" "vSvB")))]
2295  "flag_unsafe_math_optimizations"
2296  "v_sqrt%i0\t%0, %1"
2297  [(set_attr "type" "vop1")
2298   (set_attr "length" "8")])
2299
2300;; }}}
2301;; {{{ FP fused multiply and add
2302
2303(define_insn "fma<mode>4<exec>"
2304  [(set (match_operand:V_FP 0 "register_operand"  "=  v,   v")
2305	(fma:V_FP
2306	  (match_operand:V_FP 1 "gcn_alu_operand" "% vA,  vA")
2307	  (match_operand:V_FP 2 "gcn_alu_operand" "  vA,vSvA")
2308	  (match_operand:V_FP 3 "gcn_alu_operand" "vSvA,  vA")))]
2309  ""
2310  "v_fma%i0\t%0, %1, %2, %3"
2311  [(set_attr "type" "vop3a")
2312   (set_attr "length" "8")])
2313
2314(define_insn "fma<mode>4_negop2<exec>"
2315  [(set (match_operand:V_FP 0 "register_operand"    "=  v,   v,   v")
2316	(fma:V_FP
2317	  (match_operand:V_FP 1 "gcn_alu_operand"   "  vA,  vA,vSvA")
2318	  (neg:V_FP
2319	    (match_operand:V_FP 2 "gcn_alu_operand" "  vA,vSvA,  vA"))
2320	  (match_operand:V_FP 3 "gcn_alu_operand"   "vSvA,  vA,  vA")))]
2321  ""
2322  "v_fma%i0\t%0, %1, -%2, %3"
2323  [(set_attr "type" "vop3a")
2324   (set_attr "length" "8")])
2325
2326(define_insn "fma<mode>4"
2327  [(set (match_operand:FP 0 "register_operand"  "=  v,   v")
2328	(fma:FP
2329	  (match_operand:FP 1 "gcn_alu_operand" "% vA,  vA")
2330	  (match_operand:FP 2 "gcn_alu_operand" "  vA,vSvA")
2331	  (match_operand:FP 3 "gcn_alu_operand" "vSvA,  vA")))]
2332  ""
2333  "v_fma%i0\t%0, %1, %2, %3"
2334  [(set_attr "type" "vop3a")
2335   (set_attr "length" "8")])
2336
2337(define_insn "fma<mode>4_negop2"
2338  [(set (match_operand:FP 0 "register_operand"    "=  v,   v,   v")
2339	(fma:FP
2340	  (match_operand:FP 1 "gcn_alu_operand"   "  vA,  vA,vSvA")
2341	  (neg:FP
2342	    (match_operand:FP 2 "gcn_alu_operand" "  vA,vSvA,  vA"))
2343	  (match_operand:FP 3 "gcn_alu_operand"   "vSvA,  vA,  vA")))]
2344  ""
2345  "v_fma%i0\t%0, %1, -%2, %3"
2346  [(set_attr "type" "vop3a")
2347   (set_attr "length" "8")])
2348
2349;; }}}
2350;; {{{ FP division
2351
2352(define_insn "recip<mode>2<exec>"
2353  [(set (match_operand:V_FP 0 "register_operand"  "=  v")
2354	(div:V_FP
2355	  (vec_duplicate:V_FP (float:<SCALAR_MODE> (const_int 1)))
2356	  (match_operand:V_FP 1 "gcn_alu_operand" "vSvB")))]
2357  ""
2358  "v_rcp%i0\t%0, %1"
2359  [(set_attr "type" "vop1")
2360   (set_attr "length" "8")])
2361
2362(define_insn "recip<mode>2"
2363  [(set (match_operand:FP 0 "register_operand"	 "=  v")
2364	(div:FP
2365	  (float:FP (const_int 1))
2366	  (match_operand:FP 1 "gcn_alu_operand"	 "vSvB")))]
2367  ""
2368  "v_rcp%i0\t%0, %1"
2369  [(set_attr "type" "vop1")
2370   (set_attr "length" "8")])
2371
2372;; Do division via a = b * 1/c
2373;; The v_rcp_* instructions are not sufficiently accurate on their own,
2374;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2375;; which the ISA manual says is enough to improve the reciprocal accuracy.
2376;;
2377;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2378
2379(define_expand "div<mode>3"
2380  [(match_operand:V_FP 0 "gcn_valu_dst_operand")
2381   (match_operand:V_FP 1 "gcn_valu_src0_operand")
2382   (match_operand:V_FP 2 "gcn_valu_src0_operand")]
2383  "flag_reciprocal_math"
2384  {
2385    rtx two = gcn_vec_constant (<MODE>mode,
2386		  const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2387    rtx initrcp = gen_reg_rtx (<MODE>mode);
2388    rtx fma = gen_reg_rtx (<MODE>mode);
2389    rtx rcp;
2390
2391    bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2392		   && real_identical
2393		        (CONST_DOUBLE_REAL_VALUE
2394			  (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2395
2396    if (is_rcp)
2397      rcp = operands[0];
2398    else
2399      rcp = gen_reg_rtx (<MODE>mode);
2400
2401    emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2402    emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2403    emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2404
2405    if (!is_rcp)
2406      emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2407
2408    DONE;
2409  })
2410
2411(define_expand "div<mode>3"
2412  [(match_operand:FP 0 "gcn_valu_dst_operand")
2413   (match_operand:FP 1 "gcn_valu_src0_operand")
2414   (match_operand:FP 2 "gcn_valu_src0_operand")]
2415  "flag_reciprocal_math"
2416  {
2417    rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2418    rtx initrcp = gen_reg_rtx (<MODE>mode);
2419    rtx fma = gen_reg_rtx (<MODE>mode);
2420    rtx rcp;
2421
2422    bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2423		   && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2424				      &dconstm1));
2425
2426    if (is_rcp)
2427      rcp = operands[0];
2428    else
2429      rcp = gen_reg_rtx (<MODE>mode);
2430
2431    emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2432    emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2433    emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2434
2435    if (!is_rcp)
2436      emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2437
2438    DONE;
2439  })
2440
2441;; }}}
2442;; {{{ Int/FP conversions
2443
2444(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2445(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2446
2447(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF])
2448(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF])
2449(define_mode_iterator VCVT_IMODE [V64HI V64SI])
2450
2451(define_code_iterator cvt_op [fix unsigned_fix
2452			      float unsigned_float
2453			      float_extend float_truncate])
2454(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2455			    (float "float") (unsigned_float "floatuns")
2456			    (float_extend "extend") (float_truncate "trunc")])
2457(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2458				(float "%i0%i1") (unsigned_float "%i0%u1")
2459				(float_extend "%i0%i1")
2460				(float_truncate "%i0%i1")])
2461
2462(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2463  [(set (match_operand:CVT_TO_MODE 0 "register_operand"	   "=  v")
2464	(cvt_op:CVT_TO_MODE
2465	  (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2466  "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2467		    <cvt_name>_cvt)"
2468  "v_cvt<cvt_operands>\t%0, %1"
2469  [(set_attr "type" "vop1")
2470   (set_attr "length" "8")])
2471
2472(define_insn "<cvt_name><VCVT_MODE:mode><VCVT_FMODE:mode>2<exec>"
2473  [(set (match_operand:VCVT_FMODE 0 "register_operand" "=  v")
2474	(cvt_op:VCVT_FMODE
2475	  (match_operand:VCVT_MODE 1 "gcn_alu_operand" "vSvB")))]
2476  "gcn_valid_cvt_p (<VCVT_MODE:MODE>mode, <VCVT_FMODE:MODE>mode,
2477		    <cvt_name>_cvt)"
2478  "v_cvt<cvt_operands>\t%0, %1"
2479  [(set_attr "type" "vop1")
2480   (set_attr "length" "8")])
2481
2482(define_insn "<cvt_name><VCVT_FMODE:mode><VCVT_IMODE:mode>2<exec>"
2483  [(set (match_operand:VCVT_IMODE 0 "register_operand"  "=  v")
2484	(cvt_op:VCVT_IMODE
2485	  (match_operand:VCVT_FMODE 1 "gcn_alu_operand" "vSvB")))]
2486  "gcn_valid_cvt_p (<VCVT_FMODE:MODE>mode, <VCVT_IMODE:MODE>mode,
2487		    <cvt_name>_cvt)"
2488  "v_cvt<cvt_operands>\t%0, %1"
2489  [(set_attr "type" "vop1")
2490   (set_attr "length" "8")])
2491
2492;; }}}
2493;; {{{ Int/int conversions
2494
2495(define_code_iterator zero_convert [truncate zero_extend])
2496(define_code_attr convop [
2497	(sign_extend "extend")
2498	(zero_extend "zero_extend")
2499	(truncate "trunc")])
2500
2501(define_insn "<convop><V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2502  [(set (match_operand:V_INT_1REG 0 "register_operand"      "=v")
2503        (zero_convert:V_INT_1REG
2504	  (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2505  ""
2506  "v_mov_b32_sdwa\t%0, %1 dst_sel:<V_INT_1REG:sdwa> dst_unused:UNUSED_PAD src0_sel:<V_INT_1REG_ALT:sdwa>"
2507  [(set_attr "type" "vop_sdwa")
2508   (set_attr "length" "8")])
2509
2510(define_insn "extend<V_INT_1REG_ALT:mode><V_INT_1REG:mode>2<exec>"
2511  [(set (match_operand:V_INT_1REG 0 "register_operand"	    "=v")
2512        (sign_extend:V_INT_1REG
2513	  (match_operand:V_INT_1REG_ALT 1 "gcn_alu_operand" " v")))]
2514  ""
2515  "v_mov_b32_sdwa\t%0, sext(%1) src0_sel:<V_INT_1REG_ALT:sdwa>"
2516  [(set_attr "type" "vop_sdwa")
2517   (set_attr "length" "8")])
2518
2519;; GCC can already do these for scalar types, but not for vector types.
2520;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2521;; so there must be a few tricks here.
2522
2523(define_insn_and_split "trunc<vndi><mode>2"
2524  [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
2525	(truncate:V_INT_1REG
2526	  (match_operand:<VnDI> 1 "gcn_alu_operand"     " v")))]
2527  ""
2528  "#"
2529  "reload_completed"
2530  [(const_int 0)]
2531  {
2532    rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2533    rtx out = operands[0];
2534
2535    if (<MODE>mode != <VnSI>mode)
2536      emit_insn (gen_trunc<vnsi><mode>2 (out, inlo));
2537    else
2538      emit_move_insn (out, inlo);
2539  }
2540  [(set_attr "type" "vop2")
2541   (set_attr "length" "4")])
2542
2543(define_insn_and_split "trunc<vndi><mode>2_exec"
2544  [(set (match_operand:V_INT_1REG 0 "register_operand"		  "=v")
2545	(vec_merge:V_INT_1REG
2546	  (truncate:V_INT_1REG
2547	    (match_operand:<VnDI> 1 "gcn_alu_operand"		  " v"))
2548	  (match_operand:V_INT_1REG 2 "gcn_alu_or_unspec_operand" "U0")
2549	  (match_operand:DI 3 "gcn_exec_operand"		  " e")))]
2550  ""
2551  "#"
2552  "reload_completed"
2553  [(const_int 0)]
2554  {
2555    rtx out = operands[0];
2556    rtx inlo = gcn_operand_part (<VnDI>mode, operands[1], 0);
2557    rtx merge = operands[2];
2558    rtx exec = operands[3];
2559
2560    if (<MODE>mode != <VnSI>mode)
2561      emit_insn (gen_trunc<vnsi><mode>2_exec (out, inlo, merge, exec));
2562    else
2563      emit_insn (gen_mov<mode>_exec (out, inlo, merge, exec));
2564  }
2565  [(set_attr "type" "vop2")
2566   (set_attr "length" "4")])
2567
2568(define_insn_and_split "<convop><mode><vndi>2"
2569  [(set (match_operand:<VnDI> 0 "register_operand"	"=v")
2570	(any_extend:<VnDI>
2571	  (match_operand:V_INT_1REG 1 "gcn_alu_operand" " v")))]
2572  ""
2573  "#"
2574  "reload_completed"
2575  [(const_int 0)]
2576  {
2577    rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2578    rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
2579    rtx in = operands[1];
2580      
2581    if (<MODE>mode != <VnSI>mode)
2582      emit_insn (gen_<convop><mode><vnsi>2 (outlo, in));
2583    else
2584      emit_move_insn (outlo, in);
2585    if ('<su>' == 's')
2586      emit_insn (gen_ashr<vnsi>3 (outhi, outlo, GEN_INT (31)));
2587    else
2588      emit_insn (gen_vec_duplicate<vnsi> (outhi, const0_rtx));
2589  }
2590  [(set_attr "type" "mult")
2591   (set_attr "length" "12")])
2592
2593(define_insn_and_split "<convop><mode><vndi>2_exec"
2594  [(set (match_operand:<VnDI> 0 "register_operand"	     "=v")
2595	(vec_merge:<VnDI>
2596	  (any_extend:<VnDI>
2597	    (match_operand:V_INT_1REG 1 "gcn_alu_operand"    " v"))
2598	  (match_operand:<VnDI> 2 "gcn_alu_or_unspec_operand" "U0")
2599	  (match_operand:DI 3 "gcn_exec_operand"	     " e")))]
2600  ""
2601  "#"
2602  "reload_completed"
2603  [(const_int 0)]
2604  {
2605    rtx outlo = gcn_operand_part (<VnDI>mode, operands[0], 0);
2606    rtx outhi = gcn_operand_part (<VnDI>mode, operands[0], 1);
2607    rtx in = operands[1];
2608    rtx mergelo = gcn_operand_part (<VnDI>mode, operands[2], 0);
2609    rtx mergehi = gcn_operand_part (<VnDI>mode, operands[2], 1);
2610    rtx exec = operands[3];
2611      
2612    if (<MODE>mode != <VnSI>mode)
2613      emit_insn (gen_<convop><mode><vnsi>2_exec (outlo, in, mergelo, exec));
2614    else
2615      emit_insn (gen_mov<mode>_exec (outlo, in, mergelo, exec));
2616    if ('<su>' == 's')
2617      emit_insn (gen_ashr<vnsi>3_exec (outhi, outlo, GEN_INT (31), mergehi,
2618				       exec));
2619    else
2620      emit_insn (gen_vec_duplicate<vnsi>_exec (outhi, const0_rtx, mergehi,
2621					       exec));
2622  }
2623  [(set_attr "type" "mult")
2624   (set_attr "length" "12")])
2625
2626;; }}}
2627;; {{{ Vector comparison/merge
2628
2629(define_insn "vec_cmp<mode>di"
2630  [(set (match_operand:DI 0 "register_operand"	      "=cV,cV,  e, e,Sg,Sg")
2631	(match_operator:DI 1 "gcn_fp_compare_operator"
2632	  [(match_operand:V_noQI 2 "gcn_alu_operand"  "vSv, B,vSv, B, v,vA")
2633	   (match_operand:V_noQI 3 "gcn_vop3_operand" "  v, v,  v, v,vA, v")]))
2634   (clobber (match_scratch:DI 4			      "= X, X, cV,cV, X, X"))]
2635  ""
2636  "@
2637   v_cmp%E1\tvcc, %2, %3
2638   v_cmp%E1\tvcc, %2, %3
2639   v_cmpx%E1\tvcc, %2, %3
2640   v_cmpx%E1\tvcc, %2, %3
2641   v_cmp%E1\t%0, %2, %3
2642   v_cmp%E1\t%0, %2, %3"
2643  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2644   (set_attr "length" "4,8,4,8,8,8")])
2645
2646(define_expand "vec_cmpu<mode>di"
2647  [(match_operand:DI 0 "register_operand")
2648   (match_operator 1 "gcn_compare_operator"
2649     [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2650      (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])]
2651  ""
2652  {
2653    /* Unsigned comparisons use the same patterns as signed comparisons,
2654       except that they use unsigned operators (e.g. LTU vs LT).
2655       The '%E1' directive then does the Right Thing.  */
2656    emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2657				    operands[3]));
2658    DONE;
2659  })
2660
2661; There's no instruction for 8-bit vector comparison, so we need to extend.
2662(define_expand "vec_cmp<u><mode>di"
2663  [(match_operand:DI 0 "register_operand")
2664   (match_operator 1 "gcn_compare_operator"
2665     [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2666      (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])]
2667  "can_create_pseudo_p ()"
2668  {
2669    rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2670    rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
2671
2672    emit_insn (gen_<expander><mode><vnsi>2 (sitmp1, operands[2]));
2673    emit_insn (gen_<expander><mode><vnsi>2 (sitmp2, operands[3]));
2674    emit_insn (gen_vec_cmp<vnsi>di (operands[0], operands[1], sitmp1, sitmp2));
2675    DONE;
2676  })
2677
2678(define_insn "vec_cmp<mode>di_exec"
2679  [(set (match_operand:DI 0 "register_operand"	       "=cV,cV,  e, e,Sg,Sg")
2680	(and:DI
2681	  (match_operator 1 "gcn_fp_compare_operator"
2682	    [(match_operand:V_noQI 2 "gcn_alu_operand" "vSv, B,vSv, B, v,vA")
2683	     (match_operand:V_noQI 3 "gcn_vop3_operand" " v, v,  v, v,vA, v")])
2684	  (match_operand:DI 4 "gcn_exec_reg_operand"   "  e, e,  e, e, e, e")))
2685   (clobber (match_scratch:DI 5			       "= X, X, cV,cV, X, X"))]
2686  ""
2687  "@
2688   v_cmp%E1\tvcc, %2, %3
2689   v_cmp%E1\tvcc, %2, %3
2690   v_cmpx%E1\tvcc, %2, %3
2691   v_cmpx%E1\tvcc, %2, %3
2692   v_cmp%E1\t%0, %2, %3
2693   v_cmp%E1\t%0, %2, %3"
2694  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2695   (set_attr "length" "4,8,4,8,8,8")])
2696
2697(define_expand "vec_cmpu<mode>di_exec"
2698  [(match_operand:DI 0 "register_operand")
2699   (match_operator 1 "gcn_compare_operator"
2700     [(match_operand:V_INT_noQI 2 "gcn_alu_operand")
2701      (match_operand:V_INT_noQI 3 "gcn_vop3_operand")])
2702   (match_operand:DI 4 "gcn_exec_reg_operand")]
2703  ""
2704  {
2705    /* Unsigned comparisons use the same patterns as signed comparisons,
2706       except that they use unsigned operators (e.g. LTU vs LT).
2707       The '%E1' directive then does the Right Thing.  */
2708    emit_insn (gen_vec_cmpu<mode>di_exec (operands[0], operands[1],
2709					  operands[2], operands[3],
2710					  operands[4]));
2711    DONE;
2712  })
2713
2714(define_expand "vec_cmp<u><mode>di_exec"
2715  [(match_operand:DI 0 "register_operand")
2716   (match_operator 1 "gcn_compare_operator"
2717     [(any_extend:<VnSI> (match_operand:V_QI 2 "gcn_alu_operand"))
2718      (any_extend:<VnSI> (match_operand:V_QI 3 "gcn_vop3_operand"))])
2719   (match_operand:DI 4 "gcn_exec_reg_operand")]
2720  "can_create_pseudo_p ()"
2721  {
2722    rtx sitmp1 = gen_reg_rtx (<VnSI>mode);
2723    rtx sitmp2 = gen_reg_rtx (<VnSI>mode);
2724
2725    emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp1, operands[2],
2726						 operands[2], operands[4]));
2727    emit_insn (gen_<expander><mode><vnsi>2_exec (sitmp2, operands[3],
2728						 operands[3], operands[4]));
2729    emit_insn (gen_vec_cmp<vnsi>di_exec (operands[0], operands[1], sitmp1,
2730					 sitmp2, operands[4]));
2731    DONE;
2732  })
2733
2734(define_insn "vec_cmp<mode>di_dup"
2735  [(set (match_operand:DI 0 "register_operand"		   "=cV,cV, e,e,Sg")
2736	(match_operator:DI 1 "gcn_fp_compare_operator"
2737	  [(vec_duplicate:V_noQI
2738	     (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2739							   " Sv, B,Sv,B, A"))
2740	   (match_operand:V_noQI 3 "gcn_vop3_operand"	   "  v, v, v,v, v")]))
2741   (clobber (match_scratch:DI 4				   "= X,X,cV,cV, X"))]
2742  ""
2743  "@
2744   v_cmp%E1\tvcc, %2, %3
2745   v_cmp%E1\tvcc, %2, %3
2746   v_cmpx%E1\tvcc, %2, %3
2747   v_cmpx%E1\tvcc, %2, %3
2748   v_cmp%E1\t%0, %2, %3"
2749  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2750   (set_attr "length" "4,8,4,8,8")])
2751
2752(define_insn "vec_cmp<mode>di_dup_exec"
2753  [(set (match_operand:DI 0 "register_operand"		    "=cV,cV, e,e,Sg")
2754	(and:DI
2755	  (match_operator 1 "gcn_fp_compare_operator"
2756	    [(vec_duplicate:V_noQI
2757	       (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2758							    " Sv, B,Sv,B, A"))
2759	     (match_operand:V_noQI 3 "gcn_vop3_operand"	    "  v, v, v,v, v")])
2760	  (match_operand:DI 4 "gcn_exec_reg_operand"	    "  e, e, e,e, e")))
2761   (clobber (match_scratch:DI 5				    "= X,X,cV,cV, X"))]
2762  ""
2763  "@
2764   v_cmp%E1\tvcc, %2, %3
2765   v_cmp%E1\tvcc, %2, %3
2766   v_cmpx%E1\tvcc, %2, %3
2767   v_cmpx%E1\tvcc, %2, %3
2768   v_cmp%E1\t%0, %2, %3"
2769  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2770   (set_attr "length" "4,8,4,8,8")])
2771
2772(define_expand "vcond_mask_<mode>di"
2773  [(parallel
2774    [(set (match_operand:V_ALL 0   "register_operand" "")
2775	  (vec_merge:V_ALL
2776	    (match_operand:V_ALL 1 "gcn_vop3_operand" "")
2777	    (match_operand:V_ALL 2 "gcn_alu_operand" "")
2778	    (match_operand:DI 3		     "register_operand" "")))
2779     (clobber (scratch:<VnDI>))])]
2780  ""
2781  "")
2782
2783(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>"
2784  [(match_operand:V_ALL 0 "register_operand")
2785   (match_operand:V_ALL 1 "gcn_vop3_operand")
2786   (match_operand:V_ALL 2 "gcn_alu_operand")
2787   (match_operator 3 "gcn_fp_compare_operator"
2788     [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
2789      (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])]
2790  ""
2791  {
2792    rtx tmp = gen_reg_rtx (DImode);
2793    emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di
2794	       (tmp, operands[3], operands[4], operands[5]));
2795    emit_insn (gen_vcond_mask_<V_ALL:mode>di
2796	       (operands[0], operands[1], operands[2], tmp));
2797    DONE;
2798  })
2799
2800(define_expand "vcond<V_ALL:mode><V_ALL_ALT:mode>_exec"
2801  [(match_operand:V_ALL 0 "register_operand")
2802   (match_operand:V_ALL 1 "gcn_vop3_operand")
2803   (match_operand:V_ALL 2 "gcn_alu_operand")
2804   (match_operator 3 "gcn_fp_compare_operator"
2805     [(match_operand:V_ALL_ALT 4 "gcn_alu_operand")
2806      (match_operand:V_ALL_ALT 5 "gcn_vop3_operand")])
2807   (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2808  ""
2809  {
2810    rtx tmp = gen_reg_rtx (DImode);
2811    emit_insn (gen_vec_cmp<V_ALL_ALT:mode>di_exec
2812	       (tmp, operands[3], operands[4], operands[5], operands[6]));
2813    emit_insn (gen_vcond_mask_<V_ALL:mode>di
2814	       (operands[0], operands[1], operands[2], tmp));
2815    DONE;
2816  })
2817
2818(define_expand "vcondu<V_ALL:mode><V_INT:mode>"
2819  [(match_operand:V_ALL 0 "register_operand")
2820   (match_operand:V_ALL 1 "gcn_vop3_operand")
2821   (match_operand:V_ALL 2 "gcn_alu_operand")
2822   (match_operator 3 "gcn_fp_compare_operator"
2823     [(match_operand:V_INT 4 "gcn_alu_operand")
2824      (match_operand:V_INT 5 "gcn_vop3_operand")])]
2825  ""
2826  {
2827    rtx tmp = gen_reg_rtx (DImode);
2828    emit_insn (gen_vec_cmpu<V_INT:mode>di
2829	       (tmp, operands[3], operands[4], operands[5]));
2830    emit_insn (gen_vcond_mask_<V_ALL:mode>di
2831	       (operands[0], operands[1], operands[2], tmp));
2832    DONE;
2833  })
2834
2835(define_expand "vcondu<V_ALL:mode><V_INT:mode>_exec"
2836  [(match_operand:V_ALL 0 "register_operand")
2837   (match_operand:V_ALL 1 "gcn_vop3_operand")
2838   (match_operand:V_ALL 2 "gcn_alu_operand")
2839   (match_operator 3 "gcn_fp_compare_operator"
2840     [(match_operand:V_INT 4 "gcn_alu_operand")
2841      (match_operand:V_INT 5 "gcn_vop3_operand")])
2842   (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2843  ""
2844  {
2845    rtx tmp = gen_reg_rtx (DImode);
2846    emit_insn (gen_vec_cmpu<V_INT:mode>di_exec
2847	       (tmp, operands[3], operands[4], operands[5], operands[6]));
2848    emit_insn (gen_vcond_mask_<V_ALL:mode>di
2849	       (operands[0], operands[1], operands[2], tmp));
2850    DONE;
2851  })
2852
2853;; }}}
2854;; {{{ Fully masked loop support
2855
2856(define_expand "while_ultsidi"
2857  [(match_operand:DI 0 "register_operand")
2858   (match_operand:SI 1 "")
2859   (match_operand:SI 2 "")]
2860  ""
2861  {
2862    if (GET_CODE (operands[1]) != CONST_INT
2863	|| GET_CODE (operands[2]) != CONST_INT)
2864      {
2865	rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2866	rtx tmp = _0_1_2_3;
2867	if (GET_CODE (operands[1]) != CONST_INT
2868	    || INTVAL (operands[1]) != 0)
2869	  {
2870	    tmp = gen_reg_rtx (V64SImode);
2871	    emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2872	  }
2873	emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2874					   gen_rtx_GT (VOIDmode, 0, 0),
2875					   operands[2], tmp));
2876      }
2877    else
2878      {
2879	HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2880	HOST_WIDE_INT mask = (diff >= 64 ? -1
2881			      : ~((unsigned HOST_WIDE_INT)-1 << diff));
2882	emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2883      }
2884    DONE;
2885  })
2886
2887(define_expand "maskload<mode>di"
2888  [(match_operand:V_ALL 0 "register_operand")
2889   (match_operand:V_ALL 1 "memory_operand")
2890   (match_operand 2 "")]
2891  ""
2892  {
2893    rtx exec = force_reg (DImode, operands[2]);
2894    rtx addr = gcn_expand_scalar_to_vector_address
2895		(<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (<VnDI>mode));
2896    rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2897    rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2898
2899    /* Masked lanes are required to hold zero.  */
2900    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2901
2902    emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
2903					   operands[0], exec));
2904    DONE;
2905  })
2906
2907(define_expand "maskstore<mode>di"
2908  [(match_operand:V_ALL 0 "memory_operand")
2909   (match_operand:V_ALL 1 "register_operand")
2910   (match_operand 2 "")]
2911  ""
2912  {
2913    rtx exec = force_reg (DImode, operands[2]);
2914    rtx addr = gcn_expand_scalar_to_vector_address
2915		(<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (<VnDI>mode));
2916    rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2917    rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2918    emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2919    DONE;
2920  })
2921
2922(define_expand "mask_gather_load<mode><vnsi>"
2923  [(match_operand:V_ALL 0 "register_operand")
2924   (match_operand:DI 1 "register_operand")
2925   (match_operand:<VnSI> 2 "register_operand")
2926   (match_operand 3 "immediate_operand")
2927   (match_operand:SI 4 "gcn_alu_operand")
2928   (match_operand:DI 5 "")]
2929  ""
2930  {
2931    rtx exec = force_reg (DImode, operands[5]);
2932
2933    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
2934					  operands[2], operands[4],
2935					  INTVAL (operands[3]), exec);
2936
2937    /* Masked lanes are required to hold zero.  */
2938    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
2939
2940    if (GET_MODE (addr) == <VnDI>mode)
2941      emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
2942						     const0_rtx, const0_rtx,
2943						     const0_rtx, operands[0],
2944						     exec));
2945    else
2946      emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
2947						      addr, const0_rtx,
2948						      const0_rtx, const0_rtx,
2949						      operands[0], exec));
2950    DONE;
2951  })
2952
2953(define_expand "mask_scatter_store<mode><vnsi>"
2954  [(match_operand:DI 0 "register_operand")
2955   (match_operand:<VnSI> 1 "register_operand")
2956   (match_operand 2 "immediate_operand")
2957   (match_operand:SI 3 "gcn_alu_operand")
2958   (match_operand:V_ALL 4 "register_operand")
2959   (match_operand:DI 5 "")]
2960  ""
2961  {
2962    rtx exec = force_reg (DImode, operands[5]);
2963
2964    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
2965					  operands[1], operands[3],
2966					  INTVAL (operands[2]), exec);
2967
2968    if (GET_MODE (addr) == <VnDI>mode)
2969      emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
2970						      operands[4], const0_rtx,
2971						      const0_rtx,
2972						      exec));
2973    else
2974      emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
2975						       const0_rtx, operands[4],
2976						       const0_rtx, const0_rtx,
2977						       exec));
2978    DONE;
2979  })
2980
2981(define_code_iterator cond_op [plus minus mult])
2982
2983(define_expand "cond_<expander><mode>"
2984  [(match_operand:V_ALL 0 "register_operand")
2985   (match_operand:DI 1 "register_operand")
2986   (cond_op:V_ALL
2987     (match_operand:V_ALL 2 "gcn_alu_operand")
2988     (match_operand:V_ALL 3 "gcn_alu_operand"))
2989   (match_operand:V_ALL 4 "register_operand")]
2990  ""
2991  {
2992    operands[1] = force_reg (DImode, operands[1]);
2993    operands[2] = force_reg (<MODE>mode, operands[2]);
2994
2995    emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2996					   operands[3], operands[4],
2997					   operands[1]));
2998    DONE;
2999  })
3000
3001;; TODO smin umin smax umax
3002(define_code_iterator cond_bitop [and ior xor])
3003
3004(define_expand "cond_<expander><mode>"
3005  [(match_operand:V_INT 0 "register_operand")
3006   (match_operand:DI 1 "register_operand")
3007   (cond_bitop:V_INT
3008     (match_operand:V_INT 2 "gcn_alu_operand")
3009     (match_operand:V_INT 3 "gcn_alu_operand"))
3010   (match_operand:V_INT 4 "register_operand")]
3011  ""
3012  {
3013    operands[1] = force_reg (DImode, operands[1]);
3014    operands[2] = force_reg (<MODE>mode, operands[2]);
3015
3016    emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
3017					   operands[3], operands[4],
3018					   operands[1]));
3019    DONE;
3020  })
3021
3022;; }}}
3023;; {{{ Vector reductions
3024
3025(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
3026				   UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
3027				   UNSPEC_PLUS_DPP_SHR
3028				   UNSPEC_AND_DPP_SHR
3029				   UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3030
3031(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
3032					UNSPEC_AND_DPP_SHR
3033					UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
3034
3035; FIXME: Isn't there a better way of doing this?
3036(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
3037			       (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
3038			       (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
3039			       (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
3040			       (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
3041			       (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
3042			       (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
3043			       (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
3044
3045(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
3046			   (UNSPEC_SMAX_DPP_SHR "smax")
3047			   (UNSPEC_UMIN_DPP_SHR "umin")
3048			   (UNSPEC_UMAX_DPP_SHR "umax")
3049			   (UNSPEC_PLUS_DPP_SHR "plus")
3050			   (UNSPEC_AND_DPP_SHR "and")
3051			   (UNSPEC_IOR_DPP_SHR "ior")
3052			   (UNSPEC_XOR_DPP_SHR "xor")])
3053
3054(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
3055			     (UNSPEC_SMAX_DPP_SHR "v_max%i0")
3056			     (UNSPEC_UMIN_DPP_SHR "v_min%u0")
3057			     (UNSPEC_UMAX_DPP_SHR "v_max%u0")
3058			     (UNSPEC_PLUS_DPP_SHR "v_add%U0")
3059			     (UNSPEC_AND_DPP_SHR  "v_and%B0")
3060			     (UNSPEC_IOR_DPP_SHR  "v_or%B0")
3061			     (UNSPEC_XOR_DPP_SHR  "v_xor%B0")])
3062
3063(define_expand "reduc_<reduc_op>_scal_<mode>"
3064  [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
3065	(unspec:<SCALAR_MODE>
3066	  [(match_operand:V_ALL 1 "register_operand")]
3067	  REDUC_UNSPEC))]
3068  ""
3069  {
3070    rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
3071				       <reduc_unspec>);
3072
3073    /* The result of the reduction is in lane 63 of tmp.  */
3074    emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
3075
3076    DONE;
3077  })
3078
3079
3080(define_insn "*<reduc_op>_dpp_shr_<mode>"
3081  [(set (match_operand:V_1REG 0 "register_operand"   "=v")
3082	(unspec:V_1REG
3083	  [(match_operand:V_1REG 1 "register_operand" "v")
3084	   (match_operand:V_1REG 2 "register_operand" "v")
3085	   (match_operand:SI 3 "const_int_operand"    "n")]
3086	  REDUC_UNSPEC))]
3087  ; GCN3 requires a carry out, GCN5 not
3088  "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
3089     && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
3090  {
3091    return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
3092				    <reduc_unspec>, INTVAL (operands[3]));
3093  }
3094  [(set_attr "type" "vop_dpp")
3095   (set_attr "length" "8")])
3096
3097(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
3098  [(set (match_operand:V_DI 0 "register_operand"    "=v")
3099	(unspec:V_DI
3100	  [(match_operand:V_DI 1 "register_operand" "v")
3101	   (match_operand:V_DI 2 "register_operand" "v")
3102	   (match_operand:SI 3 "const_int_operand"  "n")]
3103	  REDUC_2REG_UNSPEC))]
3104  ""
3105  "#"
3106  "reload_completed"
3107  [(set (match_dup 4)
3108	(unspec:<VnSI>
3109	  [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
3110   (set (match_dup 5)
3111	(unspec:<VnSI>
3112	  [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
3113  {
3114    operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3115    operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3116    operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3117    operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3118    operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3119    operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3120  }
3121  [(set_attr "type" "vmult")
3122   (set_attr "length" "16")])
3123
3124; Special cases for addition.
3125
3126(define_insn "*plus_carry_dpp_shr_<mode>"
3127  [(set (match_operand:V_INT_1REG 0 "register_operand"   "=v")
3128	(unspec:V_INT_1REG
3129	  [(match_operand:V_INT_1REG 1 "register_operand" "v")
3130	   (match_operand:V_INT_1REG 2 "register_operand" "v")
3131	   (match_operand:SI 3 "const_int_operand"	  "n")]
3132	  UNSPEC_PLUS_CARRY_DPP_SHR))
3133   (clobber (reg:DI VCC_REG))]
3134  ""
3135  {
3136    return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
3137				    UNSPEC_PLUS_CARRY_DPP_SHR,
3138				    INTVAL (operands[3]));
3139  }
3140  [(set_attr "type" "vop_dpp")
3141   (set_attr "length" "8")])
3142
3143(define_insn "*plus_carry_in_dpp_shr_<mode>"
3144  [(set (match_operand:V_SI 0 "register_operand"    "=v")
3145	(unspec:V_SI
3146	  [(match_operand:V_SI 1 "register_operand" "v")
3147	   (match_operand:V_SI 2 "register_operand" "v")
3148	   (match_operand:SI 3 "const_int_operand"  "n")
3149	   (match_operand:DI 4 "register_operand"   "cV")]
3150	  UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3151   (clobber (reg:DI VCC_REG))]
3152  ""
3153  {
3154    return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
3155				    UNSPEC_PLUS_CARRY_IN_DPP_SHR,
3156				    INTVAL (operands[3]));
3157  }
3158  [(set_attr "type" "vop_dpp")
3159   (set_attr "length" "8")])
3160
3161(define_insn_and_split "*plus_carry_dpp_shr_<mode>"
3162  [(set (match_operand:V_DI 0 "register_operand"    "=v")
3163	(unspec:V_DI
3164	  [(match_operand:V_DI 1 "register_operand" "v")
3165	   (match_operand:V_DI 2 "register_operand" "v")
3166	   (match_operand:SI 3 "const_int_operand"  "n")]
3167	  UNSPEC_PLUS_CARRY_DPP_SHR))
3168   (clobber (reg:DI VCC_REG))]
3169  ""
3170  "#"
3171  "reload_completed"
3172  [(parallel [(set (match_dup 4)
3173		(unspec:<VnSI>
3174		  [(match_dup 6) (match_dup 8) (match_dup 3)]
3175		  UNSPEC_PLUS_CARRY_DPP_SHR))
3176	      (clobber (reg:DI VCC_REG))])
3177   (parallel [(set (match_dup 5)
3178		(unspec:<VnSI>
3179		  [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
3180		  UNSPEC_PLUS_CARRY_IN_DPP_SHR))
3181	      (clobber (reg:DI VCC_REG))])]
3182  {
3183    operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
3184    operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
3185    operands[6] = gcn_operand_part (<MODE>mode, operands[1], 0);
3186    operands[7] = gcn_operand_part (<MODE>mode, operands[1], 1);
3187    operands[8] = gcn_operand_part (<MODE>mode, operands[2], 0);
3188    operands[9] = gcn_operand_part (<MODE>mode, operands[2], 1);
3189  }
3190  [(set_attr "type" "vmult")
3191   (set_attr "length" "16")])
3192
3193; Instructions to move a scalar value from lane 63 of a vector register.
3194(define_insn "mov_from_lane63_<mode>"
3195  [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3196	(unspec:<SCALAR_MODE>
3197	  [(match_operand:V_1REG 1 "register_operand"	  "  v,v")]
3198	  UNSPEC_MOV_FROM_LANE63))]
3199  ""
3200  "@
3201   v_readlane_b32\t%0, %1, 63
3202   v_mov_b32\t%0, %1 wave_ror:1"
3203  [(set_attr "type" "vop3a,vop_dpp")
3204   (set_attr "exec" "none,*")
3205   (set_attr "length" "8")])
3206
3207(define_insn "mov_from_lane63_<mode>"
3208  [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
3209	(unspec:<SCALAR_MODE>
3210	  [(match_operand:V_2REG 1 "register_operand"	  "  v,v")]
3211	  UNSPEC_MOV_FROM_LANE63))]
3212  ""
3213  "@
3214   v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3215   * if (REGNO (operands[0]) <= REGNO (operands[1]))	\
3216       return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\"	\
3217	      \"v_mov_b32\t%H0, %H1 wave_ror:1\";	\
3218     else						\
3219       return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\"	\
3220	      \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3221  [(set_attr "type" "vop3a,vop_dpp")
3222   (set_attr "exec" "none,*")
3223   (set_attr "length" "8")])
3224
3225;; }}}
3226;; {{{ Miscellaneous
3227
3228(define_expand "vec_series<mode>"
3229  [(match_operand:V_SI 0 "register_operand")
3230   (match_operand:SI 1 "gcn_alu_operand")
3231   (match_operand:SI 2 "gcn_alu_operand")]
3232  ""
3233  {
3234    rtx tmp = gen_reg_rtx (<MODE>mode);
3235    rtx v1 = gen_rtx_REG (<MODE>mode, VGPR_REGNO (1));
3236
3237    emit_insn (gen_mul<mode>3_dup (tmp, v1, operands[2]));
3238    emit_insn (gen_add<mode>3_dup (operands[0], tmp, operands[1]));
3239    DONE;
3240  })
3241
3242(define_expand "vec_series<mode>"
3243  [(match_operand:V_DI 0 "register_operand")
3244   (match_operand:DI 1 "gcn_alu_operand")
3245   (match_operand:DI 2 "gcn_alu_operand")]
3246  ""
3247  {
3248    rtx tmp = gen_reg_rtx (<MODE>mode);
3249    rtx v1 = gen_rtx_REG (<VnSI>mode, VGPR_REGNO (1));
3250    rtx op1vec = gen_reg_rtx (<MODE>mode);
3251
3252    emit_insn (gen_mul<mode>3_zext_dup2 (tmp, v1, operands[2]));
3253    emit_insn (gen_vec_duplicate<mode> (op1vec, operands[1]));
3254    emit_insn (gen_add<mode>3 (operands[0], tmp, op1vec));
3255    DONE;
3256  })
3257
3258;; }}}
3259