gcn-valu.md revision 1.1.1.1
1132720Skan;; Copyright (C) 2016-2019 Free Software Foundation, Inc.
2132720Skan
3169691Skan;; This file is free software; you can redistribute it and/or modify it under
4132720Skan;; the terms of the GNU General Public License as published by the Free
5132720Skan;; Software Foundation; either version 3 of the License, or (at your option)
6132720Skan;; any later version.
7132720Skan
8132720Skan;; This file is distributed in the hope that it will be useful, but WITHOUT
9132720Skan;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10132720Skan;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
11132720Skan;; for more details.
12132720Skan
13132720Skan;; You should have received a copy of the GNU General Public License
14132720Skan;; along with GCC; see the file COPYING3.  If not see
15132720Skan;; <http://www.gnu.org/licenses/>.
16132720Skan
17132720Skan;; {{{ Vector iterators
18132720Skan
19169691Skan; Vector modes for one vector register
20132720Skan(define_mode_iterator VEC_1REG_MODE
21132720Skan		      [V64QI V64HI V64SI V64HF V64SF])
22132720Skan(define_mode_iterator VEC_1REG_ALT
23132720Skan		      [V64QI V64HI V64SI V64HF V64SF])
24132720Skan
25132720Skan(define_mode_iterator VEC_1REG_INT_MODE
26132720Skan		      [V64QI V64HI V64SI])
27132720Skan(define_mode_iterator VEC_1REG_INT_ALT
28132720Skan		      [V64QI V64HI V64SI])
29132720Skan
30132720Skan; Vector modes for two vector registers
31169691Skan(define_mode_iterator VEC_2REG_MODE
32169691Skan		      [V64DI V64DF])
33169691Skan
34169691Skan; All of above
35132720Skan(define_mode_iterator VEC_REG_MODE
36132720Skan		      [V64QI V64HI V64SI V64HF V64SF    ; Single reg
37132720Skan		       V64DI V64DF])		        ; Double reg
38132720Skan
39132720Skan(define_mode_attr scalar_mode
40132720Skan  [(V64QI "qi") (V64HI "hi") (V64SI "si")
41132720Skan   (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
42169691Skan
43132720Skan(define_mode_attr SCALAR_MODE
44169691Skan  [(V64QI "QI") (V64HI "HI") (V64SI "SI")
45169691Skan   (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
46132720Skan
47132720Skan;; }}}
48132720Skan;; {{{ Substitutions
49132720Skan
50132720Skan(define_subst_attr "exec" "vec_merge"
51132720Skan		   "" "_exec")
52132720Skan(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
53132720Skan		   "" "_exec")
54132720Skan(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
55132720Skan		   "" "_exec")
56132720Skan(define_subst_attr "exec_scatter" "scatter_store"
57132720Skan		   "" "_exec")
58132720Skan
59132720Skan(define_subst "vec_merge"
60132720Skan  [(set (match_operand:VEC_REG_MODE 0)
61132720Skan	(match_operand:VEC_REG_MODE 1))]
62169691Skan  ""
63169691Skan  [(set (match_dup 0)
64132720Skan	(vec_merge:VEC_REG_MODE
65132720Skan	  (match_dup 1)
66132720Skan	  (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
67132720Skan	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
68132720Skan
69132720Skan(define_subst "vec_merge_with_clobber"
70132720Skan  [(set (match_operand:VEC_REG_MODE 0)
71132720Skan	(match_operand:VEC_REG_MODE 1))
72169691Skan   (clobber (match_operand 2))]
73169691Skan  ""
74132720Skan  [(set (match_dup 0)
75132720Skan	(vec_merge:VEC_REG_MODE
76132720Skan	  (match_dup 1)
77132720Skan	  (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
78132720Skan	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
79132720Skan   (clobber (match_dup 2))])
80132720Skan
81132720Skan(define_subst "vec_merge_with_vcc"
82132720Skan  [(set (match_operand:VEC_REG_MODE 0)
83132720Skan	(match_operand:VEC_REG_MODE 1))
84132720Skan   (set (match_operand:DI 2)
85132720Skan	(match_operand:DI 3))]
86132720Skan  ""
87132720Skan  [(parallel
88132720Skan     [(set (match_dup 0)
89132720Skan	   (vec_merge:VEC_REG_MODE
90132720Skan	     (match_dup 1)
91132720Skan	     (match_operand:VEC_REG_MODE 4
92132720Skan					 "gcn_register_or_unspec_operand" "U0")
93132720Skan	     (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
94132720Skan      (set (match_dup 2)
95132720Skan	   (and:DI (match_dup 3)
96132720Skan		   (reg:DI EXEC_REG)))])])
97132720Skan
98132720Skan(define_subst "scatter_store"
99132720Skan  [(set (mem:BLK (scratch))
100132720Skan	(unspec:BLK
101132720Skan	  [(match_operand 0)
102132720Skan	   (match_operand 1)
103132720Skan	   (match_operand 2)
104132720Skan	   (match_operand 3)]
105132720Skan	  UNSPEC_SCATTER))]
106132720Skan  ""
107132720Skan  [(set (mem:BLK (scratch))
108132720Skan	(unspec:BLK
109132720Skan	  [(match_dup 0)
110132720Skan	   (match_dup 1)
111132720Skan	   (match_dup 2)
112132720Skan	   (match_dup 3)
113132720Skan	   (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
114132720Skan	  UNSPEC_SCATTER))])
115132720Skan
116132720Skan;; }}}
117132720Skan;; {{{ Vector moves
118132720Skan
119132720Skan; This is the entry point for all vector register moves.  Memory accesses can
120132720Skan; come this way also, but will more usually use the reload_in/out,
121132720Skan; gather/scatter, maskload/store, etc.
122132720Skan
123132720Skan(define_expand "mov<mode>"
124132720Skan  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
125132720Skan	(match_operand:VEC_REG_MODE 1 "general_operand"))]
126132720Skan  ""
127132720Skan  {
128132720Skan    if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
129132720Skan      {
130132720Skan	operands[1] = force_reg (<MODE>mode, operands[1]);
131132720Skan	rtx scratch = gen_rtx_SCRATCH (V64DImode);
132132720Skan	rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
133132720Skan	rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
134132720Skan	rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
135132720Skan							operands[0],
136132720Skan							scratch);
137132720Skan	emit_insn (gen_scatter<mode>_expr (expr, operands[1], a, v));
138132720Skan	DONE;
139132720Skan      }
140132720Skan    else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
141132720Skan      {
142132720Skan	rtx scratch = gen_rtx_SCRATCH (V64DImode);
143132720Skan	rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
144132720Skan	rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
145132720Skan	rtx expr = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
146132720Skan							operands[1],
147132720Skan							scratch);
148132720Skan	emit_insn (gen_gather<mode>_expr (operands[0], expr, a, v));
149169691Skan	DONE;
150169691Skan      }
151169691Skan    else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
152169691Skan      {
153132720Skan        gcc_assert (!reload_completed);
154132720Skan	rtx scratch = gen_reg_rtx (V64DImode);
155132720Skan	emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], scratch));
156132720Skan	DONE;
157132720Skan      }
158132720Skan  })
159132720Skan
160132720Skan; A pseudo instruction that helps LRA use the "U0" constraint.
161132720Skan
162132720Skan(define_insn "mov<mode>_unspec"
163132720Skan  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v")
164132720Skan	(match_operand:VEC_REG_MODE 1 "gcn_unspec_operand"   " U"))]
165132720Skan  ""
166132720Skan  ""
167132720Skan  [(set_attr "type" "unknown")
168132720Skan   (set_attr "length" "0")])
169132720Skan
170132720Skan(define_insn "*mov<mode>"
171132720Skan  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v")
172132720Skan	(match_operand:VEC_1REG_MODE 1 "general_operand"      "vA,B"))]
173132720Skan  ""
174146897Skan  "v_mov_b32\t%0, %1"
175132720Skan  [(set_attr "type" "vop1,vop1")
176132720Skan   (set_attr "length" "4,8")])
177132720Skan
178132720Skan(define_insn "mov<mode>_exec"
179132720Skan  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand"
180132720Skan							 "=v, v, v, v, v, m")
181132720Skan	(vec_merge:VEC_1REG_MODE
182132720Skan	  (match_operand:VEC_1REG_MODE 1 "general_operand"
183132720Skan							 "vA, B, v,vA, m, v")
184132720Skan	  (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand"
185132720Skan							 "U0,U0,vA,vA,U0,U0")
186132720Skan	  (match_operand:DI 2 "register_operand"	 " e, e,cV,Sv, e, e")))
187132720Skan   (clobber (match_scratch:V64DI 4			 "=X, X, X, X,&v,&v"))]
188132720Skan  "!MEM_P (operands[0]) || REG_P (operands[1])"
189132720Skan  "@
190132720Skan   v_mov_b32\t%0, %1
191132720Skan   v_mov_b32\t%0, %1
192132720Skan   v_cndmask_b32\t%0, %3, %1, vcc
193132720Skan   v_cndmask_b32\t%0, %3, %1, %2
194132720Skan   #
195132720Skan   #"
196132720Skan  [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
197132720Skan   (set_attr "length" "4,8,4,8,16,16")])
198132720Skan
199132720Skan; This variant does not accept an unspec, but does permit MEM
200132720Skan; read/modify/write which is necessary for maskstore.
201132720Skan
202132720Skan;(define_insn "*mov<mode>_exec_match"
203132720Skan;  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m")
204132720Skan;	(vec_merge:VEC_1REG_MODE
205132720Skan;	  (match_operand:VEC_1REG_MODE 1 "general_operand"    "vA,B, m, v")
206132720Skan;	  (match_dup 0)
207132720Skan;	  (match_operand:DI 2 "gcn_exec_reg_operand"	      " e,e, e, e")))
208132720Skan;   (clobber (match_scratch:V64DI 3			      "=X,X,&v,&v"))]
209132720Skan;  "!MEM_P (operands[0]) || REG_P (operands[1])"
210132720Skan;  "@
211132720Skan;  v_mov_b32\t%0, %1
212132720Skan;  v_mov_b32\t%0, %1
213132720Skan;  #
214132720Skan;  #"
215132720Skan;  [(set_attr "type" "vop1,vop1,*,*")
216132720Skan;   (set_attr "length" "4,8,16,16")])
217132720Skan
218132720Skan(define_insn "*mov<mode>"
219132720Skan  [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"  "=v")
220132720Skan	(match_operand:VEC_2REG_MODE 1 "general_operand"      "vDB"))]
221132720Skan  ""
222132720Skan  {
223132720Skan    if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
224132720Skan      return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
225132720Skan    else
226132720Skan      return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
227132720Skan  }
228132720Skan  [(set_attr "type" "vmult")
229132720Skan   (set_attr "length" "16")])
230132720Skan
231132720Skan(define_insn "mov<mode>_exec"
232132720Skan  [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
233132720Skan						       "= v,   v,   v, v, m")
234132720Skan	(vec_merge:VEC_2REG_MODE
235132720Skan	  (match_operand:VEC_2REG_MODE 1 "general_operand"
236132720Skan						       "vDB,  v0,  v0, m, v")
237132720Skan	  (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
238132720Skan						       " U0,vDA0,vDA0,U0,U0")
239132720Skan	  (match_operand:DI 2 "register_operand"       "  e,  cV,  Sv, e, e")))
240132720Skan   (clobber (match_scratch:V64DI 4		       "= X,   X,   X,&v,&v"))]
241132720Skan  "!MEM_P (operands[0]) || REG_P (operands[1])"
242132720Skan  {
243132720Skan    if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
244132720Skan      switch (which_alternative)
245132720Skan	{
246132720Skan	case 0:
247132720Skan	  return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
248132720Skan	case 1:
249132720Skan	  return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
250132720Skan		 "v_cndmask_b32\t%H0, %H3, %H1, vcc";
251132720Skan	case 2:
252132720Skan	  return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
253132720Skan		 "v_cndmask_b32\t%H0, %H3, %H1, %2";
254132720Skan	}
255132720Skan    else
256132720Skan      switch (which_alternative)
257132720Skan	{
258132720Skan	case 0:
259132720Skan	  return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
260132720Skan	case 1:
261132720Skan	  return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
262132720Skan		 "v_cndmask_b32\t%L0, %L3, %L1, vcc";
263132720Skan	case 2:
264132720Skan	  return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
265132720Skan		 "v_cndmask_b32\t%L0, %L3, %L1, %2";
266132720Skan	}
267132720Skan
268132720Skan    return "#";
269132720Skan  }
270132720Skan  [(set_attr "type" "vmult,vmult,vmult,*,*")
271132720Skan   (set_attr "length" "16,16,16,16,16")])
272132720Skan
273132720Skan; This variant does not accept an unspec, but does permit MEM
274132720Skan; read/modify/write which is necessary for maskstore.
275132720Skan
276132720Skan;(define_insn "*mov<mode>_exec_match"
277132720Skan;  [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
278132720Skan;	(vec_merge:VEC_2REG_MODE
279132720Skan;	  (match_operand:VEC_2REG_MODE 1 "general_operand"   "vDB, m, v")
280132720Skan;	  (match_dup 0)
281132720Skan;	  (match_operand:DI 2 "gcn_exec_reg_operand"	      " e, e, e")))
282132720Skan;   (clobber (match_scratch:V64DI 3			      "=X,&v,&v"))]
283132720Skan;  "!MEM_P (operands[0]) || REG_P (operands[1])"
284132720Skan;  "@
285132720Skan;   * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
286132720Skan;       return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
287132720Skan;     else \
288132720Skan;       return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
289132720Skan;   #
290132720Skan;   #"
291132720Skan;  [(set_attr "type" "vmult,*,*")
292132720Skan;   (set_attr "length" "16,16,16")])
293132720Skan
294132720Skan; A SGPR-base load looks like:
295132720Skan;   <load> v, Sv
296132720Skan;
297132720Skan; There's no hardware instruction that corresponds to this, but vector base
298132720Skan; addresses are placed in an SGPR because it is easier to add to a vector.
299132720Skan; We also have a temporary vT, and the vector v1 holding numbered lanes.
300132720Skan;
301132720Skan; Rewrite as:
302132720Skan;   vT = v1 << log2(element-size)
303132720Skan;   vT += Sv
304132720Skan;   flat_load v, vT
305132720Skan
306132720Skan(define_insn "mov<mode>_sgprbase"
307132720Skan  [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m")
308132720Skan	(unspec:VEC_1REG_MODE
309132720Skan	  [(match_operand:VEC_1REG_MODE 1 "general_operand"   " vA,vB, m, v")]
310132720Skan	  UNSPEC_SGPRBASE))
311132720Skan   (clobber (match_operand:V64DI 2 "register_operand"	      "=&v,&v,&v,&v"))]
312132720Skan  "lra_in_progress || reload_completed"
313132720Skan  "@
314132720Skan   v_mov_b32\t%0, %1
315132720Skan   v_mov_b32\t%0, %1
316132720Skan   #
317132720Skan   #"
318132720Skan  [(set_attr "type" "vop1,vop1,*,*")
319132720Skan   (set_attr "length" "4,8,12,12")])
320132720Skan
321132720Skan(define_insn "mov<mode>_sgprbase"
322132720Skan  [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
323132720Skan	(unspec:VEC_2REG_MODE
324132720Skan	  [(match_operand:VEC_2REG_MODE 1 "general_operand"   "vDB, m, v")]
325132720Skan	  UNSPEC_SGPRBASE))
326132720Skan   (clobber (match_operand:V64DI 2 "register_operand"	      "=&v,&v,&v"))]
327132720Skan  "lra_in_progress || reload_completed"
328132720Skan  "@
329169691Skan   * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
330169691Skan       return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
331132720Skan     else \
332132720Skan       return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
333   #
334   #"
335  [(set_attr "type" "vmult,*,*")
336   (set_attr "length" "8,12,12")])
337
338; reload_in was once a standard name, but here it's only referenced by
339; gcn_secondary_reload.  It allows a reload with a scratch register.
340
341(define_expand "reload_in<mode>"
342  [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v")
343	(match_operand:VEC_REG_MODE 1 "memory_operand"   "  m"))
344   (clobber (match_operand:V64DI 2 "register_operand"    "=&v"))]
345  ""
346  {
347    emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
348    DONE;
349  })
350
351; reload_out is similar to reload_in, above.
352
353(define_expand "reload_out<mode>"
354  [(set (match_operand:VEC_REG_MODE 0 "memory_operand"   "= m")
355	(match_operand:VEC_REG_MODE 1 "register_operand" "  v"))
356   (clobber (match_operand:V64DI 2 "register_operand"    "=&v"))]
357  ""
358  {
359    emit_insn (gen_mov<mode>_sgprbase (operands[0], operands[1], operands[2]));
360    DONE;
361  })
362
363; Expand scalar addresses into gather/scatter patterns
364
365(define_split
366  [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
367	(unspec:VEC_REG_MODE
368	  [(match_operand:VEC_REG_MODE 1 "general_operand")]
369	  UNSPEC_SGPRBASE))
370   (clobber (match_scratch:V64DI 2))]
371  ""
372  [(set (mem:BLK (scratch))
373	(unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
374		    UNSPEC_SCATTER))]
375  {
376    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
377						       operands[0],
378						       operands[2]);
379    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
380    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
381  })
382
383(define_split
384  [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
385	(vec_merge:VEC_REG_MODE
386	  (match_operand:VEC_REG_MODE 1 "general_operand")
387	  (match_operand:VEC_REG_MODE 2 "")
388	  (match_operand:DI 3 "gcn_exec_reg_operand")))
389   (clobber (match_scratch:V64DI 4))]
390  ""
391  [(set (mem:BLK (scratch))
392	(unspec:BLK [(match_dup 5) (match_dup 1)
393		     (match_dup 6) (match_dup 7) (match_dup 3)]
394		    UNSPEC_SCATTER))]
395  {
396    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
397						       operands[3],
398						       operands[0],
399						       operands[4]);
400    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
401    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
402  })
403
404(define_split
405  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
406	(unspec:VEC_REG_MODE
407	  [(match_operand:VEC_REG_MODE 1 "memory_operand")]
408	  UNSPEC_SGPRBASE))
409   (clobber (match_scratch:V64DI 2))]
410  ""
411  [(set (match_dup 0)
412	(unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
413			      (mem:BLK (scratch))]
414			     UNSPEC_GATHER))]
415  {
416    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode, NULL,
417						       operands[1],
418						       operands[2]);
419    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
420    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
421  })
422
423(define_split
424  [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
425	(vec_merge:VEC_REG_MODE
426	  (match_operand:VEC_REG_MODE 1 "memory_operand")
427	  (match_operand:VEC_REG_MODE 2 "")
428	  (match_operand:DI 3 "gcn_exec_reg_operand")))
429   (clobber (match_scratch:V64DI 4))]
430  ""
431  [(set (match_dup 0)
432	(vec_merge:VEC_REG_MODE
433	  (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
434				(mem:BLK (scratch))]
435			       UNSPEC_GATHER)
436	  (match_dup 2)
437	  (match_dup 3)))]
438  {
439    operands[5] = gcn_expand_scalar_to_vector_address (<MODE>mode,
440						       operands[3],
441						       operands[1],
442						       operands[4]);
443    operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
444    operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
445  })
446
447; TODO: Add zero/sign extending variants.
448
449;; }}}
450;; {{{ Lane moves
451
452; v_writelane and v_readlane work regardless of exec flags.
453; We allow source to be scratch.
454;
455; FIXME these should take A immediates
456
457(define_insn "*vec_set<mode>"
458  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"            "= v")
459	(vec_merge:VEC_1REG_MODE
460	  (vec_duplicate:VEC_1REG_MODE
461	    (match_operand:<SCALAR_MODE> 1 "register_operand"	     " Sv"))
462	  (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
463								     " U0")
464	  (ashift (const_int 1)
465		  (match_operand:SI 2 "gcn_alu_operand"		     "SvB"))))]
466  ""
467  "v_writelane_b32 %0, %1, %2"
468  [(set_attr "type" "vop3a")
469   (set_attr "length" "8")
470   (set_attr "exec" "none")
471   (set_attr "laneselect" "yes")])
472
473; FIXME: 64bit operations really should be splitters, but I am not sure how
474; to represent vertical subregs.
475(define_insn "*vec_set<mode>"
476  [(set (match_operand:VEC_2REG_MODE 0 "register_operand"	     "= v")
477	(vec_merge:VEC_2REG_MODE
478	  (vec_duplicate:VEC_2REG_MODE
479	    (match_operand:<SCALAR_MODE> 1 "register_operand"	     " Sv"))
480	  (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
481								     " U0")
482	  (ashift (const_int 1)
483		  (match_operand:SI 2 "gcn_alu_operand"		     "SvB"))))]
484  ""
485  "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
486  [(set_attr "type" "vmult")
487   (set_attr "length" "16")
488   (set_attr "exec" "none")
489   (set_attr "laneselect" "yes")])
490
491(define_expand "vec_set<mode>"
492  [(set (match_operand:VEC_REG_MODE 0 "register_operand")
493	(vec_merge:VEC_REG_MODE
494	  (vec_duplicate:VEC_REG_MODE
495	    (match_operand:<SCALAR_MODE> 1 "register_operand"))
496	  (match_dup 0)
497	  (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
498  "")
499
500(define_insn "*vec_set<mode>_1"
501  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"	       "=v")
502	(vec_merge:VEC_1REG_MODE
503	  (vec_duplicate:VEC_1REG_MODE
504	    (match_operand:<SCALAR_MODE> 1 "register_operand"	       "Sv"))
505	  (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
506								       "U0")
507	  (match_operand:SI 2 "const_int_operand"	               " i")))]
508  "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
509  {
510    operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
511    return "v_writelane_b32 %0, %1, %2";
512  }
513  [(set_attr "type" "vop3a")
514   (set_attr "length" "8")
515   (set_attr "exec" "none")
516   (set_attr "laneselect" "yes")])
517
518(define_insn "*vec_set<mode>_1"
519  [(set (match_operand:VEC_2REG_MODE 0 "register_operand"	       "=v")
520	(vec_merge:VEC_2REG_MODE
521	  (vec_duplicate:VEC_2REG_MODE
522	    (match_operand:<SCALAR_MODE> 1 "register_operand"	       "Sv"))
523	  (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
524								       "U0")
525	  (match_operand:SI 2 "const_int_operand"		       " i")))]
526  "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
527  {
528    operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
529    return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
530  }
531  [(set_attr "type" "vmult")
532   (set_attr "length" "16")
533   (set_attr "exec" "none")
534   (set_attr "laneselect" "yes")])
535
536(define_insn "vec_duplicate<mode><exec>"
537  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"  "=v")
538	(vec_duplicate:VEC_1REG_MODE
539	  (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvB")))]
540  ""
541  "v_mov_b32\t%0, %1"
542  [(set_attr "type" "vop3a")
543   (set_attr "length" "8")])
544
545(define_insn "vec_duplicate<mode><exec>"
546  [(set (match_operand:VEC_2REG_MODE 0 "register_operand"  "=  v")
547	(vec_duplicate:VEC_2REG_MODE
548	  (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
549  ""
550  "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
551  [(set_attr "type" "vop3a")
552   (set_attr "length" "16")])
553
554(define_insn "vec_extract<mode><scalar_mode>"
555  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"   "=Sg")
556	(vec_select:<SCALAR_MODE>
557	  (match_operand:VEC_1REG_MODE 1 "register_operand" "  v")
558	  (parallel [(match_operand:SI 2 "gcn_alu_operand"  "SvB")])))]
559  ""
560  "v_readlane_b32 %0, %1, %2"
561  [(set_attr "type" "vop3a")
562   (set_attr "length" "8")
563   (set_attr "exec" "none")
564   (set_attr "laneselect" "yes")])
565
566(define_insn "vec_extract<mode><scalar_mode>"
567  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"   "=Sg")
568	(vec_select:<SCALAR_MODE>
569	  (match_operand:VEC_2REG_MODE 1 "register_operand" "  v")
570	  (parallel [(match_operand:SI 2 "gcn_alu_operand"  "SvB")])))]
571  ""
572  "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
573  [(set_attr "type" "vmult")
574   (set_attr "length" "16")
575   (set_attr "exec" "none")
576   (set_attr "laneselect" "yes")])
577
578(define_expand "vec_init<mode><scalar_mode>"
579  [(match_operand:VEC_REG_MODE 0 "register_operand")
580   (match_operand 1)]
581  ""
582  {
583    gcn_expand_vector_init (operands[0], operands[1]);
584    DONE;
585  })
586
587;; }}}
588;; {{{ Scatter / Gather
589
590;; GCN does not have an instruction for loading a vector from contiguous
591;; memory so *all* loads and stores are eventually converted to scatter
592;; or gather.
593;;
594;; GCC does not permit MEM to hold vectors of addresses, so we must use an
595;; unspec.  The unspec formats are as follows:
596;;
597;;     (unspec:V64??
598;;	 [(<address expression>)
599;;	  (<addr_space_t>)
600;;	  (<use_glc>)
601;;	  (mem:BLK (scratch))]
602;;	 UNSPEC_GATHER)
603;;
604;;     (unspec:BLK
605;;	  [(<address expression>)
606;;	   (<source register>)
607;;	   (<addr_space_t>)
608;;	   (<use_glc>)
609;;	   (<exec>)]
610;;	  UNSPEC_SCATTER)
611;;
612;; - Loads are expected to be wrapped in a vec_merge, so do not need <exec>.
613;; - The mem:BLK does not contain any real information, but indicates that an
614;;   unknown memory read is taking place.  Stores are expected to use a similar
615;;   mem:BLK outside the unspec.
616;; - The address space and glc (volatile) fields are there to replace the
617;;   fields normally found in a MEM.
618;; - Multiple forms of address expression are supported, below.
619
620(define_expand "gather_load<mode>"
621  [(match_operand:VEC_REG_MODE 0 "register_operand")
622   (match_operand:DI 1 "register_operand")
623   (match_operand 2 "register_operand")
624   (match_operand 3 "immediate_operand")
625   (match_operand:SI 4 "gcn_alu_operand")]
626  ""
627  {
628    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
629					  operands[2], operands[4],
630					  INTVAL (operands[3]), NULL);
631
632    if (GET_MODE (addr) == V64DImode)
633      emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
634						const0_rtx, const0_rtx));
635    else
636      emit_insn (gen_gather<mode>_insn_2offsets (operands[0], operands[1],
637						 addr, const0_rtx, const0_rtx,
638						 const0_rtx));
639    DONE;
640  })
641
642(define_expand "gather<mode>_exec"
643  [(match_operand:VEC_REG_MODE 0 "register_operand")
644   (match_operand:DI 1 "register_operand")
645   (match_operand:V64SI 2 "register_operand")
646   (match_operand 3 "immediate_operand")
647   (match_operand:SI 4 "gcn_alu_operand")
648   (match_operand:DI 5 "gcn_exec_reg_operand")]
649  ""
650  {
651    rtx undefmode = gcn_gen_undef (<MODE>mode);
652
653    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
654					  operands[2], operands[4],
655					  INTVAL (operands[3]), operands[5]);
656
657    if (GET_MODE (addr) == V64DImode)
658      emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
659						     const0_rtx, const0_rtx,
660						     const0_rtx, undefmode,
661						     operands[5]));
662    else
663      emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
664						      addr, const0_rtx,
665						      const0_rtx, const0_rtx,
666						      undefmode, operands[5]));
667    DONE;
668  })
669
670; Allow any address expression
671(define_expand "gather<mode>_expr<exec>"
672  [(set (match_operand:VEC_REG_MODE 0 "register_operand")
673	(unspec:VEC_REG_MODE
674	  [(match_operand 1 "")
675	   (match_operand 2 "immediate_operand")
676	   (match_operand 3 "immediate_operand")
677	   (mem:BLK (scratch))]
678	  UNSPEC_GATHER))]
679    ""
680    {})
681
682(define_insn "gather<mode>_insn_1offset<exec>"
683  [(set (match_operand:VEC_REG_MODE 0 "register_operand"	 "=v")
684	(unspec:VEC_REG_MODE
685	  [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
686		       (vec_duplicate:V64DI
687			 (match_operand 2 "immediate_operand"	 " n")))
688	   (match_operand 3 "immediate_operand"			 " n")
689	   (match_operand 4 "immediate_operand"			 " n")
690	   (mem:BLK (scratch))]
691	  UNSPEC_GATHER))]
692  "(AS_FLAT_P (INTVAL (operands[3]))
693    && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
694	|| ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
695    || (AS_GLOBAL_P (INTVAL (operands[3]))
696	&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
697  {
698    addr_space_t as = INTVAL (operands[3]);
699    const char *glc = INTVAL (operands[4]) ? " glc" : "";
700
701    static char buf[200];
702    if (AS_FLAT_P (as))
703      {
704	if (TARGET_GCN5_PLUS)
705	  sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
706		   glc);
707	else
708	  sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc);
709      }
710    else if (AS_GLOBAL_P (as))
711      sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;"
712	       "s_waitcnt\tvmcnt(0)", glc);
713    else
714      gcc_unreachable ();
715
716    return buf;
717  }
718  [(set_attr "type" "flat")
719   (set_attr "length" "12")])
720
721(define_insn "gather<mode>_insn_1offset_ds<exec>"
722  [(set (match_operand:VEC_REG_MODE 0 "register_operand"	 "=v")
723	(unspec:VEC_REG_MODE
724	  [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
725		       (vec_duplicate:V64SI
726			 (match_operand 2 "immediate_operand"	 " n")))
727	   (match_operand 3 "immediate_operand"			 " n")
728	   (match_operand 4 "immediate_operand"			 " n")
729	   (mem:BLK (scratch))]
730	  UNSPEC_GATHER))]
731  "(AS_ANY_DS_P (INTVAL (operands[3]))
732    && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
733  {
734    addr_space_t as = INTVAL (operands[3]);
735    static char buf[200];
736    sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
737	     (AS_GDS_P (as) ? " gds" : ""));
738    return buf;
739  }
740  [(set_attr "type" "ds")
741   (set_attr "length" "12")])
742
743(define_insn "gather<mode>_insn_2offsets<exec>"
744  [(set (match_operand:VEC_REG_MODE 0 "register_operand"	       "=v")
745	(unspec:VEC_REG_MODE
746	  [(plus:V64DI
747	     (plus:V64DI
748	       (vec_duplicate:V64DI
749		 (match_operand:DI 1 "register_operand"		       "Sv"))
750	       (sign_extend:V64DI
751		 (match_operand:V64SI 2 "register_operand"	       " v")))
752	     (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
753	   (match_operand 4 "immediate_operand"			       " n")
754	   (match_operand 5 "immediate_operand"			       " n")
755	   (mem:BLK (scratch))]
756	  UNSPEC_GATHER))]
757  "(AS_GLOBAL_P (INTVAL (operands[4]))
758    && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
759  {
760    addr_space_t as = INTVAL (operands[4]);
761    const char *glc = INTVAL (operands[5]) ? " glc" : "";
762
763    static char buf[200];
764    if (AS_GLOBAL_P (as))
765      {
766	/* Work around assembler bug in which a 64-bit register is expected,
767	but a 32-bit value would be correct.  */
768	int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
769	sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
770		      "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
771      }
772    else
773      gcc_unreachable ();
774      
775    return buf;
776  }
777  [(set_attr "type" "flat")
778   (set_attr "length" "12")])
779
780(define_expand "scatter_store<mode>"
781  [(match_operand:DI 0 "register_operand")
782   (match_operand 1 "register_operand")
783   (match_operand 2 "immediate_operand")
784   (match_operand:SI 3 "gcn_alu_operand")
785   (match_operand:VEC_REG_MODE 4 "register_operand")]
786  ""
787  {
788    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
789					  operands[1], operands[3],
790					  INTVAL (operands[2]), NULL);
791
792    if (GET_MODE (addr) == V64DImode)
793      emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
794						 const0_rtx, const0_rtx));
795    else
796      emit_insn (gen_scatter<mode>_insn_2offsets (operands[0], addr,
797						  const0_rtx, operands[4],
798						  const0_rtx, const0_rtx));
799    DONE;
800  })
801
802(define_expand "scatter<mode>_exec"
803  [(match_operand:DI 0 "register_operand")
804   (match_operand 1 "register_operand")
805   (match_operand 2 "immediate_operand")
806   (match_operand:SI 3 "gcn_alu_operand")
807   (match_operand:VEC_REG_MODE 4 "register_operand")
808   (match_operand:DI 5 "gcn_exec_reg_operand")]
809  ""
810  {
811    operands[5] = force_reg (DImode, operands[5]);
812
813    rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
814					  operands[1], operands[3],
815					  INTVAL (operands[2]), operands[5]);
816
817    if (GET_MODE (addr) == V64DImode)
818      emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
819						      operands[4], const0_rtx,
820						      const0_rtx,
821						      operands[5]));
822    else
823      emit_insn (gen_scatter<mode>_insn_2offsets_exec (operands[0], addr,
824						       const0_rtx, operands[4],
825						       const0_rtx, const0_rtx,
826						       operands[5]));
827    DONE;
828  })
829
830; Allow any address expression
831(define_expand "scatter<mode>_expr<exec_scatter>"
832  [(set (mem:BLK (scratch))
833	(unspec:BLK
834	  [(match_operand:V64DI 0 "")
835	   (match_operand:VEC_REG_MODE 1 "register_operand")
836	   (match_operand 2 "immediate_operand")
837	   (match_operand 3 "immediate_operand")]
838	  UNSPEC_SCATTER))]
839  ""
840  {})
841
842(define_insn "scatter<mode>_insn_1offset<exec_scatter>"
843  [(set (mem:BLK (scratch))
844	(unspec:BLK
845	  [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
846		       (vec_duplicate:V64DI
847			 (match_operand 1 "immediate_operand"	 "n")))
848	   (match_operand:VEC_REG_MODE 2 "register_operand"	 "v")
849	   (match_operand 3 "immediate_operand"			 "n")
850	   (match_operand 4 "immediate_operand"			 "n")]
851	  UNSPEC_SCATTER))]
852  "(AS_FLAT_P (INTVAL (operands[3]))
853    && (INTVAL(operands[1]) == 0
854	|| (TARGET_GCN5_PLUS
855	    && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
856    || (AS_GLOBAL_P (INTVAL (operands[3]))
857	&& (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
858  {
859    addr_space_t as = INTVAL (operands[3]);
860    const char *glc = INTVAL (operands[4]) ? " glc" : "";
861
862    static char buf[200];
863    if (AS_FLAT_P (as))
864      {
865	if (TARGET_GCN5_PLUS)
866	  sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;"
867		   "s_waitcnt\texpcnt(0)", glc);
868	else
869	  sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\texpcnt(0)",
870		   glc);
871      }
872    else if (AS_GLOBAL_P (as))
873      sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;"
874	       "s_waitcnt\texpcnt(0)", glc);
875    else
876      gcc_unreachable ();
877
878    return buf;
879  }
880  [(set_attr "type" "flat")
881   (set_attr "length" "12")])
882
883(define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
884  [(set (mem:BLK (scratch))
885	(unspec:BLK
886	  [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
887		       (vec_duplicate:V64SI
888			 (match_operand 1 "immediate_operand"	 "n")))
889	   (match_operand:VEC_REG_MODE 2 "register_operand"	 "v")
890	   (match_operand 3 "immediate_operand"			 "n")
891	   (match_operand 4 "immediate_operand"			 "n")]
892	  UNSPEC_SCATTER))]
893  "(AS_ANY_DS_P (INTVAL (operands[3]))
894    && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
895  {
896    addr_space_t as = INTVAL (operands[3]);
897    static char buf[200];
898    sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\texpcnt(0)",
899	     (AS_GDS_P (as) ? " gds" : ""));
900    return buf;
901  }
902  [(set_attr "type" "ds")
903   (set_attr "length" "12")])
904
905(define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
906  [(set (mem:BLK (scratch))
907	(unspec:BLK
908	  [(plus:V64DI
909	     (plus:V64DI
910	       (vec_duplicate:V64DI
911		 (match_operand:DI 0 "register_operand"		    "Sv"))
912	       (sign_extend:V64DI
913		 (match_operand:V64SI 1 "register_operand"	    " v")))
914	     (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
915								    " n")))
916	   (match_operand:VEC_REG_MODE 3 "register_operand"	    " v")
917	   (match_operand 4 "immediate_operand"			    " n")
918	   (match_operand 5 "immediate_operand"			    " n")]
919	  UNSPEC_SCATTER))]
920  "(AS_GLOBAL_P (INTVAL (operands[4]))
921    && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
922  {
923    addr_space_t as = INTVAL (operands[4]);
924    const char *glc = INTVAL (operands[5]) ? " glc" : "";
925
926    static char buf[200];
927    if (AS_GLOBAL_P (as))
928      {
929	/* Work around assembler bug in which a 64-bit register is expected,
930	but a 32-bit value would be correct.  */
931	int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
932	sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;"
933		      "s_waitcnt\texpcnt(0)", reg, reg + 1, glc);
934      }
935    else
936      gcc_unreachable ();
937
938    return buf;
939  }
940  [(set_attr "type" "flat")
941   (set_attr "length" "12")])
942
943;; }}}
944;; {{{ Permutations
945
946(define_insn "ds_bpermute<mode>"
947  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"    "=v")
948	(unspec:VEC_1REG_MODE
949	  [(match_operand:VEC_1REG_MODE 2 "register_operand" " v")
950	   (match_operand:V64SI 1 "register_operand"	     " v")
951	   (match_operand:DI 3 "gcn_exec_reg_operand"	     " e")]
952	  UNSPEC_BPERMUTE))]
953  ""
954  "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
955  [(set_attr "type" "vop2")
956   (set_attr "length" "12")])
957
958(define_insn_and_split "ds_bpermute<mode>"
959  [(set (match_operand:VEC_2REG_MODE 0 "register_operand"    "=&v")
960	(unspec:VEC_2REG_MODE
961	  [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
962	   (match_operand:V64SI 1 "register_operand"	     "  v")
963	   (match_operand:DI 3 "gcn_exec_reg_operand"	     "  e")]
964	  UNSPEC_BPERMUTE))]
965  ""
966  "#"
967  "reload_completed"
968  [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
969				    UNSPEC_BPERMUTE))
970   (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
971				    UNSPEC_BPERMUTE))]
972  {
973    operands[4] = gcn_operand_part (<MODE>mode, operands[0], 0);
974    operands[5] = gcn_operand_part (<MODE>mode, operands[0], 1);
975    operands[6] = gcn_operand_part (<MODE>mode, operands[2], 0);
976    operands[7] = gcn_operand_part (<MODE>mode, operands[2], 1);
977  }
978  [(set_attr "type" "vmult")
979   (set_attr "length" "24")])
980
981;; }}}
982;; {{{ ALU special case: add/sub
983
984(define_insn "addv64si3<exec_clobber>"
985  [(set (match_operand:V64SI 0 "register_operand"   "=  v")
986	(plus:V64SI
987	  (match_operand:V64SI 1 "register_operand" "%  v")
988	  (match_operand:V64SI 2 "gcn_alu_operand"  "vSvB")))
989   (clobber (reg:DI VCC_REG))]
990  ""
991  "v_add%^_u32\t%0, vcc, %2, %1"
992  [(set_attr "type" "vop2")
993   (set_attr "length" "8")])
994
995(define_insn "addv64si3_dup<exec_clobber>"
996  [(set (match_operand:V64SI 0 "register_operand"   "= v")
997	(plus:V64SI
998	  (vec_duplicate:V64SI
999	    (match_operand:SI 2 "gcn_alu_operand"   "SvB"))
1000	  (match_operand:V64SI 1 "register_operand" "  v")))
1001   (clobber (reg:DI VCC_REG))]
1002  ""
1003  "v_add%^_u32\t%0, vcc, %2, %1"
1004  [(set_attr "type" "vop2")
1005   (set_attr "length" "8")])
1006
1007(define_insn "addv64si3_vcc<exec_vcc>"
1008  [(set (match_operand:V64SI 0 "register_operand"   "=  v,   v")
1009	(plus:V64SI
1010	  (match_operand:V64SI 1 "register_operand" "%  v,   v")
1011	  (match_operand:V64SI 2 "gcn_alu_operand"  "vSvB,vSvB")))
1012   (set (match_operand:DI 3 "register_operand"	    "= cV,  Sg")
1013	(ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
1014		(match_dup 1)))]
1015  ""
1016  "v_add%^_u32\t%0, %3, %2, %1"
1017  [(set_attr "type" "vop2,vop3b")
1018   (set_attr "length" "8")])
1019
1020; This pattern only changes the VCC bits when the corresponding lane is
1021; enabled, so the set must be described as an ior.
1022
1023(define_insn "addv64si3_vcc_dup<exec_vcc>"
1024  [(set (match_operand:V64SI 0 "register_operand"   "= v,  v")
1025	(plus:V64SI
1026	  (vec_duplicate:V64SI
1027	    (match_operand:SI 1 "gcn_alu_operand"   "SvB,SvB"))
1028	  (match_operand:V64SI 2 "register_operand" "  v,  v")))
1029   (set (match_operand:DI 3 "register_operand"	    "=cV, Sg")
1030	(ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
1031			    (match_dup 1))
1032		(vec_duplicate:V64SI (match_dup 2))))]
1033  ""
1034  "v_add%^_u32\t%0, %3, %2, %1"
1035  [(set_attr "type" "vop2,vop3b")
1036   (set_attr "length" "8,8")])
1037
1038; This pattern does not accept SGPR because VCC read already counts as an
1039; SGPR use and number of SGPR operands is limited to 1.
1040
1041(define_insn "addcv64si3<exec_vcc>"
1042  [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1043	(plus:V64SI
1044	  (plus:V64SI
1045	    (vec_merge:V64SI
1046	      (vec_duplicate:V64SI (const_int 1))
1047	      (vec_duplicate:V64SI (const_int 0))
1048	      (match_operand:DI 3 "register_operand" " cV,Sv"))
1049	    (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
1050	  (match_operand:V64SI 2 "gcn_alu_operand"   " vB,vB")))
1051   (set (match_operand:DI 4 "register_operand"	     "=cV,Sg")
1052	(ior:DI (ltu:DI (plus:V64SI
1053			  (plus:V64SI
1054			    (vec_merge:V64SI
1055			      (vec_duplicate:V64SI (const_int 1))
1056			      (vec_duplicate:V64SI (const_int 0))
1057			      (match_dup 3))
1058			    (match_dup 1))
1059			  (match_dup 2))
1060			(match_dup 2))
1061		(ltu:DI (plus:V64SI
1062			  (vec_merge:V64SI
1063			    (vec_duplicate:V64SI (const_int 1))
1064			    (vec_duplicate:V64SI (const_int 0))
1065			    (match_dup 3))
1066			  (match_dup 1))
1067			(match_dup 1))))]
1068  ""
1069  "v_addc%^_u32\t%0, %4, %1, %2, %3"
1070  [(set_attr "type" "vop2,vop3b")
1071   (set_attr "length" "4,8")])
1072
1073(define_insn "addcv64si3_dup<exec_vcc>"
1074  [(set (match_operand:V64SI 0 "register_operand" "=v,v")
1075	(plus:V64SI
1076	  (plus:V64SI
1077	    (vec_merge:V64SI
1078	      (vec_duplicate:V64SI (const_int 1))
1079	      (vec_duplicate:V64SI (const_int 0))
1080	      (match_operand:DI 3 "register_operand" " cV, Sv"))
1081	    (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
1082	  (vec_duplicate:V64SI
1083	    (match_operand:SI 2 "gcn_alu_operand"    "SvB,SvB"))))
1084   (set (match_operand:DI 4 "register_operand"  "=cV, Sg")
1085	(ior:DI (ltu:DI (plus:V64SI (plus:V64SI
1086				      (vec_merge:V64SI
1087					(vec_duplicate:V64SI (const_int 1))
1088					(vec_duplicate:V64SI (const_int 0))
1089					(match_dup 3))
1090				      (match_dup 1))
1091				    (vec_duplicate:V64SI
1092				      (match_dup 2)))
1093			(vec_duplicate:V64SI
1094			  (match_dup 2)))
1095		(ltu:DI (plus:V64SI (vec_merge:V64SI
1096				      (vec_duplicate:V64SI (const_int 1))
1097				      (vec_duplicate:V64SI (const_int 0))
1098				      (match_dup 3))
1099				    (match_dup 1))
1100			(match_dup 1))))]
1101  ""
1102  "v_addc%^_u32\t%0, %4, %1, %2, %3"
1103  [(set_attr "type" "vop2,vop3b")
1104   (set_attr "length" "4,8")])
1105
1106(define_insn "subv64si3<exec_clobber>"
1107  [(set (match_operand:V64SI 0 "register_operand"  "=  v,   v")
1108	(minus:V64SI
1109	  (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,   v")
1110	  (match_operand:V64SI 2 "gcn_alu_operand" "   v,vSvB")))
1111   (clobber (reg:DI VCC_REG))]
1112  ""
1113  "@
1114   v_sub%^_u32\t%0, vcc, %1, %2
1115   v_subrev%^_u32\t%0, vcc, %2, %1"
1116  [(set_attr "type" "vop2")
1117   (set_attr "length" "8,8")])
1118
1119(define_insn "subv64si3_vcc<exec_vcc>"
1120  [(set (match_operand:V64SI 0 "register_operand"  "=  v,   v,   v,   v")
1121	(minus:V64SI
1122	  (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB,   v,   v")
1123	  (match_operand:V64SI 2 "gcn_alu_operand" "   v,   v,vSvB,vSvB")))
1124   (set (match_operand:DI 3 "register_operand"	   "= cV,  Sg,  cV,  Sg")
1125	(gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
1126		(match_dup 1)))]
1127  ""
1128  "@
1129   v_sub%^_u32\t%0, %3, %1, %2
1130   v_sub%^_u32\t%0, %3, %1, %2
1131   v_subrev%^_u32\t%0, %3, %2, %1
1132   v_subrev%^_u32\t%0, %3, %2, %1"
1133  [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1134   (set_attr "length" "8")])
1135
1136; This pattern does not accept SGPR because VCC read already counts
1137; as a SGPR use and number of SGPR operands is limited to 1.
1138
1139(define_insn "subcv64si3<exec_vcc>"
1140  [(set (match_operand:V64SI 0 "register_operand"    "= v, v, v, v")
1141	(minus:V64SI
1142	  (minus:V64SI
1143	    (vec_merge:V64SI
1144	      (vec_duplicate:V64SI (const_int 1))
1145	      (vec_duplicate:V64SI (const_int 0))
1146	      (match_operand:DI 3 "gcn_alu_operand"  " cV,Sv,cV,Sv"))
1147	    (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
1148	  (match_operand:V64SI 2 "gcn_alu_operand"   " vB,vB,vA,vA")))
1149   (set (match_operand:DI 4 "register_operand"	     "=cV,Sg,cV,Sg")
1150	(ior:DI (gtu:DI (minus:V64SI (minus:V64SI
1151				       (vec_merge:V64SI
1152					 (vec_duplicate:V64SI (const_int 1))
1153					 (vec_duplicate:V64SI (const_int 0))
1154					 (match_dup 3))
1155				       (match_dup 1))
1156				     (match_dup 2))
1157			(match_dup 2))
1158		(ltu:DI (minus:V64SI (vec_merge:V64SI
1159				       (vec_duplicate:V64SI (const_int 1))
1160				       (vec_duplicate:V64SI (const_int 0))
1161				       (match_dup 3))
1162				     (match_dup 1))
1163			(match_dup 1))))]
1164  ""
1165  "@
1166   v_subb%^_u32\t%0, %4, %1, %2, %3
1167   v_subb%^_u32\t%0, %4, %1, %2, %3
1168   v_subbrev%^_u32\t%0, %4, %2, %1, %3
1169   v_subbrev%^_u32\t%0, %4, %2, %1, %3"
1170  [(set_attr "type" "vop2,vop3b,vop2,vop3b")
1171   (set_attr "length" "8")])
1172
1173(define_insn_and_split "addv64di3"
1174  [(set (match_operand:V64DI 0 "register_operand"   "=  &v")
1175	(plus:V64DI
1176	  (match_operand:V64DI 1 "register_operand" "%  v0")
1177	  (match_operand:V64DI 2 "gcn_alu_operand"  "vSvB0")))
1178   (clobber (reg:DI VCC_REG))]
1179  ""
1180  "#"
1181  "gcn_can_split_p  (V64DImode, operands[0])
1182   && gcn_can_split_p (V64DImode, operands[1])
1183   && gcn_can_split_p (V64DImode, operands[2])"
1184  [(const_int 0)]
1185  {
1186    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1187    emit_insn (gen_addv64si3_vcc
1188		(gcn_operand_part (V64DImode, operands[0], 0),
1189		 gcn_operand_part (V64DImode, operands[1], 0),
1190		 gcn_operand_part (V64DImode, operands[2], 0),
1191		 vcc));
1192    emit_insn (gen_addcv64si3
1193		(gcn_operand_part (V64DImode, operands[0], 1),
1194		 gcn_operand_part (V64DImode, operands[1], 1),
1195		 gcn_operand_part (V64DImode, operands[2], 1),
1196		 vcc, vcc));
1197    DONE;
1198  }
1199  [(set_attr "type" "vmult")
1200   (set_attr "length" "8")])
1201
1202(define_insn_and_split "addv64di3_exec"
1203  [(set (match_operand:V64DI 0 "register_operand"		  "=  &v")
1204	(vec_merge:V64DI
1205	  (plus:V64DI
1206	    (match_operand:V64DI 1 "register_operand"		  "%  v0")
1207	    (match_operand:V64DI 2 "gcn_alu_operand"		  "vSvB0"))
1208	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" "   U0")
1209	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "    e")))
1210   (clobber (reg:DI VCC_REG))]
1211  ""
1212  "#"
1213  "gcn_can_split_p  (V64DImode, operands[0])
1214   && gcn_can_split_p (V64DImode, operands[1])
1215   && gcn_can_split_p (V64DImode, operands[2])
1216   && gcn_can_split_p (V64DImode, operands[4])"
1217  [(const_int 0)]
1218  {
1219    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1220    emit_insn (gen_addv64si3_vcc_exec
1221		(gcn_operand_part (V64DImode, operands[0], 0),
1222		 gcn_operand_part (V64DImode, operands[1], 0),
1223		 gcn_operand_part (V64DImode, operands[2], 0),
1224		 vcc,
1225		 gcn_operand_part (V64DImode, operands[3], 0),
1226		 operands[4]));
1227    emit_insn (gen_addcv64si3_exec
1228		(gcn_operand_part (V64DImode, operands[0], 1),
1229		 gcn_operand_part (V64DImode, operands[1], 1),
1230		 gcn_operand_part (V64DImode, operands[2], 1),
1231		 vcc, vcc,
1232		 gcn_operand_part (V64DImode, operands[3], 1),
1233		 operands[4]));
1234    DONE;
1235  }
1236  [(set_attr "type" "vmult")
1237   (set_attr "length" "8")])
1238
1239(define_insn_and_split "subv64di3"
1240  [(set (match_operand:V64DI 0 "register_operand"  "=  &v,   &v")
1241	(minus:V64DI
1242	  (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0,   v0")
1243	  (match_operand:V64DI 2 "gcn_alu_operand" "   v0,vSvB0")))
1244   (clobber (reg:DI VCC_REG))]
1245  ""
1246  "#"
1247  "gcn_can_split_p  (V64DImode, operands[0])
1248   && gcn_can_split_p (V64DImode, operands[1])
1249   && gcn_can_split_p (V64DImode, operands[2])"
1250  [(const_int 0)]
1251  {
1252    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1253    emit_insn (gen_subv64si3_vcc
1254		(gcn_operand_part (V64DImode, operands[0], 0),
1255		 gcn_operand_part (V64DImode, operands[1], 0),
1256		 gcn_operand_part (V64DImode, operands[2], 0),
1257		 vcc));
1258    emit_insn (gen_subcv64si3
1259		(gcn_operand_part (V64DImode, operands[0], 1),
1260		 gcn_operand_part (V64DImode, operands[1], 1),
1261		 gcn_operand_part (V64DImode, operands[2], 1),
1262		 vcc, vcc));
1263    DONE;
1264  }
1265  [(set_attr "type" "vmult")
1266   (set_attr "length" "8,8")])
1267
1268(define_insn_and_split "subv64di3_exec"
1269  [(set (match_operand:V64DI 0 "register_operand"	       "=  &v,   &v")
1270	(vec_merge:V64DI
1271	  (minus:V64DI
1272	    (match_operand:V64DI 1 "gcn_alu_operand"	       "vSvB0,   v0")
1273	    (match_operand:V64DI 2 "gcn_alu_operand"	       "   v0,vSvB0"))
1274	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
1275							       "   U0,   U0")
1276	  (match_operand:DI 4 "gcn_exec_reg_operand"	       "    e,    e")))
1277   (clobber (reg:DI VCC_REG))]
1278  "register_operand (operands[1], VOIDmode)
1279   || register_operand (operands[2], VOIDmode)"
1280  "#"
1281  "gcn_can_split_p  (V64DImode, operands[0])
1282   && gcn_can_split_p (V64DImode, operands[1])
1283   && gcn_can_split_p (V64DImode, operands[2])
1284   && gcn_can_split_p (V64DImode, operands[3])"
1285  [(const_int 0)]
1286  {
1287    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1288    emit_insn (gen_subv64si3_vcc_exec
1289		(gcn_operand_part (V64DImode, operands[0], 0),
1290		 gcn_operand_part (V64DImode, operands[1], 0),
1291		 gcn_operand_part (V64DImode, operands[2], 0),
1292		 vcc,
1293		 gcn_operand_part (V64DImode, operands[3], 0),
1294		 operands[4]));
1295    emit_insn (gen_subcv64si3_exec
1296		(gcn_operand_part (V64DImode, operands[0], 1),
1297		 gcn_operand_part (V64DImode, operands[1], 1),
1298		 gcn_operand_part (V64DImode, operands[2], 1),
1299		 vcc, vcc,
1300		 gcn_operand_part (V64DImode, operands[3], 1),
1301		 operands[4]));
1302    DONE;
1303  }
1304  [(set_attr "type" "vmult")
1305   (set_attr "length" "8,8")])
1306
1307(define_insn_and_split "addv64di3_dup"
1308  [(set (match_operand:V64DI 0 "register_operand"   "= &v")
1309	(plus:V64DI
1310	  (match_operand:V64DI 1 "register_operand" "  v0")
1311	  (vec_duplicate:V64DI
1312	    (match_operand:DI 2 "gcn_alu_operand"   "SvDB"))))
1313   (clobber (reg:DI VCC_REG))]
1314  ""
1315  "#"
1316  "gcn_can_split_p  (V64DImode, operands[0])
1317   && gcn_can_split_p (V64DImode, operands[1])
1318   && gcn_can_split_p (V64DImode, operands[2])"
1319  [(const_int 0)]
1320  {
1321    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1322    emit_insn (gen_addv64si3_vcc_dup
1323		(gcn_operand_part (V64DImode, operands[0], 0),
1324		 gcn_operand_part (DImode, operands[2], 0),
1325		 gcn_operand_part (V64DImode, operands[1], 0),
1326		 vcc));
1327    emit_insn (gen_addcv64si3_dup
1328		(gcn_operand_part (V64DImode, operands[0], 1),
1329		 gcn_operand_part (V64DImode, operands[1], 1),
1330		 gcn_operand_part (DImode, operands[2], 1),
1331		 vcc, vcc));
1332    DONE;
1333  }
1334  [(set_attr "type" "vmult")
1335   (set_attr "length" "8")])
1336
1337(define_insn_and_split "addv64di3_dup_exec"
1338  [(set (match_operand:V64DI 0 "register_operand"		  "= &v")
1339	(vec_merge:V64DI
1340	  (plus:V64DI
1341	    (match_operand:V64DI 1 "register_operand"		  "  v0")
1342	    (vec_duplicate:V64DI
1343	      (match_operand:DI 2 "gcn_alu_operand"		  "SvDB")))
1344	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" "  U0")
1345	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "   e")))
1346   (clobber (reg:DI VCC_REG))]
1347  ""
1348  "#"
1349  "gcn_can_split_p  (V64DImode, operands[0])
1350   && gcn_can_split_p (V64DImode, operands[1])
1351   && gcn_can_split_p (V64DImode, operands[2])
1352   && gcn_can_split_p (V64DImode, operands[3])"
1353  [(const_int 0)]
1354  {
1355    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1356    emit_insn (gen_addv64si3_vcc_dup_exec
1357		(gcn_operand_part (V64DImode, operands[0], 0),
1358		 gcn_operand_part (DImode, operands[2], 0),
1359		 gcn_operand_part (V64DImode, operands[1], 0),
1360		 vcc,
1361		 gcn_operand_part (V64DImode, operands[3], 0),
1362		 operands[4]));
1363    emit_insn (gen_addcv64si3_dup_exec
1364		(gcn_operand_part (V64DImode, operands[0], 1),
1365		 gcn_operand_part (V64DImode, operands[1], 1),
1366		 gcn_operand_part (DImode, operands[2], 1),
1367		 vcc, vcc,
1368		 gcn_operand_part (V64DImode, operands[3], 1),
1369		 operands[4]));
1370    DONE;
1371  }
1372  [(set_attr "type" "vmult")
1373   (set_attr "length" "8")])
1374
1375(define_insn_and_split "addv64di3_zext"
1376  [(set (match_operand:V64DI 0 "register_operand"    "=&v,&v")
1377	(plus:V64DI
1378	  (zero_extend:V64DI
1379	    (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
1380	  (match_operand:V64DI 2 "gcn_alu_operand"   "0vB,0vA")))
1381   (clobber (reg:DI VCC_REG))]
1382  ""
1383  "#"
1384  "gcn_can_split_p  (V64DImode, operands[0])
1385   && gcn_can_split_p (V64DImode, operands[2])"
1386  [(const_int 0)]
1387  {
1388    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1389    emit_insn (gen_addv64si3_vcc
1390		(gcn_operand_part (V64DImode, operands[0], 0),
1391		 operands[1],
1392		 gcn_operand_part (V64DImode, operands[2], 0),
1393		 vcc));
1394    emit_insn (gen_addcv64si3
1395		(gcn_operand_part (V64DImode, operands[0], 1),
1396		 gcn_operand_part (V64DImode, operands[2], 1),
1397		 const0_rtx, vcc, vcc));
1398    DONE;
1399  }
1400  [(set_attr "type" "vmult")
1401   (set_attr "length" "8,8")])
1402
1403(define_insn_and_split "addv64di3_zext_exec"
1404  [(set (match_operand:V64DI 0 "register_operand"		  "=&v,&v")
1405	(vec_merge:V64DI
1406	  (plus:V64DI
1407	    (zero_extend:V64DI
1408	      (match_operand:V64SI 1 "gcn_alu_operand"		  "0vA,0vB"))
1409	    (match_operand:V64DI 2 "gcn_alu_operand"		  "0vB,0vA"))
1410	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
1411	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "  e,  e")))
1412   (clobber (reg:DI VCC_REG))]
1413  ""
1414  "#"
1415  "gcn_can_split_p  (V64DImode, operands[0])
1416   && gcn_can_split_p (V64DImode, operands[2])
1417   && gcn_can_split_p (V64DImode, operands[3])"
1418  [(const_int 0)]
1419  {
1420    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1421    emit_insn (gen_addv64si3_vcc_exec
1422		(gcn_operand_part (V64DImode, operands[0], 0),
1423		 operands[1],
1424		 gcn_operand_part (V64DImode, operands[2], 0),
1425		 vcc,
1426		 gcn_operand_part (V64DImode, operands[3], 0),
1427		 operands[4]));
1428    emit_insn (gen_addcv64si3_exec
1429		(gcn_operand_part (V64DImode, operands[0], 1),
1430		 gcn_operand_part (V64DImode, operands[2], 1),
1431		 const0_rtx, vcc, vcc,
1432		 gcn_operand_part (V64DImode, operands[3], 1),
1433		 operands[4]));
1434    DONE;
1435  }
1436  [(set_attr "type" "vmult")
1437   (set_attr "length" "8,8")])
1438
1439(define_insn_and_split "addv64di3_zext_dup"
1440  [(set (match_operand:V64DI 0 "register_operand"   "=&v")
1441	(plus:V64DI
1442	  (zero_extend:V64DI
1443	    (vec_duplicate:V64SI
1444	      (match_operand:SI 1 "gcn_alu_operand" "BSv")))
1445	  (match_operand:V64DI 2 "gcn_alu_operand"  "vA0")))
1446   (clobber (reg:DI VCC_REG))]
1447  ""
1448  "#"
1449  "gcn_can_split_p  (V64DImode, operands[0])
1450   && gcn_can_split_p (V64DImode, operands[2])"
1451  [(const_int 0)]
1452  {
1453    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1454    emit_insn (gen_addv64si3_vcc_dup
1455		(gcn_operand_part (V64DImode, operands[0], 0),
1456		 gcn_operand_part (DImode, operands[1], 0),
1457		 gcn_operand_part (V64DImode, operands[2], 0),
1458		 vcc));
1459    emit_insn (gen_addcv64si3
1460		(gcn_operand_part (V64DImode, operands[0], 1),
1461		 gcn_operand_part (V64DImode, operands[2], 1),
1462		 const0_rtx, vcc, vcc));
1463    DONE;
1464  }
1465  [(set_attr "type" "vmult")
1466   (set_attr "length" "8")])
1467
1468(define_insn_and_split "addv64di3_zext_dup_exec"
1469  [(set (match_operand:V64DI 0 "register_operand"		  "=&v")
1470	(vec_merge:V64DI
1471	  (plus:V64DI
1472	    (zero_extend:V64DI
1473	      (vec_duplicate:V64SI
1474		(match_operand:SI 1 "gcn_alu_operand"		  "BSv")))
1475	    (match_operand:V64DI 2 "gcn_alu_operand"		  "vA0"))
1476	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1477	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "  e")))
1478   (clobber (reg:DI VCC_REG))]
1479  ""
1480  "#"
1481  "gcn_can_split_p  (V64DImode, operands[0])
1482   && gcn_can_split_p (V64DImode, operands[2])
1483   && gcn_can_split_p (V64DImode, operands[3])"
1484  [(const_int 0)]
1485  {
1486    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1487    emit_insn (gen_addv64si3_vcc_dup_exec
1488		(gcn_operand_part (V64DImode, operands[0], 0),
1489		 gcn_operand_part (DImode, operands[1], 0),
1490		 gcn_operand_part (V64DImode, operands[2], 0),
1491		 vcc,
1492		 gcn_operand_part (V64DImode, operands[3], 0),
1493		 operands[4]));
1494    emit_insn (gen_addcv64si3_exec
1495		(gcn_operand_part (V64DImode, operands[0], 1),
1496		 gcn_operand_part (V64DImode, operands[2], 1),
1497		 const0_rtx, vcc, vcc,
1498		 gcn_operand_part (V64DImode, operands[3], 1),
1499		 operands[4]));
1500    DONE;
1501  }
1502  [(set_attr "type" "vmult")
1503   (set_attr "length" "8")])
1504
1505(define_insn_and_split "addv64di3_zext_dup2"
1506  [(set (match_operand:V64DI 0 "register_operand"		      "= v")
1507	(plus:V64DI
1508	  (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1509	  (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand"  "BSv"))))
1510   (clobber (reg:DI VCC_REG))]
1511  ""
1512  "#"
1513  "gcn_can_split_p  (V64DImode, operands[0])"
1514  [(const_int 0)]
1515  {
1516    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1517    emit_insn (gen_addv64si3_vcc_dup
1518		(gcn_operand_part (V64DImode, operands[0], 0),
1519		 gcn_operand_part (DImode, operands[2], 0),
1520		 operands[1],
1521		 vcc));
1522    rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1523    emit_insn (gen_vec_duplicatev64si
1524		(dsthi, gcn_operand_part (DImode, operands[2], 1)));
1525    emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
1526    DONE;
1527  }
1528  [(set_attr "type" "vmult")
1529   (set_attr "length" "8")])
1530
1531(define_insn_and_split "addv64di3_zext_dup2_exec"
1532  [(set (match_operand:V64DI 0 "register_operand"		       "= v")
1533	(vec_merge:V64DI
1534	  (plus:V64DI
1535	    (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1536								       " vA"))
1537	    (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1538	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand"      " U0")
1539	  (match_operand:DI 4 "gcn_exec_reg_operand"		       "  e")))
1540   (clobber (reg:DI VCC_REG))]
1541  ""
1542  "#"
1543  "gcn_can_split_p  (V64DImode, operands[0])
1544   && gcn_can_split_p (V64DImode, operands[3])"
1545  [(const_int 0)]
1546  {
1547    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1548    emit_insn (gen_addv64si3_vcc_dup_exec
1549		(gcn_operand_part (V64DImode, operands[0], 0),
1550		 gcn_operand_part (DImode, operands[2], 0),
1551		 operands[1],
1552		 vcc,
1553		 gcn_operand_part (V64DImode, operands[3], 0),
1554		 operands[4]));
1555    rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1556    emit_insn (gen_vec_duplicatev64si_exec
1557		(dsthi, gcn_operand_part (DImode, operands[2], 1),
1558		 gcn_gen_undef (V64SImode), operands[4]));
1559    emit_insn (gen_addcv64si3_exec
1560		(dsthi, dsthi, const0_rtx, vcc, vcc,
1561		 gcn_operand_part (V64DImode, operands[3], 1),
1562		 operands[4]));
1563    DONE;
1564  }
1565  [(set_attr "type" "vmult")
1566   (set_attr "length" "8")])
1567
1568(define_insn_and_split "addv64di3_sext_dup2"
1569  [(set (match_operand:V64DI 0 "register_operand"		      "= v")
1570	(plus:V64DI
1571	  (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
1572	  (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand"  "BSv"))))
1573   (clobber (match_scratch:V64SI 3				      "=&v"))
1574   (clobber (reg:DI VCC_REG))]
1575  ""
1576  "#"
1577  "gcn_can_split_p  (V64DImode, operands[0])"
1578  [(const_int 0)]
1579  {
1580    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1581    emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
1582    emit_insn (gen_addv64si3_vcc_dup
1583		(gcn_operand_part (V64DImode, operands[0], 0),
1584		 gcn_operand_part (DImode, operands[2], 0),
1585		 operands[1],
1586		 vcc));
1587    rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1588    emit_insn (gen_vec_duplicatev64si
1589		(dsthi, gcn_operand_part (DImode, operands[2], 1)));
1590    emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
1591    DONE;
1592  }
1593  [(set_attr "type" "vmult")
1594   (set_attr "length" "8")])
1595
1596(define_insn_and_split "addv64di3_sext_dup2_exec"
1597  [(set (match_operand:V64DI 0 "register_operand"		       "= v")
1598	(vec_merge:V64DI
1599	  (plus:V64DI
1600	    (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
1601								       " vA"))
1602	    (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
1603	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand"      " U0")
1604	  (match_operand:DI 4 "gcn_exec_reg_operand"		       "  e")))
1605   (clobber (match_scratch:V64SI 5				       "=&v"))
1606   (clobber (reg:DI VCC_REG))]
1607  ""
1608  "#"
1609  "gcn_can_split_p  (V64DImode, operands[0])
1610   && gcn_can_split_p (V64DImode, operands[3])"
1611  [(const_int 0)]
1612  {
1613    rtx vcc = gen_rtx_REG (DImode, VCC_REG);
1614    emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
1615				    gcn_gen_undef (V64SImode), operands[4]));
1616    emit_insn (gen_addv64si3_vcc_dup_exec
1617		(gcn_operand_part (V64DImode, operands[0], 0),
1618		 gcn_operand_part (DImode, operands[2], 0),
1619		 operands[1],
1620		 vcc,
1621		 gcn_operand_part (V64DImode, operands[3], 0),
1622		 operands[4]));
1623    rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
1624    emit_insn (gen_vec_duplicatev64si_exec
1625		(dsthi, gcn_operand_part (DImode, operands[2], 1),
1626		 gcn_gen_undef (V64SImode), operands[4]));
1627    emit_insn (gen_addcv64si3_exec
1628		(dsthi, dsthi, operands[5], vcc, vcc,
1629		 gcn_operand_part (V64DImode, operands[3], 1),
1630		 operands[4]));
1631    DONE;
1632  }
1633  [(set_attr "type" "vmult")
1634   (set_attr "length" "8")])
1635
1636;; }}}
1637;; {{{ DS memory ALU: add/sub
1638
1639(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
1640(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
1641
1642;; FIXME: the vector patterns probably need RD expanded to a vector of
1643;;        addresses.  For now, the only way a vector can get into LDS is
1644;;        if the user puts it there manually.
1645;;
1646;; FIXME: the scalar patterns are probably fine in themselves, but need to be
1647;;        checked to see if anything can ever use them.
1648
1649(define_insn "add<mode>3_ds<exec>"
1650  [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand"	 "=RD")
1651	(plus:DS_ARITH_MODE
1652	  (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
1653	  (match_operand:DS_ARITH_MODE 2 "register_operand"	 "  v")))]
1654  "rtx_equal_p (operands[0], operands[1])"
1655  "ds_add%u0\t%A0, %2%O0"
1656  [(set_attr "type" "ds")
1657   (set_attr "length" "8")])
1658
1659(define_insn "add<mode>3_ds_scalar"
1660  [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1661	(plus:DS_ARITH_SCALAR_MODE
1662	  (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1663								      "%RD")
1664	  (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand"    "  v")))]
1665  "rtx_equal_p (operands[0], operands[1])"
1666  "ds_add%u0\t%A0, %2%O0"
1667  [(set_attr "type" "ds")
1668   (set_attr "length" "8")])
1669
1670(define_insn "sub<mode>3_ds<exec>"
1671  [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand"	 "=RD")
1672	(minus:DS_ARITH_MODE
1673	  (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
1674	  (match_operand:DS_ARITH_MODE 2 "register_operand"	 "  v")))]
1675  "rtx_equal_p (operands[0], operands[1])"
1676  "ds_sub%u0\t%A0, %2%O0"
1677  [(set_attr "type" "ds")
1678   (set_attr "length" "8")])
1679
1680(define_insn "sub<mode>3_ds_scalar"
1681  [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1682	(minus:DS_ARITH_SCALAR_MODE
1683	  (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
1684								      " RD")
1685	  (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand"    "  v")))]
1686  "rtx_equal_p (operands[0], operands[1])"
1687  "ds_sub%u0\t%A0, %2%O0"
1688  [(set_attr "type" "ds")
1689   (set_attr "length" "8")])
1690
1691(define_insn "subr<mode>3_ds<exec>"
1692  [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand"	 "=RD")
1693	(minus:DS_ARITH_MODE
1694	  (match_operand:DS_ARITH_MODE 2 "register_operand"	 "  v")
1695	  (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
1696  "rtx_equal_p (operands[0], operands[1])"
1697  "ds_rsub%u0\t%A0, %2%O0"
1698  [(set_attr "type" "ds")
1699   (set_attr "length" "8")])
1700
1701(define_insn "subr<mode>3_ds_scalar"
1702  [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
1703	(minus:DS_ARITH_SCALAR_MODE
1704	  (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand"    "  v")
1705	  (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand" 
1706								      " RD")))]
1707  "rtx_equal_p (operands[0], operands[1])"
1708  "ds_rsub%u0\t%A0, %2%O0"
1709  [(set_attr "type" "ds")
1710   (set_attr "length" "8")])
1711
1712;; }}}
1713;; {{{ ALU special case: mult
1714
1715(define_insn "<su>mulv64si3_highpart<exec>"
1716  [(set (match_operand:V64SI 0 "register_operand"	 "=  v")
1717	(truncate:V64SI
1718	  (lshiftrt:V64DI
1719	    (mult:V64DI
1720	      (any_extend:V64DI
1721		(match_operand:V64SI 1 "gcn_alu_operand" "  %v"))
1722	      (any_extend:V64DI
1723		(match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
1724	    (const_int 32))))]
1725  ""
1726  "v_mul_hi<sgnsuffix>0\t%0, %2, %1"
1727  [(set_attr "type" "vop3a")
1728   (set_attr "length" "8")])
1729
1730(define_insn "mulv64si3<exec>"
1731  [(set (match_operand:V64SI 0 "register_operand"  "=   v")
1732	(mult:V64SI
1733	  (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1734	  (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))]
1735  ""
1736  "v_mul_lo_u32\t%0, %1, %2"
1737  [(set_attr "type" "vop3a")
1738   (set_attr "length" "8")])
1739
1740(define_insn "mulv64si3_dup<exec>"
1741  [(set (match_operand:V64SI 0 "register_operand"  "=   v")
1742	(mult:V64SI
1743	  (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
1744	  (vec_duplicate:V64SI
1745	    (match_operand:SI 2 "gcn_alu_operand"  "  SvA"))))]
1746  ""
1747  "v_mul_lo_u32\t%0, %1, %2"
1748  [(set_attr "type" "vop3a")
1749   (set_attr "length" "8")])
1750
1751(define_insn_and_split "mulv64di3"
1752  [(set (match_operand:V64DI 0 "register_operand"  "=&v")
1753	(mult:V64DI
1754	  (match_operand:V64DI 1 "gcn_alu_operand" "% v")
1755	  (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
1756   (clobber (match_scratch:V64SI 3		   "=&v"))]
1757  ""
1758  "#"
1759  "reload_completed"
1760  [(const_int 0)]
1761  {
1762    rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1763    rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1764    rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1765    rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1766    rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1767    rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1768    rtx tmp = operands[3];
1769
1770    emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
1771    emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
1772    emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
1773    emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1774    emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
1775    emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1776    emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
1777    emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1778    DONE;
1779  })
1780
1781(define_insn_and_split "mulv64di3_exec"
1782  [(set (match_operand:V64DI 0 "register_operand"		  "=&v")
1783	(vec_merge:V64DI
1784	  (mult:V64DI
1785	    (match_operand:V64DI 1 "gcn_alu_operand"		  "% v")
1786	    (match_operand:V64DI 2 "gcn_alu_operand"		  "vDA"))
1787	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1788	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "  e")))
1789   (clobber (match_scratch:V64SI 5                                "=&v"))]
1790  ""
1791  "#"
1792  "reload_completed"
1793  [(const_int 0)]
1794  {
1795    rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1796    rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1797    rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
1798    rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
1799    rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1800    rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1801    rtx exec = operands[4];
1802    rtx tmp = operands[5];
1803
1804    rtx old_lo, old_hi;
1805    if (GET_CODE (operands[3]) == UNSPEC)
1806      {
1807	old_lo = old_hi = gcn_gen_undef (V64SImode);
1808      }
1809    else
1810      {
1811	old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1812	old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1813      }
1814
1815    rtx undef = gcn_gen_undef (V64SImode);
1816
1817    emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
1818    emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
1819					     old_hi, exec));
1820    emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
1821    emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1822    emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
1823    emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1824    emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
1825    emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1826    DONE;
1827  })
1828
1829(define_insn_and_split "mulv64di3_zext"
1830  [(set (match_operand:V64DI 0 "register_operand"    "=&v")
1831	(mult:V64DI
1832	  (zero_extend:V64DI
1833	    (match_operand:V64SI 1 "gcn_alu_operand" "  v"))
1834	  (match_operand:V64DI 2 "gcn_alu_operand"   "vDA")))
1835   (clobber (match_scratch:V64SI 3		     "=&v"))]
1836  ""
1837  "#"
1838  "reload_completed"
1839  [(const_int 0)]
1840  {
1841    rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1842    rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1843    rtx left = operands[1];
1844    rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1845    rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1846    rtx tmp = operands[3];
1847
1848    emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1849    emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1850    emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1851    emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1852    DONE;
1853  })
1854
1855(define_insn_and_split "mulv64di3_zext_exec"
1856  [(set (match_operand:V64DI 0 "register_operand"		  "=&v")
1857	(vec_merge:V64DI
1858	  (mult:V64DI
1859	    (zero_extend:V64DI
1860	      (match_operand:V64SI 1 "gcn_alu_operand"		  "  v"))
1861	    (match_operand:V64DI 2 "gcn_alu_operand"		  "vDA"))
1862	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
1863	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "  e")))
1864   (clobber (match_scratch:V64SI 5                                "=&v"))]
1865  ""
1866  "#"
1867  "reload_completed"
1868  [(const_int 0)]
1869  {
1870    rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1871    rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1872    rtx left = operands[1];
1873    rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1874    rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1875    rtx exec = operands[4];
1876    rtx tmp = operands[5];
1877
1878    rtx old_lo, old_hi;
1879    if (GET_CODE (operands[3]) == UNSPEC)
1880      {
1881	old_lo = old_hi = gcn_gen_undef (V64SImode);
1882      }
1883    else
1884      {
1885	old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1886	old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1887      }
1888
1889    rtx undef = gcn_gen_undef (V64SImode);
1890
1891    emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1892    emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1893					     old_hi, exec));
1894    emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1895    emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1896    DONE;
1897  })
1898
1899(define_insn_and_split "mulv64di3_zext_dup2"
1900  [(set (match_operand:V64DI 0 "register_operand"    "= &v")
1901	(mult:V64DI
1902	  (zero_extend:V64DI
1903	    (match_operand:V64SI 1 "gcn_alu_operand" "   v"))
1904	  (vec_duplicate:V64DI
1905	    (match_operand:DI 2 "gcn_alu_operand"    "SvDA"))))
1906   (clobber (match_scratch:V64SI 3		     "= &v"))]
1907  ""
1908  "#"
1909  "reload_completed"
1910  [(const_int 0)]
1911  {
1912    rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1913    rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1914    rtx left = operands[1];
1915    rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1916    rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1917    rtx tmp = operands[3];
1918
1919    emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
1920    emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
1921    emit_insn (gen_mulv64si3 (tmp, left, right_hi));
1922    emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
1923    DONE;
1924  })
1925
1926(define_insn_and_split "mulv64di3_zext_dup2_exec"
1927  [(set (match_operand:V64DI 0 "register_operand"		  "= &v")
1928	(vec_merge:V64DI
1929	  (mult:V64DI
1930	    (zero_extend:V64DI
1931	      (match_operand:V64SI 1 "gcn_alu_operand"		  "   v"))
1932	    (vec_duplicate:V64DI
1933	      (match_operand:DI 2 "gcn_alu_operand"		  "SvDA")))
1934	  (match_operand:V64DI 3 "gcn_register_or_unspec_operand" "  U0")
1935	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "   e")))
1936   (clobber (match_scratch:V64SI 5                                "= &v"))]
1937  ""
1938  "#"
1939  "reload_completed"
1940  [(const_int 0)]
1941  {
1942    rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
1943    rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
1944    rtx left = operands[1];
1945    rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
1946    rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
1947    rtx exec = operands[4];
1948    rtx tmp = operands[5];
1949
1950    rtx old_lo, old_hi;
1951    if (GET_CODE (operands[3]) == UNSPEC)
1952      {
1953	old_lo = old_hi = gcn_gen_undef (V64SImode);
1954      }
1955    else
1956      {
1957	old_lo = gcn_operand_part (V64DImode, operands[3], 0);
1958	old_hi = gcn_operand_part (V64DImode, operands[3], 1);
1959      }
1960
1961    rtx undef = gcn_gen_undef (V64SImode);
1962
1963    emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
1964    emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
1965					     old_hi, exec));
1966    emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
1967    emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
1968    DONE;
1969  })
1970
1971;; }}}
1972;; {{{ ALU generic case
1973
1974(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
1975
1976(define_code_iterator bitop [and ior xor])
1977(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
1978(define_code_iterator minmaxop [smin smax umin umax])
1979
1980(define_insn "<expander><mode>2<exec>"
1981  [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand"    "=  v")
1982	(bitunop:VEC_1REG_INT_MODE
1983	  (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
1984  ""
1985  "v_<mnemonic>0\t%0, %1"
1986  [(set_attr "type" "vop1")
1987   (set_attr "length" "8")])
1988
1989(define_insn "<expander><mode>3<exec>"
1990  [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "=  v,RD")
1991	(bitop:VEC_1REG_INT_MODE
1992	  (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
1993								  "%  v, 0")
1994	  (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
1995								  "vSvB, v")))]
1996  ""
1997  "@
1998   v_<mnemonic>0\t%0, %2, %1
1999   ds_<mnemonic>0\t%A0, %2%O0"
2000  [(set_attr "type" "vop2,ds")
2001   (set_attr "length" "8,8")])
2002
2003(define_insn_and_split "<expander>v64di3"
2004  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2005	(bitop:V64DI
2006	  (match_operand:V64DI 1 "gcn_valu_src0_operand"	  "%  v,RD")
2007	  (match_operand:V64DI 2 "gcn_valu_src1com_operand"	  "vSvB, v")))]
2008  ""
2009  "@
2010   #
2011   ds_<mnemonic>0\t%A0, %2%O0"
2012  "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2013  [(set (match_dup 3)
2014	(bitop:V64SI (match_dup 5) (match_dup 7)))
2015   (set (match_dup 4)
2016	(bitop:V64SI (match_dup 6) (match_dup 8)))]
2017  {
2018    operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
2019    operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
2020    operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
2021    operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
2022    operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
2023    operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
2024  }
2025  [(set_attr "type" "vmult,ds")
2026   (set_attr "length" "16,8")])
2027
2028(define_insn_and_split "<expander>v64di3_exec"
2029  [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
2030	(vec_merge:V64DI
2031	  (bitop:V64DI
2032	    (match_operand:V64DI 1 "gcn_valu_src0_operand"	  "%  v,RD")
2033	    (match_operand:V64DI 2 "gcn_valu_src1com_operand"	  "vSvB, v"))
2034	  (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
2035								  "  U0,U0")
2036	  (match_operand:DI 4 "gcn_exec_reg_operand"		  "   e, e")))]
2037  "!memory_operand (operands[0], VOIDmode)
2038   || (rtx_equal_p (operands[0], operands[1])
2039       && register_operand (operands[2], VOIDmode))"
2040  "@
2041   #
2042   ds_<mnemonic>0\t%A0, %2%O0"
2043  "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
2044  [(set (match_dup 5)
2045	(vec_merge:V64SI
2046	  (bitop:V64SI (match_dup 7) (match_dup 9))
2047	  (match_dup 11)
2048	  (match_dup 4)))
2049   (set (match_dup 6)
2050	(vec_merge:V64SI
2051	  (bitop:V64SI (match_dup 8) (match_dup 10))
2052	  (match_dup 12)
2053	  (match_dup 4)))]
2054  {
2055    operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
2056    operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
2057    operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
2058    operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
2059    operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
2060    operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
2061    operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
2062    operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
2063  }
2064  [(set_attr "type" "vmult,ds")
2065   (set_attr "length" "16,8")])
2066
2067(define_insn "<expander>v64si3<exec>"
2068  [(set (match_operand:V64SI 0 "register_operand"  "= v")
2069	(shiftop:V64SI
2070	  (match_operand:V64SI 1 "gcn_alu_operand" "  v")
2071	  (vec_duplicate:V64SI
2072	    (match_operand:SI 2 "gcn_alu_operand"  "SvB"))))]
2073  ""
2074  "v_<revmnemonic>0\t%0, %2, %1"
2075  [(set_attr "type" "vop2")
2076   (set_attr "length" "8")])
2077
2078(define_insn "v<expander>v64si3<exec>"
2079  [(set (match_operand:V64SI 0 "register_operand"  "=v")
2080	(shiftop:V64SI
2081	  (match_operand:V64SI 1 "gcn_alu_operand" " v")
2082	  (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
2083  ""
2084  "v_<revmnemonic>0\t%0, %2, %1"
2085  [(set_attr "type" "vop2")
2086   (set_attr "length" "8")])
2087
2088(define_insn "<expander><mode>3<exec>"
2089  [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "=  v,RD")
2090	(minmaxop:VEC_1REG_INT_MODE
2091	  (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
2092								  "%  v, 0")
2093	  (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
2094								  "vSvB, v")))]
2095  ""
2096  "@
2097   v_<mnemonic>0\t%0, %2, %1
2098   ds_<mnemonic>0\t%A0, %2%O0"
2099  [(set_attr "type" "vop2,ds")
2100   (set_attr "length" "8,8")])
2101
2102;; }}}
2103;; {{{ FP binops - special cases
2104
2105; GCN does not directly provide a DFmode subtract instruction, so we do it by
2106; adding the negated second operand to the first.
2107
2108(define_insn "subv64df3<exec>"
2109  [(set (match_operand:V64DF 0 "register_operand"  "=  v,   v")
2110	(minus:V64DF
2111	  (match_operand:V64DF 1 "gcn_alu_operand" "vSvB,   v")
2112	  (match_operand:V64DF 2 "gcn_alu_operand" "   v,vSvB")))]
2113  ""
2114  "@
2115   v_add_f64\t%0, %1, -%2
2116   v_add_f64\t%0, -%2, %1"
2117  [(set_attr "type" "vop3a")
2118   (set_attr "length" "8,8")])
2119
2120(define_insn "subdf"
2121  [(set (match_operand:DF 0 "register_operand"  "=  v,   v")
2122	(minus:DF
2123	  (match_operand:DF 1 "gcn_alu_operand" "vSvB,   v")
2124	  (match_operand:DF 2 "gcn_alu_operand" "   v,vSvB")))]
2125  ""
2126  "@
2127   v_add_f64\t%0, %1, -%2
2128   v_add_f64\t%0, -%2, %1"
2129  [(set_attr "type" "vop3a")
2130   (set_attr "length" "8,8")])
2131
2132;; }}}
2133;; {{{ FP binops - generic
2134
2135(define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
2136(define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
2137(define_mode_iterator FP_MODE [HF SF DF])
2138(define_mode_iterator FP_1REG_MODE [HF SF])
2139
2140(define_code_iterator comm_fp [plus mult smin smax])
2141(define_code_iterator nocomm_fp [minus])
2142(define_code_iterator all_fp [plus mult minus smin smax])
2143
2144(define_insn "<expander><mode>3<exec>"
2145  [(set (match_operand:VEC_FP_MODE 0 "register_operand"  "=  v")
2146	(comm_fp:VEC_FP_MODE
2147	  (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "%  v")
2148	  (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
2149  ""
2150  "v_<mnemonic>0\t%0, %2, %1"
2151  [(set_attr "type" "vop2")
2152   (set_attr "length" "8")])
2153
2154(define_insn "<expander><mode>3"
2155  [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand"    "=  v,  RL")
2156	(comm_fp:FP_MODE
2157	  (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "%  v,   0")
2158	  (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
2159  ""
2160  "@
2161  v_<mnemonic>0\t%0, %2, %1
2162  v_<mnemonic>0\t%0, %1%O0"
2163  [(set_attr "type" "vop2,ds")
2164   (set_attr "length" "8")])
2165
2166(define_insn "<expander><mode>3<exec>"
2167  [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand"  "=  v,   v")
2168	(nocomm_fp:VEC_FP_1REG_MODE
2169	  (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB,   v")
2170	  (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" "   v,vSvB")))]
2171  ""
2172  "@
2173   v_<mnemonic>0\t%0, %1, %2
2174   v_<revmnemonic>0\t%0, %2, %1"
2175  [(set_attr "type" "vop2")
2176   (set_attr "length" "8,8")])
2177
2178(define_insn "<expander><mode>3"
2179  [(set (match_operand:FP_1REG_MODE 0 "register_operand"  "=  v,   v")
2180	(nocomm_fp:FP_1REG_MODE
2181	  (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB,   v")
2182	  (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" "   v,vSvB")))]
2183  ""
2184  "@
2185   v_<mnemonic>0\t%0, %1, %2
2186   v_<revmnemonic>0\t%0, %2, %1"
2187  [(set_attr "type" "vop2")
2188   (set_attr "length" "8,8")])
2189
2190;; }}}
2191;; {{{ FP unops
2192
2193(define_insn "abs<mode>2"
2194  [(set (match_operand:FP_MODE 0 "register_operand"		 "=v")
2195	(abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
2196  ""
2197  "v_add%i0\t%0, 0, |%1|"
2198  [(set_attr "type" "vop3a")
2199   (set_attr "length" "8")])
2200
2201(define_insn "abs<mode>2<exec>"
2202  [(set (match_operand:VEC_FP_MODE 0 "register_operand"	  "=v")
2203	(abs:VEC_FP_MODE
2204	  (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2205  ""
2206  "v_add%i0\t%0, 0, |%1|"
2207  [(set_attr "type" "vop3a")
2208   (set_attr "length" "8")])
2209
2210(define_insn "neg<mode>2<exec>"
2211  [(set (match_operand:VEC_FP_MODE 0 "register_operand"	  "=v")
2212	(neg:VEC_FP_MODE
2213	  (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
2214  ""
2215  "v_add%i0\t%0, 0, -%1"
2216  [(set_attr "type" "vop3a")
2217   (set_attr "length" "8")])
2218
2219(define_insn "sqrt<mode>2<exec>"
2220  [(set (match_operand:VEC_FP_MODE 0 "register_operand"	 "=  v")
2221	(sqrt:VEC_FP_MODE
2222	  (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2223  "flag_unsafe_math_optimizations"
2224  "v_sqrt%i0\t%0, %1"
2225  [(set_attr "type" "vop1")
2226   (set_attr "length" "8")])
2227
2228(define_insn "sqrt<mode>2"
2229  [(set (match_operand:FP_MODE 0 "register_operand"  "=  v")
2230	(sqrt:FP_MODE
2231	  (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
2232  "flag_unsafe_math_optimizations"
2233  "v_sqrt%i0\t%0, %1"
2234  [(set_attr "type" "vop1")
2235   (set_attr "length" "8")])
2236
2237;; }}}
2238;; {{{ FP fused multiply and add
2239
2240(define_insn "fma<mode>4<exec>"
2241  [(set (match_operand:VEC_FP_MODE 0 "register_operand"	 "=  v,   v")
2242	(fma:VEC_FP_MODE
2243	  (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA,  vA")
2244	  (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "  vA,vSvA")
2245	  (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA,  vA")))]
2246  ""
2247  "v_fma%i0\t%0, %1, %2, %3"
2248  [(set_attr "type" "vop3a")
2249   (set_attr "length" "8")])
2250
2251(define_insn "fma<mode>4_negop2<exec>"
2252  [(set (match_operand:VEC_FP_MODE 0 "register_operand"	   "=  v,   v,   v")
2253	(fma:VEC_FP_MODE
2254	  (match_operand:VEC_FP_MODE 1 "gcn_alu_operand"   "  vA,  vA,vSvA")
2255	  (neg:VEC_FP_MODE
2256	    (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "  vA,vSvA,  vA"))
2257	  (match_operand:VEC_FP_MODE 3 "gcn_alu_operand"   "vSvA,  vA,  vA")))]
2258  ""
2259  "v_fma%i0\t%0, %1, -%2, %3"
2260  [(set_attr "type" "vop3a")
2261   (set_attr "length" "8")])
2262
2263(define_insn "fma<mode>4"
2264  [(set (match_operand:FP_MODE 0 "register_operand"  "=  v,   v")
2265	(fma:FP_MODE
2266	  (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA,  vA")
2267	  (match_operand:FP_MODE 2 "gcn_alu_operand" "  vA,vSvA")
2268	  (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA,  vA")))]
2269  ""
2270  "v_fma%i0\t%0, %1, %2, %3"
2271  [(set_attr "type" "vop3a")
2272   (set_attr "length" "8")])
2273
2274(define_insn "fma<mode>4_negop2"
2275  [(set (match_operand:FP_MODE 0 "register_operand"    "=  v,   v,   v")
2276	(fma:FP_MODE
2277	  (match_operand:FP_MODE 1 "gcn_alu_operand"   "  vA,  vA,vSvA")
2278	  (neg:FP_MODE
2279	    (match_operand:FP_MODE 2 "gcn_alu_operand" "  vA,vSvA,  vA"))
2280	  (match_operand:FP_MODE 3 "gcn_alu_operand"   "vSvA,  vA,  vA")))]
2281  ""
2282  "v_fma%i0\t%0, %1, -%2, %3"
2283  [(set_attr "type" "vop3a")
2284   (set_attr "length" "8")])
2285
2286;; }}}
2287;; {{{ FP division
2288
2289(define_insn "recip<mode>2<exec>"
2290  [(set (match_operand:VEC_FP_MODE 0 "register_operand"	   "=  v")
2291	(div:VEC_FP_MODE
2292	  (vec_duplicate:VEC_FP_MODE (float:<SCALAR_MODE> (const_int 1)))
2293	  (match_operand:VEC_FP_MODE 1 "gcn_alu_operand"   "vSvB")))]
2294  ""
2295  "v_rcp%i0\t%0, %1"
2296  [(set_attr "type" "vop1")
2297   (set_attr "length" "8")])
2298
2299(define_insn "recip<mode>2"
2300  [(set (match_operand:FP_MODE 0 "register_operand"	 "=  v")
2301	(div:FP_MODE
2302	  (float:FP_MODE (const_int 1))
2303	  (match_operand:FP_MODE 1 "gcn_alu_operand"	 "vSvB")))]
2304  ""
2305  "v_rcp%i0\t%0, %1"
2306  [(set_attr "type" "vop1")
2307   (set_attr "length" "8")])
2308
2309;; Do division via a = b * 1/c
2310;; The v_rcp_* instructions are not sufficiently accurate on their own,
2311;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
2312;; which the ISA manual says is enough to improve the reciprocal accuracy.
2313;;
2314;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
2315
2316(define_expand "div<mode>3"
2317  [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
2318   (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
2319   (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
2320  "flag_reciprocal_math"
2321  {
2322    rtx two = gcn_vec_constant (<MODE>mode,
2323		  const_double_from_real_value (dconst2, <SCALAR_MODE>mode));
2324    rtx initrcp = gen_reg_rtx (<MODE>mode);
2325    rtx fma = gen_reg_rtx (<MODE>mode);
2326    rtx rcp;
2327
2328    bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
2329		   && real_identical
2330		        (CONST_DOUBLE_REAL_VALUE
2331			  (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
2332
2333    if (is_rcp)
2334      rcp = operands[0];
2335    else
2336      rcp = gen_reg_rtx (<MODE>mode);
2337
2338    emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2339    emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2340    emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2341
2342    if (!is_rcp)
2343      emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2344
2345    DONE;
2346  })
2347
2348(define_expand "div<mode>3"
2349  [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
2350   (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
2351   (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
2352  "flag_reciprocal_math"
2353  {
2354    rtx two = const_double_from_real_value (dconst2, <MODE>mode);
2355    rtx initrcp = gen_reg_rtx (<MODE>mode);
2356    rtx fma = gen_reg_rtx (<MODE>mode);
2357    rtx rcp;
2358
2359    bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
2360		   && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
2361				      &dconstm1));
2362
2363    if (is_rcp)
2364      rcp = operands[0];
2365    else
2366      rcp = gen_reg_rtx (<MODE>mode);
2367
2368    emit_insn (gen_recip<mode>2 (initrcp, operands[2]));
2369    emit_insn (gen_fma<mode>4_negop2 (fma, initrcp, operands[2], two));
2370    emit_insn (gen_mul<mode>3 (rcp, initrcp, fma));
2371
2372    if (!is_rcp)
2373      emit_insn (gen_mul<mode>3 (operands[0], operands[1], rcp));
2374
2375    DONE;
2376  })
2377
2378;; }}}
2379;; {{{ Int/FP conversions
2380
2381(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
2382(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
2383
2384(define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF])
2385(define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF])
2386
2387(define_code_iterator cvt_op [fix unsigned_fix
2388			      float unsigned_float
2389			      float_extend float_truncate])
2390(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
2391			    (float "float") (unsigned_float "floatuns")
2392			    (float_extend "extend") (float_truncate "trunc")])
2393(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
2394				(float "%i0%i1") (unsigned_float "%i0%u1")
2395				(float_extend "%i0%i1")
2396				(float_truncate "%i0%i1")])
2397
2398(define_insn "<cvt_name><CVT_FROM_MODE:mode><CVT_TO_MODE:mode>2"
2399  [(set (match_operand:CVT_TO_MODE 0 "register_operand"	   "=  v")
2400	(cvt_op:CVT_TO_MODE
2401	  (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2402  "gcn_valid_cvt_p (<CVT_FROM_MODE:MODE>mode, <CVT_TO_MODE:MODE>mode,
2403		    <cvt_name>_cvt)"
2404  "v_cvt<cvt_operands>\t%0, %1"
2405  [(set_attr "type" "vop1")
2406   (set_attr "length" "8")])
2407
2408(define_insn "<cvt_name><VCVT_FROM_MODE:mode><VCVT_TO_MODE:mode>2<exec>"
2409  [(set (match_operand:VCVT_TO_MODE 0 "register_operand"    "=  v")
2410	(cvt_op:VCVT_TO_MODE
2411	  (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
2412  "gcn_valid_cvt_p (<VCVT_FROM_MODE:MODE>mode, <VCVT_TO_MODE:MODE>mode,
2413		    <cvt_name>_cvt)"
2414  "v_cvt<cvt_operands>\t%0, %1"
2415  [(set_attr "type" "vop1")
2416   (set_attr "length" "8")])
2417
2418;; }}}
2419;; {{{ Int/int conversions
2420
2421;; GCC can already do these for scalar types, but not for vector types.
2422;; Unfortunately you can't just do SUBREG on a vector to select the low part,
2423;; so there must be a few tricks here.
2424
2425(define_insn_and_split "vec_truncatev64div64si"
2426  [(set (match_operand:V64SI 0 "register_operand"   "=v,&v")
2427	(truncate:V64SI
2428	  (match_operand:V64DI 1 "register_operand" " 0, v")))]
2429  ""
2430  "#"
2431  "reload_completed"
2432  [(set (match_dup 0) (match_dup 1))]
2433  {
2434    operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2435  }
2436  [(set_attr "type" "vop2")
2437   (set_attr "length" "0,4")])
2438
2439(define_insn_and_split "vec_truncatev64div64si_exec"
2440  [(set (match_operand:V64SI 0 "register_operand"	     "=v,&v")
2441	(vec_merge:V64SI
2442	  (truncate:V64SI
2443	    (match_operand:V64DI 1 "register_operand"        " 0, v"))
2444	  (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0")
2445	  (match_operand:DI 3 "gcn_exec_operand"	     " e, e")))]
2446  ""
2447  "#"
2448  "reload_completed"
2449  [(parallel [(set (match_dup 0)
2450		   (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3)))
2451	      (clobber (scratch:V64DI))])]
2452  {
2453    operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
2454  }
2455  [(set_attr "type" "vop2")
2456   (set_attr "length" "0,4")])
2457
2458;; }}}
2459;; {{{ Vector comparison/merge
2460
2461(define_insn "vec_cmp<mode>di"
2462  [(set (match_operand:DI 0 "register_operand"	      "=cV,cV,  e, e,Sg,Sg")
2463	(match_operator 1 "comparison_operator"
2464	  [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2465						      "vSv, B,vSv, B, v,vA")
2466	   (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2467						      "  v, v,  v, v,vA, v")]))
2468   (clobber (match_scratch:DI 4			      "= X, X, cV,cV, X, X"))]
2469  ""
2470  "@
2471   v_cmp%E1\tvcc, %2, %3
2472   v_cmp%E1\tvcc, %2, %3
2473   v_cmpx%E1\tvcc, %2, %3
2474   v_cmpx%E1\tvcc, %2, %3
2475   v_cmp%E1\t%0, %2, %3
2476   v_cmp%E1\t%0, %2, %3"
2477  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2478   (set_attr "length" "4,8,4,8,8,8")])
2479
2480(define_expand "vec_cmpu<mode>di"
2481  [(match_operand:DI 0 "register_operand")
2482   (match_operator 1 "comparison_operator"
2483     [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2484      (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
2485  ""
2486  {
2487    /* Unsigned comparisons use the same patterns as signed comparisons,
2488       except that they use unsigned operators (e.g. LTU vs LT).
2489       The '%E1' directive then does the Right Thing.  */
2490    emit_insn (gen_vec_cmp<mode>di (operands[0], operands[1], operands[2],
2491				    operands[3]));
2492    DONE;
2493  })
2494
2495(define_insn "vec_cmp<mode>di_exec"
2496  [(set (match_operand:DI 0 "register_operand"	       "=cV,cV,  e, e,Sg,Sg")
2497	(and:DI
2498	  (match_operator 1 "comparison_operator"
2499	    [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
2500						       "vSv, B,vSv, B, v,vA")
2501	     (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2502						       "  v, v,  v, v,vA, v")])
2503	  (match_operand:DI 4 "gcn_exec_reg_operand"   "  e, e,  e, e, e, e")))
2504   (clobber (match_scratch:DI 5			       "= X, X, cV,cV, X, X"))]
2505  ""
2506  "@
2507   v_cmp%E1\tvcc, %2, %3
2508   v_cmp%E1\tvcc, %2, %3
2509   v_cmpx%E1\tvcc, %2, %3
2510   v_cmpx%E1\tvcc, %2, %3
2511   v_cmp%E1\t%0, %2, %3
2512   v_cmp%E1\t%0, %2, %3"
2513  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
2514   (set_attr "length" "4,8,4,8,8,8")])
2515
2516(define_insn "vec_cmp<mode>di_dup"
2517  [(set (match_operand:DI 0 "register_operand"		   "=cV,cV, e,e,Sg")
2518	(match_operator 1 "comparison_operator"
2519	  [(vec_duplicate:VEC_1REG_MODE
2520	     (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2521							   " Sv, B,Sv,B, A"))
2522	   (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2523							   "  v, v, v,v, v")]))
2524   (clobber (match_scratch:DI 4				   "= X,X,cV,cV, X"))]
2525  ""
2526  "@
2527   v_cmp%E1\tvcc, %2, %3
2528   v_cmp%E1\tvcc, %2, %3
2529   v_cmpx%E1\tvcc, %2, %3
2530   v_cmpx%E1\tvcc, %2, %3
2531   v_cmp%E1\t%0, %2, %3"
2532  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2533   (set_attr "length" "4,8,4,8,8")])
2534
2535(define_insn "vec_cmp<mode>di_dup_exec"
2536  [(set (match_operand:DI 0 "register_operand"		    "=cV,cV, e,e,Sg")
2537	(and:DI
2538	  (match_operator 1 "comparison_operator"
2539	    [(vec_duplicate:VEC_1REG_MODE
2540	       (match_operand:<SCALAR_MODE> 2 "gcn_alu_operand"
2541							    " Sv, B,Sv,B, A"))
2542	     (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
2543							    "  v, v, v,v, v")])
2544	  (match_operand:DI 4 "gcn_exec_reg_operand"	    "  e, e, e,e, e")))
2545   (clobber (match_scratch:DI 5				    "= X,X,cV,cV, X"))]
2546  ""
2547  "@
2548   v_cmp%E1\tvcc, %2, %3
2549   v_cmp%E1\tvcc, %2, %3
2550   v_cmpx%E1\tvcc, %2, %3
2551   v_cmpx%E1\tvcc, %2, %3
2552   v_cmp%E1\t%0, %2, %3"
2553  [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
2554   (set_attr "length" "4,8,4,8,8")])
2555
2556(define_expand "vcond_mask_<mode>di"
2557  [(parallel
2558    [(set (match_operand:VEC_REG_MODE 0 "register_operand" "")
2559	  (vec_merge:VEC_REG_MODE
2560	    (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "")
2561	    (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "")
2562	    (match_operand:DI 3 "register_operand" "")))
2563     (clobber (scratch:V64DI))])]
2564  ""
2565  "")
2566
2567(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>"
2568  [(match_operand:VEC_1REG_MODE 0 "register_operand")
2569   (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2570   (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2571   (match_operator 3 "comparison_operator"
2572     [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2573      (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
2574  ""
2575  {
2576    rtx tmp = gen_reg_rtx (DImode);
2577    emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4],
2578				    operands[5]));
2579    emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2580					tmp));
2581    DONE;
2582  })
2583
2584(define_expand "vcond<VEC_1REG_MODE:mode><VEC_1REG_ALT:mode>_exec"
2585  [(match_operand:VEC_1REG_MODE 0 "register_operand")
2586   (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
2587   (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
2588   (match_operator 3 "comparison_operator"
2589     [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
2590      (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
2591   (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2592  ""
2593  {
2594    rtx tmp = gen_reg_rtx (DImode);
2595    emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4],
2596					 operands[5], operands[6]));
2597    emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2598					tmp));
2599    DONE;
2600  })
2601
2602(define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>"
2603  [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2604   (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2605   (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2606   (match_operator 3 "comparison_operator"
2607     [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2608      (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
2609  ""
2610  {
2611    rtx tmp = gen_reg_rtx (DImode);
2612    emit_insn (gen_vec_cmp<mode>di (tmp, operands[3], operands[4],
2613				    operands[5]));
2614    emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2615				        tmp));
2616    DONE;
2617  })
2618
2619(define_expand "vcondu<VEC_1REG_INT_MODE:mode><VEC_1REG_INT_ALT:mode>_exec"
2620  [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
2621   (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
2622   (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
2623   (match_operator 3 "comparison_operator"
2624     [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
2625      (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
2626   (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
2627  ""
2628  {
2629    rtx tmp = gen_reg_rtx (DImode);
2630    emit_insn (gen_vec_cmp<mode>di_exec (tmp, operands[3], operands[4],
2631					 operands[5], operands[6]));
2632    emit_insn (gen_vcond_mask_<mode>di (operands[0], operands[1], operands[2],
2633				        tmp));
2634    DONE;
2635  })
2636
2637;; }}}
2638;; {{{ Fully masked loop support
2639
2640(define_expand "while_ultsidi"
2641  [(match_operand:DI 0 "register_operand")
2642   (match_operand:SI 1 "")
2643   (match_operand:SI 2 "")]
2644  ""
2645  {
2646    if (GET_CODE (operands[1]) != CONST_INT
2647	|| GET_CODE (operands[2]) != CONST_INT)
2648      {
2649	rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
2650	rtx tmp = _0_1_2_3;
2651	if (GET_CODE (operands[1]) != CONST_INT
2652	    || INTVAL (operands[1]) != 0)
2653	  {
2654	    tmp = gen_reg_rtx (V64SImode);
2655	    emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
2656	  }
2657	emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
2658					   gen_rtx_GT (VOIDmode, 0, 0),
2659					   operands[2], tmp));
2660      }
2661    else
2662      {
2663	HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
2664	HOST_WIDE_INT mask = (diff >= 64 ? -1
2665			      : ~((unsigned HOST_WIDE_INT)-1 << diff));
2666	emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
2667      }
2668    DONE;
2669  })
2670
2671(define_expand "maskload<mode>di"
2672  [(match_operand:VEC_REG_MODE 0 "register_operand")
2673   (match_operand:VEC_REG_MODE 1 "memory_operand")
2674   (match_operand 2 "")]
2675  ""
2676  {
2677    rtx exec = force_reg (DImode, operands[2]);
2678    rtx addr = gcn_expand_scalar_to_vector_address
2679		(<MODE>mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
2680    rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
2681    rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
2682    rtx undef = gcn_gen_undef (<MODE>mode);
2683    emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v, undef,
2684					   exec));
2685    DONE;
2686  })
2687
2688(define_expand "maskstore<mode>di"
2689  [(match_operand:VEC_REG_MODE 0 "memory_operand")
2690   (match_operand:VEC_REG_MODE 1 "register_operand")
2691   (match_operand 2 "")]
2692  ""
2693  {
2694    rtx exec = force_reg (DImode, operands[2]);
2695    rtx addr = gcn_expand_scalar_to_vector_address
2696		(<MODE>mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
2697    rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
2698    rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
2699    emit_insn (gen_scatter<mode>_expr_exec (addr, operands[1], as, v, exec));
2700    DONE;
2701  })
2702
2703(define_expand "mask_gather_load<mode>"
2704  [(match_operand:VEC_REG_MODE 0 "register_operand")
2705   (match_operand:DI 1 "register_operand")
2706   (match_operand 2 "register_operand")
2707   (match_operand 3 "immediate_operand")
2708   (match_operand:SI 4 "gcn_alu_operand")
2709   (match_operand:DI 5 "")]
2710  ""
2711  {
2712    rtx exec = force_reg (DImode, operands[5]);
2713
2714    /* TODO: more conversions will be needed when more types are vectorized. */
2715    if (GET_MODE (operands[2]) == V64DImode)
2716      {
2717	rtx tmp = gen_reg_rtx (V64SImode);
2718	emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2],
2719						    gcn_gen_undef (V64SImode),
2720						    exec));
2721	operands[2] = tmp;
2722      }
2723
2724    emit_insn (gen_gather<mode>_exec (operands[0], operands[1], operands[2],
2725				      operands[3], operands[4], exec));
2726    DONE;
2727  })
2728
2729(define_expand "mask_scatter_store<mode>"
2730  [(match_operand:DI 0 "register_operand")
2731   (match_operand 1 "register_operand")
2732   (match_operand 2 "immediate_operand")
2733   (match_operand:SI 3 "gcn_alu_operand")
2734   (match_operand:VEC_REG_MODE 4 "register_operand")
2735   (match_operand:DI 5 "")]
2736  ""
2737  {
2738    rtx exec = force_reg (DImode, operands[5]);
2739
2740    /* TODO: more conversions will be needed when more types are vectorized. */
2741    if (GET_MODE (operands[1]) == V64DImode)
2742      {
2743	rtx tmp = gen_reg_rtx (V64SImode);
2744	emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1],
2745						    gcn_gen_undef (V64SImode),
2746						    exec));
2747	operands[1] = tmp;
2748      }
2749
2750    emit_insn (gen_scatter<mode>_exec (operands[0], operands[1], operands[2],
2751				       operands[3], operands[4], exec));
2752    DONE;
2753  })
2754
2755; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
2756(define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
2757(define_mode_iterator COND_INT_MODE [V64SI V64DI])
2758
2759(define_code_iterator cond_op [plus minus])
2760
2761(define_expand "cond_<expander><mode>"
2762  [(match_operand:COND_MODE 0 "register_operand")
2763   (match_operand:DI 1 "register_operand")
2764   (cond_op:COND_MODE
2765     (match_operand:COND_MODE 2 "gcn_alu_operand")
2766     (match_operand:COND_MODE 3 "gcn_alu_operand"))
2767   (match_operand:COND_MODE 4 "register_operand")]
2768  ""
2769  {
2770    operands[1] = force_reg (DImode, operands[1]);
2771    operands[2] = force_reg (<MODE>mode, operands[2]);
2772
2773    emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2774					   operands[3], operands[4],
2775					   operands[1]));
2776    DONE;
2777  })
2778
2779(define_code_iterator cond_bitop [and ior xor])
2780
2781(define_expand "cond_<expander><mode>"
2782  [(match_operand:COND_INT_MODE 0 "register_operand")
2783   (match_operand:DI 1 "register_operand")
2784   (cond_bitop:COND_INT_MODE
2785     (match_operand:COND_INT_MODE 2 "gcn_alu_operand")
2786     (match_operand:COND_INT_MODE 3 "gcn_alu_operand"))
2787   (match_operand:COND_INT_MODE 4 "register_operand")]
2788  ""
2789  {
2790    operands[1] = force_reg (DImode, operands[1]);
2791    operands[2] = force_reg (<MODE>mode, operands[2]);
2792
2793    emit_insn (gen_<expander><mode>3_exec (operands[0], operands[2],
2794					   operands[3], operands[4],
2795					   operands[1]));
2796    DONE;
2797  })
2798
2799;; }}}
2800;; {{{ Vector reductions
2801
2802(define_int_iterator REDUC_UNSPEC [UNSPEC_SMIN_DPP_SHR UNSPEC_SMAX_DPP_SHR
2803				   UNSPEC_UMIN_DPP_SHR UNSPEC_UMAX_DPP_SHR
2804				   UNSPEC_PLUS_DPP_SHR
2805				   UNSPEC_AND_DPP_SHR
2806				   UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2807
2808(define_int_iterator REDUC_2REG_UNSPEC [UNSPEC_PLUS_DPP_SHR
2809					UNSPEC_AND_DPP_SHR
2810					UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR])
2811
2812; FIXME: Isn't there a better way of doing this?
2813(define_int_attr reduc_unspec [(UNSPEC_SMIN_DPP_SHR "UNSPEC_SMIN_DPP_SHR")
2814			       (UNSPEC_SMAX_DPP_SHR "UNSPEC_SMAX_DPP_SHR")
2815			       (UNSPEC_UMIN_DPP_SHR "UNSPEC_UMIN_DPP_SHR")
2816			       (UNSPEC_UMAX_DPP_SHR "UNSPEC_UMAX_DPP_SHR")
2817			       (UNSPEC_PLUS_DPP_SHR "UNSPEC_PLUS_DPP_SHR")
2818			       (UNSPEC_AND_DPP_SHR "UNSPEC_AND_DPP_SHR")
2819			       (UNSPEC_IOR_DPP_SHR "UNSPEC_IOR_DPP_SHR")
2820			       (UNSPEC_XOR_DPP_SHR "UNSPEC_XOR_DPP_SHR")])
2821
2822(define_int_attr reduc_op [(UNSPEC_SMIN_DPP_SHR "smin")
2823			   (UNSPEC_SMAX_DPP_SHR "smax")
2824			   (UNSPEC_UMIN_DPP_SHR "umin")
2825			   (UNSPEC_UMAX_DPP_SHR "umax")
2826			   (UNSPEC_PLUS_DPP_SHR "plus")
2827			   (UNSPEC_AND_DPP_SHR "and")
2828			   (UNSPEC_IOR_DPP_SHR "ior")
2829			   (UNSPEC_XOR_DPP_SHR "xor")])
2830
2831(define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0")
2832			     (UNSPEC_SMAX_DPP_SHR "v_max%i0")
2833			     (UNSPEC_UMIN_DPP_SHR "v_min%u0")
2834			     (UNSPEC_UMAX_DPP_SHR "v_max%u0")
2835			     (UNSPEC_PLUS_DPP_SHR "v_add%u0")
2836			     (UNSPEC_AND_DPP_SHR  "v_and%b0")
2837			     (UNSPEC_IOR_DPP_SHR  "v_or%b0")
2838			     (UNSPEC_XOR_DPP_SHR  "v_xor%b0")])
2839
2840(define_expand "reduc_<reduc_op>_scal_<mode>"
2841  [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
2842	(unspec:<SCALAR_MODE>
2843	  [(match_operand:VEC_1REG_MODE 1 "register_operand")]
2844	  REDUC_UNSPEC))]
2845  ""
2846  {
2847    rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
2848				       <reduc_unspec>);
2849
2850    /* The result of the reduction is in lane 63 of tmp.  */
2851    emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
2852
2853    DONE;
2854  })
2855
2856(define_expand "reduc_<reduc_op>_scal_v64di"
2857  [(set (match_operand:DI 0 "register_operand")
2858	(unspec:DI
2859	  [(match_operand:V64DI 1 "register_operand")]
2860	  REDUC_2REG_UNSPEC))]
2861  ""
2862  {
2863    rtx tmp = gcn_expand_reduc_scalar (V64DImode, operands[1],
2864				       <reduc_unspec>);
2865
2866    /* The result of the reduction is in lane 63 of tmp.  */
2867    emit_insn (gen_mov_from_lane63_v64di (operands[0], tmp));
2868
2869    DONE;
2870  })
2871
2872(define_insn "*<reduc_op>_dpp_shr_<mode>"
2873  [(set (match_operand:VEC_1REG_MODE 0 "register_operand"   "=v")
2874	(unspec:VEC_1REG_MODE
2875	  [(match_operand:VEC_1REG_MODE 1 "register_operand" "v")
2876	   (match_operand:VEC_1REG_MODE 2 "register_operand" "v")
2877	   (match_operand:SI 3 "const_int_operand"	     "n")]
2878	  REDUC_UNSPEC))]
2879  "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
2880     && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)"
2881  {
2882    return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
2883				    <reduc_unspec>, INTVAL (operands[3]));
2884  }
2885  [(set_attr "type" "vop_dpp")
2886   (set_attr "length" "8")])
2887
2888(define_insn_and_split "*<reduc_op>_dpp_shr_v64di"
2889  [(set (match_operand:V64DI 0 "register_operand"   "=&v")
2890	(unspec:V64DI
2891	  [(match_operand:V64DI 1 "register_operand" "v0")
2892	   (match_operand:V64DI 2 "register_operand" "v0")
2893	   (match_operand:SI 3 "const_int_operand"    "n")]
2894	  REDUC_2REG_UNSPEC))]
2895  ""
2896  "#"
2897  "reload_completed"
2898  [(set (match_dup 4)
2899	(unspec:V64SI
2900	  [(match_dup 6) (match_dup 8) (match_dup 3)] REDUC_2REG_UNSPEC))
2901   (set (match_dup 5)
2902	(unspec:V64SI
2903	  [(match_dup 7) (match_dup 9) (match_dup 3)] REDUC_2REG_UNSPEC))]
2904  {
2905    operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2906    operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2907    operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2908    operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2909    operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2910    operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2911  }
2912  [(set_attr "type" "vmult")
2913   (set_attr "length" "16")])
2914
2915; Special cases for addition.
2916
2917(define_insn "*plus_carry_dpp_shr_<mode>"
2918  [(set (match_operand:VEC_1REG_INT_MODE 0 "register_operand"   "=v")
2919	(unspec:VEC_1REG_INT_MODE
2920	  [(match_operand:VEC_1REG_INT_MODE 1 "register_operand" "v")
2921	   (match_operand:VEC_1REG_INT_MODE 2 "register_operand" "v")
2922	   (match_operand:SI 3 "const_int_operand"		 "n")]
2923	  UNSPEC_PLUS_CARRY_DPP_SHR))
2924   (clobber (reg:DI VCC_REG))]
2925  ""
2926  {
2927    const char *insn = TARGET_GCN3 ? "v_add%u0" : "v_add_co%u0";
2928    return gcn_expand_dpp_shr_insn (<MODE>mode, insn,
2929				    UNSPEC_PLUS_CARRY_DPP_SHR,
2930				    INTVAL (operands[3]));
2931  }
2932  [(set_attr "type" "vop_dpp")
2933   (set_attr "length" "8")])
2934
2935(define_insn "*plus_carry_in_dpp_shr_v64si"
2936  [(set (match_operand:V64SI 0 "register_operand"   "=v")
2937	(unspec:V64SI
2938	  [(match_operand:V64SI 1 "register_operand" "v")
2939	   (match_operand:V64SI 2 "register_operand" "v")
2940	   (match_operand:SI 3 "const_int_operand"   "n")
2941	   (match_operand:DI 4 "register_operand"   "cV")]
2942	  UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2943   (clobber (reg:DI VCC_REG))]
2944  ""
2945  {
2946    const char *insn = TARGET_GCN3 ? "v_addc%u0" : "v_addc_co%u0";
2947    return gcn_expand_dpp_shr_insn (V64SImode, insn,
2948				    UNSPEC_PLUS_CARRY_IN_DPP_SHR,
2949				    INTVAL (operands[3]));
2950  }
2951  [(set_attr "type" "vop_dpp")
2952   (set_attr "length" "8")])
2953
2954(define_insn_and_split "*plus_carry_dpp_shr_v64di"
2955  [(set (match_operand:V64DI 0 "register_operand"   "=&v")
2956	(unspec:V64DI
2957	  [(match_operand:V64DI 1 "register_operand" "v0")
2958	   (match_operand:V64DI 2 "register_operand" "v0")
2959	   (match_operand:SI 3 "const_int_operand"    "n")]
2960	  UNSPEC_PLUS_CARRY_DPP_SHR))
2961   (clobber (reg:DI VCC_REG))]
2962  ""
2963  "#"
2964  "reload_completed"
2965  [(parallel [(set (match_dup 4)
2966		(unspec:V64SI
2967		  [(match_dup 6) (match_dup 8) (match_dup 3)]
2968		  UNSPEC_PLUS_CARRY_DPP_SHR))
2969	      (clobber (reg:DI VCC_REG))])
2970   (parallel [(set (match_dup 5)
2971		(unspec:V64SI
2972		  [(match_dup 7) (match_dup 9) (match_dup 3) (reg:DI VCC_REG)]
2973		  UNSPEC_PLUS_CARRY_IN_DPP_SHR))
2974	      (clobber (reg:DI VCC_REG))])]
2975  {
2976    operands[4] = gcn_operand_part (V64DImode, operands[0], 0);
2977    operands[5] = gcn_operand_part (V64DImode, operands[0], 1);
2978    operands[6] = gcn_operand_part (V64DImode, operands[1], 0);
2979    operands[7] = gcn_operand_part (V64DImode, operands[1], 1);
2980    operands[8] = gcn_operand_part (V64DImode, operands[2], 0);
2981    operands[9] = gcn_operand_part (V64DImode, operands[2], 1);
2982  }
2983  [(set_attr "type" "vmult")
2984   (set_attr "length" "16")])
2985
2986; Instructions to move a scalar value from lane 63 of a vector register.
2987(define_insn "mov_from_lane63_<mode>"
2988  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=Sg,v")
2989	(unspec:<SCALAR_MODE>
2990	  [(match_operand:VEC_1REG_MODE 1 "register_operand" "v,v")]
2991	  UNSPEC_MOV_FROM_LANE63))]
2992  ""
2993  "@
2994   v_readlane_b32\t%0, %1, 63
2995   v_mov_b32\t%0, %1 wave_ror:1"
2996  [(set_attr "type" "vop3a,vop_dpp")
2997   (set_attr "exec" "none,*")
2998   (set_attr "length" "8")])
2999
3000(define_insn "mov_from_lane63_v64di"
3001  [(set (match_operand:DI 0 "register_operand"	     "=Sg,v")
3002	(unspec:DI
3003	  [(match_operand:V64DI 1 "register_operand"   "v,v")]
3004	  UNSPEC_MOV_FROM_LANE63))]
3005  ""
3006  "@
3007   v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
3008   * if (REGNO (operands[0]) <= REGNO (operands[1]))	\
3009       return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\"	\
3010	      \"v_mov_b32\t%H0, %H1 wave_ror:1\";	\
3011     else						\
3012       return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\"	\
3013	      \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
3014  [(set_attr "type" "vop3a,vop_dpp")
3015   (set_attr "exec" "none,*")
3016   (set_attr "length" "8")])
3017
3018;; }}}
3019;; {{{ Miscellaneous
3020
3021(define_expand "vec_seriesv64si"
3022  [(match_operand:V64SI 0 "register_operand")
3023   (match_operand:SI 1 "gcn_alu_operand")
3024   (match_operand:SI 2 "gcn_alu_operand")]
3025  ""
3026  {
3027    rtx tmp = gen_reg_rtx (V64SImode);
3028    rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3029
3030    emit_insn (gen_mulv64si3_dup (tmp, v1, operands[2]));
3031    emit_insn (gen_addv64si3_dup (operands[0], tmp, operands[1]));
3032    DONE;
3033  })
3034
3035(define_expand "vec_seriesv64di"
3036  [(match_operand:V64DI 0 "register_operand")
3037   (match_operand:DI 1 "gcn_alu_operand")
3038   (match_operand:DI 2 "gcn_alu_operand")]
3039  ""
3040  {
3041    rtx tmp = gen_reg_rtx (V64DImode);
3042    rtx v1 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
3043
3044    emit_insn (gen_mulv64di3_zext_dup2 (tmp, v1, operands[2]));
3045    emit_insn (gen_addv64di3_dup (operands[0], tmp, operands[1]));
3046    DONE;
3047  })
3048
3049;; }}}
3050