1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005-2015 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  ;; SSE
22  UNSPEC_MOVNT
23  UNSPEC_LOADU
24  UNSPEC_STOREU
25
26  ;; SSE3
27  UNSPEC_LDDQU
28
29  ;; SSSE3
30  UNSPEC_PSHUFB
31  UNSPEC_PSIGN
32  UNSPEC_PALIGNR
33
34  ;; For SSE4A support
35  UNSPEC_EXTRQI
36  UNSPEC_EXTRQ
37  UNSPEC_INSERTQI
38  UNSPEC_INSERTQ
39
40  ;; For SSE4.1 support
41  UNSPEC_BLENDV
42  UNSPEC_INSERTPS
43  UNSPEC_DP
44  UNSPEC_MOVNTDQA
45  UNSPEC_MPSADBW
46  UNSPEC_PHMINPOSUW
47  UNSPEC_PTEST
48
49  ;; For SSE4.2 support
50  UNSPEC_PCMPESTR
51  UNSPEC_PCMPISTR
52
53  ;; For FMA4 support
54  UNSPEC_FMADDSUB
55  UNSPEC_XOP_UNSIGNED_CMP
56  UNSPEC_XOP_TRUEFALSE
57  UNSPEC_XOP_PERMUTE
58  UNSPEC_FRCZ
59
60  ;; For AES support
61  UNSPEC_AESENC
62  UNSPEC_AESENCLAST
63  UNSPEC_AESDEC
64  UNSPEC_AESDECLAST
65  UNSPEC_AESIMC
66  UNSPEC_AESKEYGENASSIST
67
68  ;; For PCLMUL support
69  UNSPEC_PCLMUL
70
71  ;; For AVX support
72  UNSPEC_PCMP
73  UNSPEC_VPERMIL
74  UNSPEC_VPERMIL2
75  UNSPEC_VPERMIL2F128
76  UNSPEC_CAST
77  UNSPEC_VTESTP
78  UNSPEC_VCVTPH2PS
79  UNSPEC_VCVTPS2PH
80
81  ;; For AVX2 support
82  UNSPEC_VPERMVAR
83  UNSPEC_VPERMTI
84  UNSPEC_GATHER
85  UNSPEC_VSIBADDR
86
87  ;; For AVX512F support
88  UNSPEC_VPERMI2
89  UNSPEC_VPERMT2
90  UNSPEC_VPERMI2_MASK
91  UNSPEC_UNSIGNED_FIX_NOTRUNC
92  UNSPEC_UNSIGNED_PCMP
93  UNSPEC_TESTM
94  UNSPEC_TESTNM
95  UNSPEC_SCATTER
96  UNSPEC_RCP14
97  UNSPEC_RSQRT14
98  UNSPEC_FIXUPIMM
99  UNSPEC_SCALEF
100  UNSPEC_VTERNLOG
101  UNSPEC_GETEXP
102  UNSPEC_GETMANT
103  UNSPEC_ALIGN
104  UNSPEC_CONFLICT
105  UNSPEC_COMPRESS
106  UNSPEC_COMPRESS_STORE
107  UNSPEC_EXPAND
108  UNSPEC_MASKED_EQ
109  UNSPEC_MASKED_GT
110
111  ;; For embed. rounding feature
112  UNSPEC_EMBEDDED_ROUNDING
113
114  ;; For AVX512PF support
115  UNSPEC_GATHER_PREFETCH
116  UNSPEC_SCATTER_PREFETCH
117
118  ;; For AVX512ER support
119  UNSPEC_EXP2
120  UNSPEC_RCP28
121  UNSPEC_RSQRT28
122
123  ;; For SHA support
124  UNSPEC_SHA1MSG1
125  UNSPEC_SHA1MSG2
126  UNSPEC_SHA1NEXTE
127  UNSPEC_SHA1RNDS4
128  UNSPEC_SHA256MSG1
129  UNSPEC_SHA256MSG2
130  UNSPEC_SHA256RNDS2
131
132  ;; For AVX512BW support
133  UNSPEC_DBPSADBW
134  UNSPEC_PMADDUBSW512
135  UNSPEC_PMADDWD512
136  UNSPEC_PSHUFHW
137  UNSPEC_PSHUFLW
138  UNSPEC_CVTINT2MASK
139
140  ;; For AVX512DQ support
141  UNSPEC_REDUCE
142  UNSPEC_FPCLASS
143  UNSPEC_RANGE
144
145  ;; For AVX512IFMA support
146  UNSPEC_VPMADD52LUQ
147  UNSPEC_VPMADD52HUQ
148
149  ;; For AVX512VBMI support
150  UNSPEC_VPMULTISHIFT
151])
152
153(define_c_enum "unspecv" [
154  UNSPECV_LDMXCSR
155  UNSPECV_STMXCSR
156  UNSPECV_CLFLUSH
157  UNSPECV_MONITOR
158  UNSPECV_MWAIT
159  UNSPECV_VZEROALL
160  UNSPECV_VZEROUPPER
161])
162
163;; All vector modes including V?TImode, used in move patterns.
164(define_mode_iterator VMOVE
165  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
166   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
167   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
168   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
169   (V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX") V1TI
170   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
171   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
172
173;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
174(define_mode_iterator V48_AVX512VL
175  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
176   V8DI  (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
177   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
178   V8DF  (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
179
180;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
181(define_mode_iterator VI12_AVX512VL
182  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
183   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
184
185(define_mode_iterator VI1_AVX512VL
186  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
187
188;; All vector modes
189(define_mode_iterator V
190  [(V32QI "TARGET_AVX") V16QI
191   (V16HI "TARGET_AVX") V8HI
192   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
193   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
194   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
195   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
196
197;; All 128bit vector modes
198(define_mode_iterator V_128
199  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
200
201;; All 256bit vector modes
202(define_mode_iterator V_256
203  [V32QI V16HI V8SI V4DI V8SF V4DF])
204
205;; All 512bit vector modes
206(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
207
208;; All 256bit and 512bit vector modes
209(define_mode_iterator V_256_512
210  [V32QI V16HI V8SI V4DI V8SF V4DF
211   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
212   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
213
214;; All vector float modes
215(define_mode_iterator VF
216  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
217   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
218
219;; 128- and 256-bit float vector modes
220(define_mode_iterator VF_128_256
221  [(V8SF "TARGET_AVX") V4SF
222   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
223
224;; All SFmode vector float modes
225(define_mode_iterator VF1
226  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
227
228;; 128- and 256-bit SF vector modes
229(define_mode_iterator VF1_128_256
230  [(V8SF "TARGET_AVX") V4SF])
231
232(define_mode_iterator VF1_128_256VL
233  [V8SF (V4SF "TARGET_AVX512VL")])
234
235;; All DFmode vector float modes
236(define_mode_iterator VF2
237  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
238
239;; 128- and 256-bit DF vector modes
240(define_mode_iterator VF2_128_256
241  [(V4DF "TARGET_AVX") V2DF])
242
243(define_mode_iterator VF2_512_256
244  [(V8DF "TARGET_AVX512F") V4DF])
245
246(define_mode_iterator VF2_512_256VL
247  [V8DF (V4DF "TARGET_AVX512VL")])
248
249;; All 128bit vector float modes
250(define_mode_iterator VF_128
251  [V4SF (V2DF "TARGET_SSE2")])
252
253;; All 256bit vector float modes
254(define_mode_iterator VF_256
255  [V8SF V4DF])
256
257;; All 512bit vector float modes
258(define_mode_iterator VF_512
259  [V16SF V8DF])
260
261(define_mode_iterator VI48_AVX512VL
262  [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
263   V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
264
265(define_mode_iterator VF_AVX512VL
266  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
267   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
268
269(define_mode_iterator VF2_AVX512VL
270  [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
271
272(define_mode_iterator VF1_AVX512VL
273  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
274
275;; All vector integer modes
276(define_mode_iterator VI
277  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
278   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
279   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
280   (V8SI "TARGET_AVX") V4SI
281   (V4DI "TARGET_AVX") V2DI])
282
283(define_mode_iterator VI_AVX2
284  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
285   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
286   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
287   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
288
289;; All QImode vector integer modes
290(define_mode_iterator VI1
291  [(V32QI "TARGET_AVX") V16QI])
292
293(define_mode_iterator VI_ULOADSTORE_BW_AVX512VL
294  [V64QI
295   V32HI (V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL")])
296
297(define_mode_iterator VI_ULOADSTORE_F_AVX512VL
298  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
299   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
300
301;; All DImode vector integer modes
302(define_mode_iterator VI8
303  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
304
305(define_mode_iterator VI8_AVX512VL
306  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
307
308(define_mode_iterator VI8_256_512
309  [V8DI (V4DI "TARGET_AVX512VL")])
310
311(define_mode_iterator VI1_AVX2
312  [(V32QI "TARGET_AVX2") V16QI])
313
314(define_mode_iterator VI1_AVX512
315  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
316
317(define_mode_iterator VI2_AVX2
318  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
319
320(define_mode_iterator VI2_AVX512F
321  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
322
323(define_mode_iterator VI4_AVX
324  [(V8SI "TARGET_AVX") V4SI])
325
326(define_mode_iterator VI4_AVX2
327  [(V8SI "TARGET_AVX2") V4SI])
328
329(define_mode_iterator VI4_AVX512F
330  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
331
332(define_mode_iterator VI4_AVX512VL
333  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
334
335(define_mode_iterator VI48_AVX512F_AVX512VL
336  [V4SI V8SI (V16SI "TARGET_AVX512F")
337   (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
338
339(define_mode_iterator VI2_AVX512VL
340  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
341
342(define_mode_iterator VI8_AVX2_AVX512BW
343  [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
344
345(define_mode_iterator VI8_AVX2
346  [(V4DI "TARGET_AVX2") V2DI])
347
348(define_mode_iterator VI8_AVX2_AVX512F
349  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
350
351(define_mode_iterator VI4_128_8_256
352  [V4SI V4DI])
353
354;; All V8D* modes
355(define_mode_iterator V8FI
356  [V8DF V8DI])
357
358;; All V16S* modes
359(define_mode_iterator V16FI
360  [V16SF V16SI])
361
362;; ??? We should probably use TImode instead.
363(define_mode_iterator VIMAX_AVX2
364  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
365
366;; ??? This should probably be dropped in favor of VIMAX_AVX2.
367(define_mode_iterator SSESCALARMODE
368  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
369
370(define_mode_iterator VI12_AVX2
371  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
372   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
373
374(define_mode_iterator VI24_AVX2
375  [(V16HI "TARGET_AVX2") V8HI
376   (V8SI "TARGET_AVX2") V4SI])
377
378(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
379  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
380   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
381   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
382
383(define_mode_iterator VI124_AVX2
384  [(V32QI "TARGET_AVX2") V16QI
385   (V16HI "TARGET_AVX2") V8HI
386   (V8SI "TARGET_AVX2") V4SI])
387
388(define_mode_iterator VI2_AVX2_AVX512BW
389  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
390
391(define_mode_iterator VI48_AVX2
392  [(V8SI "TARGET_AVX2") V4SI
393   (V4DI "TARGET_AVX2") V2DI])
394
395(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
396  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
397   (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
398   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
399
400(define_mode_iterator VI248_AVX512BW_AVX512VL
401  [(V32HI "TARGET_AVX512BW") 
402   (V4DI "TARGET_AVX512VL") V16SI V8DI])
403
404;; Suppose TARGET_AVX512VL as baseline
405(define_mode_iterator VI24_AVX512BW_1
406 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
407  V8SI V4SI])
408   
409(define_mode_iterator VI48_AVX512F
410  [(V16SI "TARGET_AVX512F") V8SI V4SI
411   (V8DI "TARGET_AVX512F") V4DI V2DI])
412
413(define_mode_iterator VI48_AVX_AVX512F
414  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
415   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
416
417(define_mode_iterator VI12_AVX_AVX512F
418  [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
419    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
420
421(define_mode_iterator V48_AVX2
422  [V4SF V2DF
423   V8SF V4DF
424   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
425   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
426
427(define_mode_attr avx512
428  [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
429   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
430   (V4SI  "avx512vl") (V8SI  "avx512vl") (V16SI "avx512f")
431   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
432   (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
433   (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
434
435(define_mode_attr sse2_avx_avx512f
436  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
437   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
438   (V4SI  "sse2") (V8SI  "avx") (V16SI "avx512f")
439   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
440   (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
441   (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
442
443(define_mode_attr sse2_avx2
444  [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
445   (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
446   (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
447   (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
448   (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
449
450(define_mode_attr ssse3_avx2
451   [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
452    (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
453    (V4SI "ssse3") (V8SI "avx2")
454    (V2DI "ssse3") (V4DI "avx2")
455    (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
456
457(define_mode_attr sse4_1_avx2
458   [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
459    (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
460    (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
461    (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
462
463(define_mode_attr avx_avx2
464  [(V4SF "avx") (V2DF "avx")
465   (V8SF "avx") (V4DF "avx")
466   (V4SI "avx2") (V2DI "avx2")
467   (V8SI "avx2") (V4DI "avx2")])
468
469(define_mode_attr vec_avx2
470  [(V16QI "vec") (V32QI "avx2")
471   (V8HI "vec") (V16HI "avx2")
472   (V4SI "vec") (V8SI "avx2")
473   (V2DI "vec") (V4DI "avx2")])
474
475(define_mode_attr avx2_avx512
476  [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
477   (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
478   (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
479   (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
480   (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
481
482(define_mode_attr shuffletype
483  [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
484  (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
485  (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
486  (V32HI "i") (V16HI "i") (V8HI "i")
487  (V64QI "i") (V32QI "i") (V16QI "i")
488  (V4TI "i") (V2TI "i") (V1TI "i")])
489
490(define_mode_attr ssequartermode
491  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
492
493(define_mode_attr ssedoublemodelower
494  [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
495   (V8HI "v8si")   (V16HI "v16si") (V32HI "v32si")
496   (V4SI "v4di")   (V8SI "v8di")   (V16SI "v16di")])
497
498(define_mode_attr ssedoublemode
499  [(V16SF "V32SF") (V16SI "V32SI") (V8DI "V16DI") (V8DF "V16DF")
500   (V8SF "V16SF") (V8SI "V16SI") (V4DI "V8DI") (V4DF "V8DF")
501   (V16HI "V16SI") (V8HI "V8SI") (V4HI "V4SI") (V4SI "V4DI")
502   (V32HI "V32SI") (V32QI "V32HI") (V16QI "V16HI") (V64QI "V64HI")])
503
504(define_mode_attr ssebytemode
505  [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")])
506
507;; All 128bit vector integer modes
508(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
509
510;; All 256bit vector integer modes
511(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
512
513;; All 512bit vector integer modes
514(define_mode_iterator VI_512 [V64QI V32HI V16SI V8DI])
515
516;; Various 128bit vector integer mode combinations
517(define_mode_iterator VI12_128 [V16QI V8HI])
518(define_mode_iterator VI14_128 [V16QI V4SI])
519(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
520(define_mode_iterator VI24_128 [V8HI V4SI])
521(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
522(define_mode_iterator VI48_128 [V4SI V2DI])
523
524;; Various 256bit and 512 vector integer mode combinations
525(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
526(define_mode_iterator VI124_256_AVX512F_AVX512BW
527  [V32QI V16HI V8SI
528   (V64QI "TARGET_AVX512BW")
529   (V32HI "TARGET_AVX512BW")
530   (V16SI "TARGET_AVX512F")])
531(define_mode_iterator VI48_256 [V8SI V4DI])
532(define_mode_iterator VI48_512 [V16SI V8DI])
533(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
534(define_mode_iterator VI_AVX512BW
535  [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
536
537;; Int-float size matches
538(define_mode_iterator VI4F_128 [V4SI V4SF])
539(define_mode_iterator VI8F_128 [V2DI V2DF])
540(define_mode_iterator VI4F_256 [V8SI V8SF])
541(define_mode_iterator VI8F_256 [V4DI V4DF])
542(define_mode_iterator VI8F_256_512
543  [V4DI V4DF (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
544(define_mode_iterator VI48F_256_512
545  [V8SI V8SF
546  (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
547  (V8DI  "TARGET_AVX512F") (V8DF  "TARGET_AVX512F")
548  (V4DI  "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
549(define_mode_iterator VF48_I1248
550  [V16SI V16SF V8DI V8DF V32HI V64QI])
551(define_mode_iterator VI48F
552  [V16SI V16SF V8DI V8DF
553   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
554   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
555   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
556   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
557(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
558
559;; Mapping from float mode to required SSE level
560(define_mode_attr sse
561  [(SF "sse") (DF "sse2")
562   (V4SF "sse") (V2DF "sse2")
563   (V16SF "avx512f") (V8SF "avx")
564   (V8DF "avx512f") (V4DF "avx")])
565
566(define_mode_attr sse2
567  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
568   (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
569
570(define_mode_attr sse3
571  [(V16QI "sse3") (V32QI "avx")])
572
573(define_mode_attr sse4_1
574  [(V4SF "sse4_1") (V2DF "sse4_1")
575   (V8SF "avx") (V4DF "avx")
576   (V8DF "avx512f")])
577
578(define_mode_attr avxsizesuffix
579  [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
580   (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
581   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
582   (V16SF "512") (V8DF "512")
583   (V8SF "256") (V4DF "256")
584   (V4SF "") (V2DF "")])
585
586;; SSE instruction mode
587(define_mode_attr sseinsnmode
588  [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
589   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
590   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
591   (V16SF "V16SF") (V8DF "V8DF")
592   (V8SF "V8SF") (V4DF "V4DF")
593   (V4SF "V4SF") (V2DF "V2DF")
594   (TI "TI")])
595
596;; Mapping of vector modes to corresponding mask size
597(define_mode_attr avx512fmaskmode
598  [(V64QI "DI") (V32QI "SI") (V16QI "HI")
599   (V32HI "SI") (V16HI "HI") (V8HI  "QI") (V4HI "QI")
600   (V16SI "HI") (V8SI  "QI") (V4SI  "QI")
601   (V8DI  "QI") (V4DI  "QI") (V2DI  "QI")
602   (V16SF "HI") (V8SF  "QI") (V4SF  "QI")
603   (V8DF  "QI") (V4DF  "QI") (V2DF  "QI")])
604
605;; Mapping of vector float modes to an integer mode of the same size
606(define_mode_attr sseintvecmode
607  [(V16SF "V16SI") (V8DF  "V8DI")
608   (V8SF  "V8SI")  (V4DF  "V4DI")
609   (V4SF  "V4SI")  (V2DF  "V2DI")
610   (V16SI "V16SI") (V8DI  "V8DI")
611   (V8SI  "V8SI")  (V4DI  "V4DI")
612   (V4SI  "V4SI")  (V2DI  "V2DI")
613   (V16HI "V16HI") (V8HI  "V8HI")
614   (V32HI "V32HI") (V64QI "V64QI")
615   (V32QI "V32QI") (V16QI "V16QI")])
616
617(define_mode_attr sseintvecmode2
618  [(V8DF "XI") (V4DF "OI") (V2DF "TI")
619   (V8SF "OI") (V4SF "TI")])
620
621(define_mode_attr sseintvecmodelower
622  [(V16SF "v16si") (V8DF "v8di")
623   (V8SF "v8si") (V4DF "v4di")
624   (V4SF "v4si") (V2DF "v2di")
625   (V8SI "v8si") (V4DI "v4di")
626   (V4SI "v4si") (V2DI "v2di")
627   (V16HI "v16hi") (V8HI "v8hi")
628   (V32QI "v32qi") (V16QI "v16qi")])
629
630;; Mapping of vector modes to a vector mode of double size
631(define_mode_attr ssedoublevecmode
632  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
633   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
634   (V8SF "V16SF") (V4DF "V8DF")
635   (V4SF "V8SF") (V2DF "V4DF")])
636
637;; Mapping of vector modes to a vector mode of half size
638(define_mode_attr ssehalfvecmode
639  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI")
640   (V32QI "V16QI") (V16HI  "V8HI") (V8SI  "V4SI") (V4DI "V2DI")
641   (V16QI  "V8QI") (V8HI   "V4HI") (V4SI  "V2SI")
642   (V16SF "V8SF") (V8DF "V4DF")
643   (V8SF  "V4SF") (V4DF "V2DF")
644   (V4SF  "V2SF")])
645
646;; Mapping of vector modes ti packed single mode of the same size
647(define_mode_attr ssePSmode
648  [(V16SI "V16SF") (V8DF "V16SF")
649   (V16SF "V16SF") (V8DI "V16SF")
650   (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
651   (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
652   (V8SI "V8SF") (V4SI "V4SF")
653   (V4DI "V8SF") (V2DI "V4SF")
654   (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
655   (V8SF "V8SF") (V4SF "V4SF")
656   (V4DF "V8SF") (V2DF "V4SF")])
657
658(define_mode_attr ssePSmode2
659  [(V8DI "V8SF") (V4DI "V4SF")])
660
661;; Mapping of vector modes back to the scalar modes
662(define_mode_attr ssescalarmode
663  [(V64QI "QI") (V32QI "QI") (V16QI "QI")
664   (V32HI "HI") (V16HI "HI") (V8HI "HI")
665   (V16SI "SI") (V8SI "SI")  (V4SI "SI")
666   (V8DI "DI")  (V4DI "DI")  (V2DI "DI")
667   (V16SF "SF") (V8SF "SF")  (V4SF "SF")
668   (V8DF "DF")  (V4DF "DF")  (V2DF "DF")])
669
670;; Mapping of vector modes to the 128bit modes
671(define_mode_attr ssexmmmode
672  [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
673   (V32HI "V8HI")  (V16HI "V8HI") (V8HI "V8HI")
674   (V16SI "V4SI")  (V8SI "V4SI")  (V4SI "V4SI")
675   (V8DI "V2DI")   (V4DI "V2DI")  (V2DI "V2DI")
676   (V16SF "V4SF")  (V8SF "V4SF")  (V4SF "V4SF")
677   (V8DF "V2DF")   (V4DF "V2DF")  (V2DF "V2DF")])
678
679;; Pointer size override for scalar modes (Intel asm dialect)
680(define_mode_attr iptr
681  [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
682   (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
683   (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
684   (V8SF "k") (V4DF "q")
685   (V4SF "k") (V2DF "q")
686   (SF "k") (DF "q")])
687
688;; Number of scalar elements in each vector type
689(define_mode_attr ssescalarnum
690  [(V64QI "64") (V16SI "16") (V8DI "8")
691   (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
692   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
693   (V16SF "16") (V8DF "8")
694   (V8SF "8") (V4DF "4")
695   (V4SF "4") (V2DF "2")])
696
697;; Mask of scalar elements in each vector type
698(define_mode_attr ssescalarnummask
699  [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
700   (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
701   (V8SF "7") (V4DF "3")
702   (V4SF "3") (V2DF "1")])
703
704(define_mode_attr ssescalarsize
705  [(V8DI  "64") (V4DI  "64") (V2DI  "64")
706   (V64QI "8") (V32QI "8") (V16QI "8")
707   (V32HI "16") (V16HI "16") (V8HI "16")
708   (V16SI "32") (V8SI "32") (V4SI "32")
709   (V16SF "32") (V8SF "32") (V4SF "32")
710   (V8DF "64") (V4DF "64") (V2DF "64")])
711
712;; SSE prefix for integer vector modes
713(define_mode_attr sseintprefix
714  [(V2DI  "p") (V2DF  "")
715   (V4DI  "p") (V4DF  "")
716   (V8DI  "p") (V8DF  "")
717   (V4SI  "p") (V4SF  "")
718   (V8SI  "p") (V8SF  "")
719   (V16SI "p") (V16SF "")
720   (V16QI "p") (V8HI "p")
721   (V32QI "p") (V16HI "p")
722   (V64QI "p") (V32HI "p")])
723
724;; SSE scalar suffix for vector modes
725(define_mode_attr ssescalarmodesuffix
726  [(SF "ss") (DF "sd")
727   (V8SF "ss") (V4DF "sd")
728   (V4SF "ss") (V2DF "sd")
729   (V8SI "ss") (V4DI "sd")
730   (V4SI "d")])
731
732;; Pack/unpack vector modes
733(define_mode_attr sseunpackmode
734  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
735   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
736   (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
737
738(define_mode_attr ssepackmode
739  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
740   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
741   (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
742
743;; Mapping of the max integer size for xop rotate immediate constraint
744(define_mode_attr sserotatemax
745  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
746
747;; Mapping of mode to cast intrinsic name
748(define_mode_attr castmode
749 [(V8SI "si") (V8SF "ps") (V4DF "pd")
750  (V16SI "si") (V16SF "ps") (V8DF "pd")])
751
752;; Instruction suffix for sign and zero extensions.
753(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
754
755;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
756;; i64x4 or f64x4 for 512bit modes.
757(define_mode_attr i128
758  [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
759   (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
760   (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
761
762;; Mix-n-match
763(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
764(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
765
766;; Mapping for dbpsabbw modes
767(define_mode_attr dbpsadbwmode
768  [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
769
770;; Mapping suffixes for broadcast
771(define_mode_attr bcstscalarsuff
772  [(V64QI "b")  (V32QI "b") (V16QI "b")
773   (V32HI "w")  (V16HI "w") (V8HI "w")
774   (V16SI "d")  (V8SI "d")  (V4SI "d")
775   (V8DI "q")   (V4DI "q")  (V2DI "q")
776   (V16SF "ss") (V8SF "ss") (V4SF "ss")
777   (V8DF "sd")  (V4DF "sd") (V2DF "sd")])
778
779;; Tie mode of assembler operand to mode iterator
780(define_mode_attr concat_tg_mode
781  [(V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
782   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
783
784
785;; Include define_subst patterns for instructions with mask
786(include "subst.md")
787
788;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
789
790;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
791;;
792;; Move patterns
793;;
794;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
795
796;; All of these patterns are enabled for SSE1 as well as SSE2.
797;; This is essential for maintaining stable calling conventions.
798
799(define_expand "mov<mode>"
800  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
801	(match_operand:VMOVE 1 "nonimmediate_operand"))]
802  "TARGET_SSE"
803{
804  ix86_expand_vector_move (<MODE>mode, operands);
805  DONE;
806})
807
808(define_insn "*mov<mode>_internal"
809  [(set (match_operand:VMOVE 0 "nonimmediate_operand"               "=v,v ,m")
810	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"  "BC,vm,v"))]
811  "TARGET_SSE
812   && (register_operand (operands[0], <MODE>mode)
813       || register_operand (operands[1], <MODE>mode))"
814{
815  int mode = get_attr_mode (insn);
816  switch (which_alternative)
817    {
818    case 0:
819      return standard_sse_constant_opcode (insn, operands[1]);
820    case 1:
821    case 2:
822      /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
823	 in avx512f, so we need to use workarounds, to access sse registers
824	 16-31, which are evex-only. In avx512vl we don't need workarounds.  */
825      if (TARGET_AVX512F && <MODE_SIZE> < 64 && !TARGET_AVX512VL
826	  && ((REG_P (operands[0]) && EXT_REX_SSE_REGNO_P (REGNO (operands[0])))
827	      || (REG_P (operands[1]) && EXT_REX_SSE_REGNO_P (REGNO (operands[1])))))
828	{
829	  if (memory_operand (operands[0], <MODE>mode))
830	    {
831	      if (<MODE_SIZE> == 32)
832		return "vextract<shuffletype>64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
833	      else if (<MODE_SIZE> == 16)
834		return "vextract<shuffletype>32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
835	      else
836		gcc_unreachable ();
837	    }
838	  else if (memory_operand (operands[1], <MODE>mode))
839	    {
840	      if (<MODE_SIZE> == 32)
841		return "vbroadcast<shuffletype>64x4\t{%1, %g0|%g0, %1}";
842	      else if (<MODE_SIZE> == 16)
843		return "vbroadcast<shuffletype>32x4\t{%1, %g0|%g0, %1}";
844	      else
845		gcc_unreachable ();
846	    }
847	  else
848	    /* Reg -> reg move is always aligned.  Just use wider move.  */
849	    switch (mode)
850	      {
851	      case MODE_V8SF:
852	      case MODE_V4SF:
853		return "vmovaps\t{%g1, %g0|%g0, %g1}";
854	      case MODE_V4DF:
855	      case MODE_V2DF:
856		return "vmovapd\t{%g1, %g0|%g0, %g1}";
857	      case MODE_OI:
858	      case MODE_TI:
859		return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
860	      default:
861		gcc_unreachable ();
862	      }
863	}
864      switch (mode)
865	{
866	case MODE_V16SF:
867	case MODE_V8SF:
868	case MODE_V4SF:
869	  if (TARGET_AVX
870	      && (misaligned_operand (operands[0], <MODE>mode)
871		  || misaligned_operand (operands[1], <MODE>mode)))
872	    return "vmovups\t{%1, %0|%0, %1}";
873	  else
874	    return "%vmovaps\t{%1, %0|%0, %1}";
875
876	case MODE_V8DF:
877	case MODE_V4DF:
878	case MODE_V2DF:
879	  if (TARGET_AVX
880	      && (misaligned_operand (operands[0], <MODE>mode)
881		  || misaligned_operand (operands[1], <MODE>mode)))
882	    return "vmovupd\t{%1, %0|%0, %1}";
883	  else
884	    return "%vmovapd\t{%1, %0|%0, %1}";
885
886	case MODE_OI:
887	case MODE_TI:
888	  if (TARGET_AVX
889	      && (misaligned_operand (operands[0], <MODE>mode)
890		  || misaligned_operand (operands[1], <MODE>mode)))
891	    return TARGET_AVX512VL ? "vmovdqu64\t{%1, %0|%0, %1}"
892				   : "vmovdqu\t{%1, %0|%0, %1}";
893	  else
894	    return TARGET_AVX512VL ? "vmovdqa64\t{%1, %0|%0, %1}"
895				   : "%vmovdqa\t{%1, %0|%0, %1}";
896	case MODE_XI:
897	  if (misaligned_operand (operands[0], <MODE>mode)
898	      || misaligned_operand (operands[1], <MODE>mode))
899	    return "vmovdqu64\t{%1, %0|%0, %1}";
900	  else
901	    return "vmovdqa64\t{%1, %0|%0, %1}";
902
903	default:
904	  gcc_unreachable ();
905	}
906    default:
907      gcc_unreachable ();
908    }
909}
910  [(set_attr "type" "sselog1,ssemov,ssemov")
911   (set_attr "prefix" "maybe_vex")
912   (set (attr "mode")
913	(cond [(and (match_test "<MODE_SIZE> == 16")
914		    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
915			 (and (eq_attr "alternative" "2")
916			      (match_test "TARGET_SSE_TYPELESS_STORES"))))
917		 (const_string "<ssePSmode>")
918	       (match_test "TARGET_AVX")
919		 (const_string "<sseinsnmode>")
920	       (ior (not (match_test "TARGET_SSE2"))
921		    (match_test "optimize_function_for_size_p (cfun)"))
922		 (const_string "V4SF")
923	       (and (eq_attr "alternative" "0")
924		    (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
925		 (const_string "TI")
926	      ]
927	      (const_string "<sseinsnmode>")))])
928
929(define_insn "<avx512>_load<mode>_mask"
930  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
931	(vec_merge:V48_AVX512VL
932	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
933	  (match_operand:V48_AVX512VL 2 "vector_move_operand" "0C,0C")
934	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
935  "TARGET_AVX512F"
936{
937  static char buf [64];
938
939  const char *insn_op;
940  const char *sse_suffix;
941  const char *align;
942  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
943    {
944      insn_op = "vmov";
945      sse_suffix = "<ssemodesuffix>";
946    }
947  else
948    {
949      insn_op = "vmovdq";
950      sse_suffix = "<ssescalarsize>";
951    }
952
953  if (misaligned_operand (operands[1], <MODE>mode))
954    align = "u";
955  else
956    align = "a";
957
958  snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%3%%}%%N2|%%0%%{%%3%%}%%N2, %%1}",
959	    insn_op, align, sse_suffix);
960  return buf;
961}
962  [(set_attr "type" "ssemov")
963   (set_attr "prefix" "evex")
964   (set_attr "memory" "none,load")
965   (set_attr "mode" "<sseinsnmode>")])
966
967(define_insn "<avx512>_load<mode>_mask"
968  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
969	(vec_merge:VI12_AVX512VL
970	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
971	  (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C,0C")
972	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
973  "TARGET_AVX512BW"
974  "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
975  [(set_attr "type" "ssemov")
976   (set_attr "prefix" "evex")
977   (set_attr "memory" "none,load")
978   (set_attr "mode" "<sseinsnmode>")])
979
980(define_insn "<avx512>_blendm<mode>"
981  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
982	(vec_merge:V48_AVX512VL
983	  (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
984	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
985	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
986  "TARGET_AVX512F"
987  "vblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
988  [(set_attr "type" "ssemov")
989   (set_attr "prefix" "evex")
990   (set_attr "mode" "<sseinsnmode>")])
991
992(define_insn "<avx512>_blendm<mode>"
993  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
994	(vec_merge:VI12_AVX512VL
995	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
996	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
997	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
998  "TARGET_AVX512BW"
999  "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1000  [(set_attr "type" "ssemov")
1001   (set_attr "prefix" "evex")
1002   (set_attr "mode" "<sseinsnmode>")])
1003
1004(define_insn "<avx512>_store<mode>_mask"
1005  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1006	(vec_merge:V48_AVX512VL
1007	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
1008	  (match_dup 0)
1009	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1010  "TARGET_AVX512F"
1011{
1012  static char buf [64];
1013
1014  const char *insn_op;
1015  const char *sse_suffix;
1016  const char *align;
1017  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1018    {
1019      insn_op = "vmov";
1020      sse_suffix = "<ssemodesuffix>";
1021    }
1022  else
1023    {
1024      insn_op = "vmovdq";
1025      sse_suffix = "<ssescalarsize>";
1026    }
1027
1028  if (misaligned_operand (operands[0], <MODE>mode))
1029    align = "u";
1030  else
1031    align = "a";
1032
1033  snprintf (buf, sizeof (buf), "%s%s%s\t{%%1, %%0%%{%%2%%}|%%0%%{%%2%%}, %%1}",
1034	    insn_op, align, sse_suffix);
1035  return buf;
1036}
1037  [(set_attr "type" "ssemov")
1038   (set_attr "prefix" "evex")
1039   (set_attr "memory" "store")
1040   (set_attr "mode" "<sseinsnmode>")])
1041
1042(define_insn "<avx512>_store<mode>_mask"
1043  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1044	(vec_merge:VI12_AVX512VL
1045	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1046	  (match_dup 0)
1047	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1048  "TARGET_AVX512BW"
1049  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1050  [(set_attr "type" "ssemov")
1051   (set_attr "prefix" "evex")
1052   (set_attr "memory" "store")
1053   (set_attr "mode" "<sseinsnmode>")])
1054
1055(define_insn "sse2_movq128"
1056  [(set (match_operand:V2DI 0 "register_operand" "=x")
1057	(vec_concat:V2DI
1058	  (vec_select:DI
1059	    (match_operand:V2DI 1 "nonimmediate_operand" "xm")
1060	    (parallel [(const_int 0)]))
1061	  (const_int 0)))]
1062  "TARGET_SSE2"
1063  "%vmovq\t{%1, %0|%0, %q1}"
1064  [(set_attr "type" "ssemov")
1065   (set_attr "prefix" "maybe_vex")
1066   (set_attr "mode" "TI")])
1067
1068;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1069;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1070;; from memory, we'd prefer to load the memory directly into the %xmm
1071;; register.  To facilitate this happy circumstance, this pattern won't
1072;; split until after register allocation.  If the 64-bit value didn't
1073;; come from memory, this is the best we can do.  This is much better
1074;; than storing %edx:%eax into a stack temporary and loading an %xmm
1075;; from there.
1076
1077(define_insn_and_split "movdi_to_sse"
1078  [(parallel
1079    [(set (match_operand:V4SI 0 "register_operand" "=?x,x")
1080	  (subreg:V4SI (match_operand:DI 1 "nonimmediate_operand" "r,m") 0))
1081     (clobber (match_scratch:V4SI 2 "=&x,X"))])]
1082  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1083  "#"
1084  "&& reload_completed"
1085  [(const_int 0)]
1086{
1087 if (register_operand (operands[1], DImode))
1088   {
1089      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1090	 Assemble the 64-bit DImode value in an xmm register.  */
1091      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1092				  gen_lowpart (SImode, operands[1])));
1093      emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1094				  gen_highpart (SImode, operands[1])));
1095      emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1096					     operands[2]));
1097   }
1098 else if (memory_operand (operands[1], DImode))
1099   {
1100     rtx tmp = gen_reg_rtx (V2DImode);
1101     emit_insn (gen_vec_concatv2di (tmp, operands[1], const0_rtx));
1102     emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp));
1103   }
1104 else
1105   gcc_unreachable ();
1106})
1107
1108(define_split
1109  [(set (match_operand:V4SF 0 "register_operand")
1110	(match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1111  "TARGET_SSE && reload_completed"
1112  [(set (match_dup 0)
1113	(vec_merge:V4SF
1114	  (vec_duplicate:V4SF (match_dup 1))
1115	  (match_dup 2)
1116	  (const_int 1)))]
1117{
1118  operands[1] = simplify_gen_subreg (SFmode, operands[1], V4SFmode, 0);
1119  operands[2] = CONST0_RTX (V4SFmode);
1120})
1121
1122(define_split
1123  [(set (match_operand:V2DF 0 "register_operand")
1124	(match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1125  "TARGET_SSE2 && reload_completed"
1126  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1127{
1128  operands[1] = simplify_gen_subreg (DFmode, operands[1], V2DFmode, 0);
1129  operands[2] = CONST0_RTX (DFmode);
1130})
1131
1132(define_expand "movmisalign<mode>"
1133  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1134	(match_operand:VMOVE 1 "nonimmediate_operand"))]
1135  "TARGET_SSE"
1136{
1137  ix86_expand_vector_move_misalign (<MODE>mode, operands);
1138  DONE;
1139})
1140
1141(define_expand "<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1142  [(set (match_operand:VF 0 "register_operand")
1143	(unspec:VF [(match_operand:VF 1 "nonimmediate_operand")]
1144	  UNSPEC_LOADU))]
1145  "TARGET_SSE && <mask_mode512bit_condition>"
1146{
1147  /* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1148     just fine if misaligned_operand is true, and without the UNSPEC it can
1149     be combined with arithmetic instructions.  If misaligned_operand is
1150     false, still emit UNSPEC_LOADU insn to honor user's request for
1151     misaligned load.  */
1152  if (TARGET_AVX
1153      && misaligned_operand (operands[1], <MODE>mode))
1154    {
1155      rtx src = operands[1];
1156      if (<mask_applied>)
1157	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1158				 operands[2 * <mask_applied>],
1159				 operands[3 * <mask_applied>]);
1160      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1161      DONE;
1162    }
1163})
1164
1165(define_insn "*<sse>_loadu<ssemodesuffix><avxsizesuffix><mask_name>"
1166  [(set (match_operand:VF 0 "register_operand" "=v")
1167	(unspec:VF
1168	  [(match_operand:VF 1 "nonimmediate_operand" "vm")]
1169	  UNSPEC_LOADU))]
1170  "TARGET_SSE && <mask_mode512bit_condition>"
1171{
1172  switch (get_attr_mode (insn))
1173    {
1174    case MODE_V16SF:
1175    case MODE_V8SF:
1176    case MODE_V4SF:
1177      return "%vmovups\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1178    default:
1179      return "%vmovu<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1180    }
1181}
1182  [(set_attr "type" "ssemov")
1183   (set_attr "movu" "1")
1184   (set_attr "ssememalign" "8")
1185   (set_attr "prefix" "maybe_vex")
1186   (set (attr "mode")
1187	(cond [(and (match_test "<MODE_SIZE> == 16")
1188		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1189		 (const_string "<ssePSmode>")
1190	       (match_test "TARGET_AVX")
1191		 (const_string "<MODE>")
1192	       (match_test "optimize_function_for_size_p (cfun)")
1193		 (const_string "V4SF")
1194	      ]
1195	      (const_string "<MODE>")))])
1196
1197(define_insn "<sse>_storeu<ssemodesuffix><avxsizesuffix>"
1198  [(set (match_operand:VF 0 "memory_operand" "=m")
1199	(unspec:VF
1200	  [(match_operand:VF 1 "register_operand" "v")]
1201	  UNSPEC_STOREU))]
1202  "TARGET_SSE"
1203{
1204  switch (get_attr_mode (insn))
1205    {
1206    case MODE_V16SF:
1207    case MODE_V8SF:
1208    case MODE_V4SF:
1209      return "%vmovups\t{%1, %0|%0, %1}";
1210    default:
1211      return "%vmovu<ssemodesuffix>\t{%1, %0|%0, %1}";
1212    }
1213}
1214  [(set_attr "type" "ssemov")
1215   (set_attr "movu" "1")
1216   (set_attr "ssememalign" "8")
1217   (set_attr "prefix" "maybe_vex")
1218   (set (attr "mode")
1219	(cond [(and (match_test "<MODE_SIZE> == 16")
1220                    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1221                         (match_test "TARGET_SSE_TYPELESS_STORES")))
1222		 (const_string "<ssePSmode>")
1223	       (match_test "TARGET_AVX")
1224		 (const_string "<MODE>")
1225	       (match_test "optimize_function_for_size_p (cfun)")
1226		 (const_string "V4SF")
1227	      ]
1228	      (const_string "<MODE>")))])
1229
1230(define_insn "<avx512>_storeu<ssemodesuffix><avxsizesuffix>_mask"
1231  [(set (match_operand:VF_AVX512VL 0 "memory_operand" "=m")
1232	(vec_merge:VF_AVX512VL
1233	  (unspec:VF_AVX512VL
1234	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")]
1235	    UNSPEC_STOREU)
1236	  (match_dup 0)
1237	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1238  "TARGET_AVX512F"
1239{
1240  switch (get_attr_mode (insn))
1241    {
1242    case MODE_V16SF:
1243    case MODE_V8SF:
1244    case MODE_V4SF:
1245      return "vmovups\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1246    default:
1247      return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1248    }
1249}
1250  [(set_attr "type" "ssemov")
1251   (set_attr "movu" "1")
1252   (set_attr "memory" "store")
1253   (set_attr "prefix" "evex")
1254   (set_attr "mode" "<sseinsnmode>")])
1255
1256/* For AVX, normal *mov<mode>_internal pattern will handle unaligned loads
1257   just fine if misaligned_operand is true, and without the UNSPEC it can
1258   be combined with arithmetic instructions.  If misaligned_operand is
1259   false, still emit UNSPEC_LOADU insn to honor user's request for
1260   misaligned load.  */
1261(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1262  [(set (match_operand:VI1 0 "register_operand")
1263	(unspec:VI1
1264	  [(match_operand:VI1 1 "nonimmediate_operand")]
1265	  UNSPEC_LOADU))]
1266  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1267{
1268  if (TARGET_AVX
1269      && misaligned_operand (operands[1], <MODE>mode))
1270    {
1271      rtx src = operands[1];
1272      if (<mask_applied>)
1273	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1274				 operands[2 * <mask_applied>],
1275				 operands[3 * <mask_applied>]);
1276      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1277      DONE;
1278    }
1279})
1280
1281(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1282  [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand")
1283	(unspec:VI_ULOADSTORE_BW_AVX512VL
1284	  [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand")]
1285	  UNSPEC_LOADU))]
1286  "TARGET_AVX512BW"
1287{
1288  if (misaligned_operand (operands[1], <MODE>mode))
1289    {
1290      rtx src = operands[1];
1291      if (<mask_applied>)
1292	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1293				 operands[2 * <mask_applied>],
1294				 operands[3 * <mask_applied>]);
1295      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1296      DONE;
1297    }
1298})
1299
1300(define_expand "<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1301  [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand")
1302	(unspec:VI_ULOADSTORE_F_AVX512VL
1303	  [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand")]
1304	  UNSPEC_LOADU))]
1305  "TARGET_AVX512F"
1306{
1307  if (misaligned_operand (operands[1], <MODE>mode))
1308    {
1309      rtx src = operands[1];
1310      if (<mask_applied>)
1311	src = gen_rtx_VEC_MERGE (<MODE>mode, operands[1],
1312				 operands[2 * <mask_applied>],
1313				 operands[3 * <mask_applied>]);
1314      emit_insn (gen_rtx_SET (VOIDmode, operands[0], src));
1315      DONE;
1316    }
1317})
1318
1319(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1320  [(set (match_operand:VI1 0 "register_operand" "=v")
1321	(unspec:VI1
1322	  [(match_operand:VI1 1 "nonimmediate_operand" "vm")]
1323	  UNSPEC_LOADU))]
1324  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
1325{
1326  switch (get_attr_mode (insn))
1327    {
1328    case MODE_V8SF:
1329    case MODE_V4SF:
1330      return "%vmovups\t{%1, %0|%0, %1}";
1331    default:
1332      if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1333	return "%vmovdqu\t{%1, %0|%0, %1}";
1334      else
1335	return "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1336    }
1337}
1338  [(set_attr "type" "ssemov")
1339   (set_attr "movu" "1")
1340   (set_attr "ssememalign" "8")
1341   (set (attr "prefix_data16")
1342     (if_then_else
1343       (match_test "TARGET_AVX")
1344     (const_string "*")
1345     (const_string "1")))
1346   (set_attr "prefix" "maybe_vex")
1347   (set (attr "mode")
1348	(cond [(and (match_test "<MODE_SIZE> == 16")
1349		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1350		 (const_string "<ssePSmode>")
1351	       (match_test "TARGET_AVX")
1352		 (const_string "<sseinsnmode>")
1353	       (match_test "optimize_function_for_size_p (cfun)")
1354	         (const_string "V4SF")
1355	      ]
1356	      (const_string "<sseinsnmode>")))])
1357
1358(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1359  [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "register_operand" "=v")
1360	(unspec:VI_ULOADSTORE_BW_AVX512VL
1361	  [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "nonimmediate_operand" "vm")]
1362	  UNSPEC_LOADU))]
1363  "TARGET_AVX512BW"
1364  "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1365  [(set_attr "type" "ssemov")
1366   (set_attr "movu" "1")
1367   (set_attr "ssememalign" "8")
1368   (set_attr "prefix" "maybe_evex")])
1369
1370(define_insn "*<sse2_avx_avx512f>_loaddqu<mode><mask_name>"
1371  [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "register_operand" "=v")
1372	(unspec:VI_ULOADSTORE_F_AVX512VL
1373	  [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "nonimmediate_operand" "vm")]
1374	  UNSPEC_LOADU))]
1375  "TARGET_AVX512F"
1376  "vmovdqu<ssescalarsize>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
1377  [(set_attr "type" "ssemov")
1378   (set_attr "movu" "1")
1379   (set_attr "ssememalign" "8")
1380   (set_attr "prefix" "maybe_evex")])
1381
1382(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1383  [(set (match_operand:VI1 0 "memory_operand" "=m")
1384	(unspec:VI1
1385	  [(match_operand:VI1 1 "register_operand" "v")]
1386	  UNSPEC_STOREU))]
1387  "TARGET_SSE2"
1388{
1389  switch (get_attr_mode (insn))
1390    {
1391    case MODE_V16SF:
1392    case MODE_V8SF:
1393    case MODE_V4SF:
1394      return "%vmovups\t{%1, %0|%0, %1}";
1395    default:
1396      switch (<MODE>mode)
1397      {
1398      case V32QImode:
1399      case V16QImode:
1400	if (!(TARGET_AVX512VL && TARGET_AVX512BW))
1401	  return "%vmovdqu\t{%1, %0|%0, %1}";
1402      default:
1403	  return "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}";
1404      }
1405    }
1406}
1407  [(set_attr "type" "ssemov")
1408   (set_attr "movu" "1")
1409   (set_attr "ssememalign" "8")
1410   (set (attr "prefix_data16")
1411     (if_then_else
1412       (match_test "TARGET_AVX")
1413     (const_string "*")
1414     (const_string "1")))
1415   (set_attr "prefix" "maybe_vex")
1416   (set (attr "mode")
1417	(cond [(and (match_test "<MODE_SIZE> == 16")
1418		    (ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
1419			 (match_test "TARGET_SSE_TYPELESS_STORES")))
1420		 (const_string "<ssePSmode>")
1421	       (match_test "TARGET_AVX")
1422		 (const_string "<sseinsnmode>")
1423	       (match_test "optimize_function_for_size_p (cfun)")
1424	         (const_string "V4SF")
1425	      ]
1426	      (const_string "<sseinsnmode>")))])
1427
1428(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1429  [(set (match_operand:VI_ULOADSTORE_BW_AVX512VL 0 "memory_operand" "=m")
1430	(unspec:VI_ULOADSTORE_BW_AVX512VL
1431	  [(match_operand:VI_ULOADSTORE_BW_AVX512VL 1 "register_operand" "v")]
1432	  UNSPEC_STOREU))]
1433  "TARGET_AVX512BW"
1434  "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1435  [(set_attr "type" "ssemov")
1436   (set_attr "movu" "1")
1437   (set_attr "ssememalign" "8")
1438   (set_attr "prefix" "maybe_evex")])
1439
1440(define_insn "<sse2_avx_avx512f>_storedqu<mode>"
1441  [(set (match_operand:VI_ULOADSTORE_F_AVX512VL 0 "memory_operand" "=m")
1442	(unspec:VI_ULOADSTORE_F_AVX512VL
1443	  [(match_operand:VI_ULOADSTORE_F_AVX512VL 1 "register_operand" "v")]
1444	  UNSPEC_STOREU))]
1445  "TARGET_AVX512F"
1446  "vmovdqu<ssescalarsize>\t{%1, %0|%0, %1}"
1447  [(set_attr "type" "ssemov")
1448   (set_attr "movu" "1")
1449   (set_attr "ssememalign" "8")
1450   (set_attr "prefix" "maybe_vex")])
1451
1452(define_insn "<avx512>_storedqu<mode>_mask"
1453  [(set (match_operand:VI48_AVX512VL 0 "memory_operand" "=m")
1454	(vec_merge:VI48_AVX512VL
1455	  (unspec:VI48_AVX512VL
1456	    [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
1457	    UNSPEC_STOREU)
1458	  (match_dup 0)
1459	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1460  "TARGET_AVX512F"
1461  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1462  [(set_attr "type" "ssemov")
1463   (set_attr "movu" "1")
1464   (set_attr "memory" "store")
1465   (set_attr "prefix" "evex")
1466   (set_attr "mode" "<sseinsnmode>")])
1467
1468(define_insn "<avx512>_storedqu<mode>_mask"
1469  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1470	(vec_merge:VI12_AVX512VL
1471	  (unspec:VI12_AVX512VL
1472	    [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
1473	    UNSPEC_STOREU)
1474	  (match_dup 0)
1475	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1476  "TARGET_AVX512BW"
1477  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1478  [(set_attr "type" "ssemov")
1479   (set_attr "movu" "1")
1480   (set_attr "memory" "store")
1481   (set_attr "prefix" "evex")
1482   (set_attr "mode" "<sseinsnmode>")])
1483
1484(define_insn "<sse3>_lddqu<avxsizesuffix>"
1485  [(set (match_operand:VI1 0 "register_operand" "=x")
1486	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1487		    UNSPEC_LDDQU))]
1488  "TARGET_SSE3"
1489  "%vlddqu\t{%1, %0|%0, %1}"
1490  [(set_attr "type" "ssemov")
1491   (set_attr "movu" "1")
1492   (set_attr "ssememalign" "8")
1493   (set (attr "prefix_data16")
1494     (if_then_else
1495       (match_test "TARGET_AVX")
1496     (const_string "*")
1497     (const_string "0")))
1498   (set (attr "prefix_rep")
1499     (if_then_else
1500       (match_test "TARGET_AVX")
1501     (const_string "*")
1502     (const_string "1")))
1503   (set_attr "prefix" "maybe_vex")
1504   (set_attr "mode" "<sseinsnmode>")])
1505
1506(define_insn "sse2_movnti<mode>"
1507  [(set (match_operand:SWI48 0 "memory_operand" "=m")
1508	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1509		      UNSPEC_MOVNT))]
1510  "TARGET_SSE2"
1511  "movnti\t{%1, %0|%0, %1}"
1512  [(set_attr "type" "ssemov")
1513   (set_attr "prefix_data16" "0")
1514   (set_attr "mode" "<MODE>")])
1515
1516(define_insn "<sse>_movnt<mode>"
1517  [(set (match_operand:VF 0 "memory_operand" "=m")
1518	(unspec:VF
1519	  [(match_operand:VF 1 "register_operand" "v")]
1520	  UNSPEC_MOVNT))]
1521  "TARGET_SSE"
1522  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1523  [(set_attr "type" "ssemov")
1524   (set_attr "prefix" "maybe_vex")
1525   (set_attr "mode" "<MODE>")])
1526
1527(define_insn "<sse2>_movnt<mode>"
1528  [(set (match_operand:VI8 0 "memory_operand" "=m")
1529	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1530		    UNSPEC_MOVNT))]
1531  "TARGET_SSE2"
1532  "%vmovntdq\t{%1, %0|%0, %1}"
1533  [(set_attr "type" "ssecvt")
1534   (set (attr "prefix_data16")
1535     (if_then_else
1536       (match_test "TARGET_AVX")
1537     (const_string "*")
1538     (const_string "1")))
1539   (set_attr "prefix" "maybe_vex")
1540   (set_attr "mode" "<sseinsnmode>")])
1541
1542; Expand patterns for non-temporal stores.  At the moment, only those
1543; that directly map to insns are defined; it would be possible to
1544; define patterns for other modes that would expand to several insns.
1545
1546;; Modes handled by storent patterns.
1547(define_mode_iterator STORENT_MODE
1548  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1549   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1550   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1551   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1552   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1553
1554(define_expand "storent<mode>"
1555  [(set (match_operand:STORENT_MODE 0 "memory_operand")
1556	(unspec:STORENT_MODE
1557	  [(match_operand:STORENT_MODE 1 "register_operand")]
1558	  UNSPEC_MOVNT))]
1559  "TARGET_SSE")
1560
1561;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1562;;
1563;; Parallel floating point arithmetic
1564;;
1565;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1566
1567(define_expand "<code><mode>2"
1568  [(set (match_operand:VF 0 "register_operand")
1569	(absneg:VF
1570	  (match_operand:VF 1 "register_operand")))]
1571  "TARGET_SSE"
1572  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1573
1574(define_insn_and_split "*absneg<mode>2"
1575  [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1576	(match_operator:VF 3 "absneg_operator"
1577	  [(match_operand:VF 1 "nonimmediate_operand" "0, xm, v, m")]))
1578   (use (match_operand:VF 2 "nonimmediate_operand"    "xm, 0, vm,v"))]
1579  "TARGET_SSE"
1580  "#"
1581  "&& reload_completed"
1582  [(const_int 0)]
1583{
1584  enum rtx_code absneg_op;
1585  rtx op1, op2;
1586  rtx t;
1587
1588  if (TARGET_AVX)
1589    {
1590      if (MEM_P (operands[1]))
1591	op1 = operands[2], op2 = operands[1];
1592      else
1593	op1 = operands[1], op2 = operands[2];
1594    }
1595  else
1596    {
1597      op1 = operands[0];
1598      if (rtx_equal_p (operands[0], operands[1]))
1599	op2 = operands[2];
1600      else
1601	op2 = operands[1];
1602    }
1603
1604  absneg_op = GET_CODE (operands[3]) == NEG ? XOR : AND;
1605  t = gen_rtx_fmt_ee (absneg_op, <MODE>mode, op1, op2);
1606  t = gen_rtx_SET (VOIDmode, operands[0], t);
1607  emit_insn (t);
1608  DONE;
1609}
1610  [(set_attr "isa" "noavx,noavx,avx,avx")])
1611
1612(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1613  [(set (match_operand:VF 0 "register_operand")
1614	(plusminus:VF
1615	  (match_operand:VF 1 "<round_nimm_predicate>")
1616	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1617  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1618  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1619
1620(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1621  [(set (match_operand:VF 0 "register_operand" "=x,v")
1622	(plusminus:VF
1623	  (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1624	  (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1625  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1626  "@
1627   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1628   v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1629  [(set_attr "isa" "noavx,avx")
1630   (set_attr "type" "sseadd")
1631   (set_attr "prefix" "<mask_prefix3>")
1632   (set_attr "mode" "<MODE>")])
1633
1634(define_insn "<sse>_vm<plusminus_insn><mode>3<round_name>"
1635  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1636	(vec_merge:VF_128
1637	  (plusminus:VF_128
1638	    (match_operand:VF_128 1 "register_operand" "0,v")
1639	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1640	  (match_dup 1)
1641	  (const_int 1)))]
1642  "TARGET_SSE"
1643  "@
1644   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1645   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1646  [(set_attr "isa" "noavx,avx")
1647   (set_attr "type" "sseadd")
1648   (set_attr "prefix" "<round_prefix>")
1649   (set_attr "mode" "<ssescalarmode>")])
1650
1651(define_expand "mul<mode>3<mask_name><round_name>"
1652  [(set (match_operand:VF 0 "register_operand")
1653	(mult:VF
1654	  (match_operand:VF 1 "<round_nimm_predicate>")
1655	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1656  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1657  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1658
1659(define_insn "*mul<mode>3<mask_name><round_name>"
1660  [(set (match_operand:VF 0 "register_operand" "=x,v")
1661	(mult:VF
1662	  (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1663	  (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1664  "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1665  "@
1666   mul<ssemodesuffix>\t{%2, %0|%0, %2}
1667   vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1668  [(set_attr "isa" "noavx,avx")
1669   (set_attr "type" "ssemul")
1670   (set_attr "prefix" "<mask_prefix3>")
1671   (set_attr "btver2_decode" "direct,double")
1672   (set_attr "mode" "<MODE>")])
1673
1674(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<round_name>"
1675  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1676	(vec_merge:VF_128
1677	  (multdiv:VF_128
1678	    (match_operand:VF_128 1 "register_operand" "0,v")
1679	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
1680	  (match_dup 1)
1681	  (const_int 1)))]
1682  "TARGET_SSE"
1683  "@
1684   <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1685   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %<iptr>2<round_op3>}"
1686  [(set_attr "isa" "noavx,avx")
1687   (set_attr "type" "sse<multdiv_mnemonic>")
1688   (set_attr "prefix" "<round_prefix>")
1689   (set_attr "btver2_decode" "direct,double")
1690   (set_attr "mode" "<ssescalarmode>")])
1691
1692(define_expand "div<mode>3"
1693  [(set (match_operand:VF2 0 "register_operand")
1694	(div:VF2 (match_operand:VF2 1 "register_operand")
1695		 (match_operand:VF2 2 "nonimmediate_operand")))]
1696  "TARGET_SSE2"
1697  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1698
1699(define_expand "div<mode>3"
1700  [(set (match_operand:VF1 0 "register_operand")
1701	(div:VF1 (match_operand:VF1 1 "register_operand")
1702		 (match_operand:VF1 2 "nonimmediate_operand")))]
1703  "TARGET_SSE"
1704{
1705  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1706
1707  if (TARGET_SSE_MATH
1708      && TARGET_RECIP_VEC_DIV
1709      && !optimize_insn_for_size_p ()
1710      && flag_finite_math_only && !flag_trapping_math
1711      && flag_unsafe_math_optimizations)
1712    {
1713      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1714      DONE;
1715    }
1716})
1717
1718(define_insn "<sse>_div<mode>3<mask_name><round_name>"
1719  [(set (match_operand:VF 0 "register_operand" "=x,v")
1720	(div:VF
1721	  (match_operand:VF 1 "register_operand" "0,v")
1722	  (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
1723  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1724  "@
1725   div<ssemodesuffix>\t{%2, %0|%0, %2}
1726   vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1727  [(set_attr "isa" "noavx,avx")
1728   (set_attr "type" "ssediv")
1729   (set_attr "prefix" "<mask_prefix3>")
1730   (set_attr "mode" "<MODE>")])
1731
1732(define_insn "<sse>_rcp<mode>2"
1733  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1734	(unspec:VF1_128_256
1735	  [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
1736  "TARGET_SSE"
1737  "%vrcpps\t{%1, %0|%0, %1}"
1738  [(set_attr "type" "sse")
1739   (set_attr "atom_sse_attr" "rcp")
1740   (set_attr "btver2_sse_attr" "rcp")
1741   (set_attr "prefix" "maybe_vex")
1742   (set_attr "mode" "<MODE>")])
1743
1744(define_insn "sse_vmrcpv4sf2"
1745  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1746	(vec_merge:V4SF
1747	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1748		       UNSPEC_RCP)
1749	  (match_operand:V4SF 2 "register_operand" "0,x")
1750	  (const_int 1)))]
1751  "TARGET_SSE"
1752  "@
1753   rcpss\t{%1, %0|%0, %k1}
1754   vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1755  [(set_attr "isa" "noavx,avx")
1756   (set_attr "type" "sse")
1757   (set_attr "ssememalign" "32")
1758   (set_attr "atom_sse_attr" "rcp")
1759   (set_attr "btver2_sse_attr" "rcp")
1760   (set_attr "prefix" "orig,vex")
1761   (set_attr "mode" "SF")])
1762
1763(define_insn "<mask_codefor>rcp14<mode><mask_name>"
1764  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1765	(unspec:VF_AVX512VL
1766	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1767	  UNSPEC_RCP14))]
1768  "TARGET_AVX512F"
1769  "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1770  [(set_attr "type" "sse")
1771   (set_attr "prefix" "evex")
1772   (set_attr "mode" "<MODE>")])
1773
1774(define_insn "srcp14<mode>"
1775  [(set (match_operand:VF_128 0 "register_operand" "=v")
1776	(vec_merge:VF_128
1777	  (unspec:VF_128
1778	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1779	    UNSPEC_RCP14)
1780	  (match_operand:VF_128 2 "register_operand" "v")
1781	  (const_int 1)))]
1782  "TARGET_AVX512F"
1783  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1784  [(set_attr "type" "sse")
1785   (set_attr "prefix" "evex")
1786   (set_attr "mode" "<MODE>")])
1787
1788(define_expand "sqrt<mode>2"
1789  [(set (match_operand:VF2 0 "register_operand")
1790	(sqrt:VF2 (match_operand:VF2 1 "nonimmediate_operand")))]
1791  "TARGET_SSE2")
1792
1793(define_expand "sqrt<mode>2"
1794  [(set (match_operand:VF1 0 "register_operand")
1795	(sqrt:VF1 (match_operand:VF1 1 "nonimmediate_operand")))]
1796  "TARGET_SSE"
1797{
1798  if (TARGET_SSE_MATH
1799      && TARGET_RECIP_VEC_SQRT
1800      && !optimize_insn_for_size_p ()
1801      && flag_finite_math_only && !flag_trapping_math
1802      && flag_unsafe_math_optimizations)
1803    {
1804      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
1805      DONE;
1806    }
1807})
1808
1809(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
1810  [(set (match_operand:VF 0 "register_operand" "=v")
1811	(sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "<round_constraint>")))]
1812  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1813  "%vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
1814  [(set_attr "type" "sse")
1815   (set_attr "atom_sse_attr" "sqrt")
1816   (set_attr "btver2_sse_attr" "sqrt")
1817   (set_attr "prefix" "maybe_vex")
1818   (set_attr "mode" "<MODE>")])
1819
1820(define_insn "<sse>_vmsqrt<mode>2<round_name>"
1821  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1822	(vec_merge:VF_128
1823	  (sqrt:VF_128
1824	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_constraint>"))
1825	  (match_operand:VF_128 2 "register_operand" "0,v")
1826	  (const_int 1)))]
1827  "TARGET_SSE"
1828  "@
1829   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
1830   vsqrt<ssescalarmodesuffix>\t{<round_op3>%1, %2, %0|%0, %2, %<iptr>1<round_op3>}"
1831  [(set_attr "isa" "noavx,avx")
1832   (set_attr "type" "sse")
1833   (set_attr "atom_sse_attr" "sqrt")
1834   (set_attr "prefix" "<round_prefix>")
1835   (set_attr "btver2_sse_attr" "sqrt")
1836   (set_attr "mode" "<ssescalarmode>")])
1837
1838(define_expand "rsqrt<mode>2"
1839  [(set (match_operand:VF1_128_256 0 "register_operand")
1840	(unspec:VF1_128_256
1841	  [(match_operand:VF1_128_256 1 "nonimmediate_operand")] UNSPEC_RSQRT))]
1842  "TARGET_SSE_MATH"
1843{
1844  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
1845  DONE;
1846})
1847
1848(define_insn "<sse>_rsqrt<mode>2"
1849  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1850	(unspec:VF1_128_256
1851	  [(match_operand:VF1_128_256 1 "nonimmediate_operand" "xm")] UNSPEC_RSQRT))]
1852  "TARGET_SSE"
1853  "%vrsqrtps\t{%1, %0|%0, %1}"
1854  [(set_attr "type" "sse")
1855   (set_attr "prefix" "maybe_vex")
1856   (set_attr "mode" "<MODE>")])
1857
1858(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
1859  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1860	(unspec:VF_AVX512VL
1861	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1862	  UNSPEC_RSQRT14))]
1863  "TARGET_AVX512F"
1864  "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1865  [(set_attr "type" "sse")
1866   (set_attr "prefix" "evex")
1867   (set_attr "mode" "<MODE>")])
1868
1869(define_insn "rsqrt14<mode>"
1870  [(set (match_operand:VF_128 0 "register_operand" "=v")
1871	(vec_merge:VF_128
1872	  (unspec:VF_128
1873	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1874	    UNSPEC_RSQRT14)
1875	  (match_operand:VF_128 2 "register_operand" "v")
1876	  (const_int 1)))]
1877  "TARGET_AVX512F"
1878  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
1879  [(set_attr "type" "sse")
1880   (set_attr "prefix" "evex")
1881   (set_attr "mode" "<MODE>")])
1882
1883(define_insn "sse_vmrsqrtv4sf2"
1884  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1885	(vec_merge:V4SF
1886	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1887		       UNSPEC_RSQRT)
1888	  (match_operand:V4SF 2 "register_operand" "0,x")
1889	  (const_int 1)))]
1890  "TARGET_SSE"
1891  "@
1892   rsqrtss\t{%1, %0|%0, %k1}
1893   vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
1894  [(set_attr "isa" "noavx,avx")
1895   (set_attr "type" "sse")
1896   (set_attr "ssememalign" "32")
1897   (set_attr "prefix" "orig,vex")
1898   (set_attr "mode" "SF")])
1899
1900;; ??? For !flag_finite_math_only, the representation with SMIN/SMAX
1901;; isn't really correct, as those rtl operators aren't defined when
1902;; applied to NaNs.  Hopefully the optimizers won't get too smart on us.
1903
1904(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
1905  [(set (match_operand:VF 0 "register_operand")
1906	(smaxmin:VF
1907	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
1908	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
1909  "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1910{
1911  if (!flag_finite_math_only)
1912    operands[1] = force_reg (<MODE>mode, operands[1]);
1913  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
1914})
1915
1916(define_insn "*<code><mode>3_finite<mask_name><round_saeonly_name>"
1917  [(set (match_operand:VF 0 "register_operand" "=x,v")
1918	(smaxmin:VF
1919	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
1920	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1921  "TARGET_SSE && flag_finite_math_only
1922   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1923   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1924  "@
1925   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1926   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1927  [(set_attr "isa" "noavx,avx")
1928   (set_attr "type" "sseadd")
1929   (set_attr "btver2_sse_attr" "maxmin")
1930   (set_attr "prefix" "<mask_prefix3>")
1931   (set_attr "mode" "<MODE>")])
1932
1933(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
1934  [(set (match_operand:VF 0 "register_operand" "=x,v")
1935	(smaxmin:VF
1936	  (match_operand:VF 1 "register_operand" "0,v")
1937	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xm,<round_saeonly_constraint>")))]
1938  "TARGET_SSE && !flag_finite_math_only
1939   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
1940  "@
1941   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
1942   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
1943  [(set_attr "isa" "noavx,avx")
1944   (set_attr "type" "sseadd")
1945   (set_attr "btver2_sse_attr" "maxmin")
1946   (set_attr "prefix" "<mask_prefix3>")
1947   (set_attr "mode" "<MODE>")])
1948
1949(define_insn "<sse>_vm<code><mode>3<round_saeonly_name>"
1950  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1951	(vec_merge:VF_128
1952	  (smaxmin:VF_128
1953	    (match_operand:VF_128 1 "register_operand" "0,v")
1954	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_constraint>"))
1955	 (match_dup 1)
1956	 (const_int 1)))]
1957  "TARGET_SSE"
1958  "@
1959   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1960   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op3>}"
1961  [(set_attr "isa" "noavx,avx")
1962   (set_attr "type" "sse")
1963   (set_attr "btver2_sse_attr" "maxmin")
1964   (set_attr "prefix" "<round_saeonly_prefix>")
1965   (set_attr "mode" "<ssescalarmode>")])
1966
1967;; These versions of the min/max patterns implement exactly the operations
1968;;   min = (op1 < op2 ? op1 : op2)
1969;;   max = (!(op1 < op2) ? op1 : op2)
1970;; Their operands are not commutative, and thus they may be used in the
1971;; presence of -0.0 and NaN.
1972
1973(define_insn "*ieee_smin<mode>3"
1974  [(set (match_operand:VF 0 "register_operand" "=v,v")
1975	(unspec:VF
1976	  [(match_operand:VF 1 "register_operand" "0,v")
1977	   (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1978	 UNSPEC_IEEE_MIN))]
1979  "TARGET_SSE"
1980  "@
1981   min<ssemodesuffix>\t{%2, %0|%0, %2}
1982   vmin<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1983  [(set_attr "isa" "noavx,avx")
1984   (set_attr "type" "sseadd")
1985   (set_attr "prefix" "orig,vex")
1986   (set_attr "mode" "<MODE>")])
1987
1988(define_insn "*ieee_smax<mode>3"
1989  [(set (match_operand:VF 0 "register_operand" "=v,v")
1990	(unspec:VF
1991	  [(match_operand:VF 1 "register_operand" "0,v")
1992	   (match_operand:VF 2 "nonimmediate_operand" "vm,vm")]
1993	 UNSPEC_IEEE_MAX))]
1994  "TARGET_SSE"
1995  "@
1996   max<ssemodesuffix>\t{%2, %0|%0, %2}
1997   vmax<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1998  [(set_attr "isa" "noavx,avx")
1999   (set_attr "type" "sseadd")
2000   (set_attr "prefix" "orig,vex")
2001   (set_attr "mode" "<MODE>")])
2002
2003(define_insn "avx_addsubv4df3"
2004  [(set (match_operand:V4DF 0 "register_operand" "=x")
2005	(vec_merge:V4DF
2006	  (plus:V4DF
2007	    (match_operand:V4DF 1 "register_operand" "x")
2008	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2009	  (minus:V4DF (match_dup 1) (match_dup 2))
2010	  (const_int 10)))]
2011  "TARGET_AVX"
2012  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2013  [(set_attr "type" "sseadd")
2014   (set_attr "prefix" "vex")
2015   (set_attr "mode" "V4DF")])
2016
2017(define_insn "sse3_addsubv2df3"
2018  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2019	(vec_merge:V2DF
2020	  (plus:V2DF
2021	    (match_operand:V2DF 1 "register_operand" "0,x")
2022	    (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm"))
2023	  (minus:V2DF (match_dup 1) (match_dup 2))
2024	  (const_int 2)))]
2025  "TARGET_SSE3"
2026  "@
2027   addsubpd\t{%2, %0|%0, %2}
2028   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2029  [(set_attr "isa" "noavx,avx")
2030   (set_attr "type" "sseadd")
2031   (set_attr "atom_unit" "complex")
2032   (set_attr "prefix" "orig,vex")
2033   (set_attr "mode" "V2DF")])
2034
2035(define_insn "avx_addsubv8sf3"
2036  [(set (match_operand:V8SF 0 "register_operand" "=x")
2037	(vec_merge:V8SF
2038	  (plus:V8SF
2039	    (match_operand:V8SF 1 "register_operand" "x")
2040	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2041	  (minus:V8SF (match_dup 1) (match_dup 2))
2042	  (const_int 170)))]
2043  "TARGET_AVX"
2044  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2045  [(set_attr "type" "sseadd")
2046   (set_attr "prefix" "vex")
2047   (set_attr "mode" "V8SF")])
2048
2049(define_insn "sse3_addsubv4sf3"
2050  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2051	(vec_merge:V4SF
2052	  (plus:V4SF
2053	    (match_operand:V4SF 1 "register_operand" "0,x")
2054	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
2055	  (minus:V4SF (match_dup 1) (match_dup 2))
2056	  (const_int 10)))]
2057  "TARGET_SSE3"
2058  "@
2059   addsubps\t{%2, %0|%0, %2}
2060   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2061  [(set_attr "isa" "noavx,avx")
2062   (set_attr "type" "sseadd")
2063   (set_attr "prefix" "orig,vex")
2064   (set_attr "prefix_rep" "1,*")
2065   (set_attr "mode" "V4SF")])
2066
2067(define_insn "avx_h<plusminus_insn>v4df3"
2068  [(set (match_operand:V4DF 0 "register_operand" "=x")
2069	(vec_concat:V4DF
2070	  (vec_concat:V2DF
2071	    (plusminus:DF
2072	      (vec_select:DF
2073		(match_operand:V4DF 1 "register_operand" "x")
2074		(parallel [(const_int 0)]))
2075	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2076	    (plusminus:DF
2077	      (vec_select:DF
2078		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
2079		(parallel [(const_int 0)]))
2080	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2081	  (vec_concat:V2DF
2082	    (plusminus:DF
2083	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2084	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2085	    (plusminus:DF
2086	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2087	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2088  "TARGET_AVX"
2089  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2090  [(set_attr "type" "sseadd")
2091   (set_attr "prefix" "vex")
2092   (set_attr "mode" "V4DF")])
2093
2094(define_expand "sse3_haddv2df3"
2095  [(set (match_operand:V2DF 0 "register_operand")
2096	(vec_concat:V2DF
2097	  (plus:DF
2098	    (vec_select:DF
2099	      (match_operand:V2DF 1 "register_operand")
2100	      (parallel [(const_int 0)]))
2101	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2102	  (plus:DF
2103	    (vec_select:DF
2104	      (match_operand:V2DF 2 "nonimmediate_operand")
2105	      (parallel [(const_int 0)]))
2106	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2107  "TARGET_SSE3")
2108
2109(define_insn "*sse3_haddv2df3"
2110  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2111	(vec_concat:V2DF
2112	  (plus:DF
2113	    (vec_select:DF
2114	      (match_operand:V2DF 1 "register_operand" "0,x")
2115	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2116	    (vec_select:DF
2117	      (match_dup 1)
2118	      (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2119	  (plus:DF
2120	    (vec_select:DF
2121	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2122	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2123	    (vec_select:DF
2124	      (match_dup 2)
2125	      (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2126  "TARGET_SSE3
2127   && INTVAL (operands[3]) != INTVAL (operands[4])
2128   && INTVAL (operands[5]) != INTVAL (operands[6])"
2129  "@
2130   haddpd\t{%2, %0|%0, %2}
2131   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2132  [(set_attr "isa" "noavx,avx")
2133   (set_attr "type" "sseadd")
2134   (set_attr "prefix" "orig,vex")
2135   (set_attr "mode" "V2DF")])
2136
2137(define_insn "sse3_hsubv2df3"
2138  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2139	(vec_concat:V2DF
2140	  (minus:DF
2141	    (vec_select:DF
2142	      (match_operand:V2DF 1 "register_operand" "0,x")
2143	      (parallel [(const_int 0)]))
2144	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2145	  (minus:DF
2146	    (vec_select:DF
2147	      (match_operand:V2DF 2 "nonimmediate_operand" "xm,xm")
2148	      (parallel [(const_int 0)]))
2149	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2150  "TARGET_SSE3"
2151  "@
2152   hsubpd\t{%2, %0|%0, %2}
2153   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2154  [(set_attr "isa" "noavx,avx")
2155   (set_attr "type" "sseadd")
2156   (set_attr "prefix" "orig,vex")
2157   (set_attr "mode" "V2DF")])
2158
2159(define_insn "*sse3_haddv2df3_low"
2160  [(set (match_operand:DF 0 "register_operand" "=x,x")
2161	(plus:DF
2162	  (vec_select:DF
2163	    (match_operand:V2DF 1 "register_operand" "0,x")
2164	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2165	  (vec_select:DF
2166	    (match_dup 1)
2167	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2168  "TARGET_SSE3
2169   && INTVAL (operands[2]) != INTVAL (operands[3])"
2170  "@
2171   haddpd\t{%0, %0|%0, %0}
2172   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2173  [(set_attr "isa" "noavx,avx")
2174   (set_attr "type" "sseadd1")
2175   (set_attr "prefix" "orig,vex")
2176   (set_attr "mode" "V2DF")])
2177
2178(define_insn "*sse3_hsubv2df3_low"
2179  [(set (match_operand:DF 0 "register_operand" "=x,x")
2180	(minus:DF
2181	  (vec_select:DF
2182	    (match_operand:V2DF 1 "register_operand" "0,x")
2183	    (parallel [(const_int 0)]))
2184	  (vec_select:DF
2185	    (match_dup 1)
2186	    (parallel [(const_int 1)]))))]
2187  "TARGET_SSE3"
2188  "@
2189   hsubpd\t{%0, %0|%0, %0}
2190   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2191  [(set_attr "isa" "noavx,avx")
2192   (set_attr "type" "sseadd1")
2193   (set_attr "prefix" "orig,vex")
2194   (set_attr "mode" "V2DF")])
2195
2196(define_insn "avx_h<plusminus_insn>v8sf3"
2197  [(set (match_operand:V8SF 0 "register_operand" "=x")
2198	(vec_concat:V8SF
2199	  (vec_concat:V4SF
2200	    (vec_concat:V2SF
2201	      (plusminus:SF
2202		(vec_select:SF
2203		  (match_operand:V8SF 1 "register_operand" "x")
2204		  (parallel [(const_int 0)]))
2205		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2206	      (plusminus:SF
2207		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2208		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2209	    (vec_concat:V2SF
2210	      (plusminus:SF
2211		(vec_select:SF
2212		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2213		  (parallel [(const_int 0)]))
2214		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2215	      (plusminus:SF
2216		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2217		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2218	  (vec_concat:V4SF
2219	    (vec_concat:V2SF
2220	      (plusminus:SF
2221		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2222		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2223	      (plusminus:SF
2224		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2225		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2226	    (vec_concat:V2SF
2227	      (plusminus:SF
2228		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2229		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2230	      (plusminus:SF
2231		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2232		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2233  "TARGET_AVX"
2234  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2235  [(set_attr "type" "sseadd")
2236   (set_attr "prefix" "vex")
2237   (set_attr "mode" "V8SF")])
2238
2239(define_insn "sse3_h<plusminus_insn>v4sf3"
2240  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2241	(vec_concat:V4SF
2242	  (vec_concat:V2SF
2243	    (plusminus:SF
2244	      (vec_select:SF
2245		(match_operand:V4SF 1 "register_operand" "0,x")
2246		(parallel [(const_int 0)]))
2247	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2248	    (plusminus:SF
2249	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2250	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2251	  (vec_concat:V2SF
2252	    (plusminus:SF
2253	      (vec_select:SF
2254		(match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
2255		(parallel [(const_int 0)]))
2256	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2257	    (plusminus:SF
2258	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2259	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2260  "TARGET_SSE3"
2261  "@
2262   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2263   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2264  [(set_attr "isa" "noavx,avx")
2265   (set_attr "type" "sseadd")
2266   (set_attr "atom_unit" "complex")
2267   (set_attr "prefix" "orig,vex")
2268   (set_attr "prefix_rep" "1,*")
2269   (set_attr "mode" "V4SF")])
2270
2271(define_expand "reduc_splus_v8df"
2272  [(match_operand:V8DF 0 "register_operand")
2273   (match_operand:V8DF 1 "register_operand")]
2274  "TARGET_AVX512F"
2275{
2276  ix86_expand_reduc (gen_addv8df3, operands[0], operands[1]);
2277  DONE;
2278})
2279
2280(define_expand "reduc_splus_v4df"
2281  [(match_operand:V4DF 0 "register_operand")
2282   (match_operand:V4DF 1 "register_operand")]
2283  "TARGET_AVX"
2284{
2285  rtx tmp = gen_reg_rtx (V4DFmode);
2286  rtx tmp2 = gen_reg_rtx (V4DFmode);
2287  emit_insn (gen_avx_haddv4df3 (tmp, operands[1], operands[1]));
2288  emit_insn (gen_avx_vperm2f128v4df3 (tmp2, tmp, tmp, GEN_INT (1)));
2289  emit_insn (gen_addv4df3 (operands[0], tmp, tmp2));
2290  DONE;
2291})
2292
2293(define_expand "reduc_splus_v2df"
2294  [(match_operand:V2DF 0 "register_operand")
2295   (match_operand:V2DF 1 "register_operand")]
2296  "TARGET_SSE3"
2297{
2298  emit_insn (gen_sse3_haddv2df3 (operands[0], operands[1], operands[1]));
2299  DONE;
2300})
2301
2302(define_expand "reduc_splus_v16sf"
2303  [(match_operand:V16SF 0 "register_operand")
2304   (match_operand:V16SF 1 "register_operand")]
2305  "TARGET_AVX512F"
2306{
2307  ix86_expand_reduc (gen_addv16sf3, operands[0], operands[1]);
2308  DONE;
2309})
2310
2311(define_expand "reduc_splus_v8sf"
2312  [(match_operand:V8SF 0 "register_operand")
2313   (match_operand:V8SF 1 "register_operand")]
2314  "TARGET_AVX"
2315{
2316  rtx tmp = gen_reg_rtx (V8SFmode);
2317  rtx tmp2 = gen_reg_rtx (V8SFmode);
2318  emit_insn (gen_avx_haddv8sf3 (tmp, operands[1], operands[1]));
2319  emit_insn (gen_avx_haddv8sf3 (tmp2, tmp, tmp));
2320  emit_insn (gen_avx_vperm2f128v8sf3 (tmp, tmp2, tmp2, GEN_INT (1)));
2321  emit_insn (gen_addv8sf3 (operands[0], tmp, tmp2));
2322  DONE;
2323})
2324
2325(define_expand "reduc_splus_v4sf"
2326  [(match_operand:V4SF 0 "register_operand")
2327   (match_operand:V4SF 1 "register_operand")]
2328  "TARGET_SSE"
2329{
2330  if (TARGET_SSE3)
2331    {
2332      rtx tmp = gen_reg_rtx (V4SFmode);
2333      emit_insn (gen_sse3_haddv4sf3 (tmp, operands[1], operands[1]));
2334      emit_insn (gen_sse3_haddv4sf3 (operands[0], tmp, tmp));
2335    }
2336  else
2337    ix86_expand_reduc (gen_addv4sf3, operands[0], operands[1]);
2338  DONE;
2339})
2340
2341;; Modes handled by reduc_sm{in,ax}* patterns.
2342(define_mode_iterator REDUC_SMINMAX_MODE
2343  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2344   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2345   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2346   (V4SF "TARGET_SSE") (V64QI "TARGET_AVX512BW")
2347   (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2348   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2349   (V8DF "TARGET_AVX512F")])
2350
2351(define_expand "reduc_<code>_<mode>"
2352  [(smaxmin:REDUC_SMINMAX_MODE
2353     (match_operand:REDUC_SMINMAX_MODE 0 "register_operand")
2354     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2355  ""
2356{
2357  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2358  DONE;
2359})
2360
2361(define_expand "reduc_<code>_<mode>"
2362  [(umaxmin:VI_AVX512BW
2363     (match_operand:VI_AVX512BW 0 "register_operand")
2364     (match_operand:VI_AVX512BW 1 "register_operand"))]
2365  "TARGET_AVX512F"
2366{
2367  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2368  DONE;
2369})
2370
2371(define_expand "reduc_<code>_<mode>"
2372  [(umaxmin:VI_256
2373     (match_operand:VI_256 0 "register_operand")
2374     (match_operand:VI_256 1 "register_operand"))]
2375  "TARGET_AVX2"
2376{
2377  ix86_expand_reduc (gen_<code><mode>3, operands[0], operands[1]);
2378  DONE;
2379})
2380
2381(define_expand "reduc_umin_v8hi"
2382  [(umin:V8HI
2383     (match_operand:V8HI 0 "register_operand")
2384     (match_operand:V8HI 1 "register_operand"))]
2385  "TARGET_SSE4_1"
2386{
2387  ix86_expand_reduc (gen_uminv8hi3, operands[0], operands[1]);
2388  DONE;
2389})
2390
2391(define_insn "<mask_codefor>reducep<mode><mask_name>"
2392  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2393	(unspec:VF_AVX512VL
2394	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2395	   (match_operand:SI 2 "const_0_to_255_operand")]
2396	  UNSPEC_REDUCE))]
2397  "TARGET_AVX512DQ"
2398  "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2399  [(set_attr "type" "sse")
2400   (set_attr "prefix" "evex")
2401   (set_attr "mode" "<MODE>")])
2402
2403(define_insn "reduces<mode>"
2404  [(set (match_operand:VF_128 0 "register_operand" "=v")
2405	(vec_merge:VF_128
2406	  (unspec:VF_128
2407	    [(match_operand:VF_128 1 "register_operand" "v")
2408	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2409	     (match_operand:SI 3 "const_0_to_255_operand")]
2410	    UNSPEC_REDUCE)
2411	  (match_dup 1)
2412	  (const_int 1)))]
2413  "TARGET_AVX512DQ"
2414  "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2415  [(set_attr "type" "sse")
2416   (set_attr "prefix" "evex")
2417   (set_attr "mode" "<MODE>")])
2418
2419;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2420;;
2421;; Parallel floating point comparisons
2422;;
2423;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2424
2425(define_insn "avx_cmp<mode>3"
2426  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2427	(unspec:VF_128_256
2428	  [(match_operand:VF_128_256 1 "register_operand" "x")
2429	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2430	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
2431	  UNSPEC_PCMP))]
2432  "TARGET_AVX"
2433  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2434  [(set_attr "type" "ssecmp")
2435   (set_attr "length_immediate" "1")
2436   (set_attr "prefix" "vex")
2437   (set_attr "mode" "<MODE>")])
2438
2439(define_insn "avx_vmcmp<mode>3"
2440  [(set (match_operand:VF_128 0 "register_operand" "=x")
2441	(vec_merge:VF_128
2442	  (unspec:VF_128
2443	    [(match_operand:VF_128 1 "register_operand" "x")
2444	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2445	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2446	    UNSPEC_PCMP)
2447	 (match_dup 1)
2448	 (const_int 1)))]
2449  "TARGET_AVX"
2450  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2451  [(set_attr "type" "ssecmp")
2452   (set_attr "length_immediate" "1")
2453   (set_attr "prefix" "vex")
2454   (set_attr "mode" "<ssescalarmode>")])
2455
2456(define_insn "*<sse>_maskcmp<mode>3_comm"
2457  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2458	(match_operator:VF_128_256 3 "sse_comparison_operator"
2459	  [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2460	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2461  "TARGET_SSE
2462   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2463  "@
2464   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2465   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2466  [(set_attr "isa" "noavx,avx")
2467   (set_attr "type" "ssecmp")
2468   (set_attr "length_immediate" "1")
2469   (set_attr "prefix" "orig,vex")
2470   (set_attr "mode" "<MODE>")])
2471
2472(define_insn "<sse>_maskcmp<mode>3"
2473  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2474	(match_operator:VF_128_256 3 "sse_comparison_operator"
2475	  [(match_operand:VF_128_256 1 "register_operand" "0,x")
2476	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,xm")]))]
2477  "TARGET_SSE"
2478  "@
2479   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2480   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2481  [(set_attr "isa" "noavx,avx")
2482   (set_attr "type" "ssecmp")
2483   (set_attr "length_immediate" "1")
2484   (set_attr "prefix" "orig,vex")
2485   (set_attr "mode" "<MODE>")])
2486
2487(define_insn "<sse>_vmmaskcmp<mode>3"
2488  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2489	(vec_merge:VF_128
2490	 (match_operator:VF_128 3 "sse_comparison_operator"
2491	   [(match_operand:VF_128 1 "register_operand" "0,x")
2492	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2493	 (match_dup 1)
2494	 (const_int 1)))]
2495  "TARGET_SSE"
2496  "@
2497   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2498   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2499  [(set_attr "isa" "noavx,avx")
2500   (set_attr "type" "ssecmp")
2501   (set_attr "length_immediate" "1,*")
2502   (set_attr "prefix" "orig,vex")
2503   (set_attr "mode" "<ssescalarmode>")])
2504
2505(define_mode_attr cmp_imm_predicate
2506  [(V16SF "const_0_to_31_operand")  (V8DF "const_0_to_31_operand")
2507   (V16SI "const_0_to_7_operand")   (V8DI "const_0_to_7_operand")
2508   (V8SF "const_0_to_31_operand")   (V4DF "const_0_to_31_operand")
2509   (V8SI "const_0_to_7_operand")    (V4DI "const_0_to_7_operand")
2510   (V4SF "const_0_to_31_operand")   (V2DF "const_0_to_31_operand")
2511   (V4SI "const_0_to_7_operand")    (V2DI "const_0_to_7_operand")
2512   (V32HI "const_0_to_7_operand")   (V64QI "const_0_to_7_operand")
2513   (V16HI "const_0_to_7_operand")   (V32QI "const_0_to_7_operand")
2514   (V8HI "const_0_to_7_operand")    (V16QI "const_0_to_7_operand")])
2515
2516(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2517  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2518	(unspec:<avx512fmaskmode>
2519	  [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2520	   (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2521	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2522	  UNSPEC_PCMP))]
2523  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2524  "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2525  [(set_attr "type" "ssecmp")
2526   (set_attr "length_immediate" "1")
2527   (set_attr "prefix" "evex")
2528   (set_attr "mode" "<sseinsnmode>")])
2529
2530(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2531  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2532	(unspec:<avx512fmaskmode>
2533	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2534	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2535	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2536	  UNSPEC_PCMP))]
2537  "TARGET_AVX512BW"
2538  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2539  [(set_attr "type" "ssecmp")
2540   (set_attr "length_immediate" "1")
2541   (set_attr "prefix" "evex")
2542   (set_attr "mode" "<sseinsnmode>")])
2543
2544(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2545  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2546	(unspec:<avx512fmaskmode>
2547	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2548	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2549	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2550	  UNSPEC_UNSIGNED_PCMP))]
2551  "TARGET_AVX512BW"
2552  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2553  [(set_attr "type" "ssecmp")
2554   (set_attr "length_immediate" "1")
2555   (set_attr "prefix" "evex")
2556   (set_attr "mode" "<sseinsnmode>")])
2557
2558(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2559  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2560	(unspec:<avx512fmaskmode>
2561	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2562	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2563	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2564	  UNSPEC_UNSIGNED_PCMP))]
2565  "TARGET_AVX512F"
2566  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2567  [(set_attr "type" "ssecmp")
2568   (set_attr "length_immediate" "1")
2569   (set_attr "prefix" "evex")
2570   (set_attr "mode" "<sseinsnmode>")])
2571
2572(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
2573  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2574	(and:<avx512fmaskmode>
2575	  (unspec:<avx512fmaskmode>
2576	    [(match_operand:VF_128 1 "register_operand" "v")
2577	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2578	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2579	    UNSPEC_PCMP)
2580	  (const_int 1)))]
2581  "TARGET_AVX512F"
2582  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
2583  [(set_attr "type" "ssecmp")
2584   (set_attr "length_immediate" "1")
2585   (set_attr "prefix" "evex")
2586   (set_attr "mode" "<ssescalarmode>")])
2587
2588(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
2589  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2590	(and:<avx512fmaskmode>
2591	  (unspec:<avx512fmaskmode>
2592	    [(match_operand:VF_128 1 "register_operand" "v")
2593	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2594	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2595	    UNSPEC_PCMP)
2596	  (and:<avx512fmaskmode>
2597	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
2598	    (const_int 1))))]
2599  "TARGET_AVX512F"
2600  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_saeonly_op5>, %3}"
2601  [(set_attr "type" "ssecmp")
2602   (set_attr "length_immediate" "1")
2603   (set_attr "prefix" "evex")
2604   (set_attr "mode" "<ssescalarmode>")])
2605
2606(define_insn "avx512f_maskcmp<mode>3"
2607  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
2608	(match_operator:<avx512fmaskmode> 3 "sse_comparison_operator"
2609	  [(match_operand:VF 1 "register_operand" "v")
2610	   (match_operand:VF 2 "nonimmediate_operand" "vm")]))]
2611  "TARGET_AVX512F"
2612  "vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2613  [(set_attr "type" "ssecmp")
2614   (set_attr "length_immediate" "1")
2615   (set_attr "prefix" "evex")
2616   (set_attr "mode" "<sseinsnmode>")])
2617
2618(define_insn "<sse>_comi<round_saeonly_name>"
2619  [(set (reg:CCFP FLAGS_REG)
2620	(compare:CCFP
2621	  (vec_select:MODEF
2622	    (match_operand:<ssevecmode> 0 "register_operand" "v")
2623	    (parallel [(const_int 0)]))
2624	  (vec_select:MODEF
2625	    (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2626	    (parallel [(const_int 0)]))))]
2627  "SSE_FLOAT_MODE_P (<MODE>mode)"
2628  "%vcomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2629  [(set_attr "type" "ssecomi")
2630   (set_attr "prefix" "maybe_vex")
2631   (set_attr "prefix_rep" "0")
2632   (set (attr "prefix_data16")
2633	(if_then_else (eq_attr "mode" "DF")
2634		      (const_string "1")
2635		      (const_string "0")))
2636   (set_attr "mode" "<MODE>")])
2637
2638(define_insn "<sse>_ucomi<round_saeonly_name>"
2639  [(set (reg:CCFPU FLAGS_REG)
2640	(compare:CCFPU
2641	  (vec_select:MODEF
2642	    (match_operand:<ssevecmode> 0 "register_operand" "v")
2643	    (parallel [(const_int 0)]))
2644	  (vec_select:MODEF
2645	    (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
2646	    (parallel [(const_int 0)]))))]
2647  "SSE_FLOAT_MODE_P (<MODE>mode)"
2648  "%vucomi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
2649  [(set_attr "type" "ssecomi")
2650   (set_attr "prefix" "maybe_vex")
2651   (set_attr "prefix_rep" "0")
2652   (set (attr "prefix_data16")
2653	(if_then_else (eq_attr "mode" "DF")
2654		      (const_string "1")
2655		      (const_string "0")))
2656   (set_attr "mode" "<MODE>")])
2657
2658(define_expand "vcond<V_512:mode><VF_512:mode>"
2659  [(set (match_operand:V_512 0 "register_operand")
2660	(if_then_else:V_512
2661	  (match_operator 3 ""
2662	    [(match_operand:VF_512 4 "nonimmediate_operand")
2663	     (match_operand:VF_512 5 "nonimmediate_operand")])
2664	  (match_operand:V_512 1 "general_operand")
2665	  (match_operand:V_512 2 "general_operand")))]
2666  "TARGET_AVX512F
2667   && (GET_MODE_NUNITS (<V_512:MODE>mode)
2668       == GET_MODE_NUNITS (<VF_512:MODE>mode))"
2669{
2670  bool ok = ix86_expand_fp_vcond (operands);
2671  gcc_assert (ok);
2672  DONE;
2673})
2674
2675(define_expand "vcond<V_256:mode><VF_256:mode>"
2676  [(set (match_operand:V_256 0 "register_operand")
2677	(if_then_else:V_256
2678	  (match_operator 3 ""
2679	    [(match_operand:VF_256 4 "nonimmediate_operand")
2680	     (match_operand:VF_256 5 "nonimmediate_operand")])
2681	  (match_operand:V_256 1 "general_operand")
2682	  (match_operand:V_256 2 "general_operand")))]
2683  "TARGET_AVX
2684   && (GET_MODE_NUNITS (<V_256:MODE>mode)
2685       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
2686{
2687  bool ok = ix86_expand_fp_vcond (operands);
2688  gcc_assert (ok);
2689  DONE;
2690})
2691
2692(define_expand "vcond<V_128:mode><VF_128:mode>"
2693  [(set (match_operand:V_128 0 "register_operand")
2694	(if_then_else:V_128
2695	  (match_operator 3 ""
2696	    [(match_operand:VF_128 4 "nonimmediate_operand")
2697	     (match_operand:VF_128 5 "nonimmediate_operand")])
2698	  (match_operand:V_128 1 "general_operand")
2699	  (match_operand:V_128 2 "general_operand")))]
2700  "TARGET_SSE
2701   && (GET_MODE_NUNITS (<V_128:MODE>mode)
2702       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
2703{
2704  bool ok = ix86_expand_fp_vcond (operands);
2705  gcc_assert (ok);
2706  DONE;
2707})
2708
2709;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2710;;
2711;; Parallel floating point logical operations
2712;;
2713;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2714
2715(define_insn "<sse>_andnot<mode>3<mask_name>"
2716  [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2717	(and:VF_128_256
2718	  (not:VF_128_256
2719	    (match_operand:VF_128_256 1 "register_operand" "0,v"))
2720	  (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2721  "TARGET_SSE && <mask_avx512vl_condition>"
2722{
2723  static char buf[128];
2724  const char *ops;
2725  const char *suffix;
2726
2727  switch (get_attr_mode (insn))
2728    {
2729    case MODE_V8SF:
2730    case MODE_V4SF:
2731      suffix = "ps";
2732      break;
2733    default:
2734      suffix = "<ssemodesuffix>";
2735    }
2736
2737  switch (which_alternative)
2738    {
2739    case 0:
2740      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2741      break;
2742    case 1:
2743      ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2744      break;
2745    default:
2746      gcc_unreachable ();
2747    }
2748
2749  /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
2750  if (<mask_applied> && !TARGET_AVX512DQ)
2751    {
2752      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2753      ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2754    }
2755
2756  snprintf (buf, sizeof (buf), ops, suffix);
2757  return buf;
2758}
2759  [(set_attr "isa" "noavx,avx")
2760   (set_attr "type" "sselog")
2761   (set_attr "prefix" "orig,maybe_evex")
2762   (set (attr "mode")
2763	(cond [(and (match_test "<MODE_SIZE> == 16")
2764		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2765		 (const_string "<ssePSmode>")
2766	       (match_test "TARGET_AVX")
2767		 (const_string "<MODE>")
2768	       (match_test "optimize_function_for_size_p (cfun)")
2769		 (const_string "V4SF")
2770	       ]
2771	       (const_string "<MODE>")))])
2772
2773
2774(define_insn "<sse>_andnot<mode>3<mask_name>"
2775  [(set (match_operand:VF_512 0 "register_operand" "=v")
2776	(and:VF_512
2777	  (not:VF_512
2778	    (match_operand:VF_512 1 "register_operand" "v"))
2779	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2780  "TARGET_AVX512F"
2781{
2782  static char buf[128];
2783  const char *ops;
2784  const char *suffix;
2785
2786  suffix = "<ssemodesuffix>";
2787  ops = "";
2788
2789  /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
2790  if (!TARGET_AVX512DQ)
2791    {
2792      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2793      ops = "p";
2794    }
2795
2796  snprintf (buf, sizeof (buf),
2797	    "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2798	    ops, suffix);
2799  return buf;
2800}
2801  [(set_attr "type" "sselog")
2802   (set_attr "prefix" "evex")
2803   (set_attr "mode" "<sseinsnmode>")])
2804
2805(define_expand "<code><mode>3<mask_name>"
2806  [(set (match_operand:VF_128_256 0 "register_operand")
2807       (any_logic:VF_128_256
2808         (match_operand:VF_128_256 1 "nonimmediate_operand")
2809         (match_operand:VF_128_256 2 "nonimmediate_operand")))]
2810  "TARGET_SSE && <mask_avx512vl_condition>"
2811  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2812
2813(define_expand "<code><mode>3<mask_name>"
2814  [(set (match_operand:VF_512 0 "register_operand")
2815       (any_logic:VF_512
2816         (match_operand:VF_512 1 "nonimmediate_operand")
2817         (match_operand:VF_512 2 "nonimmediate_operand")))]
2818  "TARGET_AVX512F"
2819  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
2820
2821(define_insn "*<code><mode>3<mask_name>"
2822  [(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
2823	(any_logic:VF_128_256
2824	  (match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
2825	  (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
2826  "TARGET_SSE && <mask_avx512vl_condition>
2827   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2828{
2829  static char buf[128];
2830  const char *ops;
2831  const char *suffix;
2832
2833  switch (get_attr_mode (insn))
2834    {
2835    case MODE_V8SF:
2836    case MODE_V4SF:
2837      suffix = "ps";
2838      break;
2839    default:
2840      suffix = "<ssemodesuffix>";
2841    }
2842
2843  switch (which_alternative)
2844    {
2845    case 0:
2846      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
2847      break;
2848    case 1:
2849      ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2850      break;
2851    default:
2852      gcc_unreachable ();
2853    }
2854
2855  /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[dq].  */
2856  if (<mask_applied> && !TARGET_AVX512DQ)
2857    {
2858      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2859      ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
2860    }
2861
2862  snprintf (buf, sizeof (buf), ops, suffix);
2863  return buf;
2864}
2865  [(set_attr "isa" "noavx,avx")
2866   (set_attr "type" "sselog")
2867   (set_attr "prefix" "orig,maybe_evex")
2868   (set (attr "mode")
2869	(cond [(and (match_test "<MODE_SIZE> == 16")
2870		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2871		 (const_string "<ssePSmode>")
2872	       (match_test "TARGET_AVX")
2873		 (const_string "<MODE>")
2874	       (match_test "optimize_function_for_size_p (cfun)")
2875		 (const_string "V4SF")
2876	       ]
2877	       (const_string "<MODE>")))])
2878
2879(define_insn "*<code><mode>3<mask_name>"
2880  [(set (match_operand:VF_512 0 "register_operand" "=v")
2881	(any_logic:VF_512
2882	  (match_operand:VF_512 1 "nonimmediate_operand" "%v")
2883	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
2884  "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
2885{
2886  static char buf[128];
2887  const char *ops;
2888  const char *suffix;
2889
2890  suffix = "<ssemodesuffix>";
2891  ops = "";
2892
2893  /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[dq].  */
2894  if ((<MODE_SIZE> == 64 || <mask_applied>) && !TARGET_AVX512DQ)
2895    {
2896      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
2897      ops = "p";
2898    }
2899
2900  snprintf (buf, sizeof (buf),
2901	   "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
2902	   ops, suffix);
2903  return buf;
2904}
2905  [(set_attr "type" "sselog")
2906   (set_attr "prefix" "evex")
2907   (set_attr "mode" "<sseinsnmode>")])
2908
2909(define_expand "copysign<mode>3"
2910  [(set (match_dup 4)
2911	(and:VF
2912	  (not:VF (match_dup 3))
2913	  (match_operand:VF 1 "nonimmediate_operand")))
2914   (set (match_dup 5)
2915	(and:VF (match_dup 3)
2916		(match_operand:VF 2 "nonimmediate_operand")))
2917   (set (match_operand:VF 0 "register_operand")
2918	(ior:VF (match_dup 4) (match_dup 5)))]
2919  "TARGET_SSE"
2920{
2921  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
2922
2923  operands[4] = gen_reg_rtx (<MODE>mode);
2924  operands[5] = gen_reg_rtx (<MODE>mode);
2925})
2926
2927;; Also define scalar versions.  These are used for abs, neg, and
2928;; conditional move.  Using subregs into vector modes causes register
2929;; allocation lossage.  These patterns do not allow memory operands
2930;; because the native instructions read the full 128-bits.
2931
2932(define_insn "*andnot<mode>3"
2933  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
2934	(and:MODEF
2935	  (not:MODEF
2936	    (match_operand:MODEF 1 "register_operand" "0,x"))
2937	    (match_operand:MODEF 2 "register_operand" "x,x")))]
2938  "SSE_FLOAT_MODE_P (<MODE>mode)"
2939{
2940  static char buf[32];
2941  const char *ops;
2942  const char *suffix
2943    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
2944
2945  switch (which_alternative)
2946    {
2947    case 0:
2948      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
2949      break;
2950    case 1:
2951      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2952      break;
2953    default:
2954      gcc_unreachable ();
2955    }
2956
2957  snprintf (buf, sizeof (buf), ops, suffix);
2958  return buf;
2959}
2960  [(set_attr "isa" "noavx,avx")
2961   (set_attr "type" "sselog")
2962   (set_attr "prefix" "orig,vex")
2963   (set (attr "mode")
2964	(cond [(and (match_test "<MODE_SIZE> == 16")
2965		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
2966		 (const_string "V4SF")
2967	       (match_test "TARGET_AVX")
2968		 (const_string "<ssevecmode>")
2969	       (match_test "optimize_function_for_size_p (cfun)")
2970		 (const_string "V4SF")
2971	       ]
2972	       (const_string "<ssevecmode>")))])
2973
2974(define_insn "*andnottf3"
2975  [(set (match_operand:TF 0 "register_operand" "=x,x")
2976	(and:TF
2977	  (not:TF (match_operand:TF 1 "register_operand" "0,x"))
2978	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
2979  "TARGET_SSE"
2980{
2981  static char buf[32];
2982  const char *ops;
2983  const char *tmp
2984    = (get_attr_mode (insn) == MODE_V4SF) ? "andnps" : "pandn";
2985
2986  switch (which_alternative)
2987    {
2988    case 0:
2989      ops = "%s\t{%%2, %%0|%%0, %%2}";
2990      break;
2991    case 1:
2992      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
2993      break;
2994    default:
2995      gcc_unreachable ();
2996    }
2997
2998  snprintf (buf, sizeof (buf), ops, tmp);
2999  return buf;
3000}
3001  [(set_attr "isa" "noavx,avx")
3002   (set_attr "type" "sselog")
3003   (set (attr "prefix_data16")
3004     (if_then_else
3005       (and (eq_attr "alternative" "0")
3006	    (eq_attr "mode" "TI"))
3007       (const_string "1")
3008       (const_string "*")))
3009   (set_attr "prefix" "orig,vex")
3010   (set (attr "mode")
3011	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3012		 (const_string "V4SF")
3013	       (match_test "TARGET_AVX")
3014		 (const_string "TI")
3015	       (ior (not (match_test "TARGET_SSE2"))
3016		    (match_test "optimize_function_for_size_p (cfun)"))
3017		 (const_string "V4SF")
3018	       ]
3019	       (const_string "TI")))])
3020
3021(define_insn "*<code><mode>3"
3022  [(set (match_operand:MODEF 0 "register_operand" "=x,x")
3023	(any_logic:MODEF
3024	  (match_operand:MODEF 1 "register_operand" "%0,x")
3025	  (match_operand:MODEF 2 "register_operand" "x,x")))]
3026  "SSE_FLOAT_MODE_P (<MODE>mode)"
3027{
3028  static char buf[32];
3029  const char *ops;
3030  const char *suffix
3031    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3032
3033  switch (which_alternative)
3034    {
3035    case 0:
3036      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3037      break;
3038    case 1:
3039      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3040      break;
3041    default:
3042      gcc_unreachable ();
3043    }
3044
3045  snprintf (buf, sizeof (buf), ops, suffix);
3046  return buf;
3047}
3048  [(set_attr "isa" "noavx,avx")
3049   (set_attr "type" "sselog")
3050   (set_attr "prefix" "orig,vex")
3051   (set (attr "mode")
3052	(cond [(and (match_test "<MODE_SIZE> == 16")
3053		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
3054		 (const_string "V4SF")
3055	       (match_test "TARGET_AVX")
3056		 (const_string "<ssevecmode>")
3057	       (match_test "optimize_function_for_size_p (cfun)")
3058		 (const_string "V4SF")
3059	       ]
3060	       (const_string "<ssevecmode>")))])
3061
3062(define_expand "<code>tf3"
3063  [(set (match_operand:TF 0 "register_operand")
3064	(any_logic:TF
3065	  (match_operand:TF 1 "nonimmediate_operand")
3066	  (match_operand:TF 2 "nonimmediate_operand")))]
3067  "TARGET_SSE"
3068  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3069
3070(define_insn "*<code>tf3"
3071  [(set (match_operand:TF 0 "register_operand" "=x,x")
3072	(any_logic:TF
3073	  (match_operand:TF 1 "nonimmediate_operand" "%0,x")
3074	  (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
3075  "TARGET_SSE
3076   && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
3077{
3078  static char buf[32];
3079  const char *ops;
3080  const char *tmp
3081    = (get_attr_mode (insn) == MODE_V4SF) ? "<logic>ps" : "p<logic>";
3082
3083  switch (which_alternative)
3084    {
3085    case 0:
3086      ops = "%s\t{%%2, %%0|%%0, %%2}";
3087      break;
3088    case 1:
3089      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3090      break;
3091    default:
3092      gcc_unreachable ();
3093    }
3094
3095  snprintf (buf, sizeof (buf), ops, tmp);
3096  return buf;
3097}
3098  [(set_attr "isa" "noavx,avx")
3099   (set_attr "type" "sselog")
3100   (set (attr "prefix_data16")
3101     (if_then_else
3102       (and (eq_attr "alternative" "0")
3103	    (eq_attr "mode" "TI"))
3104       (const_string "1")
3105       (const_string "*")))
3106   (set_attr "prefix" "orig,vex")
3107   (set (attr "mode")
3108	(cond [(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3109		 (const_string "V4SF")
3110	       (match_test "TARGET_AVX")
3111		 (const_string "TI")
3112	       (ior (not (match_test "TARGET_SSE2"))
3113		    (match_test "optimize_function_for_size_p (cfun)"))
3114		 (const_string "V4SF")
3115	       ]
3116	       (const_string "TI")))])
3117
3118;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3119;;
3120;; FMA floating point multiply/accumulate instructions.  These include
3121;; scalar versions of the instructions as well as vector versions.
3122;;
3123;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3124
3125;; The standard names for scalar FMA are only available with SSE math enabled.
3126;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma.  It doesn't
3127;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3128;; and TARGET_FMA4 are both false.
3129;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3130;; one must force the EVEX encoding of the fma insns.  Ideally we'd improve
3131;; GAS to allow proper prefix selection.  However, for the moment all hardware
3132;; that supports AVX512F also supports FMA so we can ignore this for now.
3133(define_mode_iterator FMAMODEM
3134  [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3135   (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3136   (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3137   (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3138   (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3139   (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3140   (V16SF "TARGET_AVX512F")
3141   (V8DF "TARGET_AVX512F")])
3142
3143(define_expand "fma<mode>4"
3144  [(set (match_operand:FMAMODEM 0 "register_operand")
3145	(fma:FMAMODEM
3146	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3147	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3148	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3149
3150(define_expand "fms<mode>4"
3151  [(set (match_operand:FMAMODEM 0 "register_operand")
3152	(fma:FMAMODEM
3153	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3154	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3155	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3156
3157(define_expand "fnma<mode>4"
3158  [(set (match_operand:FMAMODEM 0 "register_operand")
3159	(fma:FMAMODEM
3160	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3161	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3162	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3163
3164(define_expand "fnms<mode>4"
3165  [(set (match_operand:FMAMODEM 0 "register_operand")
3166	(fma:FMAMODEM
3167	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3168	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3169	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3170
3171;; The builtins for intrinsics are not constrained by SSE math enabled.
3172(define_mode_iterator FMAMODE_AVX512
3173 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3174  (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3175  (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3176  (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3177  (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3178  (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3179  (V16SF "TARGET_AVX512F")
3180  (V8DF "TARGET_AVX512F")])
3181
3182(define_mode_iterator FMAMODE
3183  [SF DF V4SF V2DF V8SF V4DF])
3184
3185(define_expand "fma4i_fmadd_<mode>"
3186  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3187	(fma:FMAMODE_AVX512
3188	  (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3189	  (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3190	  (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3191
3192(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3193  [(match_operand:VF_AVX512VL 0 "register_operand")
3194   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3195   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3196   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3197   (match_operand:<avx512fmaskmode> 4 "register_operand")]
3198  "TARGET_AVX512F && <round_mode512bit_condition>"
3199{
3200  emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3201    operands[0], operands[1], operands[2], operands[3],
3202    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3203  DONE;
3204})
3205
3206(define_insn "*fma_fmadd_<mode>"
3207  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3208	(fma:FMAMODE
3209	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3210	  (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3211	  (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3212  "TARGET_FMA || TARGET_FMA4"
3213  "@
3214   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3215   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3216   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3217   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3218   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3219  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3220   (set_attr "type" "ssemuladd")
3221   (set_attr "mode" "<MODE>")])
3222
3223;; Suppose AVX-512F as baseline
3224(define_mode_iterator VF_SF_AVX512VL
3225  [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
3226   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
3227
3228(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
3229  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3230	(fma:VF_SF_AVX512VL
3231	  (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3232	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3233	  (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3234  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3235  "@
3236   vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3237   vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3238   vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3239  [(set_attr "type" "ssemuladd")
3240   (set_attr "mode" "<MODE>")])
3241
3242(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
3243  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3244	(vec_merge:VF_AVX512VL
3245	  (fma:VF_AVX512VL
3246	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3247	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3248	    (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3249	  (match_dup 1)
3250	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3251  "TARGET_AVX512F && <round_mode512bit_condition>"
3252  "@
3253   vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3254   vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3255  [(set_attr "isa" "fma_avx512f,fma_avx512f")
3256   (set_attr "type" "ssemuladd")
3257   (set_attr "mode" "<MODE>")])
3258
3259(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
3260  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=x")
3261	(vec_merge:VF_AVX512VL
3262	  (fma:VF_AVX512VL
3263	    (match_operand:VF_AVX512VL 1 "register_operand" "x")
3264	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3265	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3266	  (match_dup 3)
3267	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3268  "TARGET_AVX512F"
3269  "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3270  [(set_attr "isa" "fma_avx512f")
3271   (set_attr "type" "ssemuladd")
3272   (set_attr "mode" "<MODE>")])
3273
3274(define_insn "*fma_fmsub_<mode>"
3275  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3276	(fma:FMAMODE
3277	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0,0,v,x,x")
3278	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3279	  (neg:FMAMODE
3280	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3281  "TARGET_FMA || TARGET_FMA4"
3282  "@
3283   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3284   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3285   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3286   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3287   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3288  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3289   (set_attr "type" "ssemuladd")
3290   (set_attr "mode" "<MODE>")])
3291
3292(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
3293  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3294	(fma:VF_SF_AVX512VL
3295	  (match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
3296	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3297	  (neg:VF_SF_AVX512VL
3298	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3299  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3300  "@
3301   vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3302   vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3303   vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3304  [(set_attr "type" "ssemuladd")
3305   (set_attr "mode" "<MODE>")])
3306
3307(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
3308  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3309	(vec_merge:VF_AVX512VL
3310	  (fma:VF_AVX512VL
3311	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3312	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3313	    (neg:VF_AVX512VL
3314	      (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3315	  (match_dup 1)
3316	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3317  "TARGET_AVX512F"
3318  "@
3319   vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3320   vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3321  [(set_attr "isa" "fma_avx512f,fma_avx512f")
3322   (set_attr "type" "ssemuladd")
3323   (set_attr "mode" "<MODE>")])
3324
3325(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
3326  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3327	(vec_merge:VF_AVX512VL
3328	  (fma:VF_AVX512VL
3329	    (match_operand:VF_AVX512VL 1 "register_operand" "v")
3330	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3331	    (neg:VF_AVX512VL
3332	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3333	  (match_dup 3)
3334	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3335  "TARGET_AVX512F && <round_mode512bit_condition>"
3336  "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3337  [(set_attr "isa" "fma_avx512f")
3338   (set_attr "type" "ssemuladd")
3339   (set_attr "mode" "<MODE>")])
3340
3341(define_insn "*fma_fnmadd_<mode>"
3342  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3343	(fma:FMAMODE
3344	  (neg:FMAMODE
3345	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3346	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3347	  (match_operand:FMAMODE   3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3348  "TARGET_FMA || TARGET_FMA4"
3349  "@
3350   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3351   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3352   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3353   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3354   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3355  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3356   (set_attr "type" "ssemuladd")
3357   (set_attr "mode" "<MODE>")])
3358
3359(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
3360  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3361	(fma:VF_SF_AVX512VL
3362	  (neg:VF_SF_AVX512VL
3363	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3364	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3365	  (match_operand:VF_SF_AVX512VL   3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
3366  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3367  "@
3368   vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3369   vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3370   vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3371  [(set_attr "type" "ssemuladd")
3372   (set_attr "mode" "<MODE>")])
3373
3374(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
3375  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3376	(vec_merge:VF_AVX512VL
3377	  (fma:VF_AVX512VL
3378	    (neg:VF_AVX512VL
3379	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3380	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3381	    (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))
3382	  (match_dup 1)
3383	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3384  "TARGET_AVX512F && <round_mode512bit_condition>"
3385  "@
3386   vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3387   vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3388  [(set_attr "isa" "fma_avx512f,fma_avx512f")
3389   (set_attr "type" "ssemuladd")
3390   (set_attr "mode" "<MODE>")])
3391
3392(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
3393  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3394	(vec_merge:VF_AVX512VL
3395	  (fma:VF_AVX512VL
3396	    (neg:VF_AVX512VL
3397	      (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3398	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3399	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
3400	  (match_dup 3)
3401	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3402  "TARGET_AVX512F && <round_mode512bit_condition>"
3403  "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3404  [(set_attr "isa" "fma_avx512f")
3405   (set_attr "type" "ssemuladd")
3406   (set_attr "mode" "<MODE>")])
3407
3408(define_insn "*fma_fnmsub_<mode>"
3409  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3410	(fma:FMAMODE
3411	  (neg:FMAMODE
3412	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
3413	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
3414	  (neg:FMAMODE
3415	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
3416  "TARGET_FMA || TARGET_FMA4"
3417  "@
3418   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3419   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3420   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
3421   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3422   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3423  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3424   (set_attr "type" "ssemuladd")
3425   (set_attr "mode" "<MODE>")])
3426
3427(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
3428  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3429	(fma:VF_SF_AVX512VL
3430	  (neg:VF_SF_AVX512VL
3431	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
3432	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3433	  (neg:VF_SF_AVX512VL
3434	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
3435  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3436  "@
3437   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3438   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3439   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3440  [(set_attr "type" "ssemuladd")
3441   (set_attr "mode" "<MODE>")])
3442
3443(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
3444  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3445	(vec_merge:VF_AVX512VL
3446	  (fma:VF_AVX512VL
3447	    (neg:VF_AVX512VL
3448	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
3449	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3450	    (neg:VF_AVX512VL
3451	      (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")))
3452	  (match_dup 1)
3453	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3454  "TARGET_AVX512F && <round_mode512bit_condition>"
3455  "@
3456   vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3457   vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3458  [(set_attr "isa" "fma_avx512f,fma_avx512f")
3459   (set_attr "type" "ssemuladd")
3460   (set_attr "mode" "<MODE>")])
3461
3462(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
3463  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3464	(vec_merge:VF_AVX512VL
3465	  (fma:VF_AVX512VL
3466	    (neg:VF_AVX512VL
3467	      (match_operand:VF_AVX512VL 1 "register_operand" "v"))
3468	    (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3469	    (neg:VF_AVX512VL
3470	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
3471	  (match_dup 3)
3472	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3473  "TARGET_AVX512F"
3474  "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3475  [(set_attr "isa" "fma_avx512f")
3476   (set_attr "type" "ssemuladd")
3477   (set_attr "mode" "<MODE>")])
3478
3479;; FMA parallel floating point multiply addsub and subadd operations.
3480
3481;; It would be possible to represent these without the UNSPEC as
3482;;
3483;; (vec_merge
3484;;   (fma op1 op2 op3)
3485;;   (fma op1 op2 (neg op3))
3486;;   (merge-const))
3487;;
3488;; But this doesn't seem useful in practice.
3489
3490(define_expand "fmaddsub_<mode>"
3491  [(set (match_operand:VF 0 "register_operand")
3492	(unspec:VF
3493	  [(match_operand:VF 1 "nonimmediate_operand")
3494	   (match_operand:VF 2 "nonimmediate_operand")
3495	   (match_operand:VF 3 "nonimmediate_operand")]
3496	  UNSPEC_FMADDSUB))]
3497  "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3498
3499(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
3500  [(match_operand:VF_AVX512VL 0 "register_operand")
3501   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3502   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3503   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3504   (match_operand:<avx512fmaskmode> 4 "register_operand")]
3505  "TARGET_AVX512F"
3506{
3507  emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
3508    operands[0], operands[1], operands[2], operands[3],
3509    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3510  DONE;
3511})
3512
3513(define_insn "*fma_fmaddsub_<mode>"
3514  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3515	(unspec:VF_128_256
3516	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
3517	   (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
3518	   (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
3519	  UNSPEC_FMADDSUB))]
3520  "TARGET_FMA || TARGET_FMA4"
3521  "@
3522   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3523   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3524   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3525   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3526   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3527  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3528   (set_attr "type" "ssemuladd")
3529   (set_attr "mode" "<MODE>")])
3530
3531(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
3532  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3533	(unspec:VF_SF_AVX512VL
3534	  [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
3535	   (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3536	   (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
3537	  UNSPEC_FMADDSUB))]
3538  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3539  "@
3540   vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3541   vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3542   vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3543  [(set_attr "type" "ssemuladd")
3544   (set_attr "mode" "<MODE>")])
3545
3546(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
3547  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3548	(vec_merge:VF_AVX512VL
3549	  (unspec:VF_AVX512VL
3550	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3551	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3552	     (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>")]
3553	    UNSPEC_FMADDSUB)
3554	  (match_dup 1)
3555	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3556  "TARGET_AVX512F"
3557  "@
3558   vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3559   vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3560  [(set_attr "isa" "fma_avx512f,fma_avx512f")
3561   (set_attr "type" "ssemuladd")
3562   (set_attr "mode" "<MODE>")])
3563
3564(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
3565  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3566	(vec_merge:VF_AVX512VL
3567	  (unspec:VF_AVX512VL
3568	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3569	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3570	     (match_operand:VF_AVX512VL 3 "register_operand" "0")]
3571	    UNSPEC_FMADDSUB)
3572	  (match_dup 3)
3573	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3574  "TARGET_AVX512F"
3575  "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3576  [(set_attr "isa" "fma_avx512f")
3577   (set_attr "type" "ssemuladd")
3578   (set_attr "mode" "<MODE>")])
3579
3580(define_insn "*fma_fmsubadd_<mode>"
3581  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
3582	(unspec:VF_128_256
3583	  [(match_operand:VF_128_256   1 "nonimmediate_operand" "%0,0,v,x,x")
3584	   (match_operand:VF_128_256   2 "nonimmediate_operand" "vm,v,vm,x,m")
3585	   (neg:VF_128_256
3586	     (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
3587	  UNSPEC_FMADDSUB))]
3588  "TARGET_FMA || TARGET_FMA4"
3589  "@
3590   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3591   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
3592   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
3593   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
3594   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
3595  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
3596   (set_attr "type" "ssemuladd")
3597   (set_attr "mode" "<MODE>")])
3598
3599(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
3600  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
3601	(unspec:VF_SF_AVX512VL
3602	  [(match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
3603	   (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
3604	   (neg:VF_SF_AVX512VL
3605	     (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
3606	  UNSPEC_FMADDSUB))]
3607  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
3608  "@
3609   vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
3610   vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
3611   vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
3612  [(set_attr "type" "ssemuladd")
3613   (set_attr "mode" "<MODE>")])
3614
3615(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
3616  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
3617	(vec_merge:VF_AVX512VL
3618	  (unspec:VF_AVX512VL
3619	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
3620	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>,v")
3621	     (neg:VF_AVX512VL
3622	       (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "v,<round_constraint>"))]
3623	    UNSPEC_FMADDSUB)
3624	  (match_dup 1)
3625	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
3626  "TARGET_AVX512F"
3627  "@
3628   vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
3629   vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
3630  [(set_attr "isa" "fma_avx512f,fma_avx512f")
3631   (set_attr "type" "ssemuladd")
3632   (set_attr "mode" "<MODE>")])
3633
3634(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
3635  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
3636	(vec_merge:VF_AVX512VL
3637	  (unspec:VF_AVX512VL
3638	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
3639	     (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")
3640	     (neg:VF_AVX512VL
3641	       (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
3642	    UNSPEC_FMADDSUB)
3643	  (match_dup 3)
3644	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
3645  "TARGET_AVX512F"
3646  "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
3647  [(set_attr "isa" "fma_avx512f")
3648   (set_attr "type" "ssemuladd")
3649   (set_attr "mode" "<MODE>")])
3650
3651;; FMA3 floating point scalar intrinsics. These merge result with
3652;; high-order elements from the destination register.
3653
3654(define_expand "fmai_vmfmadd_<mode><round_name>"
3655  [(set (match_operand:VF_128 0 "register_operand")
3656	(vec_merge:VF_128
3657	  (fma:VF_128
3658	    (match_operand:VF_128 1 "<round_nimm_predicate>")
3659	    (match_operand:VF_128 2 "<round_nimm_predicate>")
3660	    (match_operand:VF_128 3 "<round_nimm_predicate>"))
3661	  (match_dup 1)
3662	  (const_int 1)))]
3663  "TARGET_FMA")
3664
3665(define_insn "*fmai_fmadd_<mode>"
3666  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3667        (vec_merge:VF_128
3668	  (fma:VF_128
3669	    (match_operand:VF_128 1 "<round_nimm_predicate>" " 0, 0")
3670	    (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v")
3671	    (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>"))
3672	  (match_dup 1)
3673	  (const_int 1)))]
3674  "TARGET_FMA || TARGET_AVX512F"
3675  "@
3676   vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3677   vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3678  [(set_attr "type" "ssemuladd")
3679   (set_attr "mode" "<MODE>")])
3680
3681(define_insn "*fmai_fmsub_<mode>"
3682  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3683        (vec_merge:VF_128
3684	  (fma:VF_128
3685	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
3686	    (match_operand:VF_128   2 "<round_nimm_predicate>" "<round_constraint>,v")
3687	    (neg:VF_128
3688	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3689	  (match_dup 1)
3690	  (const_int 1)))]
3691  "TARGET_FMA || TARGET_AVX512F"
3692  "@
3693   vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3694   vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3695  [(set_attr "type" "ssemuladd")
3696   (set_attr "mode" "<MODE>")])
3697
3698(define_insn "*fmai_fnmadd_<mode><round_name>"
3699  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3700        (vec_merge:VF_128
3701	  (fma:VF_128
3702	    (neg:VF_128
3703	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>,v"))
3704	    (match_operand:VF_128   1 "<round_nimm_predicate>" "0,0")
3705	    (match_operand:VF_128   3 "<round_nimm_predicate>" "v,<round_constraint>"))
3706	  (match_dup 1)
3707	  (const_int 1)))]
3708  "TARGET_FMA || TARGET_AVX512F"
3709  "@
3710   vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3711   vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3712  [(set_attr "type" "ssemuladd")
3713   (set_attr "mode" "<MODE>")])
3714
3715(define_insn "*fmai_fnmsub_<mode><round_name>"
3716  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
3717        (vec_merge:VF_128
3718	  (fma:VF_128
3719	    (neg:VF_128
3720	      (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>, v"))
3721	    (match_operand:VF_128   1 "<round_nimm_predicate>" " 0, 0")
3722	    (neg:VF_128
3723	      (match_operand:VF_128 3 "<round_nimm_predicate>" " v,<round_constraint>")))
3724	  (match_dup 1)
3725	  (const_int 1)))]
3726  "TARGET_FMA || TARGET_AVX512F"
3727  "@
3728   vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
3729   vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
3730  [(set_attr "type" "ssemuladd")
3731   (set_attr "mode" "<MODE>")])
3732
3733;; FMA4 floating point scalar intrinsics.  These write the
3734;; entire destination register, with the high-order elements zeroed.
3735
3736(define_expand "fma4i_vmfmadd_<mode>"
3737  [(set (match_operand:VF_128 0 "register_operand")
3738	(vec_merge:VF_128
3739	  (fma:VF_128
3740	    (match_operand:VF_128 1 "nonimmediate_operand")
3741	    (match_operand:VF_128 2 "nonimmediate_operand")
3742	    (match_operand:VF_128 3 "nonimmediate_operand"))
3743	  (match_dup 4)
3744	  (const_int 1)))]
3745  "TARGET_FMA4"
3746  "operands[4] = CONST0_RTX (<MODE>mode);")
3747
3748(define_insn "*fma4i_vmfmadd_<mode>"
3749  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3750	(vec_merge:VF_128
3751	  (fma:VF_128
3752	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3753	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3754	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
3755	  (match_operand:VF_128 4 "const0_operand")
3756	  (const_int 1)))]
3757  "TARGET_FMA4"
3758  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3759  [(set_attr "type" "ssemuladd")
3760   (set_attr "mode" "<MODE>")])
3761
3762(define_insn "*fma4i_vmfmsub_<mode>"
3763  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3764	(vec_merge:VF_128
3765	  (fma:VF_128
3766	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
3767	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
3768	    (neg:VF_128
3769	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
3770	  (match_operand:VF_128 4 "const0_operand")
3771	  (const_int 1)))]
3772  "TARGET_FMA4"
3773  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3774  [(set_attr "type" "ssemuladd")
3775   (set_attr "mode" "<MODE>")])
3776
3777(define_insn "*fma4i_vmfnmadd_<mode>"
3778  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3779	(vec_merge:VF_128
3780	  (fma:VF_128
3781	    (neg:VF_128
3782	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3783	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
3784	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
3785	  (match_operand:VF_128 4 "const0_operand")
3786	  (const_int 1)))]
3787  "TARGET_FMA4"
3788  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3789  [(set_attr "type" "ssemuladd")
3790   (set_attr "mode" "<MODE>")])
3791
3792(define_insn "*fma4i_vmfnmsub_<mode>"
3793  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
3794	(vec_merge:VF_128
3795	  (fma:VF_128
3796	    (neg:VF_128
3797	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
3798	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
3799	    (neg:VF_128
3800	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
3801	  (match_operand:VF_128 4 "const0_operand")
3802	  (const_int 1)))]
3803  "TARGET_FMA4"
3804  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
3805  [(set_attr "type" "ssemuladd")
3806   (set_attr "mode" "<MODE>")])
3807
3808;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3809;;
3810;; Parallel single-precision floating point conversion operations
3811;;
3812;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3813
3814(define_insn "sse_cvtpi2ps"
3815  [(set (match_operand:V4SF 0 "register_operand" "=x")
3816	(vec_merge:V4SF
3817	  (vec_duplicate:V4SF
3818	    (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
3819	  (match_operand:V4SF 1 "register_operand" "0")
3820	  (const_int 3)))]
3821  "TARGET_SSE"
3822  "cvtpi2ps\t{%2, %0|%0, %2}"
3823  [(set_attr "type" "ssecvt")
3824   (set_attr "mode" "V4SF")])
3825
3826(define_insn "sse_cvtps2pi"
3827  [(set (match_operand:V2SI 0 "register_operand" "=y")
3828	(vec_select:V2SI
3829	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
3830		       UNSPEC_FIX_NOTRUNC)
3831	  (parallel [(const_int 0) (const_int 1)])))]
3832  "TARGET_SSE"
3833  "cvtps2pi\t{%1, %0|%0, %q1}"
3834  [(set_attr "type" "ssecvt")
3835   (set_attr "unit" "mmx")
3836   (set_attr "mode" "DI")])
3837
3838(define_insn "sse_cvttps2pi"
3839  [(set (match_operand:V2SI 0 "register_operand" "=y")
3840	(vec_select:V2SI
3841	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
3842	  (parallel [(const_int 0) (const_int 1)])))]
3843  "TARGET_SSE"
3844  "cvttps2pi\t{%1, %0|%0, %q1}"
3845  [(set_attr "type" "ssecvt")
3846   (set_attr "unit" "mmx")
3847   (set_attr "prefix_rep" "0")
3848   (set_attr "mode" "SF")])
3849
3850(define_insn "sse_cvtsi2ss<round_name>"
3851  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3852	(vec_merge:V4SF
3853	  (vec_duplicate:V4SF
3854	    (float:SF (match_operand:SI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3855	  (match_operand:V4SF 1 "register_operand" "0,0,v")
3856	  (const_int 1)))]
3857  "TARGET_SSE"
3858  "@
3859   cvtsi2ss\t{%2, %0|%0, %2}
3860   cvtsi2ss\t{%2, %0|%0, %2}
3861   vcvtsi2ss\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
3862  [(set_attr "isa" "noavx,noavx,avx")
3863   (set_attr "type" "sseicvt")
3864   (set_attr "athlon_decode" "vector,double,*")
3865   (set_attr "amdfam10_decode" "vector,double,*")
3866   (set_attr "bdver1_decode" "double,direct,*")
3867   (set_attr "btver2_decode" "double,double,double")
3868   (set_attr "prefix" "orig,orig,maybe_evex")
3869   (set_attr "mode" "SF")])
3870
3871(define_insn "sse_cvtsi2ssq<round_name>"
3872  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
3873	(vec_merge:V4SF
3874	  (vec_duplicate:V4SF
3875	    (float:SF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
3876	  (match_operand:V4SF 1 "register_operand" "0,0,v")
3877	  (const_int 1)))]
3878  "TARGET_SSE && TARGET_64BIT"
3879  "@
3880   cvtsi2ssq\t{%2, %0|%0, %2}
3881   cvtsi2ssq\t{%2, %0|%0, %2}
3882   vcvtsi2ssq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
3883  [(set_attr "isa" "noavx,noavx,avx")
3884   (set_attr "type" "sseicvt")
3885   (set_attr "athlon_decode" "vector,double,*")
3886   (set_attr "amdfam10_decode" "vector,double,*")
3887   (set_attr "bdver1_decode" "double,direct,*")
3888   (set_attr "btver2_decode" "double,double,double")
3889   (set_attr "length_vex" "*,*,4")
3890   (set_attr "prefix_rex" "1,1,*")
3891   (set_attr "prefix" "orig,orig,maybe_evex")
3892   (set_attr "mode" "SF")])
3893
3894(define_insn "sse_cvtss2si<round_name>"
3895  [(set (match_operand:SI 0 "register_operand" "=r,r")
3896	(unspec:SI
3897	  [(vec_select:SF
3898	     (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3899	     (parallel [(const_int 0)]))]
3900	  UNSPEC_FIX_NOTRUNC))]
3901  "TARGET_SSE"
3902  "%vcvtss2si\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3903  [(set_attr "type" "sseicvt")
3904   (set_attr "athlon_decode" "double,vector")
3905   (set_attr "bdver1_decode" "double,double")
3906   (set_attr "prefix_rep" "1")
3907   (set_attr "prefix" "maybe_vex")
3908   (set_attr "mode" "SI")])
3909
3910(define_insn "sse_cvtss2si_2"
3911  [(set (match_operand:SI 0 "register_operand" "=r,r")
3912	(unspec:SI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3913		   UNSPEC_FIX_NOTRUNC))]
3914  "TARGET_SSE"
3915  "%vcvtss2si\t{%1, %0|%0, %k1}"
3916  [(set_attr "type" "sseicvt")
3917   (set_attr "athlon_decode" "double,vector")
3918   (set_attr "amdfam10_decode" "double,double")
3919   (set_attr "bdver1_decode" "double,double")
3920   (set_attr "prefix_rep" "1")
3921   (set_attr "prefix" "maybe_vex")
3922   (set_attr "mode" "SI")])
3923
3924(define_insn "sse_cvtss2siq<round_name>"
3925  [(set (match_operand:DI 0 "register_operand" "=r,r")
3926	(unspec:DI
3927	  [(vec_select:SF
3928	     (match_operand:V4SF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
3929	     (parallel [(const_int 0)]))]
3930	  UNSPEC_FIX_NOTRUNC))]
3931  "TARGET_SSE && TARGET_64BIT"
3932  "%vcvtss2si{q}\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
3933  [(set_attr "type" "sseicvt")
3934   (set_attr "athlon_decode" "double,vector")
3935   (set_attr "bdver1_decode" "double,double")
3936   (set_attr "prefix_rep" "1")
3937   (set_attr "prefix" "maybe_vex")
3938   (set_attr "mode" "DI")])
3939
3940(define_insn "sse_cvtss2siq_2"
3941  [(set (match_operand:DI 0 "register_operand" "=r,r")
3942	(unspec:DI [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
3943		   UNSPEC_FIX_NOTRUNC))]
3944  "TARGET_SSE && TARGET_64BIT"
3945  "%vcvtss2si{q}\t{%1, %0|%0, %k1}"
3946  [(set_attr "type" "sseicvt")
3947   (set_attr "athlon_decode" "double,vector")
3948   (set_attr "amdfam10_decode" "double,double")
3949   (set_attr "bdver1_decode" "double,double")
3950   (set_attr "prefix_rep" "1")
3951   (set_attr "prefix" "maybe_vex")
3952   (set_attr "mode" "DI")])
3953
3954(define_insn "sse_cvttss2si<round_saeonly_name>"
3955  [(set (match_operand:SI 0 "register_operand" "=r,r")
3956	(fix:SI
3957	  (vec_select:SF
3958	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
3959	    (parallel [(const_int 0)]))))]
3960  "TARGET_SSE"
3961  "%vcvttss2si\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3962  [(set_attr "type" "sseicvt")
3963   (set_attr "athlon_decode" "double,vector")
3964   (set_attr "amdfam10_decode" "double,double")
3965   (set_attr "bdver1_decode" "double,double")
3966   (set_attr "prefix_rep" "1")
3967   (set_attr "prefix" "maybe_vex")
3968   (set_attr "mode" "SI")])
3969
3970(define_insn "sse_cvttss2siq<round_saeonly_name>"
3971  [(set (match_operand:DI 0 "register_operand" "=r,r")
3972	(fix:DI
3973	  (vec_select:SF
3974	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint>")
3975	    (parallel [(const_int 0)]))))]
3976  "TARGET_SSE && TARGET_64BIT"
3977  "%vcvttss2si{q}\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
3978  [(set_attr "type" "sseicvt")
3979   (set_attr "athlon_decode" "double,vector")
3980   (set_attr "amdfam10_decode" "double,double")
3981   (set_attr "bdver1_decode" "double,double")
3982   (set_attr "prefix_rep" "1")
3983   (set_attr "prefix" "maybe_vex")
3984   (set_attr "mode" "DI")])
3985
3986(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
3987  [(set (match_operand:VF_128 0 "register_operand" "=v")
3988	(vec_merge:VF_128
3989	  (vec_duplicate:VF_128
3990	    (unsigned_float:<ssescalarmode>
3991	      (match_operand:SI 2 "<round_nimm_predicate>" "<round_constraint3>")))
3992	  (match_operand:VF_128 1 "register_operand" "v")
3993	  (const_int 1)))]
3994  "TARGET_AVX512F && <round_modev4sf_condition>"
3995  "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
3996  [(set_attr "type" "sseicvt")
3997   (set_attr "prefix" "evex")
3998   (set_attr "mode" "<ssescalarmode>")])
3999
4000(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
4001  [(set (match_operand:VF_128 0 "register_operand" "=v")
4002	(vec_merge:VF_128
4003	  (vec_duplicate:VF_128
4004	    (unsigned_float:<ssescalarmode>
4005	      (match_operand:DI 2 "<round_nimm_predicate>" "<round_constraint3>")))
4006	  (match_operand:VF_128 1 "register_operand" "v")
4007	  (const_int 1)))]
4008  "TARGET_AVX512F && TARGET_64BIT"
4009  "vcvtusi2<ssescalarmodesuffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4010  [(set_attr "type" "sseicvt")
4011   (set_attr "prefix" "evex")
4012   (set_attr "mode" "<ssescalarmode>")])
4013
4014(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
4015  [(set (match_operand:VF1 0 "register_operand" "=v")
4016	(float:VF1
4017	  (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "<round_constraint>")))]
4018  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
4019  "%vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4020  [(set_attr "type" "ssecvt")
4021   (set_attr "prefix" "maybe_vex")
4022   (set_attr "mode" "<sseinsnmode>")])
4023
4024(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
4025  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
4026	(unsigned_float:VF1_AVX512VL
4027	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4028  "TARGET_AVX512F"
4029  "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4030  [(set_attr "type" "ssecvt")
4031   (set_attr "prefix" "evex")
4032   (set_attr "mode" "<MODE>")])
4033
4034(define_expand "floatuns<sseintvecmodelower><mode>2"
4035  [(match_operand:VF1 0 "register_operand")
4036   (match_operand:<sseintvecmode> 1 "register_operand")]
4037  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
4038{
4039  if (<MODE>mode == V16SFmode)
4040    emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
4041  else
4042    if (TARGET_AVX512VL)
4043      {
4044	if (<MODE>mode == V4SFmode)
4045	  emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
4046	else
4047	  emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
4048      }
4049  else
4050    ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
4051
4052  DONE;
4053})
4054
4055
4056;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
4057(define_mode_attr sf2simodelower
4058  [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
4059
4060(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
4061  [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
4062	(unspec:VI4_AVX
4063	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "vm")]
4064	  UNSPEC_FIX_NOTRUNC))]
4065  "TARGET_SSE2 && <mask_mode512bit_condition>"
4066  "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4067  [(set_attr "type" "ssecvt")
4068   (set (attr "prefix_data16")
4069     (if_then_else
4070       (match_test "TARGET_AVX")
4071     (const_string "*")
4072     (const_string "1")))
4073   (set_attr "prefix" "maybe_vex")
4074   (set_attr "mode" "<sseinsnmode>")])
4075
4076(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
4077  [(set (match_operand:V16SI 0 "register_operand" "=v")
4078	(unspec:V16SI
4079	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
4080	  UNSPEC_FIX_NOTRUNC))]
4081  "TARGET_AVX512F"
4082  "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4083  [(set_attr "type" "ssecvt")
4084   (set_attr "prefix" "evex")
4085   (set_attr "mode" "XI")])
4086
4087(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
4088  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
4089	(unspec:VI4_AVX512VL
4090	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
4091	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4092  "TARGET_AVX512F"
4093  "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4094  [(set_attr "type" "ssecvt")
4095   (set_attr "prefix" "evex")
4096   (set_attr "mode" "<sseinsnmode>")])
4097
4098(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
4099  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4100	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4101		     UNSPEC_FIX_NOTRUNC))]
4102  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4103  "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4104  [(set_attr "type" "ssecvt")
4105   (set_attr "prefix" "evex")
4106   (set_attr "mode" "<sseinsnmode>")])
4107
4108(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
4109  [(set (match_operand:V2DI 0 "register_operand" "=v")
4110	(unspec:V2DI
4111	  [(vec_select:V2SF
4112	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4113	     (parallel [(const_int 0) (const_int 1)]))]
4114	  UNSPEC_FIX_NOTRUNC))]
4115  "TARGET_AVX512DQ && TARGET_AVX512VL"
4116  "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4117  [(set_attr "type" "ssecvt")
4118   (set_attr "prefix" "evex")
4119   (set_attr "mode" "TI")])
4120
4121(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
4122  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
4123	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
4124		     UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4125  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4126  "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4127  [(set_attr "type" "ssecvt")
4128   (set_attr "prefix" "evex")
4129   (set_attr "mode" "<sseinsnmode>")])
4130
4131(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
4132  [(set (match_operand:V2DI 0 "register_operand" "=v")
4133	(unspec:V2DI
4134	  [(vec_select:V2SF
4135	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4136	     (parallel [(const_int 0) (const_int 1)]))]
4137	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4138  "TARGET_AVX512DQ && TARGET_AVX512VL"
4139  "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4140  [(set_attr "type" "ssecvt")
4141   (set_attr "prefix" "evex")
4142   (set_attr "mode" "TI")])
4143
4144(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
4145  [(set (match_operand:V16SI 0 "register_operand" "=v")
4146	(any_fix:V16SI
4147	  (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4148  "TARGET_AVX512F"
4149  "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4150  [(set_attr "type" "ssecvt")
4151   (set_attr "prefix" "evex")
4152   (set_attr "mode" "XI")])
4153
4154(define_insn "fix_truncv8sfv8si2<mask_name>"
4155  [(set (match_operand:V8SI 0 "register_operand" "=v")
4156	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
4157  "TARGET_AVX && <mask_avx512vl_condition>"
4158  "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4159  [(set_attr "type" "ssecvt")
4160   (set_attr "prefix" "<mask_prefix>")
4161   (set_attr "mode" "OI")])
4162
4163(define_insn "fix_truncv4sfv4si2<mask_name>"
4164  [(set (match_operand:V4SI 0 "register_operand" "=v")
4165	(fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "vm")))]
4166  "TARGET_SSE2 && <mask_avx512vl_condition>"
4167  "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4168  [(set_attr "type" "ssecvt")
4169   (set (attr "prefix_rep")
4170     (if_then_else
4171       (match_test "TARGET_AVX")
4172     (const_string "*")
4173     (const_string "1")))
4174   (set (attr "prefix_data16")
4175     (if_then_else
4176       (match_test "TARGET_AVX")
4177     (const_string "*")
4178     (const_string "0")))
4179   (set_attr "prefix_data16" "0")
4180   (set_attr "prefix" "<mask_prefix2>")
4181   (set_attr "mode" "TI")])
4182
4183(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
4184  [(match_operand:<sseintvecmode> 0 "register_operand")
4185   (match_operand:VF1 1 "register_operand")]
4186  "TARGET_SSE2"
4187{
4188  if (<MODE>mode == V16SFmode)
4189    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
4190					  operands[1]));
4191  else
4192    {
4193      rtx tmp[3];
4194      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
4195      tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
4196      emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
4197      emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
4198    }
4199  DONE;
4200})
4201
4202;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4203;;
4204;; Parallel double-precision floating point conversion operations
4205;;
4206;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4207
4208(define_insn "sse2_cvtpi2pd"
4209  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
4210	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
4211  "TARGET_SSE2"
4212  "cvtpi2pd\t{%1, %0|%0, %1}"
4213  [(set_attr "type" "ssecvt")
4214   (set_attr "unit" "mmx,*")
4215   (set_attr "prefix_data16" "1,*")
4216   (set_attr "mode" "V2DF")])
4217
4218(define_insn "sse2_cvtpd2pi"
4219  [(set (match_operand:V2SI 0 "register_operand" "=y")
4220	(unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
4221		     UNSPEC_FIX_NOTRUNC))]
4222  "TARGET_SSE2"
4223  "cvtpd2pi\t{%1, %0|%0, %1}"
4224  [(set_attr "type" "ssecvt")
4225   (set_attr "unit" "mmx")
4226   (set_attr "bdver1_decode" "double")
4227   (set_attr "btver2_decode" "direct")
4228   (set_attr "prefix_data16" "1")
4229   (set_attr "mode" "DI")])
4230
4231(define_insn "sse2_cvttpd2pi"
4232  [(set (match_operand:V2SI 0 "register_operand" "=y")
4233	(fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
4234  "TARGET_SSE2"
4235  "cvttpd2pi\t{%1, %0|%0, %1}"
4236  [(set_attr "type" "ssecvt")
4237   (set_attr "unit" "mmx")
4238   (set_attr "bdver1_decode" "double")
4239   (set_attr "prefix_data16" "1")
4240   (set_attr "mode" "TI")])
4241
4242(define_insn "sse2_cvtsi2sd"
4243  [(set (match_operand:V2DF 0 "register_operand" "=x,x,x")
4244	(vec_merge:V2DF
4245	  (vec_duplicate:V2DF
4246	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
4247	  (match_operand:V2DF 1 "register_operand" "0,0,x")
4248	  (const_int 1)))]
4249  "TARGET_SSE2"
4250  "@
4251   cvtsi2sd\t{%2, %0|%0, %2}
4252   cvtsi2sd\t{%2, %0|%0, %2}
4253   vcvtsi2sd\t{%2, %1, %0|%0, %1, %2}"
4254  [(set_attr "isa" "noavx,noavx,avx")
4255   (set_attr "type" "sseicvt")
4256   (set_attr "athlon_decode" "double,direct,*")
4257   (set_attr "amdfam10_decode" "vector,double,*")
4258   (set_attr "bdver1_decode" "double,direct,*")
4259   (set_attr "btver2_decode" "double,double,double")
4260   (set_attr "prefix" "orig,orig,vex")
4261   (set_attr "mode" "DF")])
4262
4263(define_insn "sse2_cvtsi2sdq<round_name>"
4264  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4265	(vec_merge:V2DF
4266	  (vec_duplicate:V2DF
4267	    (float:DF (match_operand:DI 2 "<round_nimm_predicate>" "r,m,<round_constraint3>")))
4268	  (match_operand:V2DF 1 "register_operand" "0,0,v")
4269	  (const_int 1)))]
4270  "TARGET_SSE2 && TARGET_64BIT"
4271  "@
4272   cvtsi2sdq\t{%2, %0|%0, %2}
4273   cvtsi2sdq\t{%2, %0|%0, %2}
4274   vcvtsi2sdq\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
4275  [(set_attr "isa" "noavx,noavx,avx")
4276   (set_attr "type" "sseicvt")
4277   (set_attr "athlon_decode" "double,direct,*")
4278   (set_attr "amdfam10_decode" "vector,double,*")
4279   (set_attr "bdver1_decode" "double,direct,*")
4280   (set_attr "length_vex" "*,*,4")
4281   (set_attr "prefix_rex" "1,1,*")
4282   (set_attr "prefix" "orig,orig,maybe_evex")
4283   (set_attr "mode" "DF")])
4284
4285(define_insn "avx512f_vcvtss2usi<round_name>"
4286  [(set (match_operand:SI 0 "register_operand" "=r")
4287	(unspec:SI
4288	  [(vec_select:SF
4289	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4290	     (parallel [(const_int 0)]))]
4291	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4292  "TARGET_AVX512F"
4293  "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4294  [(set_attr "type" "sseicvt")
4295   (set_attr "prefix" "evex")
4296   (set_attr "mode" "SI")])
4297
4298(define_insn "avx512f_vcvtss2usiq<round_name>"
4299  [(set (match_operand:DI 0 "register_operand" "=r")
4300	(unspec:DI
4301	  [(vec_select:SF
4302	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
4303	     (parallel [(const_int 0)]))]
4304	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4305  "TARGET_AVX512F && TARGET_64BIT"
4306  "vcvtss2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4307  [(set_attr "type" "sseicvt")
4308   (set_attr "prefix" "evex")
4309   (set_attr "mode" "DI")])
4310
4311(define_insn "avx512f_vcvttss2usi<round_saeonly_name>"
4312  [(set (match_operand:SI 0 "register_operand" "=r")
4313	(unsigned_fix:SI
4314	  (vec_select:SF
4315	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4316	    (parallel [(const_int 0)]))))]
4317  "TARGET_AVX512F"
4318  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4319  [(set_attr "type" "sseicvt")
4320   (set_attr "prefix" "evex")
4321   (set_attr "mode" "SI")])
4322
4323(define_insn "avx512f_vcvttss2usiq<round_saeonly_name>"
4324  [(set (match_operand:DI 0 "register_operand" "=r")
4325	(unsigned_fix:DI
4326	  (vec_select:SF
4327	    (match_operand:V4SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4328	    (parallel [(const_int 0)]))))]
4329  "TARGET_AVX512F && TARGET_64BIT"
4330  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4331  [(set_attr "type" "sseicvt")
4332   (set_attr "prefix" "evex")
4333   (set_attr "mode" "DI")])
4334
4335(define_insn "avx512f_vcvtsd2usi<round_name>"
4336  [(set (match_operand:SI 0 "register_operand" "=r")
4337	(unspec:SI
4338	  [(vec_select:DF
4339	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4340	     (parallel [(const_int 0)]))]
4341	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4342  "TARGET_AVX512F"
4343  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4344  [(set_attr "type" "sseicvt")
4345   (set_attr "prefix" "evex")
4346   (set_attr "mode" "SI")])
4347
4348(define_insn "avx512f_vcvtsd2usiq<round_name>"
4349  [(set (match_operand:DI 0 "register_operand" "=r")
4350	(unspec:DI
4351	  [(vec_select:DF
4352	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
4353	     (parallel [(const_int 0)]))]
4354	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4355  "TARGET_AVX512F && TARGET_64BIT"
4356  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %1<round_op2>}"
4357  [(set_attr "type" "sseicvt")
4358   (set_attr "prefix" "evex")
4359   (set_attr "mode" "DI")])
4360
4361(define_insn "avx512f_vcvttsd2usi<round_saeonly_name>"
4362  [(set (match_operand:SI 0 "register_operand" "=r")
4363	(unsigned_fix:SI
4364	  (vec_select:DF
4365	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4366	    (parallel [(const_int 0)]))))]
4367  "TARGET_AVX512F"
4368  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4369  [(set_attr "type" "sseicvt")
4370   (set_attr "prefix" "evex")
4371   (set_attr "mode" "SI")])
4372
4373(define_insn "avx512f_vcvttsd2usiq<round_saeonly_name>"
4374  [(set (match_operand:DI 0 "register_operand" "=r")
4375	(unsigned_fix:DI
4376	  (vec_select:DF
4377	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
4378	    (parallel [(const_int 0)]))))]
4379  "TARGET_AVX512F && TARGET_64BIT"
4380  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
4381  [(set_attr "type" "sseicvt")
4382   (set_attr "prefix" "evex")
4383   (set_attr "mode" "DI")])
4384
4385(define_insn "sse2_cvtsd2si<round_name>"
4386  [(set (match_operand:SI 0 "register_operand" "=r,r")
4387	(unspec:SI
4388	  [(vec_select:DF
4389	     (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4390	     (parallel [(const_int 0)]))]
4391	  UNSPEC_FIX_NOTRUNC))]
4392  "TARGET_SSE2"
4393  "%vcvtsd2si\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4394  [(set_attr "type" "sseicvt")
4395   (set_attr "athlon_decode" "double,vector")
4396   (set_attr "bdver1_decode" "double,double")
4397   (set_attr "btver2_decode" "double,double")
4398   (set_attr "prefix_rep" "1")
4399   (set_attr "prefix" "maybe_vex")
4400   (set_attr "mode" "SI")])
4401
4402(define_insn "sse2_cvtsd2si_2"
4403  [(set (match_operand:SI 0 "register_operand" "=r,r")
4404	(unspec:SI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4405		   UNSPEC_FIX_NOTRUNC))]
4406  "TARGET_SSE2"
4407  "%vcvtsd2si\t{%1, %0|%0, %q1}"
4408  [(set_attr "type" "sseicvt")
4409   (set_attr "athlon_decode" "double,vector")
4410   (set_attr "amdfam10_decode" "double,double")
4411   (set_attr "bdver1_decode" "double,double")
4412   (set_attr "prefix_rep" "1")
4413   (set_attr "prefix" "maybe_vex")
4414   (set_attr "mode" "SI")])
4415
4416(define_insn "sse2_cvtsd2siq<round_name>"
4417  [(set (match_operand:DI 0 "register_operand" "=r,r")
4418	(unspec:DI
4419	  [(vec_select:DF
4420	     (match_operand:V2DF 1 "<round_nimm_predicate>" "v,<round_constraint2>")
4421	     (parallel [(const_int 0)]))]
4422	  UNSPEC_FIX_NOTRUNC))]
4423  "TARGET_SSE2 && TARGET_64BIT"
4424  "%vcvtsd2si{q}\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
4425  [(set_attr "type" "sseicvt")
4426   (set_attr "athlon_decode" "double,vector")
4427   (set_attr "bdver1_decode" "double,double")
4428   (set_attr "prefix_rep" "1")
4429   (set_attr "prefix" "maybe_vex")
4430   (set_attr "mode" "DI")])
4431
4432(define_insn "sse2_cvtsd2siq_2"
4433  [(set (match_operand:DI 0 "register_operand" "=r,r")
4434	(unspec:DI [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
4435		   UNSPEC_FIX_NOTRUNC))]
4436  "TARGET_SSE2 && TARGET_64BIT"
4437  "%vcvtsd2si{q}\t{%1, %0|%0, %q1}"
4438  [(set_attr "type" "sseicvt")
4439   (set_attr "athlon_decode" "double,vector")
4440   (set_attr "amdfam10_decode" "double,double")
4441   (set_attr "bdver1_decode" "double,double")
4442   (set_attr "prefix_rep" "1")
4443   (set_attr "prefix" "maybe_vex")
4444   (set_attr "mode" "DI")])
4445
4446(define_insn "sse2_cvttsd2si<round_saeonly_name>"
4447  [(set (match_operand:SI 0 "register_operand" "=r,r")
4448	(fix:SI
4449	  (vec_select:DF
4450	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4451	    (parallel [(const_int 0)]))))]
4452  "TARGET_SSE2"
4453  "%vcvttsd2si\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4454  [(set_attr "type" "sseicvt")
4455   (set_attr "athlon_decode" "double,vector")
4456   (set_attr "amdfam10_decode" "double,double")
4457   (set_attr "bdver1_decode" "double,double")
4458   (set_attr "btver2_decode" "double,double")
4459   (set_attr "prefix_rep" "1")
4460   (set_attr "prefix" "maybe_vex")
4461   (set_attr "mode" "SI")])
4462
4463(define_insn "sse2_cvttsd2siq<round_saeonly_name>"
4464  [(set (match_operand:DI 0 "register_operand" "=r,r")
4465	(fix:DI
4466	  (vec_select:DF
4467	    (match_operand:V2DF 1 "<round_saeonly_nimm_predicate>" "v,<round_saeonly_constraint2>")
4468	    (parallel [(const_int 0)]))))]
4469  "TARGET_SSE2 && TARGET_64BIT"
4470  "%vcvttsd2si{q}\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
4471  [(set_attr "type" "sseicvt")
4472   (set_attr "athlon_decode" "double,vector")
4473   (set_attr "amdfam10_decode" "double,double")
4474   (set_attr "bdver1_decode" "double,double")
4475   (set_attr "prefix_rep" "1")
4476   (set_attr "prefix" "maybe_vex")
4477   (set_attr "mode" "DI")])
4478
4479;; For float<si2dfmode><mode>2 insn pattern
4480(define_mode_attr si2dfmode
4481  [(V8DF "V8SI") (V4DF "V4SI")])
4482(define_mode_attr si2dfmodelower
4483  [(V8DF "v8si") (V4DF "v4si")])
4484
4485(define_insn "float<si2dfmodelower><mode>2<mask_name>"
4486  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4487	(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4488  "TARGET_AVX && <mask_mode512bit_condition>"
4489  "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4490  [(set_attr "type" "ssecvt")
4491   (set_attr "prefix" "maybe_vex")
4492   (set_attr "mode" "<MODE>")])
4493
4494(define_insn "<floatsuffix>float<sseintvecmodelower><mode>2<mask_name><round_name>"
4495  [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
4496	(any_float:VF2_AVX512VL
4497	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "vm")))]
4498  "TARGET_AVX512DQ"
4499  "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4500  [(set_attr "type" "ssecvt")
4501   (set_attr "prefix" "evex")
4502   (set_attr "mode" "<MODE>")])
4503
4504;; For <floatsuffix>float<sselondveclower><mode> insn patterns
4505(define_mode_attr qq2pssuff
4506  [(V8SF "") (V4SF "{y}")])
4507
4508(define_mode_attr sselongvecmode
4509  [(V8SF "V8DI") (V4SF  "V4DI")])
4510
4511(define_mode_attr sselongvecmodelower
4512  [(V8SF "v8di") (V4SF  "v4di")])
4513
4514(define_mode_attr sseintvecmode3
4515  [(V8SF "XI") (V4SF "OI")
4516   (V8DF "OI") (V4DF "TI")])
4517
4518(define_insn "<floatsuffix>float<sselongvecmodelower><mode>2<mask_name><round_name>"
4519  [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
4520	 (any_float:VF1_128_256VL
4521	   (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
4522  "TARGET_AVX512DQ && <round_modev8sf_condition>"
4523  "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4524  [(set_attr "type" "ssecvt")
4525   (set_attr "prefix" "evex")
4526   (set_attr "mode" "<MODE>")])
4527
4528(define_insn "*<floatsuffix>floatv2div2sf2"
4529  [(set (match_operand:V4SF 0 "register_operand" "=v")
4530    (vec_concat:V4SF
4531	    (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4532	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4533  "TARGET_AVX512DQ && TARGET_AVX512VL"
4534  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
4535  [(set_attr "type" "ssecvt")
4536   (set_attr "prefix" "evex")
4537   (set_attr "mode" "V4SF")])
4538
4539(define_insn "<floatsuffix>floatv2div2sf2_mask"
4540  [(set (match_operand:V4SF 0 "register_operand" "=v")
4541    (vec_concat:V4SF
4542        (vec_merge:V2SF
4543	        (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
4544            (vec_select:V2SF
4545                (match_operand:V4SF 2 "vector_move_operand" "0C")
4546                (parallel [(const_int 0) (const_int 1)]))
4547            (match_operand:QI 3 "register_operand" "Yk"))
4548	    (const_vector:V2SF [(const_int 0) (const_int 0)])))]
4549  "TARGET_AVX512DQ && TARGET_AVX512VL"
4550  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
4551  [(set_attr "type" "ssecvt")
4552   (set_attr "prefix" "evex")
4553   (set_attr "mode" "V4SF")])
4554
4555(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
4556  [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
4557	(unsigned_float:VF2_512_256VL
4558	  (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
4559   "TARGET_AVX512F"
4560   "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4561   [(set_attr "type" "ssecvt")
4562    (set_attr "prefix" "evex")
4563    (set_attr "mode" "<MODE>")])
4564
4565(define_insn "ufloatv2siv2df2<mask_name>"
4566  [(set (match_operand:V2DF 0 "register_operand" "=v")
4567	(unsigned_float:V2DF
4568	  (vec_select:V2SI
4569	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4570	    (parallel [(const_int 0) (const_int 1)]))))]
4571  "TARGET_AVX512VL"
4572  "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4573  [(set_attr "type" "ssecvt")
4574   (set_attr "prefix" "evex")
4575   (set_attr "mode" "V2DF")])
4576
4577(define_insn "avx512f_cvtdq2pd512_2"
4578  [(set (match_operand:V8DF 0 "register_operand" "=v")
4579	(float:V8DF
4580	  (vec_select:V8SI
4581	    (match_operand:V16SI 1 "nonimmediate_operand" "vm")
4582	    (parallel [(const_int 0) (const_int 1)
4583		       (const_int 2) (const_int 3)
4584		       (const_int 4) (const_int 5)
4585		       (const_int 6) (const_int 7)]))))]
4586  "TARGET_AVX512F"
4587  "vcvtdq2pd\t{%t1, %0|%0, %t1}"
4588  [(set_attr "type" "ssecvt")
4589   (set_attr "prefix" "evex")
4590   (set_attr "mode" "V8DF")])
4591
4592(define_insn "avx_cvtdq2pd256_2"
4593  [(set (match_operand:V4DF 0 "register_operand" "=v")
4594	(float:V4DF
4595	  (vec_select:V4SI
4596	    (match_operand:V8SI 1 "nonimmediate_operand" "vm")
4597	    (parallel [(const_int 0) (const_int 1)
4598		       (const_int 2) (const_int 3)]))))]
4599  "TARGET_AVX"
4600  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
4601  [(set_attr "type" "ssecvt")
4602   (set_attr "prefix" "maybe_evex")
4603   (set_attr "mode" "V4DF")])
4604
4605(define_insn "sse2_cvtdq2pd<mask_name>"
4606  [(set (match_operand:V2DF 0 "register_operand" "=v")
4607	(float:V2DF
4608	  (vec_select:V2SI
4609	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
4610	    (parallel [(const_int 0) (const_int 1)]))))]
4611  "TARGET_SSE2 && <mask_avx512vl_condition>"
4612  "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
4613  [(set_attr "type" "ssecvt")
4614   (set_attr "prefix" "maybe_vex")
4615   (set_attr "ssememalign" "64")
4616   (set_attr "mode" "V2DF")])
4617
4618(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
4619  [(set (match_operand:V8SI 0 "register_operand" "=v")
4620	(unspec:V8SI
4621	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
4622	  UNSPEC_FIX_NOTRUNC))]
4623  "TARGET_AVX512F"
4624  "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4625  [(set_attr "type" "ssecvt")
4626   (set_attr "prefix" "evex")
4627   (set_attr "mode" "OI")])
4628
4629(define_insn "avx_cvtpd2dq256<mask_name>"
4630  [(set (match_operand:V4SI 0 "register_operand" "=v")
4631	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
4632		     UNSPEC_FIX_NOTRUNC))]
4633  "TARGET_AVX && <mask_avx512vl_condition>"
4634  "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4635  [(set_attr "type" "ssecvt")
4636   (set_attr "prefix" "<mask_prefix>")
4637   (set_attr "mode" "OI")])
4638
4639(define_expand "avx_cvtpd2dq256_2"
4640  [(set (match_operand:V8SI 0 "register_operand")
4641	(vec_concat:V8SI
4642	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
4643		       UNSPEC_FIX_NOTRUNC)
4644	  (match_dup 2)))]
4645  "TARGET_AVX"
4646  "operands[2] = CONST0_RTX (V4SImode);")
4647
4648(define_insn "*avx_cvtpd2dq256_2"
4649  [(set (match_operand:V8SI 0 "register_operand" "=x")
4650	(vec_concat:V8SI
4651	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "xm")]
4652		       UNSPEC_FIX_NOTRUNC)
4653	  (match_operand:V4SI 2 "const0_operand")))]
4654  "TARGET_AVX"
4655  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
4656  [(set_attr "type" "ssecvt")
4657   (set_attr "prefix" "vex")
4658   (set_attr "btver2_decode" "vector")
4659   (set_attr "mode" "OI")])
4660
4661(define_insn "sse2_cvtpd2dq<mask_name>"
4662  [(set (match_operand:V4SI 0 "register_operand" "=v")
4663	(vec_concat:V4SI
4664	  (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4665		       UNSPEC_FIX_NOTRUNC)
4666	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4667  "TARGET_SSE2 && <mask_avx512vl_condition>"
4668{
4669  if (TARGET_AVX)
4670    return "vcvtpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4671  else
4672    return "cvtpd2dq\t{%1, %0|%0, %1}";
4673}
4674  [(set_attr "type" "ssecvt")
4675   (set_attr "prefix_rep" "1")
4676   (set_attr "prefix_data16" "0")
4677   (set_attr "prefix" "maybe_vex")
4678   (set_attr "mode" "TI")
4679   (set_attr "amdfam10_decode" "double")
4680   (set_attr "athlon_decode" "vector")
4681   (set_attr "bdver1_decode" "double")])
4682
4683;; For ufix_notrunc* insn patterns
4684(define_mode_attr pd2udqsuff
4685  [(V8DF "") (V4DF "{y}")])
4686
4687(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
4688  [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
4689	(unspec:<si2dfmode>
4690	  [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
4691	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4692  "TARGET_AVX512F"
4693  "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4694  [(set_attr "type" "ssecvt")
4695   (set_attr "prefix" "evex")
4696   (set_attr "mode" "<sseinsnmode>")])
4697
4698(define_insn "ufix_notruncv2dfv2si2<mask_name>"
4699  [(set (match_operand:V4SI 0 "register_operand" "=v")
4700	(vec_concat:V4SI
4701	  (unspec:V2SI
4702	    [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
4703	    UNSPEC_UNSIGNED_FIX_NOTRUNC)
4704	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4705  "TARGET_AVX512VL"
4706  "vcvtpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4707  [(set_attr "type" "ssecvt")
4708   (set_attr "prefix" "evex")
4709   (set_attr "mode" "TI")])
4710
4711(define_insn "<fixsuffix>fix_truncv8dfv8si2<mask_name><round_saeonly_name>"
4712  [(set (match_operand:V8SI 0 "register_operand" "=v")
4713	(any_fix:V8SI
4714	  (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4715  "TARGET_AVX512F"
4716  "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4717  [(set_attr "type" "ssecvt")
4718   (set_attr "prefix" "evex")
4719   (set_attr "mode" "OI")])
4720
4721(define_insn "ufix_truncv2dfv2si2<mask_name>"
4722  [(set (match_operand:V4SI 0 "register_operand" "=v")
4723	(vec_concat:V4SI
4724	  (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4725	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4726  "TARGET_AVX512VL"
4727  "vcvttpd2udq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4728  [(set_attr "type" "ssecvt")
4729   (set_attr "prefix" "evex")
4730   (set_attr "mode" "TI")])
4731
4732(define_insn "fix_truncv4dfv4si2<mask_name>"
4733  [(set (match_operand:V4SI 0 "register_operand" "=v")
4734	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4735  "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
4736  "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4737  [(set_attr "type" "ssecvt")
4738   (set_attr "prefix" "maybe_evex")
4739   (set_attr "mode" "OI")])
4740
4741(define_insn "ufix_truncv4dfv4si2<mask_name>"
4742  [(set (match_operand:V4SI 0 "register_operand" "=v")
4743	(unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4744  "TARGET_AVX512VL && TARGET_AVX512F"
4745  "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4746  [(set_attr "type" "ssecvt")
4747   (set_attr "prefix" "maybe_evex")
4748   (set_attr "mode" "OI")])
4749
4750(define_insn "<fixsuffix>fix_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
4751  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4752	(any_fix:<sseintvecmode>
4753	  (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4754  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
4755  "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4756  [(set_attr "type" "ssecvt")
4757   (set_attr "prefix" "evex")
4758   (set_attr "mode" "<sseintvecmode2>")])
4759
4760(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4761  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4762	(unspec:<sseintvecmode>
4763	  [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
4764	  UNSPEC_FIX_NOTRUNC))]
4765  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4766  "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4767  [(set_attr "type" "ssecvt")
4768   (set_attr "prefix" "evex")
4769   (set_attr "mode" "<sseintvecmode2>")])
4770
4771(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
4772  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4773	(unspec:<sseintvecmode>
4774	  [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
4775	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
4776  "TARGET_AVX512DQ && <round_mode512bit_condition>"
4777  "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4778  [(set_attr "type" "ssecvt")
4779   (set_attr "prefix" "evex")
4780   (set_attr "mode" "<sseintvecmode2>")])
4781
4782(define_insn "<fixsuffix>fix_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
4783  [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
4784	(any_fix:<sselongvecmode>
4785	  (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4786  "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
4787  "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4788  [(set_attr "type" "ssecvt")
4789   (set_attr "prefix" "evex")
4790   (set_attr "mode" "<sseintvecmode3>")])
4791
4792(define_insn "<fixsuffix>fix_truncv2sfv2di2<mask_name>"
4793  [(set (match_operand:V2DI 0 "register_operand" "=v")
4794	(any_fix:V2DI
4795	  (vec_select:V2SF
4796	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
4797	    (parallel [(const_int 0) (const_int 1)]))))]
4798  "TARGET_AVX512DQ && TARGET_AVX512VL"
4799  "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4800  [(set_attr "type" "ssecvt")
4801   (set_attr "prefix" "evex")
4802   (set_attr "mode" "TI")])
4803
4804(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
4805  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
4806	(unsigned_fix:<sseintvecmode>
4807	  (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
4808  "TARGET_AVX512VL"
4809  "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4810  [(set_attr "type" "ssecvt")
4811   (set_attr "prefix" "evex")
4812   (set_attr "mode" "<sseintvecmode2>")])
4813
4814(define_expand "avx_cvttpd2dq256_2"
4815  [(set (match_operand:V8SI 0 "register_operand")
4816	(vec_concat:V8SI
4817	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
4818	  (match_dup 2)))]
4819  "TARGET_AVX"
4820  "operands[2] = CONST0_RTX (V4SImode);")
4821
4822(define_insn "sse2_cvttpd2dq<mask_name>"
4823  [(set (match_operand:V4SI 0 "register_operand" "=v")
4824	(vec_concat:V4SI
4825	  (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4826	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
4827  "TARGET_SSE2 && <mask_avx512vl_condition>"
4828{
4829  if (TARGET_AVX)
4830    return "vcvttpd2dq{x}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}";
4831  else
4832    return "cvttpd2dq\t{%1, %0|%0, %1}";
4833}
4834  [(set_attr "type" "ssecvt")
4835   (set_attr "amdfam10_decode" "double")
4836   (set_attr "athlon_decode" "vector")
4837   (set_attr "bdver1_decode" "double")
4838   (set_attr "prefix" "maybe_vex")
4839   (set_attr "mode" "TI")])
4840
4841(define_insn "sse2_cvtsd2ss<round_name>"
4842  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
4843	(vec_merge:V4SF
4844	  (vec_duplicate:V4SF
4845	    (float_truncate:V2SF
4846	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
4847	  (match_operand:V4SF 1 "register_operand" "0,0,v")
4848	  (const_int 1)))]
4849  "TARGET_SSE2"
4850  "@
4851   cvtsd2ss\t{%2, %0|%0, %2}
4852   cvtsd2ss\t{%2, %0|%0, %q2}
4853   vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
4854  [(set_attr "isa" "noavx,noavx,avx")
4855   (set_attr "type" "ssecvt")
4856   (set_attr "athlon_decode" "vector,double,*")
4857   (set_attr "amdfam10_decode" "vector,double,*")
4858   (set_attr "bdver1_decode" "direct,direct,*")
4859   (set_attr "btver2_decode" "double,double,double")
4860   (set_attr "prefix" "orig,orig,<round_prefix>")
4861   (set_attr "mode" "SF")])
4862
4863(define_insn "sse2_cvtss2sd<round_saeonly_name>"
4864  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
4865	(vec_merge:V2DF
4866	  (float_extend:V2DF
4867	    (vec_select:V2SF
4868	      (match_operand:V4SF 2 "<round_saeonly_nimm_predicate>" "x,m,<round_saeonly_constraint>")
4869	      (parallel [(const_int 0) (const_int 1)])))
4870	  (match_operand:V2DF 1 "register_operand" "0,0,v")
4871	  (const_int 1)))]
4872  "TARGET_SSE2"
4873  "@
4874   cvtss2sd\t{%2, %0|%0, %2}
4875   cvtss2sd\t{%2, %0|%0, %k2}
4876   vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
4877  [(set_attr "isa" "noavx,noavx,avx")
4878   (set_attr "type" "ssecvt")
4879   (set_attr "amdfam10_decode" "vector,double,*")
4880   (set_attr "athlon_decode" "direct,direct,*")
4881   (set_attr "bdver1_decode" "direct,direct,*")
4882   (set_attr "btver2_decode" "double,double,double")
4883   (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
4884   (set_attr "mode" "DF")])
4885
4886(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
4887  [(set (match_operand:V8SF 0 "register_operand" "=v")
4888	(float_truncate:V8SF
4889	  (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
4890  "TARGET_AVX512F"
4891  "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
4892  [(set_attr "type" "ssecvt")
4893   (set_attr "prefix" "evex")
4894   (set_attr "mode" "V8SF")])
4895
4896(define_insn "avx_cvtpd2ps256<mask_name>"
4897  [(set (match_operand:V4SF 0 "register_operand" "=v")
4898	(float_truncate:V4SF
4899	  (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
4900  "TARGET_AVX && <mask_avx512vl_condition>"
4901  "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
4902  [(set_attr "type" "ssecvt")
4903   (set_attr "prefix" "maybe_evex")
4904   (set_attr "btver2_decode" "vector")
4905   (set_attr "mode" "V4SF")])
4906
4907(define_expand "sse2_cvtpd2ps"
4908  [(set (match_operand:V4SF 0 "register_operand")
4909	(vec_concat:V4SF
4910	  (float_truncate:V2SF
4911	    (match_operand:V2DF 1 "nonimmediate_operand"))
4912	  (match_dup 2)))]
4913  "TARGET_SSE2"
4914  "operands[2] = CONST0_RTX (V2SFmode);")
4915
4916(define_expand "sse2_cvtpd2ps_mask"
4917  [(set (match_operand:V4SF 0 "register_operand")
4918	(vec_merge:V4SF
4919	  (vec_concat:V4SF
4920	    (float_truncate:V2SF
4921	      (match_operand:V2DF 1 "nonimmediate_operand"))
4922	    (match_dup 4))
4923	  (match_operand:V4SF 2 "register_operand")
4924	  (match_operand:QI 3 "register_operand")))]
4925  "TARGET_SSE2"
4926  "operands[4] = CONST0_RTX (V2SFmode);")
4927
4928(define_insn "*sse2_cvtpd2ps<mask_name>"
4929  [(set (match_operand:V4SF 0 "register_operand" "=v")
4930	(vec_concat:V4SF
4931	  (float_truncate:V2SF
4932	    (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
4933	  (match_operand:V2SF 2 "const0_operand")))]
4934  "TARGET_SSE2 && <mask_avx512vl_condition>"
4935{
4936  if (TARGET_AVX)
4937    return "vcvtpd2ps{x}\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}";
4938  else
4939    return "cvtpd2ps\t{%1, %0|%0, %1}";
4940}
4941  [(set_attr "type" "ssecvt")
4942   (set_attr "amdfam10_decode" "double")
4943   (set_attr "athlon_decode" "vector")
4944   (set_attr "bdver1_decode" "double")
4945   (set_attr "prefix_data16" "1")
4946   (set_attr "prefix" "maybe_vex")
4947   (set_attr "mode" "V4SF")])
4948
4949;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
4950(define_mode_attr sf2dfmode
4951  [(V8DF "V8SF") (V4DF "V4SF")])
4952
4953(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
4954  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
4955	(float_extend:VF2_512_256
4956	  (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
4957  "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
4958  "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
4959  [(set_attr "type" "ssecvt")
4960   (set_attr "prefix" "maybe_vex")
4961   (set_attr "mode" "<MODE>")])
4962
4963(define_insn "*avx_cvtps2pd256_2"
4964  [(set (match_operand:V4DF 0 "register_operand" "=x")
4965	(float_extend:V4DF
4966	  (vec_select:V4SF
4967	    (match_operand:V8SF 1 "nonimmediate_operand" "xm")
4968	    (parallel [(const_int 0) (const_int 1)
4969		       (const_int 2) (const_int 3)]))))]
4970  "TARGET_AVX"
4971  "vcvtps2pd\t{%x1, %0|%0, %x1}"
4972  [(set_attr "type" "ssecvt")
4973   (set_attr "prefix" "vex")
4974   (set_attr "mode" "V4DF")])
4975
4976(define_insn "vec_unpacks_lo_v16sf"
4977  [(set (match_operand:V8DF 0 "register_operand" "=v")
4978	(float_extend:V8DF
4979	  (vec_select:V8SF
4980	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
4981	    (parallel [(const_int 0) (const_int 1)
4982		       (const_int 2) (const_int 3)
4983		       (const_int 4) (const_int 5)
4984		       (const_int 6) (const_int 7)]))))]
4985  "TARGET_AVX512F"
4986  "vcvtps2pd\t{%t1, %0|%0, %t1}"
4987  [(set_attr "type" "ssecvt")
4988   (set_attr "prefix" "evex")
4989   (set_attr "mode" "V8DF")])
4990
4991(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
4992  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
4993	(unspec:<avx512fmaskmode>
4994	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
4995	 UNSPEC_CVTINT2MASK))]
4996  "TARGET_AVX512BW"
4997  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
4998  [(set_attr "prefix" "evex")
4999   (set_attr "mode" "<sseinsnmode>")])
5000
5001(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
5002  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
5003	(unspec:<avx512fmaskmode>
5004	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
5005	 UNSPEC_CVTINT2MASK))]
5006  "TARGET_AVX512DQ"
5007  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
5008  [(set_attr "prefix" "evex")
5009   (set_attr "mode" "<sseinsnmode>")])
5010
5011(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5012  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
5013	(vec_merge:VI12_AVX512VL
5014	  (match_dup 2)
5015	  (match_dup 3)
5016	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5017  "TARGET_AVX512BW"
5018  {
5019    operands[2] = CONSTM1_RTX (<MODE>mode);
5020    operands[3] = CONST0_RTX (<MODE>mode);
5021  })
5022
5023(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5024  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
5025	(vec_merge:VI12_AVX512VL
5026	  (match_operand:VI12_AVX512VL 2 "constm1_operand")
5027	  (match_operand:VI12_AVX512VL 3 "const0_operand")
5028	  (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5029  "TARGET_AVX512BW"
5030  "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5031  [(set_attr "prefix" "evex")
5032   (set_attr "mode" "<sseinsnmode>")])
5033
5034(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
5035  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
5036	(vec_merge:VI48_AVX512VL
5037	  (match_dup 2)
5038	  (match_dup 3)
5039	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
5040  "TARGET_AVX512DQ"
5041  "{
5042    operands[2] = CONSTM1_RTX (<MODE>mode);
5043    operands[3] = CONST0_RTX (<MODE>mode);
5044  }")
5045
5046(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
5047  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
5048	(vec_merge:VI48_AVX512VL
5049	  (match_operand:VI48_AVX512VL 2 "constm1_operand")
5050	  (match_operand:VI48_AVX512VL 3 "const0_operand")
5051	  (match_operand:<avx512fmaskmode> 1 "register_operand" "Yk")))]
5052  "TARGET_AVX512DQ"
5053  "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
5054  [(set_attr "prefix" "evex")
5055   (set_attr "mode" "<sseinsnmode>")])
5056
5057(define_insn "sse2_cvtps2pd<mask_name>"
5058  [(set (match_operand:V2DF 0 "register_operand" "=v")
5059	(float_extend:V2DF
5060	  (vec_select:V2SF
5061	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5062	    (parallel [(const_int 0) (const_int 1)]))))]
5063  "TARGET_SSE2 && <mask_avx512vl_condition>"
5064  "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5065  [(set_attr "type" "ssecvt")
5066   (set_attr "amdfam10_decode" "direct")
5067   (set_attr "athlon_decode" "double")
5068   (set_attr "bdver1_decode" "double")
5069   (set_attr "prefix_data16" "0")
5070   (set_attr "prefix" "maybe_vex")
5071   (set_attr "mode" "V2DF")])
5072
5073(define_expand "vec_unpacks_hi_v4sf"
5074  [(set (match_dup 2)
5075   (vec_select:V4SF
5076     (vec_concat:V8SF
5077       (match_dup 2)
5078       (match_operand:V4SF 1 "nonimmediate_operand"))
5079     (parallel [(const_int 6) (const_int 7)
5080		(const_int 2) (const_int 3)])))
5081  (set (match_operand:V2DF 0 "register_operand")
5082   (float_extend:V2DF
5083     (vec_select:V2SF
5084       (match_dup 2)
5085       (parallel [(const_int 0) (const_int 1)]))))]
5086  "TARGET_SSE2"
5087  "operands[2] = gen_reg_rtx (V4SFmode);")
5088
5089(define_expand "vec_unpacks_hi_v8sf"
5090  [(set (match_dup 2)
5091	(vec_select:V4SF
5092	  (match_operand:V8SF 1 "register_operand")
5093	  (parallel [(const_int 4) (const_int 5)
5094		     (const_int 6) (const_int 7)])))
5095   (set (match_operand:V4DF 0 "register_operand")
5096	(float_extend:V4DF
5097	  (match_dup 2)))]
5098  "TARGET_AVX"
5099  "operands[2] = gen_reg_rtx (V4SFmode);")
5100
5101(define_expand "vec_unpacks_hi_v16sf"
5102  [(set (match_dup 2)
5103	(vec_select:V8SF
5104	  (match_operand:V16SF 1 "register_operand")
5105	  (parallel [(const_int 8) (const_int 9)
5106		     (const_int 10) (const_int 11)
5107		     (const_int 12) (const_int 13)
5108		     (const_int 14) (const_int 15)])))
5109   (set (match_operand:V8DF 0 "register_operand")
5110	(float_extend:V8DF
5111	  (match_dup 2)))]
5112"TARGET_AVX512F"
5113"operands[2] = gen_reg_rtx (V8SFmode);")
5114
5115(define_expand "vec_unpacks_lo_v4sf"
5116  [(set (match_operand:V2DF 0 "register_operand")
5117	(float_extend:V2DF
5118	  (vec_select:V2SF
5119	    (match_operand:V4SF 1 "nonimmediate_operand")
5120	    (parallel [(const_int 0) (const_int 1)]))))]
5121  "TARGET_SSE2")
5122
5123(define_expand "vec_unpacks_lo_v8sf"
5124  [(set (match_operand:V4DF 0 "register_operand")
5125	(float_extend:V4DF
5126	  (vec_select:V4SF
5127	    (match_operand:V8SF 1 "nonimmediate_operand")
5128	    (parallel [(const_int 0) (const_int 1)
5129		       (const_int 2) (const_int 3)]))))]
5130  "TARGET_AVX")
5131
5132(define_mode_attr sseunpackfltmode
5133  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
5134  (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
5135
5136(define_expand "vec_unpacks_float_hi_<mode>"
5137  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5138   (match_operand:VI2_AVX512F 1 "register_operand")]
5139  "TARGET_SSE2"
5140{
5141  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5142
5143  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
5144  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5145			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5146  DONE;
5147})
5148
5149(define_expand "vec_unpacks_float_lo_<mode>"
5150  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5151   (match_operand:VI2_AVX512F 1 "register_operand")]
5152  "TARGET_SSE2"
5153{
5154  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5155
5156  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
5157  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5158			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5159  DONE;
5160})
5161
5162(define_expand "vec_unpacku_float_hi_<mode>"
5163  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5164   (match_operand:VI2_AVX512F 1 "register_operand")]
5165  "TARGET_SSE2"
5166{
5167  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5168
5169  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
5170  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5171			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5172  DONE;
5173})
5174
5175(define_expand "vec_unpacku_float_lo_<mode>"
5176  [(match_operand:<sseunpackfltmode> 0 "register_operand")
5177   (match_operand:VI2_AVX512F 1 "register_operand")]
5178  "TARGET_SSE2"
5179{
5180  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
5181
5182  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
5183  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
5184			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
5185  DONE;
5186})
5187
5188(define_expand "vec_unpacks_float_hi_v4si"
5189  [(set (match_dup 2)
5190	(vec_select:V4SI
5191	  (match_operand:V4SI 1 "nonimmediate_operand")
5192	  (parallel [(const_int 2) (const_int 3)
5193		     (const_int 2) (const_int 3)])))
5194   (set (match_operand:V2DF 0 "register_operand")
5195	(float:V2DF
5196	  (vec_select:V2SI
5197	  (match_dup 2)
5198	    (parallel [(const_int 0) (const_int 1)]))))]
5199  "TARGET_SSE2"
5200  "operands[2] = gen_reg_rtx (V4SImode);")
5201
5202(define_expand "vec_unpacks_float_lo_v4si"
5203  [(set (match_operand:V2DF 0 "register_operand")
5204	(float:V2DF
5205	  (vec_select:V2SI
5206	    (match_operand:V4SI 1 "nonimmediate_operand")
5207	    (parallel [(const_int 0) (const_int 1)]))))]
5208  "TARGET_SSE2")
5209
5210(define_expand "vec_unpacks_float_hi_v8si"
5211  [(set (match_dup 2)
5212	(vec_select:V4SI
5213	  (match_operand:V8SI 1 "nonimmediate_operand")
5214	  (parallel [(const_int 4) (const_int 5)
5215		     (const_int 6) (const_int 7)])))
5216   (set (match_operand:V4DF 0 "register_operand")
5217	(float:V4DF
5218	  (match_dup 2)))]
5219  "TARGET_AVX"
5220  "operands[2] = gen_reg_rtx (V4SImode);")
5221
5222(define_expand "vec_unpacks_float_lo_v8si"
5223  [(set (match_operand:V4DF 0 "register_operand")
5224	(float:V4DF
5225	  (vec_select:V4SI
5226	    (match_operand:V8SI 1 "nonimmediate_operand")
5227	    (parallel [(const_int 0) (const_int 1)
5228		       (const_int 2) (const_int 3)]))))]
5229  "TARGET_AVX")
5230
5231(define_expand "vec_unpacks_float_hi_v16si"
5232  [(set (match_dup 2)
5233	(vec_select:V8SI
5234	  (match_operand:V16SI 1 "nonimmediate_operand")
5235	  (parallel [(const_int 8) (const_int 9)
5236		     (const_int 10) (const_int 11)
5237		     (const_int 12) (const_int 13)
5238		     (const_int 14) (const_int 15)])))
5239   (set (match_operand:V8DF 0 "register_operand")
5240	(float:V8DF
5241	  (match_dup 2)))]
5242  "TARGET_AVX512F"
5243  "operands[2] = gen_reg_rtx (V8SImode);")
5244
5245(define_expand "vec_unpacks_float_lo_v16si"
5246  [(set (match_operand:V8DF 0 "register_operand")
5247	(float:V8DF
5248	  (vec_select:V8SI
5249	    (match_operand:V16SI 1 "nonimmediate_operand")
5250	    (parallel [(const_int 0) (const_int 1)
5251		       (const_int 2) (const_int 3)
5252		       (const_int 4) (const_int 5)
5253		       (const_int 6) (const_int 7)]))))]
5254  "TARGET_AVX512F")
5255
5256(define_expand "vec_unpacku_float_hi_v4si"
5257  [(set (match_dup 5)
5258	(vec_select:V4SI
5259	  (match_operand:V4SI 1 "nonimmediate_operand")
5260	  (parallel [(const_int 2) (const_int 3)
5261		     (const_int 2) (const_int 3)])))
5262   (set (match_dup 6)
5263	(float:V2DF
5264	  (vec_select:V2SI
5265	  (match_dup 5)
5266	    (parallel [(const_int 0) (const_int 1)]))))
5267   (set (match_dup 7)
5268	(lt:V2DF (match_dup 6) (match_dup 3)))
5269   (set (match_dup 8)
5270	(and:V2DF (match_dup 7) (match_dup 4)))
5271   (set (match_operand:V2DF 0 "register_operand")
5272	(plus:V2DF (match_dup 6) (match_dup 8)))]
5273  "TARGET_SSE2"
5274{
5275  REAL_VALUE_TYPE TWO32r;
5276  rtx x;
5277  int i;
5278
5279  real_ldexp (&TWO32r, &dconst1, 32);
5280  x = const_double_from_real_value (TWO32r, DFmode);
5281
5282  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5283  operands[4] = force_reg (V2DFmode,
5284			   ix86_build_const_vector (V2DFmode, 1, x));
5285
5286  operands[5] = gen_reg_rtx (V4SImode);
5287
5288  for (i = 6; i < 9; i++)
5289    operands[i] = gen_reg_rtx (V2DFmode);
5290})
5291
5292(define_expand "vec_unpacku_float_lo_v4si"
5293  [(set (match_dup 5)
5294	(float:V2DF
5295	  (vec_select:V2SI
5296	    (match_operand:V4SI 1 "nonimmediate_operand")
5297	    (parallel [(const_int 0) (const_int 1)]))))
5298   (set (match_dup 6)
5299	(lt:V2DF (match_dup 5) (match_dup 3)))
5300   (set (match_dup 7)
5301	(and:V2DF (match_dup 6) (match_dup 4)))
5302   (set (match_operand:V2DF 0 "register_operand")
5303	(plus:V2DF (match_dup 5) (match_dup 7)))]
5304  "TARGET_SSE2"
5305{
5306  REAL_VALUE_TYPE TWO32r;
5307  rtx x;
5308  int i;
5309
5310  real_ldexp (&TWO32r, &dconst1, 32);
5311  x = const_double_from_real_value (TWO32r, DFmode);
5312
5313  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
5314  operands[4] = force_reg (V2DFmode,
5315			   ix86_build_const_vector (V2DFmode, 1, x));
5316
5317  for (i = 5; i < 8; i++)
5318    operands[i] = gen_reg_rtx (V2DFmode);
5319})
5320
5321(define_expand "vec_unpacku_float_hi_v8si"
5322  [(match_operand:V4DF 0 "register_operand")
5323   (match_operand:V8SI 1 "register_operand")]
5324  "TARGET_AVX"
5325{
5326  REAL_VALUE_TYPE TWO32r;
5327  rtx x, tmp[6];
5328  int i;
5329
5330  real_ldexp (&TWO32r, &dconst1, 32);
5331  x = const_double_from_real_value (TWO32r, DFmode);
5332
5333  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5334  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5335  tmp[5] = gen_reg_rtx (V4SImode);
5336
5337  for (i = 2; i < 5; i++)
5338    tmp[i] = gen_reg_rtx (V4DFmode);
5339  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
5340  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
5341  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5342			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5343  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5344  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5345  DONE;
5346})
5347
5348(define_expand "vec_unpacku_float_hi_v16si"
5349  [(match_operand:V8DF 0 "register_operand")
5350   (match_operand:V16SI 1 "register_operand")]
5351  "TARGET_AVX512F"
5352{
5353  REAL_VALUE_TYPE TWO32r;
5354  rtx k, x, tmp[4];
5355
5356  real_ldexp (&TWO32r, &dconst1, 32);
5357  x = const_double_from_real_value (TWO32r, DFmode);
5358
5359  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5360  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5361  tmp[2] = gen_reg_rtx (V8DFmode);
5362  tmp[3] = gen_reg_rtx (V8SImode);
5363  k = gen_reg_rtx (QImode);
5364
5365  emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
5366  emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
5367  emit_insn (gen_rtx_SET (VOIDmode, k,
5368			  gen_rtx_LT (QImode, tmp[2], tmp[0])));
5369  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5370  emit_move_insn (operands[0], tmp[2]);
5371  DONE;
5372})
5373
5374(define_expand "vec_unpacku_float_lo_v8si"
5375  [(match_operand:V4DF 0 "register_operand")
5376   (match_operand:V8SI 1 "nonimmediate_operand")]
5377  "TARGET_AVX"
5378{
5379  REAL_VALUE_TYPE TWO32r;
5380  rtx x, tmp[5];
5381  int i;
5382
5383  real_ldexp (&TWO32r, &dconst1, 32);
5384  x = const_double_from_real_value (TWO32r, DFmode);
5385
5386  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
5387  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
5388
5389  for (i = 2; i < 5; i++)
5390    tmp[i] = gen_reg_rtx (V4DFmode);
5391  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
5392  emit_insn (gen_rtx_SET (VOIDmode, tmp[3],
5393			  gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
5394  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
5395  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
5396  DONE;
5397})
5398
5399(define_expand "vec_unpacku_float_lo_v16si"
5400  [(match_operand:V8DF 0 "register_operand")
5401   (match_operand:V16SI 1 "nonimmediate_operand")]
5402  "TARGET_AVX512F"
5403{
5404  REAL_VALUE_TYPE TWO32r;
5405  rtx k, x, tmp[3];
5406
5407  real_ldexp (&TWO32r, &dconst1, 32);
5408  x = const_double_from_real_value (TWO32r, DFmode);
5409
5410  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
5411  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
5412  tmp[2] = gen_reg_rtx (V8DFmode);
5413  k = gen_reg_rtx (QImode);
5414
5415  emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
5416  emit_insn (gen_rtx_SET (VOIDmode, k,
5417			  gen_rtx_LT (QImode, tmp[2], tmp[0])));
5418  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
5419  emit_move_insn (operands[0], tmp[2]);
5420  DONE;
5421})
5422
5423(define_expand "vec_pack_trunc_<mode>"
5424  [(set (match_dup 3)
5425	(float_truncate:<sf2dfmode>
5426	  (match_operand:VF2_512_256 1 "nonimmediate_operand")))
5427   (set (match_dup 4)
5428	(float_truncate:<sf2dfmode>
5429	  (match_operand:VF2_512_256 2 "nonimmediate_operand")))
5430   (set (match_operand:<ssePSmode> 0 "register_operand")
5431	(vec_concat:<ssePSmode>
5432	  (match_dup 3)
5433	  (match_dup 4)))]
5434  "TARGET_AVX"
5435{
5436  operands[3] = gen_reg_rtx (<sf2dfmode>mode);
5437  operands[4] = gen_reg_rtx (<sf2dfmode>mode);
5438})
5439
5440(define_expand "vec_pack_trunc_v2df"
5441  [(match_operand:V4SF 0 "register_operand")
5442   (match_operand:V2DF 1 "nonimmediate_operand")
5443   (match_operand:V2DF 2 "nonimmediate_operand")]
5444  "TARGET_SSE2"
5445{
5446  rtx tmp0, tmp1;
5447
5448  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5449    {
5450      tmp0 = gen_reg_rtx (V4DFmode);
5451      tmp1 = force_reg (V2DFmode, operands[1]);
5452
5453      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5454      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
5455    }
5456  else
5457    {
5458      tmp0 = gen_reg_rtx (V4SFmode);
5459      tmp1 = gen_reg_rtx (V4SFmode);
5460
5461      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
5462      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
5463      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
5464    }
5465  DONE;
5466})
5467
5468(define_expand "vec_pack_sfix_trunc_v8df"
5469  [(match_operand:V16SI 0 "register_operand")
5470   (match_operand:V8DF 1 "nonimmediate_operand")
5471   (match_operand:V8DF 2 "nonimmediate_operand")]
5472  "TARGET_AVX512F"
5473{
5474  rtx r1, r2;
5475
5476  r1 = gen_reg_rtx (V8SImode);
5477  r2 = gen_reg_rtx (V8SImode);
5478
5479  emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
5480  emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
5481  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5482  DONE;
5483})
5484
5485(define_expand "vec_pack_sfix_trunc_v4df"
5486  [(match_operand:V8SI 0 "register_operand")
5487   (match_operand:V4DF 1 "nonimmediate_operand")
5488   (match_operand:V4DF 2 "nonimmediate_operand")]
5489  "TARGET_AVX"
5490{
5491  rtx r1, r2;
5492
5493  r1 = gen_reg_rtx (V4SImode);
5494  r2 = gen_reg_rtx (V4SImode);
5495
5496  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
5497  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
5498  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5499  DONE;
5500})
5501
5502(define_expand "vec_pack_sfix_trunc_v2df"
5503  [(match_operand:V4SI 0 "register_operand")
5504   (match_operand:V2DF 1 "nonimmediate_operand")
5505   (match_operand:V2DF 2 "nonimmediate_operand")]
5506  "TARGET_SSE2"
5507{
5508  rtx tmp0, tmp1, tmp2;
5509
5510  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5511    {
5512      tmp0 = gen_reg_rtx (V4DFmode);
5513      tmp1 = force_reg (V2DFmode, operands[1]);
5514
5515      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5516      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
5517    }
5518  else
5519    {
5520      tmp0 = gen_reg_rtx (V4SImode);
5521      tmp1 = gen_reg_rtx (V4SImode);
5522      tmp2 = gen_reg_rtx (V2DImode);
5523
5524      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
5525      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
5526      emit_insn (gen_vec_interleave_lowv2di (tmp2,
5527					     gen_lowpart (V2DImode, tmp0),
5528					     gen_lowpart (V2DImode, tmp1)));
5529      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5530    }
5531  DONE;
5532})
5533
5534(define_mode_attr ssepackfltmode
5535  [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
5536
5537(define_expand "vec_pack_ufix_trunc_<mode>"
5538  [(match_operand:<ssepackfltmode> 0 "register_operand")
5539   (match_operand:VF2 1 "register_operand")
5540   (match_operand:VF2 2 "register_operand")]
5541  "TARGET_SSE2"
5542{
5543  if (<MODE>mode == V8DFmode)
5544    {
5545      rtx r1, r2;
5546
5547      r1 = gen_reg_rtx (V8SImode);
5548      r2 = gen_reg_rtx (V8SImode);
5549
5550      emit_insn (gen_ufix_truncv8dfv8si2 (r1, operands[1]));
5551      emit_insn (gen_ufix_truncv8dfv8si2 (r2, operands[2]));
5552      emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
5553    }
5554  else
5555    {
5556      rtx tmp[7];
5557      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5558      tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
5559      tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
5560      emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
5561      if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
5562	{
5563	  tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
5564	  ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
5565	}
5566      else
5567	{
5568	  tmp[5] = gen_reg_rtx (V8SFmode);
5569	  ix86_expand_vec_extract_even_odd (tmp[5], gen_lowpart (V8SFmode, tmp[2]),
5570					    gen_lowpart (V8SFmode, tmp[3]), 0);
5571	  tmp[5] = gen_lowpart (V8SImode, tmp[5]);
5572	}
5573      tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
5574				    operands[0], 0, OPTAB_DIRECT);
5575      if (tmp[6] != operands[0])
5576	emit_move_insn (operands[0], tmp[6]);
5577    }
5578
5579  DONE;
5580})
5581
5582(define_expand "vec_pack_sfix_v4df"
5583  [(match_operand:V8SI 0 "register_operand")
5584   (match_operand:V4DF 1 "nonimmediate_operand")
5585   (match_operand:V4DF 2 "nonimmediate_operand")]
5586  "TARGET_AVX"
5587{
5588  rtx r1, r2;
5589
5590  r1 = gen_reg_rtx (V4SImode);
5591  r2 = gen_reg_rtx (V4SImode);
5592
5593  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
5594  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
5595  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
5596  DONE;
5597})
5598
5599(define_expand "vec_pack_sfix_v2df"
5600  [(match_operand:V4SI 0 "register_operand")
5601   (match_operand:V2DF 1 "nonimmediate_operand")
5602   (match_operand:V2DF 2 "nonimmediate_operand")]
5603  "TARGET_SSE2"
5604{
5605  rtx tmp0, tmp1, tmp2;
5606
5607  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
5608    {
5609      tmp0 = gen_reg_rtx (V4DFmode);
5610      tmp1 = force_reg (V2DFmode, operands[1]);
5611
5612      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
5613      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
5614    }
5615  else
5616    {
5617      tmp0 = gen_reg_rtx (V4SImode);
5618      tmp1 = gen_reg_rtx (V4SImode);
5619      tmp2 = gen_reg_rtx (V2DImode);
5620
5621      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
5622      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
5623      emit_insn (gen_vec_interleave_lowv2di (tmp2,
5624					     gen_lowpart (V2DImode, tmp0),
5625					     gen_lowpart (V2DImode, tmp1)));
5626      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
5627    }
5628  DONE;
5629})
5630
5631;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5632;;
5633;; Parallel single-precision floating point element swizzling
5634;;
5635;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5636
5637(define_expand "sse_movhlps_exp"
5638  [(set (match_operand:V4SF 0 "nonimmediate_operand")
5639	(vec_select:V4SF
5640	  (vec_concat:V8SF
5641	    (match_operand:V4SF 1 "nonimmediate_operand")
5642	    (match_operand:V4SF 2 "nonimmediate_operand"))
5643	  (parallel [(const_int 6)
5644		     (const_int 7)
5645		     (const_int 2)
5646		     (const_int 3)])))]
5647  "TARGET_SSE"
5648{
5649  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5650
5651  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
5652
5653  /* Fix up the destination if needed.  */
5654  if (dst != operands[0])
5655    emit_move_insn (operands[0], dst);
5656
5657  DONE;
5658})
5659
5660(define_insn "sse_movhlps"
5661  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
5662	(vec_select:V4SF
5663	  (vec_concat:V8SF
5664	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5665	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,o,o,x"))
5666	  (parallel [(const_int 6)
5667		     (const_int 7)
5668		     (const_int 2)
5669		     (const_int 3)])))]
5670  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
5671  "@
5672   movhlps\t{%2, %0|%0, %2}
5673   vmovhlps\t{%2, %1, %0|%0, %1, %2}
5674   movlps\t{%H2, %0|%0, %H2}
5675   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
5676   %vmovhps\t{%2, %0|%q0, %2}"
5677  [(set_attr "isa" "noavx,avx,noavx,avx,*")
5678   (set_attr "type" "ssemov")
5679   (set_attr "ssememalign" "64")
5680   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5681   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5682
5683(define_expand "sse_movlhps_exp"
5684  [(set (match_operand:V4SF 0 "nonimmediate_operand")
5685	(vec_select:V4SF
5686	  (vec_concat:V8SF
5687	    (match_operand:V4SF 1 "nonimmediate_operand")
5688	    (match_operand:V4SF 2 "nonimmediate_operand"))
5689	  (parallel [(const_int 0)
5690		     (const_int 1)
5691		     (const_int 4)
5692		     (const_int 5)])))]
5693  "TARGET_SSE"
5694{
5695  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
5696
5697  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
5698
5699  /* Fix up the destination if needed.  */
5700  if (dst != operands[0])
5701    emit_move_insn (operands[0], dst);
5702
5703  DONE;
5704})
5705
5706(define_insn "sse_movlhps"
5707  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
5708	(vec_select:V4SF
5709	  (vec_concat:V8SF
5710	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
5711	    (match_operand:V4SF 2 "nonimmediate_operand" " x,x,m,m,x"))
5712	  (parallel [(const_int 0)
5713		     (const_int 1)
5714		     (const_int 4)
5715		     (const_int 5)])))]
5716  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
5717  "@
5718   movlhps\t{%2, %0|%0, %2}
5719   vmovlhps\t{%2, %1, %0|%0, %1, %2}
5720   movhps\t{%2, %0|%0, %q2}
5721   vmovhps\t{%2, %1, %0|%0, %1, %q2}
5722   %vmovlps\t{%2, %H0|%H0, %2}"
5723  [(set_attr "isa" "noavx,avx,noavx,avx,*")
5724   (set_attr "type" "ssemov")
5725   (set_attr "ssememalign" "64")
5726   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
5727   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
5728
5729(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
5730  [(set (match_operand:V16SF 0 "register_operand" "=v")
5731	(vec_select:V16SF
5732	  (vec_concat:V32SF
5733	    (match_operand:V16SF 1 "register_operand" "v")
5734	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5735	  (parallel [(const_int 2) (const_int 18)
5736		     (const_int 3) (const_int 19)
5737		     (const_int 6) (const_int 22)
5738		     (const_int 7) (const_int 23)
5739		     (const_int 10) (const_int 26)
5740		     (const_int 11) (const_int 27)
5741		     (const_int 14) (const_int 30)
5742		     (const_int 15) (const_int 31)])))]
5743  "TARGET_AVX512F"
5744  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5745  [(set_attr "type" "sselog")
5746   (set_attr "prefix" "evex")
5747   (set_attr "mode" "V16SF")])
5748
5749;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5750(define_insn "avx_unpckhps256<mask_name>"
5751  [(set (match_operand:V8SF 0 "register_operand" "=v")
5752	(vec_select:V8SF
5753	  (vec_concat:V16SF
5754	    (match_operand:V8SF 1 "register_operand" "v")
5755	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5756	  (parallel [(const_int 2) (const_int 10)
5757		     (const_int 3) (const_int 11)
5758		     (const_int 6) (const_int 14)
5759		     (const_int 7) (const_int 15)])))]
5760  "TARGET_AVX && <mask_avx512vl_condition>"
5761  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5762  [(set_attr "type" "sselog")
5763   (set_attr "prefix" "vex")
5764   (set_attr "mode" "V8SF")])
5765
5766(define_expand "vec_interleave_highv8sf"
5767  [(set (match_dup 3)
5768	(vec_select:V8SF
5769	  (vec_concat:V16SF
5770	    (match_operand:V8SF 1 "register_operand" "x")
5771	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5772	  (parallel [(const_int 0) (const_int 8)
5773		     (const_int 1) (const_int 9)
5774		     (const_int 4) (const_int 12)
5775		     (const_int 5) (const_int 13)])))
5776   (set (match_dup 4)
5777	(vec_select:V8SF
5778	  (vec_concat:V16SF
5779	    (match_dup 1)
5780	    (match_dup 2))
5781	  (parallel [(const_int 2) (const_int 10)
5782		     (const_int 3) (const_int 11)
5783		     (const_int 6) (const_int 14)
5784		     (const_int 7) (const_int 15)])))
5785   (set (match_operand:V8SF 0 "register_operand")
5786	(vec_select:V8SF
5787	  (vec_concat:V16SF
5788	    (match_dup 3)
5789	    (match_dup 4))
5790	  (parallel [(const_int 4) (const_int 5)
5791		     (const_int 6) (const_int 7)
5792		     (const_int 12) (const_int 13)
5793		     (const_int 14) (const_int 15)])))]
5794 "TARGET_AVX"
5795{
5796  operands[3] = gen_reg_rtx (V8SFmode);
5797  operands[4] = gen_reg_rtx (V8SFmode);
5798})
5799
5800(define_insn "vec_interleave_highv4sf<mask_name>"
5801  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
5802	(vec_select:V4SF
5803	  (vec_concat:V8SF
5804	    (match_operand:V4SF 1 "register_operand" "0,v")
5805	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,vm"))
5806	  (parallel [(const_int 2) (const_int 6)
5807		     (const_int 3) (const_int 7)])))]
5808  "TARGET_SSE && <mask_avx512vl_condition>"
5809  "@
5810   unpckhps\t{%2, %0|%0, %2}
5811   vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5812  [(set_attr "isa" "noavx,avx")
5813   (set_attr "type" "sselog")
5814   (set_attr "prefix" "orig,vex")
5815   (set_attr "mode" "V4SF")])
5816
5817(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
5818  [(set (match_operand:V16SF 0 "register_operand" "=v")
5819	(vec_select:V16SF
5820	  (vec_concat:V32SF
5821	    (match_operand:V16SF 1 "register_operand" "v")
5822	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
5823	  (parallel [(const_int 0) (const_int 16)
5824		     (const_int 1) (const_int 17)
5825		     (const_int 4) (const_int 20)
5826		     (const_int 5) (const_int 21)
5827		     (const_int 8) (const_int 24)
5828		     (const_int 9) (const_int 25)
5829		     (const_int 12) (const_int 28)
5830		     (const_int 13) (const_int 29)])))]
5831  "TARGET_AVX512F"
5832  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5833  [(set_attr "type" "sselog")
5834   (set_attr "prefix" "evex")
5835   (set_attr "mode" "V16SF")])
5836
5837;; Recall that the 256-bit unpck insns only shuffle within their lanes.
5838(define_insn "avx_unpcklps256<mask_name>"
5839  [(set (match_operand:V8SF 0 "register_operand" "=v")
5840	(vec_select:V8SF
5841	  (vec_concat:V16SF
5842	    (match_operand:V8SF 1 "register_operand" "v")
5843	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
5844	  (parallel [(const_int 0) (const_int 8)
5845		     (const_int 1) (const_int 9)
5846		     (const_int 4) (const_int 12)
5847		     (const_int 5) (const_int 13)])))]
5848  "TARGET_AVX && <mask_avx512vl_condition>"
5849  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
5850  [(set_attr "type" "sselog")
5851   (set_attr "prefix" "vex")
5852   (set_attr "mode" "V8SF")])
5853
5854(define_insn "unpcklps128_mask"
5855  [(set (match_operand:V4SF 0 "register_operand" "=v")
5856	(vec_merge:V4SF
5857	  (vec_select:V4SF
5858	    (vec_concat:V8SF
5859	      (match_operand:V4SF 1 "register_operand" "v")
5860	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
5861	    (parallel [(const_int 0) (const_int 4)
5862		      (const_int 1) (const_int 5)]))
5863	  (match_operand:V4SF 3 "vector_move_operand" "0C")
5864	  (match_operand:QI 4 "register_operand" "Yk")))]
5865  "TARGET_AVX512VL"
5866  "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
5867  [(set_attr "type" "sselog")
5868   (set_attr "prefix" "evex")
5869   (set_attr "mode" "V4SF")])
5870
5871(define_expand "vec_interleave_lowv8sf"
5872  [(set (match_dup 3)
5873	(vec_select:V8SF
5874	  (vec_concat:V16SF
5875	    (match_operand:V8SF 1 "register_operand" "x")
5876	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
5877	  (parallel [(const_int 0) (const_int 8)
5878		     (const_int 1) (const_int 9)
5879		     (const_int 4) (const_int 12)
5880		     (const_int 5) (const_int 13)])))
5881   (set (match_dup 4)
5882	(vec_select:V8SF
5883	  (vec_concat:V16SF
5884	    (match_dup 1)
5885	    (match_dup 2))
5886	  (parallel [(const_int 2) (const_int 10)
5887		     (const_int 3) (const_int 11)
5888		     (const_int 6) (const_int 14)
5889		     (const_int 7) (const_int 15)])))
5890   (set (match_operand:V8SF 0 "register_operand")
5891	(vec_select:V8SF
5892	  (vec_concat:V16SF
5893	    (match_dup 3)
5894	    (match_dup 4))
5895	  (parallel [(const_int 0) (const_int 1)
5896		     (const_int 2) (const_int 3)
5897		     (const_int 8) (const_int 9)
5898		     (const_int 10) (const_int 11)])))]
5899 "TARGET_AVX"
5900{
5901  operands[3] = gen_reg_rtx (V8SFmode);
5902  operands[4] = gen_reg_rtx (V8SFmode);
5903})
5904
5905(define_insn "vec_interleave_lowv4sf"
5906  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
5907	(vec_select:V4SF
5908	  (vec_concat:V8SF
5909	    (match_operand:V4SF 1 "register_operand" "0,x")
5910	    (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm"))
5911	  (parallel [(const_int 0) (const_int 4)
5912		     (const_int 1) (const_int 5)])))]
5913  "TARGET_SSE"
5914  "@
5915   unpcklps\t{%2, %0|%0, %2}
5916   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
5917  [(set_attr "isa" "noavx,avx")
5918   (set_attr "type" "sselog")
5919   (set_attr "prefix" "orig,vex")
5920   (set_attr "mode" "V4SF")])
5921
5922;; These are modeled with the same vec_concat as the others so that we
5923;; capture users of shufps that can use the new instructions
5924(define_insn "avx_movshdup256<mask_name>"
5925  [(set (match_operand:V8SF 0 "register_operand" "=v")
5926	(vec_select:V8SF
5927	  (vec_concat:V16SF
5928	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5929	    (match_dup 1))
5930	  (parallel [(const_int 1) (const_int 1)
5931		     (const_int 3) (const_int 3)
5932		     (const_int 5) (const_int 5)
5933		     (const_int 7) (const_int 7)])))]
5934  "TARGET_AVX && <mask_avx512vl_condition>"
5935  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5936  [(set_attr "type" "sse")
5937   (set_attr "prefix" "vex")
5938   (set_attr "mode" "V8SF")])
5939
5940(define_insn "sse3_movshdup<mask_name>"
5941  [(set (match_operand:V4SF 0 "register_operand" "=v")
5942	(vec_select:V4SF
5943	  (vec_concat:V8SF
5944	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5945	    (match_dup 1))
5946	  (parallel [(const_int 1)
5947		     (const_int 1)
5948		     (const_int 7)
5949		     (const_int 7)])))]
5950  "TARGET_SSE3 && <mask_avx512vl_condition>"
5951  "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5952  [(set_attr "type" "sse")
5953   (set_attr "prefix_rep" "1")
5954   (set_attr "prefix" "maybe_vex")
5955   (set_attr "mode" "V4SF")])
5956
5957(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
5958  [(set (match_operand:V16SF 0 "register_operand" "=v")
5959	(vec_select:V16SF
5960	  (vec_concat:V32SF
5961	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
5962	    (match_dup 1))
5963	  (parallel [(const_int 1) (const_int 1)
5964		     (const_int 3) (const_int 3)
5965		     (const_int 5) (const_int 5)
5966		     (const_int 7) (const_int 7)
5967		     (const_int 9) (const_int 9)
5968		     (const_int 11) (const_int 11)
5969		     (const_int 13) (const_int 13)
5970		     (const_int 15) (const_int 15)])))]
5971  "TARGET_AVX512F"
5972  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5973  [(set_attr "type" "sse")
5974   (set_attr "prefix" "evex")
5975   (set_attr "mode" "V16SF")])
5976
5977(define_insn "avx_movsldup256<mask_name>"
5978  [(set (match_operand:V8SF 0 "register_operand" "=v")
5979	(vec_select:V8SF
5980	  (vec_concat:V16SF
5981	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
5982	    (match_dup 1))
5983	  (parallel [(const_int 0) (const_int 0)
5984		     (const_int 2) (const_int 2)
5985		     (const_int 4) (const_int 4)
5986		     (const_int 6) (const_int 6)])))]
5987  "TARGET_AVX && <mask_avx512vl_condition>"
5988  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5989  [(set_attr "type" "sse")
5990   (set_attr "prefix" "vex")
5991   (set_attr "mode" "V8SF")])
5992
5993(define_insn "sse3_movsldup<mask_name>"
5994  [(set (match_operand:V4SF 0 "register_operand" "=v")
5995	(vec_select:V4SF
5996	  (vec_concat:V8SF
5997	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5998	    (match_dup 1))
5999	  (parallel [(const_int 0)
6000		     (const_int 0)
6001		     (const_int 6)
6002		     (const_int 6)])))]
6003  "TARGET_SSE3 && <mask_avx512vl_condition>"
6004  "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6005  [(set_attr "type" "sse")
6006   (set_attr "prefix_rep" "1")
6007   (set_attr "prefix" "maybe_vex")
6008   (set_attr "mode" "V4SF")])
6009
6010(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
6011  [(set (match_operand:V16SF 0 "register_operand" "=v")
6012	(vec_select:V16SF
6013	  (vec_concat:V32SF
6014	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6015	    (match_dup 1))
6016	  (parallel [(const_int 0) (const_int 0)
6017		     (const_int 2) (const_int 2)
6018		     (const_int 4) (const_int 4)
6019		     (const_int 6) (const_int 6)
6020		     (const_int 8) (const_int 8)
6021		     (const_int 10) (const_int 10)
6022		     (const_int 12) (const_int 12)
6023		     (const_int 14) (const_int 14)])))]
6024  "TARGET_AVX512F"
6025  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6026  [(set_attr "type" "sse")
6027   (set_attr "prefix" "evex")
6028   (set_attr "mode" "V16SF")])
6029
6030(define_expand "avx_shufps256<mask_expand4_name>"
6031  [(match_operand:V8SF 0 "register_operand")
6032   (match_operand:V8SF 1 "register_operand")
6033   (match_operand:V8SF 2 "nonimmediate_operand")
6034   (match_operand:SI 3 "const_int_operand")]
6035  "TARGET_AVX"
6036{
6037  int mask = INTVAL (operands[3]);
6038  emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
6039						     operands[1],
6040						     operands[2],
6041						     GEN_INT ((mask >> 0) & 3),
6042						     GEN_INT ((mask >> 2) & 3),
6043						     GEN_INT (((mask >> 4) & 3) + 8),
6044						     GEN_INT (((mask >> 6) & 3) + 8),
6045						     GEN_INT (((mask >> 0) & 3) + 4),
6046						     GEN_INT (((mask >> 2) & 3) + 4),
6047						     GEN_INT (((mask >> 4) & 3) + 12),
6048						     GEN_INT (((mask >> 6) & 3) + 12)
6049						     <mask_expand4_args>));
6050  DONE;
6051})
6052
6053;; One bit in mask selects 2 elements.
6054(define_insn "avx_shufps256_1<mask_name>"
6055  [(set (match_operand:V8SF 0 "register_operand" "=v")
6056	(vec_select:V8SF
6057	  (vec_concat:V16SF
6058	    (match_operand:V8SF 1 "register_operand" "v")
6059	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
6060	  (parallel [(match_operand 3  "const_0_to_3_operand"  )
6061		     (match_operand 4  "const_0_to_3_operand"  )
6062		     (match_operand 5  "const_8_to_11_operand" )
6063		     (match_operand 6  "const_8_to_11_operand" )
6064		     (match_operand 7  "const_4_to_7_operand"  )
6065		     (match_operand 8  "const_4_to_7_operand"  )
6066		     (match_operand 9  "const_12_to_15_operand")
6067		     (match_operand 10 "const_12_to_15_operand")])))]
6068  "TARGET_AVX
6069   && <mask_avx512vl_condition>
6070   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
6071       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
6072       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
6073       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
6074{
6075  int mask;
6076  mask = INTVAL (operands[3]);
6077  mask |= INTVAL (operands[4]) << 2;
6078  mask |= (INTVAL (operands[5]) - 8) << 4;
6079  mask |= (INTVAL (operands[6]) - 8) << 6;
6080  operands[3] = GEN_INT (mask);
6081
6082  return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
6083}
6084  [(set_attr "type" "sseshuf")
6085   (set_attr "length_immediate" "1")
6086   (set_attr "prefix" "<mask_prefix>")
6087   (set_attr "mode" "V8SF")])
6088
6089(define_expand "sse_shufps<mask_expand4_name>"
6090  [(match_operand:V4SF 0 "register_operand")
6091   (match_operand:V4SF 1 "register_operand")
6092   (match_operand:V4SF 2 "nonimmediate_operand")
6093   (match_operand:SI 3 "const_int_operand")]
6094  "TARGET_SSE"
6095{
6096  int mask = INTVAL (operands[3]);
6097  emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
6098						     operands[1],
6099						     operands[2],
6100						     GEN_INT ((mask >> 0) & 3),
6101						     GEN_INT ((mask >> 2) & 3),
6102						     GEN_INT (((mask >> 4) & 3) + 4),
6103						     GEN_INT (((mask >> 6) & 3) + 4)
6104						     <mask_expand4_args>));
6105  DONE;
6106})
6107
6108(define_insn "sse_shufps_v4sf_mask"
6109  [(set (match_operand:V4SF 0 "register_operand" "=v")
6110    (vec_merge:V4SF
6111	  (vec_select:V4SF
6112	    (vec_concat:V8SF
6113	      (match_operand:V4SF 1 "register_operand" "v")
6114	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
6115	    (parallel [(match_operand 3 "const_0_to_3_operand")
6116	               (match_operand 4 "const_0_to_3_operand")
6117	               (match_operand 5 "const_4_to_7_operand")
6118	               (match_operand 6 "const_4_to_7_operand")]))
6119      (match_operand:V4SF 7 "vector_move_operand" "0C")
6120      (match_operand:QI 8 "register_operand" "Yk")))]
6121  "TARGET_AVX512VL"
6122{
6123  int mask = 0;
6124  mask |= INTVAL (operands[3]) << 0;
6125  mask |= INTVAL (operands[4]) << 2;
6126  mask |= (INTVAL (operands[5]) - 4) << 4;
6127  mask |= (INTVAL (operands[6]) - 4) << 6;
6128  operands[3] = GEN_INT (mask);
6129
6130  return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
6131}
6132  [(set_attr "type" "sseshuf")
6133   (set_attr "length_immediate" "1")
6134   (set_attr "prefix" "evex")
6135   (set_attr "mode" "V4SF")])
6136
6137(define_insn "sse_shufps_<mode>"
6138  [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
6139	(vec_select:VI4F_128
6140	  (vec_concat:<ssedoublevecmode>
6141	    (match_operand:VI4F_128 1 "register_operand" "0,x")
6142	    (match_operand:VI4F_128 2 "nonimmediate_operand" "xm,xm"))
6143	  (parallel [(match_operand 3 "const_0_to_3_operand")
6144		     (match_operand 4 "const_0_to_3_operand")
6145		     (match_operand 5 "const_4_to_7_operand")
6146		     (match_operand 6 "const_4_to_7_operand")])))]
6147  "TARGET_SSE"
6148{
6149  int mask = 0;
6150  mask |= INTVAL (operands[3]) << 0;
6151  mask |= INTVAL (operands[4]) << 2;
6152  mask |= (INTVAL (operands[5]) - 4) << 4;
6153  mask |= (INTVAL (operands[6]) - 4) << 6;
6154  operands[3] = GEN_INT (mask);
6155
6156  switch (which_alternative)
6157    {
6158    case 0:
6159      return "shufps\t{%3, %2, %0|%0, %2, %3}";
6160    case 1:
6161      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6162    default:
6163      gcc_unreachable ();
6164    }
6165}
6166  [(set_attr "isa" "noavx,avx")
6167   (set_attr "type" "sseshuf")
6168   (set_attr "length_immediate" "1")
6169   (set_attr "prefix" "orig,vex")
6170   (set_attr "mode" "V4SF")])
6171
6172(define_insn "sse_storehps"
6173  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
6174	(vec_select:V2SF
6175	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
6176	  (parallel [(const_int 2) (const_int 3)])))]
6177  "TARGET_SSE"
6178  "@
6179   %vmovhps\t{%1, %0|%q0, %1}
6180   %vmovhlps\t{%1, %d0|%d0, %1}
6181   %vmovlps\t{%H1, %d0|%d0, %H1}"
6182  [(set_attr "type" "ssemov")
6183   (set_attr "ssememalign" "64")
6184   (set_attr "prefix" "maybe_vex")
6185   (set_attr "mode" "V2SF,V4SF,V2SF")])
6186
6187(define_expand "sse_loadhps_exp"
6188  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6189	(vec_concat:V4SF
6190	  (vec_select:V2SF
6191	    (match_operand:V4SF 1 "nonimmediate_operand")
6192	    (parallel [(const_int 0) (const_int 1)]))
6193	  (match_operand:V2SF 2 "nonimmediate_operand")))]
6194  "TARGET_SSE"
6195{
6196  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6197
6198  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
6199
6200  /* Fix up the destination if needed.  */
6201  if (dst != operands[0])
6202    emit_move_insn (operands[0], dst);
6203
6204  DONE;
6205})
6206
6207(define_insn "sse_loadhps"
6208  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,o")
6209	(vec_concat:V4SF
6210	  (vec_select:V2SF
6211	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,x,0,x,0")
6212	    (parallel [(const_int 0) (const_int 1)]))
6213	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,x,x")))]
6214  "TARGET_SSE"
6215  "@
6216   movhps\t{%2, %0|%0, %q2}
6217   vmovhps\t{%2, %1, %0|%0, %1, %q2}
6218   movlhps\t{%2, %0|%0, %2}
6219   vmovlhps\t{%2, %1, %0|%0, %1, %2}
6220   %vmovlps\t{%2, %H0|%H0, %2}"
6221  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6222   (set_attr "type" "ssemov")
6223   (set_attr "ssememalign" "64")
6224   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6225   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
6226
6227(define_insn "sse_storelps"
6228  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,x,x")
6229	(vec_select:V2SF
6230	  (match_operand:V4SF 1 "nonimmediate_operand" " x,x,m")
6231	  (parallel [(const_int 0) (const_int 1)])))]
6232  "TARGET_SSE"
6233  "@
6234   %vmovlps\t{%1, %0|%q0, %1}
6235   %vmovaps\t{%1, %0|%0, %1}
6236   %vmovlps\t{%1, %d0|%d0, %q1}"
6237  [(set_attr "type" "ssemov")
6238   (set_attr "prefix" "maybe_vex")
6239   (set_attr "mode" "V2SF,V4SF,V2SF")])
6240
6241(define_expand "sse_loadlps_exp"
6242  [(set (match_operand:V4SF 0 "nonimmediate_operand")
6243	(vec_concat:V4SF
6244	  (match_operand:V2SF 2 "nonimmediate_operand")
6245	  (vec_select:V2SF
6246	    (match_operand:V4SF 1 "nonimmediate_operand")
6247	    (parallel [(const_int 2) (const_int 3)]))))]
6248  "TARGET_SSE"
6249{
6250  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
6251
6252  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
6253
6254  /* Fix up the destination if needed.  */
6255  if (dst != operands[0])
6256    emit_move_insn (operands[0], dst);
6257
6258  DONE;
6259})
6260
6261(define_insn "sse_loadlps"
6262  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,x,x,x,m")
6263	(vec_concat:V4SF
6264	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,x,m,m,x")
6265	  (vec_select:V2SF
6266	    (match_operand:V4SF 1 "nonimmediate_operand" " x,x,0,x,0")
6267	    (parallel [(const_int 2) (const_int 3)]))))]
6268  "TARGET_SSE"
6269  "@
6270   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
6271   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
6272   movlps\t{%2, %0|%0, %q2}
6273   vmovlps\t{%2, %1, %0|%0, %1, %q2}
6274   %vmovlps\t{%2, %0|%q0, %2}"
6275  [(set_attr "isa" "noavx,avx,noavx,avx,*")
6276   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
6277   (set_attr "ssememalign" "64")
6278   (set_attr "length_immediate" "1,1,*,*,*")
6279   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex")
6280   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
6281
6282(define_insn "sse_movss"
6283  [(set (match_operand:V4SF 0 "register_operand"   "=x,x")
6284	(vec_merge:V4SF
6285	  (match_operand:V4SF 2 "register_operand" " x,x")
6286	  (match_operand:V4SF 1 "register_operand" " 0,x")
6287	  (const_int 1)))]
6288  "TARGET_SSE"
6289  "@
6290   movss\t{%2, %0|%0, %2}
6291   vmovss\t{%2, %1, %0|%0, %1, %2}"
6292  [(set_attr "isa" "noavx,avx")
6293   (set_attr "type" "ssemov")
6294   (set_attr "prefix" "orig,vex")
6295   (set_attr "mode" "SF")])
6296
6297(define_insn "avx2_vec_dup<mode>"
6298  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
6299	(vec_duplicate:VF1_128_256
6300	  (vec_select:SF
6301	    (match_operand:V4SF 1 "register_operand" "x")
6302	    (parallel [(const_int 0)]))))]
6303  "TARGET_AVX2"
6304  "vbroadcastss\t{%1, %0|%0, %1}"
6305  [(set_attr "type" "sselog1")
6306    (set_attr "prefix" "vex")
6307    (set_attr "mode" "<MODE>")])
6308
6309(define_insn "avx2_vec_dupv8sf_1"
6310  [(set (match_operand:V8SF 0 "register_operand" "=x")
6311	(vec_duplicate:V8SF
6312	  (vec_select:SF
6313	    (match_operand:V8SF 1 "register_operand" "x")
6314	    (parallel [(const_int 0)]))))]
6315  "TARGET_AVX2"
6316  "vbroadcastss\t{%x1, %0|%0, %x1}"
6317  [(set_attr "type" "sselog1")
6318    (set_attr "prefix" "vex")
6319    (set_attr "mode" "V8SF")])
6320
6321(define_insn "avx512f_vec_dup<mode>_1"
6322  [(set (match_operand:VF_512 0 "register_operand" "=v")
6323	(vec_duplicate:VF_512
6324	  (vec_select:<ssescalarmode>
6325	    (match_operand:VF_512 1 "register_operand" "v")
6326	    (parallel [(const_int 0)]))))]
6327  "TARGET_AVX512F"
6328  "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
6329  [(set_attr "type" "sselog1")
6330    (set_attr "prefix" "evex")
6331    (set_attr "mode" "<MODE>")])
6332
6333;; Although insertps takes register source, we prefer
6334;; unpcklps with register source since it is shorter.
6335(define_insn "*vec_concatv2sf_sse4_1"
6336  [(set (match_operand:V2SF 0 "register_operand"     "=Yr,*x,x,Yr,*x,x,x,*y ,*y")
6337	(vec_concat:V2SF
6338	  (match_operand:SF 1 "nonimmediate_operand" "  0, 0,x, 0,0, x,m, 0 , m")
6339	  (match_operand:SF 2 "vector_move_operand"  " Yr,*x,x, m,m, m,C,*ym, C")))]
6340  "TARGET_SSE4_1"
6341  "@
6342   unpcklps\t{%2, %0|%0, %2}
6343   unpcklps\t{%2, %0|%0, %2}
6344   vunpcklps\t{%2, %1, %0|%0, %1, %2}
6345   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6346   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
6347   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
6348   %vmovss\t{%1, %0|%0, %1}
6349   punpckldq\t{%2, %0|%0, %2}
6350   movd\t{%1, %0|%0, %1}"
6351  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
6352   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
6353   (set_attr "prefix_data16" "*,*,*,1,1,*,*,*,*")
6354   (set_attr "prefix_extra" "*,*,*,1,1,1,*,*,*")
6355   (set_attr "length_immediate" "*,*,*,1,1,1,*,*,*")
6356   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
6357   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
6358
6359;; ??? In theory we can match memory for the MMX alternative, but allowing
6360;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
6361;; alternatives pretty much forces the MMX alternative to be chosen.
6362(define_insn "*vec_concatv2sf_sse"
6363  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
6364	(vec_concat:V2SF
6365	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
6366	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
6367  "TARGET_SSE"
6368  "@
6369   unpcklps\t{%2, %0|%0, %2}
6370   movss\t{%1, %0|%0, %1}
6371   punpckldq\t{%2, %0|%0, %2}
6372   movd\t{%1, %0|%0, %1}"
6373  [(set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
6374   (set_attr "mode" "V4SF,SF,DI,DI")])
6375
6376(define_insn "*vec_concatv4sf"
6377  [(set (match_operand:V4SF 0 "register_operand"       "=x,x,x,x")
6378	(vec_concat:V4SF
6379	  (match_operand:V2SF 1 "register_operand"     " 0,x,0,x")
6380	  (match_operand:V2SF 2 "nonimmediate_operand" " x,x,m,m")))]
6381  "TARGET_SSE"
6382  "@
6383   movlhps\t{%2, %0|%0, %2}
6384   vmovlhps\t{%2, %1, %0|%0, %1, %2}
6385   movhps\t{%2, %0|%0, %q2}
6386   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
6387  [(set_attr "isa" "noavx,avx,noavx,avx")
6388   (set_attr "type" "ssemov")
6389   (set_attr "prefix" "orig,vex,orig,vex")
6390   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
6391
6392(define_expand "vec_init<mode>"
6393  [(match_operand:V_128 0 "register_operand")
6394   (match_operand 1)]
6395  "TARGET_SSE"
6396{
6397  ix86_expand_vector_init (false, operands[0], operands[1]);
6398  DONE;
6399})
6400
6401;; Avoid combining registers from different units in a single alternative,
6402;; see comment above inline_secondary_memory_needed function in i386.c
6403(define_insn "vec_set<mode>_0"
6404  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
6405	  "=Yr,*v,v,Yi,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
6406	(vec_merge:VI4F_128
6407	  (vec_duplicate:VI4F_128
6408	    (match_operand:<ssescalarmode> 2 "general_operand"
6409	  " Yr,*v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
6410	  (match_operand:VI4F_128 1 "vector_move_operand"
6411	  " C , C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
6412	  (const_int 1)))]
6413  "TARGET_SSE"
6414  "@
6415   %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6416   %vinsertps\t{$0xe, %d2, %0|%0, %d2, 0xe}
6417   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
6418   %vmovd\t{%2, %0|%0, %2}
6419   movss\t{%2, %0|%0, %2}
6420   movss\t{%2, %0|%0, %2}
6421   vmovss\t{%2, %1, %0|%0, %1, %2}
6422   pinsrd\t{$0, %2, %0|%0, %2, 0}
6423   pinsrd\t{$0, %2, %0|%0, %2, 0}
6424   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
6425   #
6426   #
6427   #"
6428  [(set_attr "isa" "sse4,sse4,sse2,sse2,noavx,noavx,avx,sse4_noavx,sse4_noavx,avx,*,*,*")
6429   (set (attr "type")
6430     (cond [(eq_attr "alternative" "0,1,7,8,9")
6431	      (const_string "sselog")
6432	    (eq_attr "alternative" "11")
6433	      (const_string "imov")
6434	    (eq_attr "alternative" "12")
6435	      (const_string "fmov")
6436	   ]
6437	   (const_string "ssemov")))
6438   (set_attr "prefix_extra" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6439   (set_attr "length_immediate" "*,*,*,*,*,*,*,1,1,1,*,*,*")
6440   (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex,*,*,*")
6441   (set_attr "mode" "SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")])
6442
6443;; A subset is vec_setv4sf.
6444(define_insn "*vec_setv4sf_sse4_1"
6445  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6446	(vec_merge:V4SF
6447	  (vec_duplicate:V4SF
6448	    (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,xm"))
6449	  (match_operand:V4SF 1 "register_operand" "0,0,x")
6450	  (match_operand:SI 3 "const_int_operand")))]
6451  "TARGET_SSE4_1
6452   && ((unsigned) exact_log2 (INTVAL (operands[3]))
6453       < GET_MODE_NUNITS (V4SFmode))"
6454{
6455  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
6456  switch (which_alternative)
6457    {
6458    case 0:
6459    case 1:
6460      return "insertps\t{%3, %2, %0|%0, %2, %3}";
6461    case 2:
6462      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6463    default:
6464      gcc_unreachable ();
6465    }
6466}
6467  [(set_attr "isa" "noavx,noavx,avx")
6468   (set_attr "type" "sselog")
6469   (set_attr "prefix_data16" "1,1,*")
6470   (set_attr "prefix_extra" "1")
6471   (set_attr "length_immediate" "1")
6472   (set_attr "prefix" "orig,orig,vex")
6473   (set_attr "mode" "V4SF")])
6474
6475(define_insn "sse4_1_insertps"
6476  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,x")
6477	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,xm")
6478		      (match_operand:V4SF 1 "register_operand" "0,0,x")
6479		      (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
6480		     UNSPEC_INSERTPS))]
6481  "TARGET_SSE4_1"
6482{
6483  if (MEM_P (operands[2]))
6484    {
6485      unsigned count_s = INTVAL (operands[3]) >> 6;
6486      if (count_s)
6487	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
6488      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
6489    }
6490  switch (which_alternative)
6491    {
6492    case 0:
6493    case 1:
6494      return "insertps\t{%3, %2, %0|%0, %2, %3}";
6495    case 2:
6496      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6497    default:
6498      gcc_unreachable ();
6499    }
6500}
6501  [(set_attr "isa" "noavx,noavx,avx")
6502   (set_attr "type" "sselog")
6503   (set_attr "prefix_data16" "1,1,*")
6504   (set_attr "prefix_extra" "1")
6505   (set_attr "length_immediate" "1")
6506   (set_attr "prefix" "orig,orig,vex")
6507   (set_attr "mode" "V4SF")])
6508
6509(define_split
6510  [(set (match_operand:VI4F_128 0 "memory_operand")
6511	(vec_merge:VI4F_128
6512	  (vec_duplicate:VI4F_128
6513	    (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
6514	  (match_dup 0)
6515	  (const_int 1)))]
6516  "TARGET_SSE && reload_completed"
6517  [(set (match_dup 0) (match_dup 1))]
6518  "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
6519
6520(define_expand "vec_set<mode>"
6521  [(match_operand:V 0 "register_operand")
6522   (match_operand:<ssescalarmode> 1 "register_operand")
6523   (match_operand 2 "const_int_operand")]
6524  "TARGET_SSE"
6525{
6526  ix86_expand_vector_set (false, operands[0], operands[1],
6527			  INTVAL (operands[2]));
6528  DONE;
6529})
6530
6531(define_insn_and_split "*vec_extractv4sf_0"
6532  [(set (match_operand:SF 0 "nonimmediate_operand" "=x,m,f,r")
6533	(vec_select:SF
6534	  (match_operand:V4SF 1 "nonimmediate_operand" "xm,x,m,m")
6535	  (parallel [(const_int 0)])))]
6536  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6537  "#"
6538  "&& reload_completed"
6539  [(set (match_dup 0) (match_dup 1))]
6540{
6541  if (REG_P (operands[1]))
6542    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
6543  else
6544    operands[1] = adjust_address (operands[1], SFmode, 0);
6545})
6546
6547(define_insn_and_split "*sse4_1_extractps"
6548  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,x,x")
6549	(vec_select:SF
6550	  (match_operand:V4SF 1 "register_operand" "Yr,*x,0,x")
6551	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n")])))]
6552  "TARGET_SSE4_1"
6553  "@
6554   %vextractps\t{%2, %1, %0|%0, %1, %2}
6555   %vextractps\t{%2, %1, %0|%0, %1, %2}
6556   #
6557   #"
6558  "&& reload_completed && SSE_REG_P (operands[0])"
6559  [(const_int 0)]
6560{
6561  rtx dest = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
6562  switch (INTVAL (operands[2]))
6563    {
6564    case 1:
6565    case 3:
6566      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
6567				      operands[2], operands[2],
6568				      GEN_INT (INTVAL (operands[2]) + 4),
6569				      GEN_INT (INTVAL (operands[2]) + 4)));
6570      break;
6571    case 2:
6572      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
6573      break;
6574    default:
6575      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
6576      gcc_unreachable ();
6577    }
6578  DONE;
6579}
6580  [(set_attr "isa" "*,*,noavx,avx")
6581   (set_attr "type" "sselog,sselog,*,*")
6582   (set_attr "prefix_data16" "1,1,*,*")
6583   (set_attr "prefix_extra" "1,1,*,*")
6584   (set_attr "length_immediate" "1,1,*,*")
6585   (set_attr "prefix" "maybe_vex,maybe_vex,*,*")
6586   (set_attr "mode" "V4SF,V4SF,*,*")])
6587
6588(define_insn_and_split "*vec_extractv4sf_mem"
6589  [(set (match_operand:SF 0 "register_operand" "=x,*r,f")
6590	(vec_select:SF
6591	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
6592	  (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
6593  "TARGET_SSE"
6594  "#"
6595  "&& reload_completed"
6596  [(set (match_dup 0) (match_dup 1))]
6597{
6598  operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
6599})
6600
6601(define_mode_attr extract_type
6602  [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
6603
6604(define_mode_attr extract_suf
6605  [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
6606
6607(define_mode_iterator AVX512_VEC
6608  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
6609
6610(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
6611  [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
6612   (match_operand:AVX512_VEC 1 "register_operand")
6613   (match_operand:SI 2 "const_0_to_3_operand")
6614   (match_operand:<ssequartermode> 3 "nonimmediate_operand")
6615   (match_operand:QI 4 "register_operand")]
6616  "TARGET_AVX512F"
6617{
6618  int mask;
6619  mask = INTVAL (operands[2]);
6620
6621  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6622    operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6623
6624  if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
6625    emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (operands[0],
6626        operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
6627	GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
6628	operands[4]));
6629  else
6630    emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (operands[0],
6631        operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
6632	operands[4]));
6633  DONE;
6634})
6635
6636(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
6637  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6638	(vec_merge:<ssequartermode>
6639	  (vec_select:<ssequartermode>
6640	    (match_operand:V8FI 1 "register_operand" "v")
6641	    (parallel [(match_operand 2  "const_0_to_7_operand")
6642	      (match_operand 3  "const_0_to_7_operand")]))
6643	  (match_operand:<ssequartermode> 4 "memory_operand" "0")
6644	  (match_operand:QI 5 "register_operand" "k")))]
6645  "TARGET_AVX512DQ
6646   && (INTVAL (operands[2]) % 2 == 0)
6647   && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)
6648   && rtx_equal_p (operands[4], operands[0])"
6649{
6650  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6651  return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
6652}
6653  [(set_attr "type" "sselog")
6654   (set_attr "prefix_extra" "1")
6655   (set_attr "length_immediate" "1")
6656   (set_attr "memory" "store")
6657   (set_attr "prefix" "evex")
6658   (set_attr "mode" "<sseinsnmode>")])
6659
6660(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
6661  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
6662	(vec_merge:<ssequartermode>
6663	  (vec_select:<ssequartermode>
6664	    (match_operand:V16FI 1 "register_operand" "v")
6665	    (parallel [(match_operand 2  "const_0_to_15_operand")
6666	      (match_operand 3  "const_0_to_15_operand")
6667	      (match_operand 4  "const_0_to_15_operand")
6668	      (match_operand 5  "const_0_to_15_operand")]))
6669	  (match_operand:<ssequartermode> 6 "memory_operand" "0")
6670	  (match_operand:QI 7 "register_operand" "Yk")))]
6671  "TARGET_AVX512F
6672   && ((INTVAL (operands[2]) % 4 == 0)
6673       && INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6674       && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6675       && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))
6676   && rtx_equal_p (operands[6], operands[0])"
6677{
6678  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6679  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
6680}
6681  [(set_attr "type" "sselog")
6682   (set_attr "prefix_extra" "1")
6683   (set_attr "length_immediate" "1")
6684   (set_attr "memory" "store")
6685   (set_attr "prefix" "evex")
6686   (set_attr "mode" "<sseinsnmode>")])
6687
6688(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
6689  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6690	(vec_select:<ssequartermode>
6691	  (match_operand:V8FI 1 "register_operand" "v")
6692	  (parallel [(match_operand 2  "const_0_to_7_operand")
6693            (match_operand 3  "const_0_to_7_operand")])))]
6694  "TARGET_AVX512DQ && (INTVAL (operands[2]) == INTVAL (operands[3]) - 1)"
6695{
6696  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
6697  return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
6698}
6699  [(set_attr "type" "sselog1")
6700   (set_attr "prefix_extra" "1")
6701   (set_attr "length_immediate" "1")
6702   (set_attr "prefix" "evex")
6703   (set_attr "mode" "<sseinsnmode>")])
6704
6705(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
6706  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6707	(vec_select:<ssequartermode>
6708	  (match_operand:V16FI 1 "register_operand" "v")
6709	  (parallel [(match_operand 2  "const_0_to_15_operand")
6710            (match_operand 3  "const_0_to_15_operand")
6711            (match_operand 4  "const_0_to_15_operand")
6712            (match_operand 5  "const_0_to_15_operand")])))]
6713  "TARGET_AVX512F
6714   && (INTVAL (operands[2]) == (INTVAL (operands[3]) - 1)
6715       && INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
6716       && INTVAL (operands[4]) == (INTVAL (operands[5]) - 1))"
6717{
6718  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 2);
6719  return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
6720}
6721  [(set_attr "type" "sselog1")
6722   (set_attr "prefix_extra" "1")
6723   (set_attr "length_immediate" "1")
6724   (set_attr "prefix" "evex")
6725   (set_attr "mode" "<sseinsnmode>")])
6726
6727(define_mode_attr extract_type_2
6728  [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
6729
6730(define_mode_attr extract_suf_2
6731  [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
6732
6733(define_mode_iterator AVX512_VEC_2
6734  [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
6735
6736(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
6737  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6738   (match_operand:AVX512_VEC_2 1 "register_operand")
6739   (match_operand:SI 2 "const_0_to_1_operand")
6740   (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
6741   (match_operand:QI 4 "register_operand")]
6742  "TARGET_AVX512F"
6743{
6744  rtx (*insn)(rtx, rtx, rtx, rtx);
6745
6746  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6747    operands[0] = force_reg (<ssequartermode>mode, operands[0]);
6748
6749  switch (INTVAL (operands[2]))
6750    {
6751    case 0:
6752      insn = gen_vec_extract_lo_<mode>_mask;
6753      break;
6754    case 1:
6755      insn = gen_vec_extract_hi_<mode>_mask;
6756      break;
6757    default:
6758      gcc_unreachable ();
6759    }
6760
6761  emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6762  DONE;
6763})
6764
6765(define_split
6766  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6767	(vec_select:<ssehalfvecmode>
6768	  (match_operand:V8FI 1 "nonimmediate_operand")
6769	  (parallel [(const_int 0) (const_int 1)
6770            (const_int 2) (const_int 3)])))]
6771  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6772  && reload_completed"
6773  [(const_int 0)]
6774{
6775  rtx op1 = operands[1];
6776  if (REG_P (op1))
6777    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6778  else
6779    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6780  emit_move_insn (operands[0], op1);
6781  DONE;
6782})
6783
6784(define_insn "vec_extract_lo_<mode>_maskm"
6785  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6786	(vec_merge:<ssehalfvecmode>
6787	  (vec_select:<ssehalfvecmode>
6788	    (match_operand:V8FI 1 "register_operand" "v")
6789	    (parallel [(const_int 0) (const_int 1)
6790	      (const_int 2) (const_int 3)]))
6791	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6792	  (match_operand:QI 3 "register_operand" "Yk")))]
6793  "TARGET_AVX512F
6794   && rtx_equal_p (operands[2], operands[0])"
6795  "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
6796  [(set_attr "type" "sselog1")
6797   (set_attr "prefix_extra" "1")
6798   (set_attr "length_immediate" "1")
6799   (set_attr "prefix" "evex")
6800   (set_attr "mode" "<sseinsnmode>")])
6801
6802(define_insn "vec_extract_lo_<mode><mask_name>"
6803  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,v")
6804	(vec_select:<ssehalfvecmode>
6805	  (match_operand:V8FI 1 "nonimmediate_operand" "v,m")
6806	  (parallel [(const_int 0) (const_int 1)
6807            (const_int 2) (const_int 3)])))]
6808  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6809{
6810  if (<mask_applied>)
6811    return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6812  else
6813    return "#";
6814}
6815  [(set_attr "type" "sselog1")
6816   (set_attr "prefix_extra" "1")
6817   (set_attr "length_immediate" "1")
6818   (set_attr "prefix" "evex")
6819   (set_attr "mode" "<sseinsnmode>")])
6820
6821(define_insn "vec_extract_hi_<mode>_maskm"
6822  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6823	(vec_merge:<ssehalfvecmode>
6824	  (vec_select:<ssehalfvecmode>
6825	    (match_operand:V8FI 1 "register_operand" "v")
6826	    (parallel [(const_int 4) (const_int 5)
6827	      (const_int 6) (const_int 7)]))
6828	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6829	  (match_operand:QI 3 "register_operand" "Yk")))]
6830  "TARGET_AVX512F
6831   && rtx_equal_p (operands[2], operands[0])"
6832  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6833  [(set_attr "type" "sselog")
6834   (set_attr "prefix_extra" "1")
6835   (set_attr "length_immediate" "1")
6836   (set_attr "memory" "store")
6837   (set_attr "prefix" "evex")
6838   (set_attr "mode" "<sseinsnmode>")])
6839
6840(define_insn "vec_extract_hi_<mode><mask_name>"
6841  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
6842	(vec_select:<ssehalfvecmode>
6843	  (match_operand:V8FI 1 "register_operand" "v")
6844	  (parallel [(const_int 4) (const_int 5)
6845            (const_int 6) (const_int 7)])))]
6846  "TARGET_AVX512F"
6847  "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
6848  [(set_attr "type" "sselog1")
6849   (set_attr "prefix_extra" "1")
6850   (set_attr "length_immediate" "1")
6851   (set_attr "prefix" "evex")
6852   (set_attr "mode" "<sseinsnmode>")])
6853
6854(define_insn "vec_extract_hi_<mode>_maskm"
6855   [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
6856	(vec_merge:<ssehalfvecmode>
6857	  (vec_select:<ssehalfvecmode>
6858	    (match_operand:V16FI 1 "register_operand" "v")
6859	    (parallel [(const_int 8) (const_int 9)
6860	      (const_int 10) (const_int 11)
6861	      (const_int 12) (const_int 13)
6862	      (const_int 14) (const_int 15)]))
6863	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
6864	  (match_operand:QI 3 "register_operand" "k")))]
6865  "TARGET_AVX512DQ
6866   && rtx_equal_p (operands[2], operands[0])"
6867  "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
6868  [(set_attr "type" "sselog1")
6869   (set_attr "prefix_extra" "1")
6870   (set_attr "length_immediate" "1")
6871   (set_attr "prefix" "evex")
6872   (set_attr "mode" "<sseinsnmode>")])
6873
6874(define_insn "vec_extract_hi_<mode><mask_name>"
6875  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
6876	(vec_select:<ssehalfvecmode>
6877	  (match_operand:V16FI 1 "register_operand" "v,v")
6878	  (parallel [(const_int 8) (const_int 9)
6879            (const_int 10) (const_int 11)
6880	    (const_int 12) (const_int 13)
6881	    (const_int 14) (const_int 15)])))]
6882  "TARGET_AVX512F && <mask_avx512dq_condition>"
6883  "@
6884   vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
6885   vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
6886  [(set_attr "type" "sselog1")
6887   (set_attr "prefix_extra" "1")
6888   (set_attr "isa" "avx512dq,noavx512dq")
6889   (set_attr "length_immediate" "1")
6890   (set_attr "prefix" "evex")
6891   (set_attr "mode" "<sseinsnmode>")])
6892
6893(define_expand "avx512vl_vextractf128<mode>"
6894  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6895   (match_operand:VI48F_256 1 "register_operand")
6896   (match_operand:SI 2 "const_0_to_1_operand")
6897   (match_operand:<ssehalfvecmode> 3 "vector_move_operand")
6898   (match_operand:QI 4 "register_operand")]
6899  "TARGET_AVX512DQ && TARGET_AVX512VL"
6900{
6901  rtx (*insn)(rtx, rtx, rtx, rtx);
6902
6903  if (MEM_P (operands[0]) && GET_CODE (operands[3]) == CONST_VECTOR)
6904    operands[0] = force_reg (<ssehalfvecmode>mode, operands[0]);
6905
6906  switch (INTVAL (operands[2]))
6907    {
6908    case 0:
6909      insn = gen_vec_extract_lo_<mode>_mask;
6910      break;
6911    case 1:
6912      insn = gen_vec_extract_hi_<mode>_mask;
6913      break;
6914    default:
6915      gcc_unreachable ();
6916    }
6917
6918  emit_insn (insn (operands[0], operands[1], operands[3], operands[4]));
6919  DONE;
6920})
6921
6922(define_expand "avx_vextractf128<mode>"
6923  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6924   (match_operand:V_256 1 "register_operand")
6925   (match_operand:SI 2 "const_0_to_1_operand")]
6926  "TARGET_AVX"
6927{
6928  rtx (*insn)(rtx, rtx);
6929
6930  switch (INTVAL (operands[2]))
6931    {
6932    case 0:
6933      insn = gen_vec_extract_lo_<mode>;
6934      break;
6935    case 1:
6936      insn = gen_vec_extract_hi_<mode>;
6937      break;
6938    default:
6939      gcc_unreachable ();
6940    }
6941
6942  emit_insn (insn (operands[0], operands[1]));
6943  DONE;
6944})
6945
6946(define_insn "vec_extract_lo_<mode><mask_name>"
6947  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=v,m")
6948	(vec_select:<ssehalfvecmode>
6949	  (match_operand:V16FI 1 "nonimmediate_operand" "vm,v")
6950	  (parallel [(const_int 0) (const_int 1)
6951                     (const_int 2) (const_int 3)
6952                     (const_int 4) (const_int 5)
6953                     (const_int 6) (const_int 7)])))]
6954  "TARGET_AVX512F
6955   && <mask_mode512bit_condition>
6956   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6957{
6958  if (<mask_applied>)
6959    return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
6960  else
6961    return "#";
6962})
6963
6964(define_split
6965  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
6966	(vec_select:<ssehalfvecmode>
6967	  (match_operand:V16FI 1 "nonimmediate_operand")
6968	  (parallel [(const_int 0) (const_int 1)
6969            (const_int 2) (const_int 3)
6970	    (const_int 4) (const_int 5)
6971	    (const_int 6) (const_int 7)])))]
6972  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
6973   && reload_completed"
6974   [(const_int 0)]
6975 {
6976  rtx op1 = operands[1];
6977  if (REG_P (op1))
6978    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
6979  else
6980    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
6981  emit_move_insn (operands[0], op1);
6982  DONE;
6983})
6984
6985(define_insn "vec_extract_lo_<mode><mask_name>"
6986  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,m")
6987	(vec_select:<ssehalfvecmode>
6988	  (match_operand:VI8F_256 1 "nonimmediate_operand" "vm,v")
6989	  (parallel [(const_int 0) (const_int 1)])))]
6990  "TARGET_AVX
6991   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
6992   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
6993{
6994  if (<mask_applied>)
6995    return "vextract<shuffletype>64x2\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}";
6996  else
6997    return "#";
6998}
6999   [(set_attr "type" "sselog")
7000    (set_attr "prefix_extra" "1")
7001    (set_attr "length_immediate" "1")
7002    (set_attr "memory" "none,store")
7003    (set_attr "prefix" "evex")
7004    (set_attr "mode" "XI")])
7005
7006(define_split
7007  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7008	(vec_select:<ssehalfvecmode>
7009	  (match_operand:VI8F_256 1 "nonimmediate_operand")
7010	  (parallel [(const_int 0) (const_int 1)])))]
7011  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
7012  && reload_completed"
7013   [(const_int 0)]
7014{
7015  rtx op1 = operands[1];
7016  if (REG_P (op1))
7017    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7018  else
7019    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7020  emit_move_insn (operands[0], op1);
7021  DONE;
7022})
7023
7024(define_insn "vec_extract_hi_<mode>_maskm"
7025  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7026	(vec_merge:<ssehalfvecmode>
7027	  (vec_select:<ssehalfvecmode>
7028	    (match_operand:VI8F_256 1 "register_operand" "v")
7029	    (parallel [(const_int 2) (const_int 3)]))
7030	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7031	  (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7032  "TARGET_AVX512DQ && TARGET_AVX512VL
7033   && rtx_equal_p (operands[2], operands[0])"
7034  "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7035  [(set_attr "type" "sselog1")
7036   (set_attr "length_immediate" "1")
7037   (set_attr "prefix" "evex")
7038   (set_attr "mode" "<sseinsnmode>")])
7039
7040(define_insn "vec_extract_hi_<mode>_mask"
7041  [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
7042	(vec_merge:<ssehalfvecmode>
7043	  (vec_select:<ssehalfvecmode>
7044	    (match_operand:VI8F_256 1 "register_operand" "v")
7045	    (parallel [(const_int 2) (const_int 3)]))
7046	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "0C")
7047	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
7048  "TARGET_AVX512VL && TARGET_AVX512DQ"
7049  "vextract<shuffletype>64x2\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
7050  [(set_attr "type" "sselog1")
7051   (set_attr "length_immediate" "1")
7052   (set_attr "prefix" "evex")
7053   (set_attr "mode" "<sseinsnmode>")])
7054
7055(define_insn "vec_extract_hi_<mode>"
7056  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
7057	(vec_select:<ssehalfvecmode>
7058	  (match_operand:VI8F_256 1 "register_operand" "x, v")
7059	  (parallel [(const_int 2) (const_int 3)])))]
7060  "TARGET_AVX"
7061  "@
7062    vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
7063    vextract<shuffletype>64x2\t{$0x1, %1, %0|%0, %1, 0x1}"
7064  [(set_attr "isa" "*, avx512dq")
7065   (set_attr "prefix" "vex, evex")
7066   (set_attr "type" "sselog1")
7067   (set_attr "length_immediate" "1")
7068   (set_attr "mode" "<sseinsnmode>")])
7069
7070(define_split
7071  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
7072	(vec_select:<ssehalfvecmode>
7073	  (match_operand:VI4F_256 1 "nonimmediate_operand")
7074	  (parallel [(const_int 0) (const_int 1)
7075		     (const_int 2) (const_int 3)])))]
7076  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1])) && reload_completed"
7077   [(const_int 0)]
7078{
7079  rtx op1 = operands[1];
7080  if (REG_P (op1))
7081    op1 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op1));
7082  else
7083    op1 = gen_lowpart (<ssehalfvecmode>mode, op1);
7084  emit_move_insn (operands[0], op1);
7085  DONE;
7086})
7087
7088
7089(define_insn "vec_extract_lo_<mode><mask_name>"
7090  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7091	(vec_select:<ssehalfvecmode>
7092	  (match_operand:VI4F_256 1 "nonimmediate_operand" "v")
7093	  (parallel [(const_int 0) (const_int 1)
7094		     (const_int 2) (const_int 3)])))]
7095  "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
7096{
7097  if (<mask_applied>)
7098    return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
7099  else
7100    return "#";
7101}
7102  [(set_attr "type" "sselog1")
7103   (set_attr "prefix_extra" "1")
7104   (set_attr "length_immediate" "1")
7105   (set_attr "prefix" "evex")
7106   (set_attr "mode" "<sseinsnmode>")])
7107
7108(define_insn "vec_extract_lo_<mode>_maskm"
7109  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7110	(vec_merge:<ssehalfvecmode>
7111	  (vec_select:<ssehalfvecmode>
7112	    (match_operand:VI4F_256 1 "register_operand" "v")
7113	    (parallel [(const_int 0) (const_int 1)
7114		      (const_int 2) (const_int 3)]))
7115	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7116	  (match_operand:QI 3 "register_operand" "k")))]
7117  "TARGET_AVX512VL && TARGET_AVX512F
7118   && rtx_equal_p (operands[2], operands[0])"
7119  "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
7120  [(set_attr "type" "sselog1")
7121   (set_attr "prefix_extra" "1")
7122   (set_attr "length_immediate" "1")
7123   (set_attr "prefix" "evex")
7124   (set_attr "mode" "<sseinsnmode>")])
7125
7126(define_insn "vec_extract_hi_<mode>_maskm"
7127  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
7128	(vec_merge:<ssehalfvecmode>
7129	  (vec_select:<ssehalfvecmode>
7130	    (match_operand:VI4F_256 1 "register_operand" "v")
7131	    (parallel [(const_int 4) (const_int 5)
7132		      (const_int 6) (const_int 7)]))
7133	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
7134	  (match_operand:<ssehalfvecmode> 3 "register_operand" "k")))]
7135  "TARGET_AVX512F && TARGET_AVX512VL
7136   && rtx_equal_p (operands[2], operands[0])"
7137  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
7138  [(set_attr "type" "sselog1")
7139   (set_attr "prefix_extra" "1")
7140   (set_attr "length_immediate" "1")
7141   (set_attr "prefix" "evex")
7142   (set_attr "mode" "<sseinsnmode>")])
7143
7144(define_insn "vec_extract_hi_<mode><mask_name>"
7145  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
7146	(vec_select:<ssehalfvecmode>
7147	  (match_operand:VI4F_256 1 "register_operand" "v")
7148	  (parallel [(const_int 4) (const_int 5)
7149		     (const_int 6) (const_int 7)])))]
7150  "TARGET_AVX && <mask_avx512vl_condition>"
7151{
7152  if (TARGET_AVX512VL)
7153    return "vextract<shuffletype>32x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
7154  else
7155    return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
7156}
7157  [(set_attr "type" "sselog1")
7158   (set_attr "prefix_extra" "1")
7159   (set_attr "length_immediate" "1")
7160   (set (attr "prefix")
7161     (if_then_else
7162       (match_test "TARGET_AVX512VL")
7163     (const_string "evex")
7164     (const_string "vex")))
7165   (set_attr "mode" "<sseinsnmode>")])
7166
7167(define_insn_and_split "vec_extract_lo_v32hi"
7168  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7169	(vec_select:V16HI
7170	  (match_operand:V32HI 1 "nonimmediate_operand" "vm,v")
7171	  (parallel [(const_int 0) (const_int 1)
7172		     (const_int 2) (const_int 3)
7173		     (const_int 4) (const_int 5)
7174		     (const_int 6) (const_int 7)
7175		     (const_int 8) (const_int 9)
7176		     (const_int 10) (const_int 11)
7177		     (const_int 12) (const_int 13)
7178		     (const_int 14) (const_int 15)])))]
7179  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7180  "#"
7181  "&& reload_completed"
7182  [(set (match_dup 0) (match_dup 1))]
7183{
7184  if (REG_P (operands[1]))
7185    operands[1] = gen_rtx_REG (V16HImode, REGNO (operands[1]));
7186  else
7187    operands[1] = adjust_address (operands[1], V16HImode, 0);
7188})
7189
7190(define_insn "vec_extract_hi_v32hi"
7191  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,m")
7192	(vec_select:V16HI
7193	  (match_operand:V32HI 1 "nonimmediate_operand" "v,v")
7194	  (parallel [(const_int 16) (const_int 17)
7195		     (const_int 18) (const_int 19)
7196		     (const_int 20) (const_int 21)
7197		     (const_int 22) (const_int 23)
7198		     (const_int 24) (const_int 25)
7199		     (const_int 26) (const_int 27)
7200		     (const_int 28) (const_int 29)
7201		     (const_int 30) (const_int 31)])))]
7202  "TARGET_AVX512F"
7203  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7204  [(set_attr "type" "sselog")
7205   (set_attr "prefix_extra" "1")
7206   (set_attr "length_immediate" "1")
7207   (set_attr "memory" "none,store")
7208   (set_attr "prefix" "evex")
7209   (set_attr "mode" "XI")])
7210
7211(define_insn_and_split "vec_extract_lo_v16hi"
7212  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7213	(vec_select:V8HI
7214	  (match_operand:V16HI 1 "nonimmediate_operand" "xm,x")
7215	  (parallel [(const_int 0) (const_int 1)
7216		     (const_int 2) (const_int 3)
7217		     (const_int 4) (const_int 5)
7218		     (const_int 6) (const_int 7)])))]
7219  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7220  "#"
7221  "&& reload_completed"
7222  [(set (match_dup 0) (match_dup 1))]
7223{
7224  if (REG_P (operands[1]))
7225    operands[1] = gen_rtx_REG (V8HImode, REGNO (operands[1]));
7226  else
7227    operands[1] = adjust_address (operands[1], V8HImode, 0);
7228})
7229
7230(define_insn "vec_extract_hi_v16hi"
7231  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
7232	(vec_select:V8HI
7233	  (match_operand:V16HI 1 "register_operand" "x,x")
7234	  (parallel [(const_int 8) (const_int 9)
7235		     (const_int 10) (const_int 11)
7236		     (const_int 12) (const_int 13)
7237		     (const_int 14) (const_int 15)])))]
7238  "TARGET_AVX"
7239  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7240  [(set_attr "type" "sselog")
7241   (set_attr "prefix_extra" "1")
7242   (set_attr "length_immediate" "1")
7243   (set_attr "memory" "none,store")
7244   (set_attr "prefix" "vex")
7245   (set_attr "mode" "OI")])
7246
7247(define_insn_and_split "vec_extract_lo_v64qi"
7248  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7249	(vec_select:V32QI
7250	  (match_operand:V64QI 1 "nonimmediate_operand" "vm,v")
7251	  (parallel [(const_int 0) (const_int 1)
7252		     (const_int 2) (const_int 3)
7253		     (const_int 4) (const_int 5)
7254		     (const_int 6) (const_int 7)
7255		     (const_int 8) (const_int 9)
7256		     (const_int 10) (const_int 11)
7257		     (const_int 12) (const_int 13)
7258		     (const_int 14) (const_int 15)
7259		     (const_int 16) (const_int 17)
7260		     (const_int 18) (const_int 19)
7261		     (const_int 20) (const_int 21)
7262		     (const_int 22) (const_int 23)
7263		     (const_int 24) (const_int 25)
7264		     (const_int 26) (const_int 27)
7265		     (const_int 28) (const_int 29)
7266		     (const_int 30) (const_int 31)])))]
7267  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7268  "#"
7269  "&& reload_completed"
7270  [(set (match_dup 0) (match_dup 1))]
7271{
7272  if (REG_P (operands[1]))
7273    operands[1] = gen_rtx_REG (V32QImode, REGNO (operands[1]));
7274  else
7275    operands[1] = adjust_address (operands[1], V32QImode, 0);
7276})
7277
7278(define_insn "vec_extract_hi_v64qi"
7279  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
7280	(vec_select:V32QI
7281	  (match_operand:V64QI 1 "nonimmediate_operand" "v,v")
7282	  (parallel [(const_int 32) (const_int 33)
7283		     (const_int 34) (const_int 35)
7284		     (const_int 36) (const_int 37)
7285		     (const_int 38) (const_int 39)
7286		     (const_int 40) (const_int 41)
7287		     (const_int 42) (const_int 43)
7288		     (const_int 44) (const_int 45)
7289		     (const_int 46) (const_int 47)
7290		     (const_int 48) (const_int 49)
7291		     (const_int 50) (const_int 51)
7292		     (const_int 52) (const_int 53)
7293		     (const_int 54) (const_int 55)
7294		     (const_int 56) (const_int 57)
7295		     (const_int 58) (const_int 59)
7296		     (const_int 60) (const_int 61)
7297		     (const_int 62) (const_int 63)])))]
7298  "TARGET_AVX512F"
7299  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
7300  [(set_attr "type" "sselog")
7301   (set_attr "prefix_extra" "1")
7302   (set_attr "length_immediate" "1")
7303   (set_attr "memory" "none,store")
7304   (set_attr "prefix" "evex")
7305   (set_attr "mode" "XI")])
7306
7307(define_insn_and_split "vec_extract_lo_v32qi"
7308  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7309	(vec_select:V16QI
7310	  (match_operand:V32QI 1 "nonimmediate_operand" "xm,x")
7311	  (parallel [(const_int 0) (const_int 1)
7312		     (const_int 2) (const_int 3)
7313		     (const_int 4) (const_int 5)
7314		     (const_int 6) (const_int 7)
7315		     (const_int 8) (const_int 9)
7316		     (const_int 10) (const_int 11)
7317		     (const_int 12) (const_int 13)
7318		     (const_int 14) (const_int 15)])))]
7319  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7320  "#"
7321  "&& reload_completed"
7322  [(set (match_dup 0) (match_dup 1))]
7323{
7324  if (REG_P (operands[1]))
7325    operands[1] = gen_rtx_REG (V16QImode, REGNO (operands[1]));
7326  else
7327    operands[1] = adjust_address (operands[1], V16QImode, 0);
7328})
7329
7330(define_insn "vec_extract_hi_v32qi"
7331  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
7332	(vec_select:V16QI
7333	  (match_operand:V32QI 1 "register_operand" "x,x")
7334	  (parallel [(const_int 16) (const_int 17)
7335		     (const_int 18) (const_int 19)
7336		     (const_int 20) (const_int 21)
7337		     (const_int 22) (const_int 23)
7338		     (const_int 24) (const_int 25)
7339		     (const_int 26) (const_int 27)
7340		     (const_int 28) (const_int 29)
7341		     (const_int 30) (const_int 31)])))]
7342  "TARGET_AVX"
7343  "vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}"
7344  [(set_attr "type" "sselog")
7345   (set_attr "prefix_extra" "1")
7346   (set_attr "length_immediate" "1")
7347   (set_attr "memory" "none,store")
7348   (set_attr "prefix" "vex")
7349   (set_attr "mode" "OI")])
7350
7351;; Modes handled by vec_extract patterns.
7352(define_mode_iterator VEC_EXTRACT_MODE
7353  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
7354   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
7355   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
7356   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
7357   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
7358   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
7359
7360(define_expand "vec_extract<mode>"
7361  [(match_operand:<ssescalarmode> 0 "register_operand")
7362   (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
7363   (match_operand 2 "const_int_operand")]
7364  "TARGET_SSE"
7365{
7366  ix86_expand_vector_extract (false, operands[0], operands[1],
7367			      INTVAL (operands[2]));
7368  DONE;
7369})
7370
7371;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7372;;
7373;; Parallel double-precision floating point element swizzling
7374;;
7375;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7376
7377(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
7378  [(set (match_operand:V8DF 0 "register_operand" "=v")
7379	(vec_select:V8DF
7380	  (vec_concat:V16DF
7381	    (match_operand:V8DF 1 "nonimmediate_operand" "v")
7382	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
7383	  (parallel [(const_int 1) (const_int 9)
7384		     (const_int 3) (const_int 11)
7385		     (const_int 5) (const_int 13)
7386		     (const_int 7) (const_int 15)])))]
7387  "TARGET_AVX512F"
7388  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7389  [(set_attr "type" "sselog")
7390   (set_attr "prefix" "evex")
7391   (set_attr "mode" "V8DF")])
7392
7393;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7394(define_insn "avx_unpckhpd256<mask_name>"
7395  [(set (match_operand:V4DF 0 "register_operand" "=v")
7396	(vec_select:V4DF
7397	  (vec_concat:V8DF
7398	    (match_operand:V4DF 1 "register_operand" "v")
7399	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
7400	  (parallel [(const_int 1) (const_int 5)
7401		     (const_int 3) (const_int 7)])))]
7402  "TARGET_AVX && <mask_avx512vl_condition>"
7403  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7404  [(set_attr "type" "sselog")
7405   (set_attr "prefix" "vex")
7406   (set_attr "mode" "V4DF")])
7407
7408(define_expand "vec_interleave_highv4df"
7409  [(set (match_dup 3)
7410	(vec_select:V4DF
7411	  (vec_concat:V8DF
7412	    (match_operand:V4DF 1 "register_operand" "x")
7413	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7414	  (parallel [(const_int 0) (const_int 4)
7415		     (const_int 2) (const_int 6)])))
7416   (set (match_dup 4)
7417	(vec_select:V4DF
7418	  (vec_concat:V8DF
7419	    (match_dup 1)
7420	    (match_dup 2))
7421	  (parallel [(const_int 1) (const_int 5)
7422		     (const_int 3) (const_int 7)])))
7423   (set (match_operand:V4DF 0 "register_operand")
7424	(vec_select:V4DF
7425	  (vec_concat:V8DF
7426	    (match_dup 3)
7427	    (match_dup 4))
7428	  (parallel [(const_int 2) (const_int 3)
7429		     (const_int 6) (const_int 7)])))]
7430 "TARGET_AVX"
7431{
7432  operands[3] = gen_reg_rtx (V4DFmode);
7433  operands[4] = gen_reg_rtx (V4DFmode);
7434})
7435
7436
7437(define_insn "avx512vl_unpckhpd128_mask"
7438  [(set (match_operand:V2DF 0 "register_operand" "=v")
7439	(vec_merge:V2DF
7440	  (vec_select:V2DF
7441	    (vec_concat:V4DF
7442	      (match_operand:V2DF 1 "register_operand" "v")
7443	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7444	    (parallel [(const_int 1) (const_int 3)]))
7445	  (match_operand:V2DF 3 "vector_move_operand" "0C")
7446	  (match_operand:QI 4 "register_operand" "Yk")))]
7447  "TARGET_AVX512VL"
7448  "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7449  [(set_attr "type" "sselog")
7450   (set_attr "prefix" "evex")
7451   (set_attr "mode" "V2DF")])
7452
7453(define_expand "vec_interleave_highv2df"
7454  [(set (match_operand:V2DF 0 "register_operand")
7455	(vec_select:V2DF
7456	  (vec_concat:V4DF
7457	    (match_operand:V2DF 1 "nonimmediate_operand")
7458	    (match_operand:V2DF 2 "nonimmediate_operand"))
7459	  (parallel [(const_int 1)
7460		     (const_int 3)])))]
7461  "TARGET_SSE2"
7462{
7463  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
7464    operands[2] = force_reg (V2DFmode, operands[2]);
7465})
7466
7467(define_insn "*vec_interleave_highv2df"
7468  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,m")
7469	(vec_select:V2DF
7470	  (vec_concat:V4DF
7471	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,o,o,o,x")
7472	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,0,x,0"))
7473	  (parallel [(const_int 1)
7474		     (const_int 3)])))]
7475  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
7476  "@
7477   unpckhpd\t{%2, %0|%0, %2}
7478   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
7479   %vmovddup\t{%H1, %0|%0, %H1}
7480   movlpd\t{%H1, %0|%0, %H1}
7481   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
7482   %vmovhpd\t{%1, %0|%q0, %1}"
7483  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7484   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7485   (set_attr "ssememalign" "64")
7486   (set_attr "prefix_data16" "*,*,*,1,*,1")
7487   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7488   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7489
7490(define_expand "avx512f_movddup512<mask_name>"
7491  [(set (match_operand:V8DF 0 "register_operand")
7492	(vec_select:V8DF
7493	  (vec_concat:V16DF
7494	    (match_operand:V8DF 1 "nonimmediate_operand")
7495	    (match_dup 1))
7496	  (parallel [(const_int 0) (const_int 8)
7497		     (const_int 2) (const_int 10)
7498		     (const_int 4) (const_int 12)
7499		     (const_int 6) (const_int 14)])))]
7500  "TARGET_AVX512F")
7501
7502(define_expand "avx512f_unpcklpd512<mask_name>"
7503  [(set (match_operand:V8DF 0 "register_operand")
7504	(vec_select:V8DF
7505	  (vec_concat:V16DF
7506	    (match_operand:V8DF 1 "register_operand")
7507	    (match_operand:V8DF 2 "nonimmediate_operand"))
7508	  (parallel [(const_int 0) (const_int 8)
7509		     (const_int 2) (const_int 10)
7510		     (const_int 4) (const_int 12)
7511		     (const_int 6) (const_int 14)])))]
7512  "TARGET_AVX512F")
7513
7514(define_insn "*avx512f_unpcklpd512<mask_name>"
7515  [(set (match_operand:V8DF 0 "register_operand" "=v,v")
7516	(vec_select:V8DF
7517	  (vec_concat:V16DF
7518	    (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
7519	    (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
7520	  (parallel [(const_int 0) (const_int 8)
7521		     (const_int 2) (const_int 10)
7522		     (const_int 4) (const_int 12)
7523		     (const_int 6) (const_int 14)])))]
7524  "TARGET_AVX512F"
7525  "@
7526   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
7527   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7528  [(set_attr "type" "sselog")
7529   (set_attr "prefix" "evex")
7530   (set_attr "mode" "V8DF")])
7531
7532;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7533(define_expand "avx_movddup256<mask_name>"
7534  [(set (match_operand:V4DF 0 "register_operand")
7535	(vec_select:V4DF
7536	  (vec_concat:V8DF
7537	    (match_operand:V4DF 1 "nonimmediate_operand")
7538	    (match_dup 1))
7539	  (parallel [(const_int 0) (const_int 4)
7540		     (const_int 2) (const_int 6)])))]
7541  "TARGET_AVX && <mask_avx512vl_condition>")
7542
7543(define_expand "avx_unpcklpd256<mask_name>"
7544  [(set (match_operand:V4DF 0 "register_operand")
7545	(vec_select:V4DF
7546	  (vec_concat:V8DF
7547	    (match_operand:V4DF 1 "register_operand")
7548	    (match_operand:V4DF 2 "nonimmediate_operand"))
7549	  (parallel [(const_int 0) (const_int 4)
7550		     (const_int 2) (const_int 6)])))]
7551  "TARGET_AVX && <mask_avx512vl_condition>")
7552
7553(define_insn "*avx_unpcklpd256<mask_name>"
7554  [(set (match_operand:V4DF 0 "register_operand"         "=v,v")
7555	(vec_select:V4DF
7556	  (vec_concat:V8DF
7557	    (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
7558	    (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
7559	  (parallel [(const_int 0) (const_int 4)
7560		     (const_int 2) (const_int 6)])))]
7561  "TARGET_AVX && <mask_avx512vl_condition>"
7562  "@
7563   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
7564   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
7565  [(set_attr "type" "sselog")
7566   (set_attr "prefix" "vex")
7567   (set_attr "mode" "V4DF")])
7568
7569(define_expand "vec_interleave_lowv4df"
7570  [(set (match_dup 3)
7571	(vec_select:V4DF
7572	  (vec_concat:V8DF
7573	    (match_operand:V4DF 1 "register_operand" "x")
7574	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
7575	  (parallel [(const_int 0) (const_int 4)
7576		     (const_int 2) (const_int 6)])))
7577   (set (match_dup 4)
7578	(vec_select:V4DF
7579	  (vec_concat:V8DF
7580	    (match_dup 1)
7581	    (match_dup 2))
7582	  (parallel [(const_int 1) (const_int 5)
7583		     (const_int 3) (const_int 7)])))
7584   (set (match_operand:V4DF 0 "register_operand")
7585	(vec_select:V4DF
7586	  (vec_concat:V8DF
7587	    (match_dup 3)
7588	    (match_dup 4))
7589	  (parallel [(const_int 0) (const_int 1)
7590		     (const_int 4) (const_int 5)])))]
7591 "TARGET_AVX"
7592{
7593  operands[3] = gen_reg_rtx (V4DFmode);
7594  operands[4] = gen_reg_rtx (V4DFmode);
7595})
7596
7597(define_insn "avx512vl_unpcklpd128_mask"
7598  [(set (match_operand:V2DF 0 "register_operand" "=v")
7599	(vec_merge:V2DF
7600	  (vec_select:V2DF
7601	    (vec_concat:V4DF
7602	      (match_operand:V2DF 1 "register_operand" "v")
7603	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
7604	    (parallel [(const_int 0) (const_int 2)]))
7605	  (match_operand:V2DF 3 "vector_move_operand" "0C")
7606	  (match_operand:QI 4 "register_operand" "Yk")))]
7607  "TARGET_AVX512VL"
7608  "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7609  [(set_attr "type" "sselog")
7610   (set_attr "prefix" "evex")
7611   (set_attr "mode" "V2DF")])
7612
7613(define_expand "vec_interleave_lowv2df"
7614  [(set (match_operand:V2DF 0 "register_operand")
7615	(vec_select:V2DF
7616	  (vec_concat:V4DF
7617	    (match_operand:V2DF 1 "nonimmediate_operand")
7618	    (match_operand:V2DF 2 "nonimmediate_operand"))
7619	  (parallel [(const_int 0)
7620		     (const_int 2)])))]
7621  "TARGET_SSE2"
7622{
7623  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
7624    operands[1] = force_reg (V2DFmode, operands[1]);
7625})
7626
7627(define_insn "*vec_interleave_lowv2df"
7628  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,x,x,x,x,o")
7629	(vec_select:V2DF
7630	  (vec_concat:V4DF
7631	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,m,0,x,0")
7632	    (match_operand:V2DF 2 "nonimmediate_operand" " x,x,1,m,m,x"))
7633	  (parallel [(const_int 0)
7634		     (const_int 2)])))]
7635  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
7636  "@
7637   unpcklpd\t{%2, %0|%0, %2}
7638   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
7639   %vmovddup\t{%1, %0|%0, %q1}
7640   movhpd\t{%2, %0|%0, %q2}
7641   vmovhpd\t{%2, %1, %0|%0, %1, %q2}
7642   %vmovlpd\t{%2, %H0|%H0, %2}"
7643  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
7644   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
7645   (set_attr "ssememalign" "64")
7646   (set_attr "prefix_data16" "*,*,*,1,*,1")
7647   (set_attr "prefix" "orig,vex,maybe_vex,orig,vex,maybe_vex")
7648   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
7649
7650(define_split
7651  [(set (match_operand:V2DF 0 "memory_operand")
7652	(vec_select:V2DF
7653	  (vec_concat:V4DF
7654	    (match_operand:V2DF 1 "register_operand")
7655	    (match_dup 1))
7656	  (parallel [(const_int 0)
7657		     (const_int 2)])))]
7658  "TARGET_SSE3 && reload_completed"
7659  [(const_int 0)]
7660{
7661  rtx low = gen_rtx_REG (DFmode, REGNO (operands[1]));
7662  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
7663  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
7664  DONE;
7665})
7666
7667(define_split
7668  [(set (match_operand:V2DF 0 "register_operand")
7669	(vec_select:V2DF
7670	  (vec_concat:V4DF
7671	    (match_operand:V2DF 1 "memory_operand")
7672	    (match_dup 1))
7673	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
7674		     (match_operand:SI 3 "const_int_operand")])))]
7675  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
7676  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
7677{
7678  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
7679})
7680
7681(define_insn "avx512f_vmscalef<mode><round_name>"
7682  [(set (match_operand:VF_128 0 "register_operand" "=v")
7683	(vec_merge:VF_128
7684	  (unspec:VF_128
7685	    [(match_operand:VF_128 1 "register_operand" "v")
7686	     (match_operand:VF_128 2 "<round_nimm_predicate>" "<round_constraint>")]
7687	    UNSPEC_SCALEF)
7688	  (match_dup 1)
7689	  (const_int 1)))]
7690  "TARGET_AVX512F"
7691  "vscalef<ssescalarmodesuffix>\t{<round_op3>%2, %1, %0|%0, %1, %2<round_op3>}"
7692  [(set_attr "prefix" "evex")
7693   (set_attr "mode"  "<ssescalarmode>")])
7694
7695(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
7696  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7697	(unspec:VF_AVX512VL
7698	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
7699	   (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
7700	  UNSPEC_SCALEF))]
7701  "TARGET_AVX512F"
7702  "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
7703  [(set_attr "prefix" "evex")
7704   (set_attr "mode"  "<MODE>")])
7705
7706(define_expand "<avx512>_vternlog<mode>_maskz"
7707  [(match_operand:VI48_AVX512VL 0 "register_operand")
7708   (match_operand:VI48_AVX512VL 1 "register_operand")
7709   (match_operand:VI48_AVX512VL 2 "register_operand")
7710   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
7711   (match_operand:SI 4 "const_0_to_255_operand")
7712   (match_operand:<avx512fmaskmode> 5 "register_operand")]
7713  "TARGET_AVX512F"
7714{
7715  emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
7716    operands[0], operands[1], operands[2], operands[3],
7717    operands[4], CONST0_RTX (<MODE>mode), operands[5]));
7718  DONE;
7719})
7720
7721(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
7722  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7723	(unspec:VI48_AVX512VL
7724	  [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7725	   (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7726	   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7727	   (match_operand:SI 4 "const_0_to_255_operand")]
7728	  UNSPEC_VTERNLOG))]
7729  "TARGET_AVX512F"
7730  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
7731  [(set_attr "type" "sselog")
7732   (set_attr "prefix" "evex")
7733   (set_attr "mode" "<sseinsnmode>")])
7734
7735(define_insn "<avx512>_vternlog<mode>_mask"
7736  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7737	(vec_merge:VI48_AVX512VL
7738	  (unspec:VI48_AVX512VL
7739	    [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
7740	     (match_operand:VI48_AVX512VL 2 "register_operand" "v")
7741	     (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
7742	     (match_operand:SI 4 "const_0_to_255_operand")]
7743	    UNSPEC_VTERNLOG)
7744	  (match_dup 1)
7745	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7746  "TARGET_AVX512F"
7747  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
7748  [(set_attr "type" "sselog")
7749   (set_attr "prefix" "evex")
7750   (set_attr "mode" "<sseinsnmode>")])
7751
7752(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
7753  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7754        (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7755                        UNSPEC_GETEXP))]
7756   "TARGET_AVX512F"
7757   "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
7758    [(set_attr "prefix" "evex")
7759     (set_attr "mode" "<MODE>")])
7760
7761(define_insn "avx512f_sgetexp<mode><round_saeonly_name>"
7762  [(set (match_operand:VF_128 0 "register_operand" "=v")
7763	(vec_merge:VF_128
7764	  (unspec:VF_128
7765	    [(match_operand:VF_128 1 "register_operand" "v")
7766	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
7767	    UNSPEC_GETEXP)
7768	  (match_dup 1)
7769	  (const_int 1)))]
7770   "TARGET_AVX512F"
7771   "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}";
7772    [(set_attr "prefix" "evex")
7773     (set_attr "mode" "<ssescalarmode>")])
7774
7775(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
7776  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
7777        (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
7778			       (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
7779			       (match_operand:SI 3 "const_0_to_255_operand")]
7780			      UNSPEC_ALIGN))]
7781  "TARGET_AVX512F"
7782  "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
7783  [(set_attr "prefix" "evex")
7784   (set_attr "mode" "<sseinsnmode>")])
7785
7786(define_expand "avx512f_shufps512_mask"
7787  [(match_operand:V16SF 0 "register_operand")
7788   (match_operand:V16SF 1 "register_operand")
7789   (match_operand:V16SF 2 "nonimmediate_operand")
7790   (match_operand:SI 3 "const_0_to_255_operand")
7791   (match_operand:V16SF 4 "register_operand")
7792   (match_operand:HI 5 "register_operand")]
7793  "TARGET_AVX512F"
7794{
7795  int mask = INTVAL (operands[3]);
7796  emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
7797					  GEN_INT ((mask >> 0) & 3),
7798					  GEN_INT ((mask >> 2) & 3),
7799					  GEN_INT (((mask >> 4) & 3) + 16),
7800					  GEN_INT (((mask >> 6) & 3) + 16),
7801					  GEN_INT (((mask >> 0) & 3) + 4),
7802					  GEN_INT (((mask >> 2) & 3) + 4),
7803					  GEN_INT (((mask >> 4) & 3) + 20),
7804					  GEN_INT (((mask >> 6) & 3) + 20),
7805					  GEN_INT (((mask >> 0) & 3) + 8),
7806					  GEN_INT (((mask >> 2) & 3) + 8),
7807					  GEN_INT (((mask >> 4) & 3) + 24),
7808					  GEN_INT (((mask >> 6) & 3) + 24),
7809					  GEN_INT (((mask >> 0) & 3) + 12),
7810					  GEN_INT (((mask >> 2) & 3) + 12),
7811					  GEN_INT (((mask >> 4) & 3) + 28),
7812					  GEN_INT (((mask >> 6) & 3) + 28),
7813					  operands[4], operands[5]));
7814  DONE;
7815})
7816
7817
7818(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
7819  [(match_operand:VF_AVX512VL 0 "register_operand")
7820   (match_operand:VF_AVX512VL 1 "register_operand")
7821   (match_operand:VF_AVX512VL 2 "register_operand")
7822   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7823   (match_operand:SI 4 "const_0_to_255_operand")
7824   (match_operand:<avx512fmaskmode> 5 "register_operand")]
7825  "TARGET_AVX512F"
7826{
7827  emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7828	operands[0], operands[1], operands[2], operands[3],
7829	operands[4], CONST0_RTX (<MODE>mode), operands[5]
7830	<round_saeonly_expand_operand6>));
7831  DONE;
7832})
7833
7834(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
7835  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7836        (unspec:VF_AVX512VL
7837          [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7838	   (match_operand:VF_AVX512VL 2 "register_operand" "v")
7839           (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7840           (match_operand:SI 4 "const_0_to_255_operand")]
7841           UNSPEC_FIXUPIMM))]
7842  "TARGET_AVX512F"
7843  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7844  [(set_attr "prefix" "evex")
7845   (set_attr "mode" "<MODE>")])
7846
7847(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
7848  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7849	(vec_merge:VF_AVX512VL
7850          (unspec:VF_AVX512VL
7851            [(match_operand:VF_AVX512VL 1 "register_operand" "0")
7852	     (match_operand:VF_AVX512VL 2 "register_operand" "v")
7853             (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
7854             (match_operand:SI 4 "const_0_to_255_operand")]
7855             UNSPEC_FIXUPIMM)
7856	  (match_dup 1)
7857	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7858  "TARGET_AVX512F"
7859  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7860  [(set_attr "prefix" "evex")
7861   (set_attr "mode" "<MODE>")])
7862
7863(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
7864  [(match_operand:VF_128 0 "register_operand")
7865   (match_operand:VF_128 1 "register_operand")
7866   (match_operand:VF_128 2 "register_operand")
7867   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
7868   (match_operand:SI 4 "const_0_to_255_operand")
7869   (match_operand:<avx512fmaskmode> 5 "register_operand")]
7870  "TARGET_AVX512F"
7871{
7872  emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
7873	operands[0], operands[1], operands[2], operands[3],
7874	operands[4], CONST0_RTX (<MODE>mode), operands[5]
7875	<round_saeonly_expand_operand6>));
7876  DONE;
7877})
7878
7879(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
7880  [(set (match_operand:VF_128 0 "register_operand" "=v")
7881	(vec_merge:VF_128
7882          (unspec:VF_128
7883            [(match_operand:VF_128 1 "register_operand" "0")
7884	     (match_operand:VF_128 2 "register_operand" "v")
7885	     (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7886	     (match_operand:SI 4 "const_0_to_255_operand")]
7887	    UNSPEC_FIXUPIMM)
7888	  (match_dup 1)
7889	  (const_int 1)))]
7890   "TARGET_AVX512F"
7891   "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
7892   [(set_attr "prefix" "evex")
7893   (set_attr "mode" "<ssescalarmode>")])
7894
7895(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
7896  [(set (match_operand:VF_128 0 "register_operand" "=v")
7897	(vec_merge:VF_128
7898	  (vec_merge:VF_128
7899	    (unspec:VF_128
7900	       [(match_operand:VF_128 1 "register_operand" "0")
7901		(match_operand:VF_128 2 "register_operand" "v")
7902		(match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7903		(match_operand:SI 4 "const_0_to_255_operand")]
7904	       UNSPEC_FIXUPIMM)
7905	    (match_dup 1)
7906	    (const_int 1))
7907	  (match_dup 1)
7908	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
7909  "TARGET_AVX512F"
7910  "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
7911  [(set_attr "prefix" "evex")
7912   (set_attr "mode" "<ssescalarmode>")])
7913
7914(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
7915  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
7916	(unspec:VF_AVX512VL
7917	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
7918	   (match_operand:SI 2 "const_0_to_255_operand")]
7919	  UNSPEC_ROUND))]
7920  "TARGET_AVX512F"
7921  "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
7922  [(set_attr "length_immediate" "1")
7923   (set_attr "prefix" "evex")
7924   (set_attr "mode" "<MODE>")])
7925
7926(define_insn "avx512f_rndscale<mode><round_saeonly_name>"
7927  [(set (match_operand:VF_128 0 "register_operand" "=v")
7928	(vec_merge:VF_128
7929	  (unspec:VF_128
7930	    [(match_operand:VF_128 1 "register_operand" "v")
7931	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
7932	     (match_operand:SI 3 "const_0_to_255_operand")]
7933	    UNSPEC_ROUND)
7934	  (match_dup 1)
7935	  (const_int 1)))]
7936  "TARGET_AVX512F"
7937  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
7938  [(set_attr "length_immediate" "1")
7939   (set_attr "prefix" "evex")
7940   (set_attr "mode" "<MODE>")])
7941
7942;; One bit in mask selects 2 elements.
7943(define_insn "avx512f_shufps512_1<mask_name>"
7944  [(set (match_operand:V16SF 0 "register_operand" "=v")
7945	(vec_select:V16SF
7946	  (vec_concat:V32SF
7947	    (match_operand:V16SF 1 "register_operand" "v")
7948	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7949	  (parallel [(match_operand 3  "const_0_to_3_operand")
7950		     (match_operand 4  "const_0_to_3_operand")
7951		     (match_operand 5  "const_16_to_19_operand")
7952		     (match_operand 6  "const_16_to_19_operand")
7953		     (match_operand 7  "const_4_to_7_operand")
7954		     (match_operand 8  "const_4_to_7_operand")
7955		     (match_operand 9  "const_20_to_23_operand")
7956		     (match_operand 10  "const_20_to_23_operand")
7957		     (match_operand 11  "const_8_to_11_operand")
7958		     (match_operand 12  "const_8_to_11_operand")
7959		     (match_operand 13  "const_24_to_27_operand")
7960		     (match_operand 14  "const_24_to_27_operand")
7961		     (match_operand 15  "const_12_to_15_operand")
7962		     (match_operand 16  "const_12_to_15_operand")
7963		     (match_operand 17  "const_28_to_31_operand")
7964		     (match_operand 18  "const_28_to_31_operand")])))]
7965  "TARGET_AVX512F
7966   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7967       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7968       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7969       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
7970       && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
7971       && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
7972       && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
7973       && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
7974       && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
7975       && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
7976       && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
7977       && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
7978{
7979  int mask;
7980  mask = INTVAL (operands[3]);
7981  mask |= INTVAL (operands[4]) << 2;
7982  mask |= (INTVAL (operands[5]) - 16) << 4;
7983  mask |= (INTVAL (operands[6]) - 16) << 6;
7984  operands[3] = GEN_INT (mask);
7985
7986  return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
7987}
7988  [(set_attr "type" "sselog")
7989   (set_attr "length_immediate" "1")
7990   (set_attr "prefix" "evex")
7991   (set_attr "mode" "V16SF")])
7992
7993(define_expand "avx512f_shufpd512_mask"
7994  [(match_operand:V8DF 0 "register_operand")
7995   (match_operand:V8DF 1 "register_operand")
7996   (match_operand:V8DF 2 "nonimmediate_operand")
7997   (match_operand:SI 3 "const_0_to_255_operand")
7998   (match_operand:V8DF 4 "register_operand")
7999   (match_operand:QI 5 "register_operand")]
8000  "TARGET_AVX512F"
8001{
8002  int mask = INTVAL (operands[3]);
8003  emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
8004					GEN_INT (mask & 1),
8005					GEN_INT (mask & 2 ? 9 : 8),
8006					GEN_INT (mask & 4 ? 3 : 2),
8007					GEN_INT (mask & 8 ? 11 : 10),
8008					GEN_INT (mask & 16 ? 5 : 4),
8009					GEN_INT (mask & 32 ? 13 : 12),
8010					GEN_INT (mask & 64 ? 7 : 6),
8011					GEN_INT (mask & 128 ? 15 : 14),
8012					operands[4], operands[5]));
8013  DONE;
8014})
8015
8016(define_insn "avx512f_shufpd512_1<mask_name>"
8017  [(set (match_operand:V8DF 0 "register_operand" "=v")
8018	(vec_select:V8DF
8019	  (vec_concat:V16DF
8020	    (match_operand:V8DF 1 "register_operand" "v")
8021	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
8022	  (parallel [(match_operand 3 "const_0_to_1_operand")
8023		     (match_operand 4 "const_8_to_9_operand")
8024		     (match_operand 5 "const_2_to_3_operand")
8025		     (match_operand 6 "const_10_to_11_operand")
8026		     (match_operand 7 "const_4_to_5_operand")
8027		     (match_operand 8 "const_12_to_13_operand")
8028		     (match_operand 9 "const_6_to_7_operand")
8029		     (match_operand 10 "const_14_to_15_operand")])))]
8030  "TARGET_AVX512F"
8031{
8032  int mask;
8033  mask = INTVAL (operands[3]);
8034  mask |= (INTVAL (operands[4]) - 8) << 1;
8035  mask |= (INTVAL (operands[5]) - 2) << 2;
8036  mask |= (INTVAL (operands[6]) - 10) << 3;
8037  mask |= (INTVAL (operands[7]) - 4) << 4;
8038  mask |= (INTVAL (operands[8]) - 12) << 5;
8039  mask |= (INTVAL (operands[9]) - 6) << 6;
8040  mask |= (INTVAL (operands[10]) - 14) << 7;
8041  operands[3] = GEN_INT (mask);
8042
8043  return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
8044}
8045  [(set_attr "type" "sselog")
8046   (set_attr "length_immediate" "1")
8047   (set_attr "prefix" "evex")
8048   (set_attr "mode" "V8DF")])
8049
8050(define_expand "avx_shufpd256<mask_expand4_name>"
8051  [(match_operand:V4DF 0 "register_operand")
8052   (match_operand:V4DF 1 "register_operand")
8053   (match_operand:V4DF 2 "nonimmediate_operand")
8054   (match_operand:SI 3 "const_int_operand")]
8055  "TARGET_AVX"
8056{
8057  int mask = INTVAL (operands[3]);
8058  emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
8059						     operands[1],
8060						     operands[2],
8061						     GEN_INT (mask & 1),
8062						     GEN_INT (mask & 2 ? 5 : 4),
8063						     GEN_INT (mask & 4 ? 3 : 2),
8064						     GEN_INT (mask & 8 ? 7 : 6)
8065						     <mask_expand4_args>));
8066  DONE;
8067})
8068
8069(define_insn "avx_shufpd256_1<mask_name>"
8070  [(set (match_operand:V4DF 0 "register_operand" "=v")
8071	(vec_select:V4DF
8072	  (vec_concat:V8DF
8073	    (match_operand:V4DF 1 "register_operand" "v")
8074	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
8075	  (parallel [(match_operand 3 "const_0_to_1_operand")
8076		     (match_operand 4 "const_4_to_5_operand")
8077		     (match_operand 5 "const_2_to_3_operand")
8078		     (match_operand 6 "const_6_to_7_operand")])))]
8079  "TARGET_AVX && <mask_avx512vl_condition>"
8080{
8081  int mask;
8082  mask = INTVAL (operands[3]);
8083  mask |= (INTVAL (operands[4]) - 4) << 1;
8084  mask |= (INTVAL (operands[5]) - 2) << 2;
8085  mask |= (INTVAL (operands[6]) - 6) << 3;
8086  operands[3] = GEN_INT (mask);
8087
8088  return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
8089}
8090  [(set_attr "type" "sseshuf")
8091   (set_attr "length_immediate" "1")
8092   (set_attr "prefix" "vex")
8093   (set_attr "mode" "V4DF")])
8094
8095(define_expand "sse2_shufpd<mask_expand4_name>"
8096  [(match_operand:V2DF 0 "register_operand")
8097   (match_operand:V2DF 1 "register_operand")
8098   (match_operand:V2DF 2 "nonimmediate_operand")
8099   (match_operand:SI 3 "const_int_operand")]
8100  "TARGET_SSE2"
8101{
8102  int mask = INTVAL (operands[3]);
8103  emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
8104						      operands[2], GEN_INT (mask & 1),
8105						      GEN_INT (mask & 2 ? 3 : 2)
8106						      <mask_expand4_args>));
8107  DONE;
8108})
8109
8110(define_insn "sse2_shufpd_v2df_mask"
8111  [(set (match_operand:V2DF 0 "register_operand" "=v")
8112    (vec_merge:V2DF
8113	  (vec_select:V2DF
8114	    (vec_concat:V4DF
8115	      (match_operand:V2DF 1 "register_operand" "v")
8116	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
8117	    (parallel [(match_operand 3 "const_0_to_1_operand")
8118		           (match_operand 4 "const_2_to_3_operand")]))
8119      (match_operand:V2DF 5 "vector_move_operand" "0C")
8120      (match_operand:QI 6 "register_operand" "Yk")))]
8121  "TARGET_AVX512VL"
8122{
8123  int mask;
8124  mask = INTVAL (operands[3]);
8125  mask |= (INTVAL (operands[4]) - 2) << 1;
8126  operands[3] = GEN_INT (mask);
8127
8128  return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{6%}%N5, %1, %2, %3}";
8129}
8130  [(set_attr "type" "sseshuf")
8131   (set_attr "length_immediate" "1")
8132   (set_attr "prefix" "evex")
8133   (set_attr "mode" "V2DF")])
8134
8135;; punpcklqdq and punpckhqdq are shorter than shufpd.
8136(define_insn "avx2_interleave_highv4di<mask_name>"
8137  [(set (match_operand:V4DI 0 "register_operand" "=v")
8138	(vec_select:V4DI
8139	  (vec_concat:V8DI
8140	    (match_operand:V4DI 1 "register_operand" "v")
8141	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8142	  (parallel [(const_int 1)
8143		     (const_int 5)
8144		     (const_int 3)
8145		     (const_int 7)])))]
8146  "TARGET_AVX2 && <mask_avx512vl_condition>"
8147  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8148  [(set_attr "type" "sselog")
8149   (set_attr "prefix" "vex")
8150   (set_attr "mode" "OI")])
8151
8152(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
8153  [(set (match_operand:V8DI 0 "register_operand" "=v")
8154	(vec_select:V8DI
8155	  (vec_concat:V16DI
8156	    (match_operand:V8DI 1 "register_operand" "v")
8157	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8158	  (parallel [(const_int 1) (const_int 9)
8159		     (const_int 3) (const_int 11)
8160		     (const_int 5) (const_int 13)
8161		     (const_int 7) (const_int 15)])))]
8162  "TARGET_AVX512F"
8163  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8164  [(set_attr "type" "sselog")
8165   (set_attr "prefix" "evex")
8166   (set_attr "mode" "XI")])
8167
8168(define_insn "vec_interleave_highv2di<mask_name>"
8169  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8170	(vec_select:V2DI
8171	  (vec_concat:V4DI
8172	    (match_operand:V2DI 1 "register_operand" "0,v")
8173	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8174	  (parallel [(const_int 1)
8175		     (const_int 3)])))]
8176  "TARGET_SSE2 && <mask_avx512vl_condition>"
8177  "@
8178   punpckhqdq\t{%2, %0|%0, %2}
8179   vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8180  [(set_attr "isa" "noavx,avx")
8181   (set_attr "type" "sselog")
8182   (set_attr "prefix_data16" "1,*")
8183   (set_attr "prefix" "orig,<mask_prefix>")
8184   (set_attr "mode" "TI")])
8185
8186(define_insn "avx2_interleave_lowv4di<mask_name>"
8187  [(set (match_operand:V4DI 0 "register_operand" "=v")
8188	(vec_select:V4DI
8189	  (vec_concat:V8DI
8190	    (match_operand:V4DI 1 "register_operand" "v")
8191	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
8192	  (parallel [(const_int 0)
8193		     (const_int 4)
8194		     (const_int 2)
8195		     (const_int 6)])))]
8196  "TARGET_AVX2 && <mask_avx512vl_condition>"
8197  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8198  [(set_attr "type" "sselog")
8199   (set_attr "prefix" "vex")
8200   (set_attr "mode" "OI")])
8201
8202(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
8203  [(set (match_operand:V8DI 0 "register_operand" "=v")
8204	(vec_select:V8DI
8205	  (vec_concat:V16DI
8206	    (match_operand:V8DI 1 "register_operand" "v")
8207	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
8208	  (parallel [(const_int 0) (const_int 8)
8209		     (const_int 2) (const_int 10)
8210		     (const_int 4) (const_int 12)
8211		     (const_int 6) (const_int 14)])))]
8212  "TARGET_AVX512F"
8213  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8214  [(set_attr "type" "sselog")
8215   (set_attr "prefix" "evex")
8216   (set_attr "mode" "XI")])
8217
8218(define_insn "vec_interleave_lowv2di<mask_name>"
8219  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
8220	(vec_select:V2DI
8221	  (vec_concat:V4DI
8222	    (match_operand:V2DI 1 "register_operand" "0,v")
8223	    (match_operand:V2DI 2 "nonimmediate_operand" "xm,vm"))
8224	  (parallel [(const_int 0)
8225		     (const_int 2)])))]
8226  "TARGET_SSE2 && <mask_avx512vl_condition>"
8227  "@
8228   punpcklqdq\t{%2, %0|%0, %2}
8229   vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
8230  [(set_attr "isa" "noavx,avx")
8231   (set_attr "type" "sselog")
8232   (set_attr "prefix_data16" "1,*")
8233   (set_attr "prefix" "orig,vex")
8234   (set_attr "mode" "TI")])
8235
8236(define_insn "sse2_shufpd_<mode>"
8237  [(set (match_operand:VI8F_128 0 "register_operand" "=x,x")
8238	(vec_select:VI8F_128
8239	  (vec_concat:<ssedoublevecmode>
8240	    (match_operand:VI8F_128 1 "register_operand" "0,x")
8241	    (match_operand:VI8F_128 2 "nonimmediate_operand" "xm,xm"))
8242	  (parallel [(match_operand 3 "const_0_to_1_operand")
8243		     (match_operand 4 "const_2_to_3_operand")])))]
8244  "TARGET_SSE2"
8245{
8246  int mask;
8247  mask = INTVAL (operands[3]);
8248  mask |= (INTVAL (operands[4]) - 2) << 1;
8249  operands[3] = GEN_INT (mask);
8250
8251  switch (which_alternative)
8252    {
8253    case 0:
8254      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
8255    case 1:
8256      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8257    default:
8258      gcc_unreachable ();
8259    }
8260}
8261  [(set_attr "isa" "noavx,avx")
8262   (set_attr "type" "sseshuf")
8263   (set_attr "length_immediate" "1")
8264   (set_attr "prefix" "orig,vex")
8265   (set_attr "mode" "V2DF")])
8266
8267;; Avoid combining registers from different units in a single alternative,
8268;; see comment above inline_secondary_memory_needed function in i386.c
8269(define_insn "sse2_storehpd"
8270  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,x,*f,r")
8271	(vec_select:DF
8272	  (match_operand:V2DF 1 "nonimmediate_operand" " x,0,x,o,o,o")
8273	  (parallel [(const_int 1)])))]
8274  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8275  "@
8276   %vmovhpd\t{%1, %0|%0, %1}
8277   unpckhpd\t%0, %0
8278   vunpckhpd\t{%d1, %0|%0, %d1}
8279   #
8280   #
8281   #"
8282  [(set_attr "isa" "*,noavx,avx,*,*,*")
8283   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
8284   (set (attr "prefix_data16")
8285     (if_then_else
8286       (and (eq_attr "alternative" "0")
8287	    (not (match_test "TARGET_AVX")))
8288       (const_string "1")
8289       (const_string "*")))
8290   (set_attr "prefix" "maybe_vex,orig,vex,*,*,*")
8291   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
8292
8293(define_split
8294  [(set (match_operand:DF 0 "register_operand")
8295	(vec_select:DF
8296	  (match_operand:V2DF 1 "memory_operand")
8297	  (parallel [(const_int 1)])))]
8298  "TARGET_SSE2 && reload_completed"
8299  [(set (match_dup 0) (match_dup 1))]
8300  "operands[1] = adjust_address (operands[1], DFmode, 8);")
8301
8302(define_insn "*vec_extractv2df_1_sse"
8303  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8304	(vec_select:DF
8305	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
8306	  (parallel [(const_int 1)])))]
8307  "!TARGET_SSE2 && TARGET_SSE
8308   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8309  "@
8310   movhps\t{%1, %0|%q0, %1}
8311   movhlps\t{%1, %0|%0, %1}
8312   movlps\t{%H1, %0|%0, %H1}"
8313  [(set_attr "type" "ssemov")
8314   (set_attr "ssememalign" "64")
8315   (set_attr "mode" "V2SF,V4SF,V2SF")])
8316
8317;; Avoid combining registers from different units in a single alternative,
8318;; see comment above inline_secondary_memory_needed function in i386.c
8319(define_insn "sse2_storelpd"
8320  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
8321	(vec_select:DF
8322	  (match_operand:V2DF 1 "nonimmediate_operand" " x,x,m,m,m")
8323	  (parallel [(const_int 0)])))]
8324  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8325  "@
8326   %vmovlpd\t{%1, %0|%0, %1}
8327   #
8328   #
8329   #
8330   #"
8331  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
8332   (set_attr "prefix_data16" "1,*,*,*,*")
8333   (set_attr "prefix" "maybe_vex")
8334   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
8335
8336(define_split
8337  [(set (match_operand:DF 0 "register_operand")
8338	(vec_select:DF
8339	  (match_operand:V2DF 1 "nonimmediate_operand")
8340	  (parallel [(const_int 0)])))]
8341  "TARGET_SSE2 && reload_completed"
8342  [(set (match_dup 0) (match_dup 1))]
8343{
8344  if (REG_P (operands[1]))
8345    operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));
8346  else
8347    operands[1] = adjust_address (operands[1], DFmode, 0);
8348})
8349
8350(define_insn "*vec_extractv2df_0_sse"
8351  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
8352	(vec_select:DF
8353	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
8354	  (parallel [(const_int 0)])))]
8355  "!TARGET_SSE2 && TARGET_SSE
8356   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8357  "@
8358   movlps\t{%1, %0|%0, %1}
8359   movaps\t{%1, %0|%0, %1}
8360   movlps\t{%1, %0|%0, %q1}"
8361  [(set_attr "type" "ssemov")
8362   (set_attr "mode" "V2SF,V4SF,V2SF")])
8363
8364(define_expand "sse2_loadhpd_exp"
8365  [(set (match_operand:V2DF 0 "nonimmediate_operand")
8366	(vec_concat:V2DF
8367	  (vec_select:DF
8368	    (match_operand:V2DF 1 "nonimmediate_operand")
8369	    (parallel [(const_int 0)]))
8370	  (match_operand:DF 2 "nonimmediate_operand")))]
8371  "TARGET_SSE2"
8372{
8373  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8374
8375  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
8376
8377  /* Fix up the destination if needed.  */
8378  if (dst != operands[0])
8379    emit_move_insn (operands[0], dst);
8380
8381  DONE;
8382})
8383
8384;; Avoid combining registers from different units in a single alternative,
8385;; see comment above inline_secondary_memory_needed function in i386.c
8386(define_insn "sse2_loadhpd"
8387  [(set (match_operand:V2DF 0 "nonimmediate_operand"
8388	  "=x,x,x,x,o,o ,o")
8389	(vec_concat:V2DF
8390	  (vec_select:DF
8391	    (match_operand:V2DF 1 "nonimmediate_operand"
8392	  " 0,x,0,x,0,0 ,0")
8393	    (parallel [(const_int 0)]))
8394	  (match_operand:DF 2 "nonimmediate_operand"
8395	  " m,m,x,x,x,*f,r")))]
8396  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8397  "@
8398   movhpd\t{%2, %0|%0, %2}
8399   vmovhpd\t{%2, %1, %0|%0, %1, %2}
8400   unpcklpd\t{%2, %0|%0, %2}
8401   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8402   #
8403   #
8404   #"
8405  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
8406   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
8407   (set_attr "ssememalign" "64")
8408   (set_attr "prefix_data16" "1,*,*,*,*,*,*")
8409   (set_attr "prefix" "orig,vex,orig,vex,*,*,*")
8410   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
8411
8412(define_split
8413  [(set (match_operand:V2DF 0 "memory_operand")
8414	(vec_concat:V2DF
8415	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
8416	  (match_operand:DF 1 "register_operand")))]
8417  "TARGET_SSE2 && reload_completed"
8418  [(set (match_dup 0) (match_dup 1))]
8419  "operands[0] = adjust_address (operands[0], DFmode, 8);")
8420
8421(define_expand "sse2_loadlpd_exp"
8422  [(set (match_operand:V2DF 0 "nonimmediate_operand")
8423	(vec_concat:V2DF
8424	  (match_operand:DF 2 "nonimmediate_operand")
8425	  (vec_select:DF
8426	    (match_operand:V2DF 1 "nonimmediate_operand")
8427	    (parallel [(const_int 1)]))))]
8428  "TARGET_SSE2"
8429{
8430  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
8431
8432  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
8433
8434  /* Fix up the destination if needed.  */
8435  if (dst != operands[0])
8436    emit_move_insn (operands[0], dst);
8437
8438  DONE;
8439})
8440
8441;; Avoid combining registers from different units in a single alternative,
8442;; see comment above inline_secondary_memory_needed function in i386.c
8443(define_insn "sse2_loadlpd"
8444  [(set (match_operand:V2DF 0 "nonimmediate_operand"
8445	  "=x,x,x,x,x,x,x,x,m,m ,m")
8446	(vec_concat:V2DF
8447	  (match_operand:DF 2 "nonimmediate_operand"
8448	  " m,m,m,x,x,0,0,x,x,*f,r")
8449	  (vec_select:DF
8450	    (match_operand:V2DF 1 "vector_move_operand"
8451	  " C,0,x,0,x,x,o,o,0,0 ,0")
8452	    (parallel [(const_int 1)]))))]
8453  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
8454  "@
8455   %vmovsd\t{%2, %0|%0, %2}
8456   movlpd\t{%2, %0|%0, %2}
8457   vmovlpd\t{%2, %1, %0|%0, %1, %2}
8458   movsd\t{%2, %0|%0, %2}
8459   vmovsd\t{%2, %1, %0|%0, %1, %2}
8460   shufpd\t{$2, %1, %0|%0, %1, 2}
8461   movhpd\t{%H1, %0|%0, %H1}
8462   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
8463   #
8464   #
8465   #"
8466  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
8467   (set (attr "type")
8468     (cond [(eq_attr "alternative" "5")
8469	      (const_string "sselog")
8470	    (eq_attr "alternative" "9")
8471	      (const_string "fmov")
8472	    (eq_attr "alternative" "10")
8473	      (const_string "imov")
8474	   ]
8475	   (const_string "ssemov")))
8476   (set_attr "ssememalign" "64")
8477   (set_attr "prefix_data16" "*,1,*,*,*,*,1,*,*,*,*")
8478   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*,*,*")
8479   (set_attr "prefix" "maybe_vex,orig,vex,orig,vex,orig,orig,vex,*,*,*")
8480   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
8481
8482(define_split
8483  [(set (match_operand:V2DF 0 "memory_operand")
8484	(vec_concat:V2DF
8485	  (match_operand:DF 1 "register_operand")
8486	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
8487  "TARGET_SSE2 && reload_completed"
8488  [(set (match_dup 0) (match_dup 1))]
8489  "operands[0] = adjust_address (operands[0], DFmode, 0);")
8490
8491(define_insn "sse2_movsd"
8492  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,x,x,x,m,x,x,x,o")
8493	(vec_merge:V2DF
8494	  (match_operand:V2DF 2 "nonimmediate_operand" " x,x,m,m,x,0,0,x,0")
8495	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,x,0,x,0,x,o,o,x")
8496	  (const_int 1)))]
8497  "TARGET_SSE2"
8498  "@
8499   movsd\t{%2, %0|%0, %2}
8500   vmovsd\t{%2, %1, %0|%0, %1, %2}
8501   movlpd\t{%2, %0|%0, %q2}
8502   vmovlpd\t{%2, %1, %0|%0, %1, %q2}
8503   %vmovlpd\t{%2, %0|%q0, %2}
8504   shufpd\t{$2, %1, %0|%0, %1, 2}
8505   movhps\t{%H1, %0|%0, %H1}
8506   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
8507   %vmovhps\t{%1, %H0|%H0, %1}"
8508  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
8509   (set (attr "type")
8510     (if_then_else
8511       (eq_attr "alternative" "5")
8512       (const_string "sselog")
8513       (const_string "ssemov")))
8514   (set (attr "prefix_data16")
8515     (if_then_else
8516       (and (eq_attr "alternative" "2,4")
8517	    (not (match_test "TARGET_AVX")))
8518       (const_string "1")
8519       (const_string "*")))
8520   (set_attr "length_immediate" "*,*,*,*,*,1,*,*,*")
8521   (set_attr "ssememalign" "64")
8522   (set_attr "prefix" "orig,vex,orig,vex,maybe_vex,orig,orig,vex,maybe_vex")
8523   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
8524
8525(define_insn "vec_dupv2df<mask_name>"
8526  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v")
8527	(vec_duplicate:V2DF
8528	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
8529  "TARGET_SSE2 && <mask_avx512vl_condition>"
8530  "@
8531   unpcklpd\t%0, %0
8532   %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
8533   vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
8534  [(set_attr "isa" "noavx,sse3,avx512vl")
8535   (set_attr "type" "sselog1")
8536   (set_attr "prefix" "orig,maybe_vex,evex")
8537   (set_attr "mode" "V2DF,DF,DF")])
8538
8539(define_insn "*vec_concatv2df"
8540  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x,v,x,x")
8541	(vec_concat:V2DF
8542	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,m,0,0")
8543	  (match_operand:DF 2 "vector_move_operand"  " x,x,v,1,1,m,m,C,x,m")))]
8544  "TARGET_SSE"
8545  "@
8546   unpcklpd\t{%2, %0|%0, %2}
8547   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8548   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
8549   %vmovddup\t{%1, %0|%0, %1}
8550   vmovddup\t{%1, %0|%0, %1}
8551   movhpd\t{%2, %0|%0, %2}
8552   vmovhpd\t{%2, %1, %0|%0, %1, %2}
8553   %vmovsd\t{%1, %0|%0, %1}
8554   movlhps\t{%2, %0|%0, %2}
8555   movhps\t{%2, %0|%0, %2}"
8556  [(set_attr "isa" "sse2_noavx,avx,avx512vl,sse3,avx512vl,sse2_noavx,avx,sse2,noavx,noavx")
8557   (set (attr "type")
8558     (if_then_else
8559       (eq_attr "alternative" "0,1,2,3,4")
8560       (const_string "sselog")
8561       (const_string "ssemov")))
8562   (set (attr "prefix_data16")
8563	(if_then_else (eq_attr "alternative" "5")
8564		      (const_string "1")
8565		      (const_string "*")))
8566   (set_attr "prefix" "orig,vex,evex,maybe_vex,evex,orig,vex,maybe_vex,orig,orig")
8567   (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
8568
8569;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8570;;
8571;; Parallel integer down-conversion operations
8572;;
8573;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8574
8575(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
8576(define_mode_attr pmov_src_mode
8577  [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
8578(define_mode_attr pmov_src_lower
8579  [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
8580(define_mode_attr pmov_suff_1
8581  [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
8582
8583(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
8584  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8585	(any_truncate:PMOV_DST_MODE_1
8586	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
8587  "TARGET_AVX512F"
8588  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
8589  [(set_attr "type" "ssemov")
8590   (set_attr "memory" "none,store")
8591   (set_attr "prefix" "evex")
8592   (set_attr "mode" "<sseinsnmode>")])
8593
8594(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
8595  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
8596    (vec_merge:PMOV_DST_MODE_1
8597      (any_truncate:PMOV_DST_MODE_1
8598        (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
8599      (match_operand:PMOV_DST_MODE_1 2 "vector_move_operand" "0C,0")
8600      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8601  "TARGET_AVX512F"
8602  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8603  [(set_attr "type" "ssemov")
8604   (set_attr "memory" "none,store")
8605   (set_attr "prefix" "evex")
8606   (set_attr "mode" "<sseinsnmode>")])
8607
8608(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
8609  [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
8610    (vec_merge:PMOV_DST_MODE_1
8611      (any_truncate:PMOV_DST_MODE_1
8612        (match_operand:<pmov_src_mode> 1 "register_operand"))
8613      (match_dup 0)
8614      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8615  "TARGET_AVX512F")
8616
8617(define_insn "avx512bw_<code>v32hiv32qi2"
8618  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8619	(any_truncate:V32QI
8620	    (match_operand:V32HI 1 "register_operand" "v,v")))]
8621  "TARGET_AVX512BW"
8622  "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
8623  [(set_attr "type" "ssemov")
8624   (set_attr "memory" "none,store")
8625   (set_attr "prefix" "evex")
8626   (set_attr "mode" "XI")])
8627
8628(define_insn "avx512bw_<code>v32hiv32qi2_mask"
8629  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
8630    (vec_merge:V32QI
8631      (any_truncate:V32QI
8632        (match_operand:V32HI 1 "register_operand" "v,v"))
8633      (match_operand:V32QI 2 "vector_move_operand" "0C,0")
8634      (match_operand:SI 3 "register_operand" "Yk,Yk")))]
8635  "TARGET_AVX512BW"
8636  "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8637  [(set_attr "type" "ssemov")
8638   (set_attr "memory" "none,store")
8639   (set_attr "prefix" "evex")
8640   (set_attr "mode" "XI")])
8641
8642(define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
8643  [(set (match_operand:V32QI 0 "nonimmediate_operand")
8644    (vec_merge:V32QI
8645      (any_truncate:V32QI
8646        (match_operand:V32HI 1 "register_operand"))
8647      (match_dup 0)
8648      (match_operand:SI 2 "register_operand")))]
8649  "TARGET_AVX512BW")
8650
8651(define_mode_iterator PMOV_DST_MODE_2
8652  [V4SI V8HI (V16QI "TARGET_AVX512BW")])
8653(define_mode_attr pmov_suff_2
8654  [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
8655
8656(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
8657  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8658	(any_truncate:PMOV_DST_MODE_2
8659	    (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
8660  "TARGET_AVX512VL"
8661  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
8662  [(set_attr "type" "ssemov")
8663   (set_attr "memory" "none,store")
8664   (set_attr "prefix" "evex")
8665   (set_attr "mode" "<sseinsnmode>")])
8666
8667(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
8668  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
8669    (vec_merge:PMOV_DST_MODE_2
8670      (any_truncate:PMOV_DST_MODE_2
8671        (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
8672      (match_operand:PMOV_DST_MODE_2 2 "vector_move_operand" "0C,0")
8673      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
8674  "TARGET_AVX512VL"
8675  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8676  [(set_attr "type" "ssemov")
8677   (set_attr "memory" "none,store")
8678   (set_attr "prefix" "evex")
8679   (set_attr "mode" "<sseinsnmode>")])
8680
8681(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
8682  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
8683    (vec_merge:PMOV_DST_MODE_2
8684      (any_truncate:PMOV_DST_MODE_2
8685        (match_operand:<ssedoublemode> 1 "register_operand"))
8686      (match_dup 0)
8687      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
8688  "TARGET_AVX512VL")
8689
8690(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
8691(define_mode_attr pmov_dst_3
8692  [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
8693(define_mode_attr pmov_dst_zeroed_3
8694  [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
8695(define_mode_attr pmov_suff_3
8696  [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
8697
8698(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
8699  [(set (match_operand:V16QI 0 "register_operand" "=v")
8700    (vec_concat:V16QI
8701      (any_truncate:<pmov_dst_3>
8702	      (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
8703      (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
8704  "TARGET_AVX512VL"
8705  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8706  [(set_attr "type" "ssemov")
8707   (set_attr "prefix" "evex")
8708   (set_attr "mode" "TI")])
8709
8710(define_insn "*avx512vl_<code>v2div2qi2_store"
8711  [(set (match_operand:V16QI 0 "memory_operand" "=m")
8712    (vec_concat:V16QI
8713      (any_truncate:V2QI
8714	      (match_operand:V2DI 1 "register_operand" "v"))
8715      (vec_select:V14QI
8716        (match_dup 0)
8717        (parallel [(const_int 2) (const_int 3)
8718                   (const_int 4) (const_int 5)
8719                   (const_int 6) (const_int 7)
8720                   (const_int 8) (const_int 9)
8721                   (const_int 10) (const_int 11)
8722                   (const_int 12) (const_int 13)
8723                   (const_int 14) (const_int 15)]))))]
8724  "TARGET_AVX512VL"
8725  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
8726  [(set_attr "type" "ssemov")
8727   (set_attr "memory" "store")
8728   (set_attr "prefix" "evex")
8729   (set_attr "mode" "TI")])
8730
8731(define_insn "avx512vl_<code>v2div2qi2_mask"
8732  [(set (match_operand:V16QI 0 "register_operand" "=v")
8733    (vec_concat:V16QI
8734      (vec_merge:V2QI
8735        (any_truncate:V2QI
8736          (match_operand:V2DI 1 "register_operand" "v"))
8737        (vec_select:V2QI
8738          (match_operand:V16QI 2 "vector_move_operand" "0C")
8739          (parallel [(const_int 0) (const_int 1)]))
8740        (match_operand:QI 3 "register_operand" "Yk"))
8741      (const_vector:V14QI [(const_int 0) (const_int 0)
8742                           (const_int 0) (const_int 0)
8743                           (const_int 0) (const_int 0)
8744                           (const_int 0) (const_int 0)
8745                           (const_int 0) (const_int 0)
8746                           (const_int 0) (const_int 0)
8747                           (const_int 0) (const_int 0)])))]
8748  "TARGET_AVX512VL"
8749  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8750  [(set_attr "type" "ssemov")
8751   (set_attr "prefix" "evex")
8752   (set_attr "mode" "TI")])
8753
8754(define_insn "avx512vl_<code>v2div2qi2_mask_store"
8755  [(set (match_operand:V16QI 0 "memory_operand" "=m")
8756    (vec_concat:V16QI
8757      (vec_merge:V2QI
8758        (any_truncate:V2QI
8759          (match_operand:V2DI 1 "register_operand" "v"))
8760        (vec_select:V2QI
8761          (match_dup 0)
8762          (parallel [(const_int 0) (const_int 1)]))
8763        (match_operand:QI 2 "register_operand" "Yk"))
8764      (vec_select:V14QI
8765        (match_dup 0)
8766        (parallel [(const_int 2) (const_int 3)
8767                   (const_int 4) (const_int 5)
8768                   (const_int 6) (const_int 7)
8769                   (const_int 8) (const_int 9)
8770                   (const_int 10) (const_int 11)
8771                   (const_int 12) (const_int 13)
8772                   (const_int 14) (const_int 15)]))))]
8773  "TARGET_AVX512VL"
8774  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8775  [(set_attr "type" "ssemov")
8776   (set_attr "memory" "store")
8777   (set_attr "prefix" "evex")
8778   (set_attr "mode" "TI")])
8779
8780(define_insn "*avx512vl_<code><mode>v4qi2_store"
8781  [(set (match_operand:V16QI 0 "memory_operand" "=m")
8782    (vec_concat:V16QI
8783      (any_truncate:V4QI
8784	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8785      (vec_select:V12QI
8786        (match_dup 0)
8787        (parallel [(const_int 4) (const_int 5)
8788                   (const_int 6) (const_int 7)
8789                   (const_int 8) (const_int 9)
8790                   (const_int 10) (const_int 11)
8791                   (const_int 12) (const_int 13)
8792                   (const_int 14) (const_int 15)]))))]
8793  "TARGET_AVX512VL"
8794  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8795  [(set_attr "type" "ssemov")
8796   (set_attr "memory" "store")
8797   (set_attr "prefix" "evex")
8798   (set_attr "mode" "TI")])
8799
8800(define_insn "avx512vl_<code><mode>v4qi2_mask"
8801  [(set (match_operand:V16QI 0 "register_operand" "=v")
8802    (vec_concat:V16QI
8803      (vec_merge:V4QI
8804        (any_truncate:V4QI
8805          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8806        (vec_select:V4QI
8807          (match_operand:V16QI 2 "vector_move_operand" "0C")
8808          (parallel [(const_int 0) (const_int 1)
8809                     (const_int 2) (const_int 3)]))
8810        (match_operand:QI 3 "register_operand" "Yk"))
8811      (const_vector:V12QI [(const_int 0) (const_int 0)
8812                           (const_int 0) (const_int 0)
8813                           (const_int 0) (const_int 0)
8814                           (const_int 0) (const_int 0)
8815                           (const_int 0) (const_int 0)
8816                           (const_int 0) (const_int 0)])))]
8817  "TARGET_AVX512VL"
8818  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8819  [(set_attr "type" "ssemov")
8820   (set_attr "prefix" "evex")
8821   (set_attr "mode" "TI")])
8822
8823(define_insn "avx512vl_<code><mode>v4qi2_mask_store"
8824  [(set (match_operand:V16QI 0 "memory_operand" "=m")
8825    (vec_concat:V16QI
8826      (vec_merge:V4QI
8827        (any_truncate:V4QI
8828          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8829        (vec_select:V4QI
8830          (match_dup 0)
8831          (parallel [(const_int 0) (const_int 1)
8832                     (const_int 2) (const_int 3)]))
8833        (match_operand:QI 2 "register_operand" "Yk"))
8834      (vec_select:V12QI
8835        (match_dup 0)
8836        (parallel [(const_int 4) (const_int 5)
8837                   (const_int 6) (const_int 7)
8838                   (const_int 8) (const_int 9)
8839                   (const_int 10) (const_int 11)
8840                   (const_int 12) (const_int 13)
8841                   (const_int 14) (const_int 15)]))))]
8842  "TARGET_AVX512VL"
8843  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8844  [(set_attr "type" "ssemov")
8845   (set_attr "memory" "store")
8846   (set_attr "prefix" "evex")
8847   (set_attr "mode" "TI")])
8848
8849(define_mode_iterator VI2_128_BW_4_256
8850  [(V8HI "TARGET_AVX512BW") V8SI])
8851
8852(define_insn "*avx512vl_<code><mode>v8qi2_store"
8853  [(set (match_operand:V16QI 0 "memory_operand" "=m")
8854    (vec_concat:V16QI
8855      (any_truncate:V8QI
8856	      (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8857      (vec_select:V8QI
8858        (match_dup 0)
8859        (parallel [(const_int 8) (const_int 9)
8860                   (const_int 10) (const_int 11)
8861                   (const_int 12) (const_int 13)
8862                   (const_int 14) (const_int 15)]))))]
8863  "TARGET_AVX512VL"
8864  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
8865  [(set_attr "type" "ssemov")
8866   (set_attr "memory" "store")
8867   (set_attr "prefix" "evex")
8868   (set_attr "mode" "TI")])
8869
8870(define_insn "avx512vl_<code><mode>v8qi2_mask"
8871  [(set (match_operand:V16QI 0 "register_operand" "=v")
8872    (vec_concat:V16QI
8873      (vec_merge:V8QI
8874        (any_truncate:V8QI
8875          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8876        (vec_select:V8QI
8877          (match_operand:V16QI 2 "vector_move_operand" "0C")
8878          (parallel [(const_int 0) (const_int 1)
8879                     (const_int 2) (const_int 3)
8880                     (const_int 4) (const_int 5)
8881                     (const_int 6) (const_int 7)]))
8882        (match_operand:QI 3 "register_operand" "Yk"))
8883      (const_vector:V8QI [(const_int 0) (const_int 0)
8884                          (const_int 0) (const_int 0)
8885                          (const_int 0) (const_int 0)
8886                          (const_int 0) (const_int 0)])))]
8887  "TARGET_AVX512VL"
8888  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8889  [(set_attr "type" "ssemov")
8890   (set_attr "prefix" "evex")
8891   (set_attr "mode" "TI")])
8892
8893(define_insn "avx512vl_<code><mode>v8qi2_mask_store"
8894  [(set (match_operand:V16QI 0 "memory_operand" "=m")
8895    (vec_concat:V16QI
8896      (vec_merge:V8QI
8897        (any_truncate:V8QI
8898          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
8899        (vec_select:V8QI
8900          (match_dup 0)
8901          (parallel [(const_int 0) (const_int 1)
8902                     (const_int 2) (const_int 3)
8903                     (const_int 4) (const_int 5)
8904                     (const_int 6) (const_int 7)]))
8905        (match_operand:QI 2 "register_operand" "Yk"))
8906      (vec_select:V8QI
8907        (match_dup 0)
8908        (parallel [(const_int 8) (const_int 9)
8909                   (const_int 10) (const_int 11)
8910                   (const_int 12) (const_int 13)
8911                   (const_int 14) (const_int 15)]))))]
8912  "TARGET_AVX512VL"
8913  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8914  [(set_attr "type" "ssemov")
8915   (set_attr "memory" "store")
8916   (set_attr "prefix" "evex")
8917   (set_attr "mode" "TI")])
8918
8919(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
8920(define_mode_attr pmov_dst_4
8921  [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
8922(define_mode_attr pmov_dst_zeroed_4
8923  [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
8924(define_mode_attr pmov_suff_4
8925  [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
8926
8927(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
8928  [(set (match_operand:V8HI 0 "register_operand" "=v")
8929    (vec_concat:V8HI
8930      (any_truncate:<pmov_dst_4>
8931	      (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
8932      (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
8933  "TARGET_AVX512VL"
8934  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8935  [(set_attr "type" "ssemov")
8936   (set_attr "prefix" "evex")
8937   (set_attr "mode" "TI")])
8938
8939(define_insn "*avx512vl_<code><mode>v4hi2_store"
8940  [(set (match_operand:V8HI 0 "memory_operand" "=m")
8941    (vec_concat:V8HI
8942      (any_truncate:V4HI
8943	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8944      (vec_select:V4HI
8945        (match_dup 0)
8946        (parallel [(const_int 4) (const_int 5)
8947                   (const_int 6) (const_int 7)]))))]
8948  "TARGET_AVX512VL"
8949  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
8950  [(set_attr "type" "ssemov")
8951   (set_attr "memory" "store")
8952   (set_attr "prefix" "evex")
8953   (set_attr "mode" "TI")])
8954
8955(define_insn "avx512vl_<code><mode>v4hi2_mask"
8956  [(set (match_operand:V8HI 0 "register_operand" "=v")
8957    (vec_concat:V8HI
8958      (vec_merge:V4HI
8959        (any_truncate:V4HI
8960          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8961        (vec_select:V4HI
8962          (match_operand:V8HI 2 "vector_move_operand" "0C")
8963          (parallel [(const_int 0) (const_int 1)
8964                     (const_int 2) (const_int 3)]))
8965        (match_operand:QI 3 "register_operand" "Yk"))
8966      (const_vector:V4HI [(const_int 0) (const_int 0)
8967                          (const_int 0) (const_int 0)])))]
8968  "TARGET_AVX512VL"
8969  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
8970  [(set_attr "type" "ssemov")
8971   (set_attr "prefix" "evex")
8972   (set_attr "mode" "TI")])
8973
8974(define_insn "avx512vl_<code><mode>v4hi2_mask_store"
8975  [(set (match_operand:V8HI 0 "memory_operand" "=m")
8976    (vec_concat:V8HI
8977      (vec_merge:V4HI
8978        (any_truncate:V4HI
8979          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
8980        (vec_select:V4HI
8981          (match_dup 0)
8982          (parallel [(const_int 0) (const_int 1)
8983                     (const_int 2) (const_int 3)]))
8984        (match_operand:QI 2 "register_operand" "Yk"))
8985      (vec_select:V4HI
8986        (match_dup 0)
8987        (parallel [(const_int 4) (const_int 5)
8988                   (const_int 6) (const_int 7)]))))]
8989  "TARGET_AVX512VL"
8990  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
8991  [(set_attr "type" "ssemov")
8992   (set_attr "memory" "store")
8993   (set_attr "prefix" "evex")
8994   (set_attr "mode" "TI")])
8995
8996(define_insn "*avx512vl_<code>v2div2hi2_store"
8997  [(set (match_operand:V8HI 0 "memory_operand" "=m")
8998    (vec_concat:V8HI
8999      (any_truncate:V2HI
9000	      (match_operand:V2DI 1 "register_operand" "v"))
9001      (vec_select:V6HI
9002        (match_dup 0)
9003        (parallel [(const_int 2) (const_int 3)
9004                   (const_int 4) (const_int 5)
9005                   (const_int 6) (const_int 7)]))))]
9006  "TARGET_AVX512VL"
9007  "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
9008  [(set_attr "type" "ssemov")
9009   (set_attr "memory" "store")
9010   (set_attr "prefix" "evex")
9011   (set_attr "mode" "TI")])
9012
9013(define_insn "avx512vl_<code>v2div2hi2_mask"
9014  [(set (match_operand:V8HI 0 "register_operand" "=v")
9015    (vec_concat:V8HI
9016      (vec_merge:V2HI
9017        (any_truncate:V2HI
9018          (match_operand:V2DI 1 "register_operand" "v"))
9019        (vec_select:V2HI
9020          (match_operand:V8HI 2 "vector_move_operand" "0C")
9021          (parallel [(const_int 0) (const_int 1)]))
9022        (match_operand:QI 3 "register_operand" "Yk"))
9023      (const_vector:V6HI [(const_int 0) (const_int 0)
9024                          (const_int 0) (const_int 0)
9025                          (const_int 0) (const_int 0)])))]
9026  "TARGET_AVX512VL"
9027  "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9028  [(set_attr "type" "ssemov")
9029   (set_attr "prefix" "evex")
9030   (set_attr "mode" "TI")])
9031
9032(define_insn "avx512vl_<code>v2div2hi2_mask_store"
9033  [(set (match_operand:V8HI 0 "memory_operand" "=m")
9034    (vec_concat:V8HI
9035      (vec_merge:V2HI
9036        (any_truncate:V2HI
9037          (match_operand:V2DI 1 "register_operand" "v"))
9038        (vec_select:V2HI
9039          (match_dup 0)
9040          (parallel [(const_int 0) (const_int 1)]))
9041        (match_operand:QI 2 "register_operand" "Yk"))
9042      (vec_select:V6HI
9043        (match_dup 0)
9044        (parallel [(const_int 2) (const_int 3)
9045                   (const_int 4) (const_int 5)
9046                   (const_int 6) (const_int 7)]))))]
9047  "TARGET_AVX512VL"
9048  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9049  [(set_attr "type" "ssemov")
9050   (set_attr "memory" "store")
9051   (set_attr "prefix" "evex")
9052   (set_attr "mode" "TI")])
9053
9054(define_insn "*avx512vl_<code>v2div2si2"
9055  [(set (match_operand:V4SI 0 "register_operand" "=v")
9056    (vec_concat:V4SI
9057      (any_truncate:V2SI
9058	      (match_operand:V2DI 1 "register_operand" "v"))
9059      (match_operand:V2SI 2 "const0_operand")))]
9060  "TARGET_AVX512VL"
9061  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9062  [(set_attr "type" "ssemov")
9063   (set_attr "prefix" "evex")
9064   (set_attr "mode" "TI")])
9065
9066(define_insn "*avx512vl_<code>v2div2si2_store"
9067  [(set (match_operand:V4SI 0 "memory_operand" "=m")
9068    (vec_concat:V4SI
9069      (any_truncate:V2SI
9070	      (match_operand:V2DI 1 "register_operand" "v"))
9071      (vec_select:V2SI
9072        (match_dup 0)
9073        (parallel [(const_int 2) (const_int 3)]))))]
9074  "TARGET_AVX512VL"
9075  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
9076  [(set_attr "type" "ssemov")
9077   (set_attr "memory" "store")
9078   (set_attr "prefix" "evex")
9079   (set_attr "mode" "TI")])
9080
9081(define_insn "avx512vl_<code>v2div2si2_mask"
9082  [(set (match_operand:V4SI 0 "register_operand" "=v")
9083    (vec_concat:V4SI
9084      (vec_merge:V2SI
9085        (any_truncate:V2SI
9086          (match_operand:V2DI 1 "register_operand" "v"))
9087        (vec_select:V2SI
9088          (match_operand:V4SI 2 "vector_move_operand" "0C")
9089          (parallel [(const_int 0) (const_int 1)]))
9090        (match_operand:QI 3 "register_operand" "Yk"))
9091      (const_vector:V2SI [(const_int 0) (const_int 0)])))]
9092  "TARGET_AVX512VL"
9093  "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9094  [(set_attr "type" "ssemov")
9095   (set_attr "prefix" "evex")
9096   (set_attr "mode" "TI")])
9097
9098(define_insn "avx512vl_<code>v2div2si2_mask_store"
9099  [(set (match_operand:V4SI 0 "memory_operand" "=m")
9100    (vec_concat:V4SI
9101      (vec_merge:V2SI
9102        (any_truncate:V2SI
9103          (match_operand:V2DI 1 "register_operand" "v"))
9104        (vec_select:V2SI
9105          (match_dup 0)
9106          (parallel [(const_int 0) (const_int 1)]))
9107        (match_operand:QI 2 "register_operand" "Yk"))
9108      (vec_select:V2SI
9109        (match_dup 0)
9110        (parallel [(const_int 2) (const_int 3)]))))]
9111  "TARGET_AVX512VL"
9112  "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9113  [(set_attr "type" "ssemov")
9114   (set_attr "memory" "store")
9115   (set_attr "prefix" "evex")
9116   (set_attr "mode" "TI")])
9117
9118(define_insn "*avx512f_<code>v8div16qi2"
9119  [(set (match_operand:V16QI 0 "register_operand" "=v")
9120	(vec_concat:V16QI
9121	  (any_truncate:V8QI
9122	    (match_operand:V8DI 1 "register_operand" "v"))
9123	  (const_vector:V8QI [(const_int 0) (const_int 0)
9124			      (const_int 0) (const_int 0)
9125			      (const_int 0) (const_int 0)
9126			      (const_int 0) (const_int 0)])))]
9127  "TARGET_AVX512F"
9128  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9129  [(set_attr "type" "ssemov")
9130   (set_attr "prefix" "evex")
9131   (set_attr "mode" "TI")])
9132
9133(define_insn "*avx512f_<code>v8div16qi2_store"
9134  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9135	(vec_concat:V16QI
9136	  (any_truncate:V8QI
9137	    (match_operand:V8DI 1 "register_operand" "v"))
9138	  (vec_select:V8QI
9139	    (match_dup 0)
9140	    (parallel [(const_int 8) (const_int 9)
9141		       (const_int 10) (const_int 11)
9142		       (const_int 12) (const_int 13)
9143		       (const_int 14) (const_int 15)]))))]
9144  "TARGET_AVX512F"
9145  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
9146  [(set_attr "type" "ssemov")
9147   (set_attr "memory" "store")
9148   (set_attr "prefix" "evex")
9149   (set_attr "mode" "TI")])
9150
9151(define_insn "avx512f_<code>v8div16qi2_mask"
9152  [(set (match_operand:V16QI 0 "register_operand" "=v")
9153    (vec_concat:V16QI
9154      (vec_merge:V8QI
9155        (any_truncate:V8QI
9156          (match_operand:V8DI 1 "register_operand" "v"))
9157        (vec_select:V8QI
9158          (match_operand:V16QI 2 "vector_move_operand" "0C")
9159          (parallel [(const_int 0) (const_int 1)
9160                     (const_int 2) (const_int 3)
9161                     (const_int 4) (const_int 5)
9162                     (const_int 6) (const_int 7)]))
9163        (match_operand:QI 3 "register_operand" "Yk"))
9164      (const_vector:V8QI [(const_int 0) (const_int 0)
9165                          (const_int 0) (const_int 0)
9166                          (const_int 0) (const_int 0)
9167                          (const_int 0) (const_int 0)])))]
9168  "TARGET_AVX512F"
9169  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
9170  [(set_attr "type" "ssemov")
9171   (set_attr "prefix" "evex")
9172   (set_attr "mode" "TI")])
9173
9174(define_insn "avx512f_<code>v8div16qi2_mask_store"
9175  [(set (match_operand:V16QI 0 "memory_operand" "=m")
9176    (vec_concat:V16QI
9177      (vec_merge:V8QI
9178        (any_truncate:V8QI
9179          (match_operand:V8DI 1 "register_operand" "v"))
9180        (vec_select:V8QI
9181          (match_dup 0)
9182          (parallel [(const_int 0) (const_int 1)
9183                     (const_int 2) (const_int 3)
9184                     (const_int 4) (const_int 5)
9185                     (const_int 6) (const_int 7)]))
9186        (match_operand:QI 2 "register_operand" "Yk"))
9187      (vec_select:V8QI
9188        (match_dup 0)
9189        (parallel [(const_int 8) (const_int 9)
9190                   (const_int 10) (const_int 11)
9191                   (const_int 12) (const_int 13)
9192                   (const_int 14) (const_int 15)]))))]
9193  "TARGET_AVX512F"
9194  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
9195  [(set_attr "type" "ssemov")
9196   (set_attr "memory" "store")
9197   (set_attr "prefix" "evex")
9198   (set_attr "mode" "TI")])
9199
9200;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9201;;
9202;; Parallel integral arithmetic
9203;;
9204;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9205
9206(define_expand "neg<mode>2"
9207  [(set (match_operand:VI_AVX2 0 "register_operand")
9208	(minus:VI_AVX2
9209	  (match_dup 2)
9210	  (match_operand:VI_AVX2 1 "nonimmediate_operand")))]
9211  "TARGET_SSE2"
9212  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
9213
9214(define_expand "<plusminus_insn><mode>3"
9215  [(set (match_operand:VI_AVX2 0 "register_operand")
9216	(plusminus:VI_AVX2
9217	  (match_operand:VI_AVX2 1 "nonimmediate_operand")
9218	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
9219  "TARGET_SSE2"
9220  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9221
9222(define_expand "<plusminus_insn><mode>3_mask"
9223  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
9224	(vec_merge:VI48_AVX512VL
9225	  (plusminus:VI48_AVX512VL
9226	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
9227	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
9228	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
9229	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9230  "TARGET_AVX512F"
9231  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9232
9233(define_expand "<plusminus_insn><mode>3_mask"
9234  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
9235	(vec_merge:VI12_AVX512VL
9236	  (plusminus:VI12_AVX512VL
9237	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
9238	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
9239	  (match_operand:VI12_AVX512VL 3 "vector_move_operand")
9240	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
9241  "TARGET_AVX512BW"
9242  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9243
9244(define_insn "*<plusminus_insn><mode>3"
9245  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
9246	(plusminus:VI_AVX2
9247	  (match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9248	  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9249  "TARGET_SSE2
9250   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9251  "@
9252   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9253   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9254  [(set_attr "isa" "noavx,avx")
9255   (set_attr "type" "sseiadd")
9256   (set_attr "prefix_data16" "1,*")
9257   (set_attr "prefix" "<mask_prefix3>")
9258   (set_attr "mode" "<sseinsnmode>")])
9259
9260(define_insn "*<plusminus_insn><mode>3_mask"
9261  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9262	(vec_merge:VI48_AVX512VL
9263	  (plusminus:VI48_AVX512VL
9264	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9265	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
9266	  (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
9267	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9268  "TARGET_AVX512F
9269   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9270  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9271  [(set_attr "type" "sseiadd")
9272   (set_attr "prefix" "evex")
9273   (set_attr "mode" "<sseinsnmode>")])
9274
9275(define_insn "*<plusminus_insn><mode>3_mask"
9276  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
9277	(vec_merge:VI12_AVX512VL
9278	  (plusminus:VI12_AVX512VL
9279	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
9280	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
9281	  (match_operand:VI12_AVX512VL 3 "vector_move_operand" "0C")
9282	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
9283  "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9284  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9285  [(set_attr "type" "sseiadd")
9286   (set_attr "prefix" "evex")
9287   (set_attr "mode" "<sseinsnmode>")])
9288
9289(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9290  [(set (match_operand:VI12_AVX2 0 "register_operand")
9291	(sat_plusminus:VI12_AVX2
9292	  (match_operand:VI12_AVX2 1 "nonimmediate_operand")
9293	  (match_operand:VI12_AVX2 2 "nonimmediate_operand")))]
9294  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9295  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
9296
9297(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
9298  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
9299	(sat_plusminus:VI12_AVX2
9300	  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
9301	  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9302  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
9303   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
9304  "@
9305   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
9306   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9307  [(set_attr "isa" "noavx,avx")
9308   (set_attr "type" "sseiadd")
9309   (set_attr "prefix_data16" "1,*")
9310   (set_attr "prefix" "orig,maybe_evex")
9311   (set_attr "mode" "TI")])
9312
9313(define_expand "mul<mode>3<mask_name>"
9314  [(set (match_operand:VI1_AVX512 0 "register_operand")
9315	(mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
9316		       (match_operand:VI1_AVX512 2 "register_operand")))]
9317  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9318{
9319  ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
9320  DONE;
9321})
9322
9323(define_expand "mul<mode>3<mask_name>"
9324  [(set (match_operand:VI2_AVX2 0 "register_operand")
9325	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand")
9326		       (match_operand:VI2_AVX2 2 "nonimmediate_operand")))]
9327  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9328  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9329
9330(define_insn "*mul<mode>3<mask_name>"
9331  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9332	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v")
9333		       (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))]
9334  "TARGET_SSE2
9335   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9336   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9337  "@
9338   pmullw\t{%2, %0|%0, %2}
9339   vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9340  [(set_attr "isa" "noavx,avx")
9341   (set_attr "type" "sseimul")
9342   (set_attr "prefix_data16" "1,*")
9343   (set_attr "prefix" "orig,vex")
9344   (set_attr "mode" "<sseinsnmode>")])
9345
9346(define_expand "<s>mul<mode>3_highpart<mask_name>"
9347  [(set (match_operand:VI2_AVX2 0 "register_operand")
9348	(truncate:VI2_AVX2
9349	  (lshiftrt:<ssedoublemode>
9350	    (mult:<ssedoublemode>
9351	      (any_extend:<ssedoublemode>
9352		(match_operand:VI2_AVX2 1 "nonimmediate_operand"))
9353	      (any_extend:<ssedoublemode>
9354		(match_operand:VI2_AVX2 2 "nonimmediate_operand")))
9355	    (const_int 16))))]
9356  "TARGET_SSE2
9357   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9358  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
9359
9360(define_insn "*<s>mul<mode>3_highpart<mask_name>"
9361  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
9362	(truncate:VI2_AVX2
9363	  (lshiftrt:<ssedoublemode>
9364	    (mult:<ssedoublemode>
9365	      (any_extend:<ssedoublemode>
9366		(match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
9367	      (any_extend:<ssedoublemode>
9368		(match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
9369	    (const_int 16))))]
9370  "TARGET_SSE2
9371   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
9372   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
9373  "@
9374   pmulh<u>w\t{%2, %0|%0, %2}
9375   vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9376  [(set_attr "isa" "noavx,avx")
9377   (set_attr "type" "sseimul")
9378   (set_attr "prefix_data16" "1,*")
9379   (set_attr "prefix" "orig,vex")
9380   (set_attr "mode" "<sseinsnmode>")])
9381
9382(define_expand "vec_widen_umult_even_v16si<mask_name>"
9383  [(set (match_operand:V8DI 0 "register_operand")
9384        (mult:V8DI
9385          (zero_extend:V8DI
9386            (vec_select:V8SI
9387              (match_operand:V16SI 1 "nonimmediate_operand")
9388              (parallel [(const_int 0) (const_int 2)
9389                         (const_int 4) (const_int 6)
9390                         (const_int 8) (const_int 10)
9391                         (const_int 12) (const_int 14)])))
9392          (zero_extend:V8DI
9393            (vec_select:V8SI
9394              (match_operand:V16SI 2 "nonimmediate_operand")
9395              (parallel [(const_int 0) (const_int 2)
9396                         (const_int 4) (const_int 6)
9397                         (const_int 8) (const_int 10)
9398                         (const_int 12) (const_int 14)])))))]
9399  "TARGET_AVX512F"
9400  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9401
9402(define_insn "*vec_widen_umult_even_v16si<mask_name>"
9403  [(set (match_operand:V8DI 0 "register_operand" "=v")
9404        (mult:V8DI
9405          (zero_extend:V8DI
9406            (vec_select:V8SI
9407              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9408              (parallel [(const_int 0) (const_int 2)
9409                         (const_int 4) (const_int 6)
9410                         (const_int 8) (const_int 10)
9411                         (const_int 12) (const_int 14)])))
9412          (zero_extend:V8DI
9413            (vec_select:V8SI
9414              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9415              (parallel [(const_int 0) (const_int 2)
9416                         (const_int 4) (const_int 6)
9417                         (const_int 8) (const_int 10)
9418                         (const_int 12) (const_int 14)])))))]
9419  "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9420  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9421  [(set_attr "isa" "avx512f")
9422   (set_attr "type" "sseimul")
9423   (set_attr "prefix_extra" "1")
9424   (set_attr "prefix" "evex")
9425   (set_attr "mode" "XI")])
9426
9427(define_expand "vec_widen_umult_even_v8si<mask_name>"
9428  [(set (match_operand:V4DI 0 "register_operand")
9429	(mult:V4DI
9430	  (zero_extend:V4DI
9431	    (vec_select:V4SI
9432	      (match_operand:V8SI 1 "nonimmediate_operand")
9433	      (parallel [(const_int 0) (const_int 2)
9434			 (const_int 4) (const_int 6)])))
9435	  (zero_extend:V4DI
9436	    (vec_select:V4SI
9437	      (match_operand:V8SI 2 "nonimmediate_operand")
9438	      (parallel [(const_int 0) (const_int 2)
9439			 (const_int 4) (const_int 6)])))))]
9440  "TARGET_AVX2 && <mask_avx512vl_condition>"
9441  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9442
9443(define_insn "*vec_widen_umult_even_v8si<mask_name>"
9444  [(set (match_operand:V4DI 0 "register_operand" "=v")
9445	(mult:V4DI
9446	  (zero_extend:V4DI
9447	    (vec_select:V4SI
9448	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9449	      (parallel [(const_int 0) (const_int 2)
9450			 (const_int 4) (const_int 6)])))
9451	  (zero_extend:V4DI
9452	    (vec_select:V4SI
9453	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9454	      (parallel [(const_int 0) (const_int 2)
9455			 (const_int 4) (const_int 6)])))))]
9456  "TARGET_AVX2 && <mask_avx512vl_condition>
9457   && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9458  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9459  [(set_attr "type" "sseimul")
9460   (set_attr "prefix" "maybe_evex")
9461   (set_attr "mode" "OI")])
9462
9463(define_expand "vec_widen_umult_even_v4si<mask_name>"
9464  [(set (match_operand:V2DI 0 "register_operand")
9465	(mult:V2DI
9466	  (zero_extend:V2DI
9467	    (vec_select:V2SI
9468	      (match_operand:V4SI 1 "nonimmediate_operand")
9469	      (parallel [(const_int 0) (const_int 2)])))
9470	  (zero_extend:V2DI
9471	    (vec_select:V2SI
9472	      (match_operand:V4SI 2 "nonimmediate_operand")
9473	      (parallel [(const_int 0) (const_int 2)])))))]
9474  "TARGET_SSE2 && <mask_avx512vl_condition>"
9475  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9476
9477(define_insn "*vec_widen_umult_even_v4si<mask_name>"
9478  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9479	(mult:V2DI
9480	  (zero_extend:V2DI
9481	    (vec_select:V2SI
9482	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,v")
9483	      (parallel [(const_int 0) (const_int 2)])))
9484	  (zero_extend:V2DI
9485	    (vec_select:V2SI
9486	      (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm")
9487	      (parallel [(const_int 0) (const_int 2)])))))]
9488  "TARGET_SSE2 && <mask_avx512vl_condition>
9489   && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9490  "@
9491   pmuludq\t{%2, %0|%0, %2}
9492   vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9493  [(set_attr "isa" "noavx,avx")
9494   (set_attr "type" "sseimul")
9495   (set_attr "prefix_data16" "1,*")
9496   (set_attr "prefix" "orig,maybe_evex")
9497   (set_attr "mode" "TI")])
9498
9499(define_expand "vec_widen_smult_even_v16si<mask_name>"
9500  [(set (match_operand:V8DI 0 "register_operand")
9501        (mult:V8DI
9502          (sign_extend:V8DI
9503            (vec_select:V8SI
9504              (match_operand:V16SI 1 "nonimmediate_operand")
9505              (parallel [(const_int 0) (const_int 2)
9506                         (const_int 4) (const_int 6)
9507                         (const_int 8) (const_int 10)
9508                         (const_int 12) (const_int 14)])))
9509          (sign_extend:V8DI
9510            (vec_select:V8SI
9511              (match_operand:V16SI 2 "nonimmediate_operand")
9512              (parallel [(const_int 0) (const_int 2)
9513                         (const_int 4) (const_int 6)
9514                         (const_int 8) (const_int 10)
9515                         (const_int 12) (const_int 14)])))))]
9516  "TARGET_AVX512F"
9517  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
9518
9519(define_insn "*vec_widen_smult_even_v16si<mask_name>"
9520  [(set (match_operand:V8DI 0 "register_operand" "=v")
9521        (mult:V8DI
9522          (sign_extend:V8DI
9523            (vec_select:V8SI
9524              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
9525              (parallel [(const_int 0) (const_int 2)
9526                         (const_int 4) (const_int 6)
9527                         (const_int 8) (const_int 10)
9528                         (const_int 12) (const_int 14)])))
9529          (sign_extend:V8DI
9530            (vec_select:V8SI
9531              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
9532              (parallel [(const_int 0) (const_int 2)
9533                         (const_int 4) (const_int 6)
9534                         (const_int 8) (const_int 10)
9535                         (const_int 12) (const_int 14)])))))]
9536  "TARGET_AVX512F && ix86_binary_operator_ok (MULT, V16SImode, operands)"
9537  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9538  [(set_attr "isa" "avx512f")
9539   (set_attr "type" "sseimul")
9540   (set_attr "prefix_extra" "1")
9541   (set_attr "prefix" "evex")
9542   (set_attr "mode" "XI")])
9543
9544(define_expand "vec_widen_smult_even_v8si<mask_name>"
9545  [(set (match_operand:V4DI 0 "register_operand")
9546	(mult:V4DI
9547	  (sign_extend:V4DI
9548	    (vec_select:V4SI
9549	      (match_operand:V8SI 1 "nonimmediate_operand")
9550	      (parallel [(const_int 0) (const_int 2)
9551			 (const_int 4) (const_int 6)])))
9552	  (sign_extend:V4DI
9553	    (vec_select:V4SI
9554	      (match_operand:V8SI 2 "nonimmediate_operand")
9555	      (parallel [(const_int 0) (const_int 2)
9556			 (const_int 4) (const_int 6)])))))]
9557  "TARGET_AVX2 && <mask_avx512vl_condition>"
9558  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
9559
9560(define_insn "*vec_widen_smult_even_v8si<mask_name>"
9561  [(set (match_operand:V4DI 0 "register_operand" "=v")
9562	(mult:V4DI
9563	  (sign_extend:V4DI
9564	    (vec_select:V4SI
9565	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
9566	      (parallel [(const_int 0) (const_int 2)
9567			 (const_int 4) (const_int 6)])))
9568	  (sign_extend:V4DI
9569	    (vec_select:V4SI
9570	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
9571	      (parallel [(const_int 0) (const_int 2)
9572			 (const_int 4) (const_int 6)])))))]
9573  "TARGET_AVX2
9574   && ix86_binary_operator_ok (MULT, V8SImode, operands)"
9575  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9576  [(set_attr "type" "sseimul")
9577   (set_attr "prefix_extra" "1")
9578   (set_attr "prefix" "vex")
9579   (set_attr "mode" "OI")])
9580
9581(define_expand "sse4_1_mulv2siv2di3<mask_name>"
9582  [(set (match_operand:V2DI 0 "register_operand")
9583	(mult:V2DI
9584	  (sign_extend:V2DI
9585	    (vec_select:V2SI
9586	      (match_operand:V4SI 1 "nonimmediate_operand")
9587	      (parallel [(const_int 0) (const_int 2)])))
9588	  (sign_extend:V2DI
9589	    (vec_select:V2SI
9590	      (match_operand:V4SI 2 "nonimmediate_operand")
9591	      (parallel [(const_int 0) (const_int 2)])))))]
9592  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
9593  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
9594
9595(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
9596  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
9597	(mult:V2DI
9598	  (sign_extend:V2DI
9599	    (vec_select:V2SI
9600	      (match_operand:V4SI 1 "nonimmediate_operand" "%0,0,v")
9601	      (parallel [(const_int 0) (const_int 2)])))
9602	  (sign_extend:V2DI
9603	    (vec_select:V2SI
9604	      (match_operand:V4SI 2 "nonimmediate_operand" "Yrm,*xm,vm")
9605	      (parallel [(const_int 0) (const_int 2)])))))]
9606  "TARGET_SSE4_1 && <mask_avx512vl_condition>
9607   && ix86_binary_operator_ok (MULT, V4SImode, operands)"
9608  "@
9609   pmuldq\t{%2, %0|%0, %2}
9610   pmuldq\t{%2, %0|%0, %2}
9611   vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9612  [(set_attr "isa" "noavx,noavx,avx")
9613   (set_attr "type" "sseimul")
9614   (set_attr "prefix_data16" "1,1,*")
9615   (set_attr "prefix_extra" "1")
9616   (set_attr "prefix" "orig,orig,vex")
9617   (set_attr "mode" "TI")])
9618
9619(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
9620  [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
9621          (unspec:<sseunpackmode>
9622            [(match_operand:VI2_AVX2 1 "register_operand" "v")
9623             (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
9624             UNSPEC_PMADDWD512))]
9625   "TARGET_AVX512BW && <mask_mode512bit_condition>"
9626   "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
9627  [(set_attr "type" "sseiadd")
9628   (set_attr "prefix" "evex")
9629   (set_attr "mode" "XI")])
9630
9631(define_expand "avx2_pmaddwd"
9632  [(set (match_operand:V8SI 0 "register_operand")
9633	(plus:V8SI
9634	  (mult:V8SI
9635	    (sign_extend:V8SI
9636	      (vec_select:V8HI
9637		(match_operand:V16HI 1 "nonimmediate_operand")
9638		(parallel [(const_int 0) (const_int 2)
9639			   (const_int 4) (const_int 6)
9640			   (const_int 8) (const_int 10)
9641			   (const_int 12) (const_int 14)])))
9642	    (sign_extend:V8SI
9643	      (vec_select:V8HI
9644		(match_operand:V16HI 2 "nonimmediate_operand")
9645		(parallel [(const_int 0) (const_int 2)
9646			   (const_int 4) (const_int 6)
9647			   (const_int 8) (const_int 10)
9648			   (const_int 12) (const_int 14)]))))
9649	  (mult:V8SI
9650	    (sign_extend:V8SI
9651	      (vec_select:V8HI (match_dup 1)
9652		(parallel [(const_int 1) (const_int 3)
9653			   (const_int 5) (const_int 7)
9654			   (const_int 9) (const_int 11)
9655			   (const_int 13) (const_int 15)])))
9656	    (sign_extend:V8SI
9657	      (vec_select:V8HI (match_dup 2)
9658		(parallel [(const_int 1) (const_int 3)
9659			   (const_int 5) (const_int 7)
9660			   (const_int 9) (const_int 11)
9661			   (const_int 13) (const_int 15)]))))))]
9662  "TARGET_AVX2"
9663  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
9664
9665(define_insn "*avx2_pmaddwd"
9666  [(set (match_operand:V8SI 0 "register_operand" "=x")
9667	(plus:V8SI
9668	  (mult:V8SI
9669	    (sign_extend:V8SI
9670	      (vec_select:V8HI
9671		(match_operand:V16HI 1 "nonimmediate_operand" "%x")
9672		(parallel [(const_int 0) (const_int 2)
9673			   (const_int 4) (const_int 6)
9674			   (const_int 8) (const_int 10)
9675			   (const_int 12) (const_int 14)])))
9676	    (sign_extend:V8SI
9677	      (vec_select:V8HI
9678		(match_operand:V16HI 2 "nonimmediate_operand" "xm")
9679		(parallel [(const_int 0) (const_int 2)
9680			   (const_int 4) (const_int 6)
9681			   (const_int 8) (const_int 10)
9682			   (const_int 12) (const_int 14)]))))
9683	  (mult:V8SI
9684	    (sign_extend:V8SI
9685	      (vec_select:V8HI (match_dup 1)
9686		(parallel [(const_int 1) (const_int 3)
9687			   (const_int 5) (const_int 7)
9688			   (const_int 9) (const_int 11)
9689			   (const_int 13) (const_int 15)])))
9690	    (sign_extend:V8SI
9691	      (vec_select:V8HI (match_dup 2)
9692		(parallel [(const_int 1) (const_int 3)
9693			   (const_int 5) (const_int 7)
9694			   (const_int 9) (const_int 11)
9695			   (const_int 13) (const_int 15)]))))))]
9696  "TARGET_AVX2 && ix86_binary_operator_ok (MULT, V16HImode, operands)"
9697  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9698  [(set_attr "type" "sseiadd")
9699   (set_attr "prefix" "vex")
9700   (set_attr "mode" "OI")])
9701
9702(define_expand "sse2_pmaddwd"
9703  [(set (match_operand:V4SI 0 "register_operand")
9704	(plus:V4SI
9705	  (mult:V4SI
9706	    (sign_extend:V4SI
9707	      (vec_select:V4HI
9708		(match_operand:V8HI 1 "nonimmediate_operand")
9709		(parallel [(const_int 0) (const_int 2)
9710			   (const_int 4) (const_int 6)])))
9711	    (sign_extend:V4SI
9712	      (vec_select:V4HI
9713		(match_operand:V8HI 2 "nonimmediate_operand")
9714		(parallel [(const_int 0) (const_int 2)
9715			   (const_int 4) (const_int 6)]))))
9716	  (mult:V4SI
9717	    (sign_extend:V4SI
9718	      (vec_select:V4HI (match_dup 1)
9719		(parallel [(const_int 1) (const_int 3)
9720			   (const_int 5) (const_int 7)])))
9721	    (sign_extend:V4SI
9722	      (vec_select:V4HI (match_dup 2)
9723		(parallel [(const_int 1) (const_int 3)
9724			   (const_int 5) (const_int 7)]))))))]
9725  "TARGET_SSE2"
9726  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
9727
9728(define_insn "*sse2_pmaddwd"
9729  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
9730	(plus:V4SI
9731	  (mult:V4SI
9732	    (sign_extend:V4SI
9733	      (vec_select:V4HI
9734		(match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
9735		(parallel [(const_int 0) (const_int 2)
9736			   (const_int 4) (const_int 6)])))
9737	    (sign_extend:V4SI
9738	      (vec_select:V4HI
9739		(match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
9740		(parallel [(const_int 0) (const_int 2)
9741			   (const_int 4) (const_int 6)]))))
9742	  (mult:V4SI
9743	    (sign_extend:V4SI
9744	      (vec_select:V4HI (match_dup 1)
9745		(parallel [(const_int 1) (const_int 3)
9746			   (const_int 5) (const_int 7)])))
9747	    (sign_extend:V4SI
9748	      (vec_select:V4HI (match_dup 2)
9749		(parallel [(const_int 1) (const_int 3)
9750			   (const_int 5) (const_int 7)]))))))]
9751  "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
9752  "@
9753   pmaddwd\t{%2, %0|%0, %2}
9754   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
9755  [(set_attr "isa" "noavx,avx")
9756   (set_attr "type" "sseiadd")
9757   (set_attr "atom_unit" "simul")
9758   (set_attr "prefix_data16" "1,*")
9759   (set_attr "prefix" "orig,vex")
9760   (set_attr "mode" "TI")])
9761
9762(define_insn "avx512dq_mul<mode>3<mask_name>"
9763  [(set (match_operand:VI8 0 "register_operand" "=v")
9764	(mult:VI8
9765	  (match_operand:VI8 1 "register_operand" "v")
9766	  (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
9767  "TARGET_AVX512DQ && <mask_mode512bit_condition>"
9768  "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9769  [(set_attr "type" "sseimul")
9770   (set_attr "prefix" "evex")
9771   (set_attr "mode" "<sseinsnmode>")])
9772
9773(define_expand "mul<mode>3<mask_name>"
9774  [(set (match_operand:VI4_AVX512F 0 "register_operand")
9775	(mult:VI4_AVX512F
9776	  (match_operand:VI4_AVX512F 1 "general_vector_operand")
9777	  (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
9778  "TARGET_SSE2 && <mask_mode512bit_condition>"
9779{
9780  if (TARGET_SSE4_1)
9781    {
9782      if (!nonimmediate_operand (operands[1], <MODE>mode))
9783	operands[1] = force_reg (<MODE>mode, operands[1]);
9784      if (!nonimmediate_operand (operands[2], <MODE>mode))
9785	operands[2] = force_reg (<MODE>mode, operands[2]);
9786      ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
9787    }
9788  else
9789    {
9790      ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
9791      DONE;
9792    }
9793})
9794
9795(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
9796  [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
9797	(mult:VI4_AVX512F
9798	  (match_operand:VI4_AVX512F 1 "nonimmediate_operand" "%0,0,v")
9799	  (match_operand:VI4_AVX512F 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
9800  "TARGET_SSE4_1 && ix86_binary_operator_ok (MULT, <MODE>mode, operands) && <mask_mode512bit_condition>"
9801  "@
9802   pmulld\t{%2, %0|%0, %2}
9803   pmulld\t{%2, %0|%0, %2}
9804   vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9805  [(set_attr "isa" "noavx,noavx,avx")
9806   (set_attr "type" "sseimul")
9807   (set_attr "prefix_extra" "1")
9808   (set_attr "prefix" "<mask_prefix4>")
9809   (set_attr "btver2_decode" "vector,vector,vector")
9810   (set_attr "mode" "<sseinsnmode>")])
9811
9812(define_expand "mul<mode>3"
9813  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
9814	(mult:VI8_AVX2_AVX512F
9815	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
9816	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
9817  "TARGET_SSE2"
9818{
9819  ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
9820  DONE;
9821})
9822
9823(define_expand "vec_widen_<s>mult_hi_<mode>"
9824  [(match_operand:<sseunpackmode> 0 "register_operand")
9825   (any_extend:<sseunpackmode>
9826     (match_operand:VI124_AVX2 1 "register_operand"))
9827   (match_operand:VI124_AVX2 2 "register_operand")]
9828  "TARGET_SSE2"
9829{
9830  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9831			      <u_bool>, true);
9832  DONE;
9833})
9834
9835(define_expand "vec_widen_<s>mult_lo_<mode>"
9836  [(match_operand:<sseunpackmode> 0 "register_operand")
9837   (any_extend:<sseunpackmode>
9838     (match_operand:VI124_AVX2 1 "register_operand"))
9839   (match_operand:VI124_AVX2 2 "register_operand")]
9840  "TARGET_SSE2"
9841{
9842  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
9843			      <u_bool>, false);
9844  DONE;
9845})
9846
9847;; Most widen_<s>mult_even_<mode> can be handled directly from other
9848;; named patterns, but signed V4SI needs special help for plain SSE2.
9849(define_expand "vec_widen_smult_even_v4si"
9850  [(match_operand:V2DI 0 "register_operand")
9851   (match_operand:V4SI 1 "nonimmediate_operand")
9852   (match_operand:V4SI 2 "nonimmediate_operand")]
9853  "TARGET_SSE2"
9854{
9855  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9856				 false, false);
9857  DONE;
9858})
9859
9860(define_expand "vec_widen_<s>mult_odd_<mode>"
9861  [(match_operand:<sseunpackmode> 0 "register_operand")
9862   (any_extend:<sseunpackmode>
9863     (match_operand:VI4_AVX512F 1 "general_vector_operand"))
9864   (match_operand:VI4_AVX512F 2 "general_vector_operand")]
9865  "TARGET_SSE2"
9866{
9867  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
9868				 <u_bool>, true);
9869  DONE;
9870})
9871
9872(define_mode_attr SDOT_PMADD_SUF
9873  [(V32HI "512v32hi") (V16HI "") (V8HI "")])
9874
9875(define_expand "sdot_prod<mode>"
9876  [(match_operand:<sseunpackmode> 0 "register_operand")
9877   (match_operand:VI2_AVX2 1 "register_operand")
9878   (match_operand:VI2_AVX2 2 "register_operand")
9879   (match_operand:<sseunpackmode> 3 "register_operand")]
9880  "TARGET_SSE2"
9881{
9882  rtx t = gen_reg_rtx (<sseunpackmode>mode);
9883  emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
9884  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9885			  gen_rtx_PLUS (<sseunpackmode>mode,
9886					operands[3], t)));
9887  DONE;
9888})
9889
9890;; Normally we use widen_mul_even/odd, but combine can't quite get it all
9891;; back together when madd is available.
9892(define_expand "sdot_prodv4si"
9893  [(match_operand:V2DI 0 "register_operand")
9894   (match_operand:V4SI 1 "register_operand")
9895   (match_operand:V4SI 2 "register_operand")
9896   (match_operand:V2DI 3 "register_operand")]
9897  "TARGET_XOP"
9898{
9899  rtx t = gen_reg_rtx (V2DImode);
9900  emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
9901  emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
9902  DONE;
9903})
9904
9905(define_expand "usadv16qi"
9906  [(match_operand:V4SI 0 "register_operand")
9907   (match_operand:V16QI 1 "register_operand")
9908   (match_operand:V16QI 2 "nonimmediate_operand")
9909   (match_operand:V4SI 3 "nonimmediate_operand")]
9910  "TARGET_SSE2"
9911{
9912  rtx t1 = gen_reg_rtx (V2DImode);
9913  rtx t2 = gen_reg_rtx (V4SImode);
9914  emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
9915  convert_move (t2, t1, 0);
9916  emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
9917  DONE;
9918})
9919
9920(define_expand "usadv32qi"
9921  [(match_operand:V8SI 0 "register_operand")
9922   (match_operand:V32QI 1 "register_operand")
9923   (match_operand:V32QI 2 "nonimmediate_operand")
9924   (match_operand:V8SI 3 "nonimmediate_operand")]
9925  "TARGET_AVX2"
9926{
9927  rtx t1 = gen_reg_rtx (V4DImode);
9928  rtx t2 = gen_reg_rtx (V8SImode);
9929  emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
9930  convert_move (t2, t1, 0);
9931  emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
9932  DONE;
9933})
9934
9935(define_insn "ashr<mode>3"
9936  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
9937	(ashiftrt:VI24_AVX2
9938	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
9939	  (match_operand:SI 2 "nonmemory_operand" "xN,xN")))]
9940  "TARGET_SSE2"
9941  "@
9942   psra<ssemodesuffix>\t{%2, %0|%0, %2}
9943   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
9944  [(set_attr "isa" "noavx,avx")
9945   (set_attr "type" "sseishft")
9946   (set (attr "length_immediate")
9947     (if_then_else (match_operand 2 "const_int_operand")
9948       (const_string "1")
9949       (const_string "0")))
9950   (set_attr "prefix_data16" "1,*")
9951   (set_attr "prefix" "orig,vex")
9952   (set_attr "mode" "<sseinsnmode>")])
9953
9954(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
9955  [(set (match_operand:VI24_AVX512BW_1 0 "register_operand" "=v,v")
9956	(ashiftrt:VI24_AVX512BW_1
9957	  (match_operand:VI24_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
9958	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9959  "TARGET_AVX512VL"
9960  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9961  [(set_attr "type" "sseishft")
9962   (set (attr "length_immediate")
9963     (if_then_else (match_operand 2 "const_int_operand")
9964       (const_string "1")
9965       (const_string "0")))
9966   (set_attr "mode" "<sseinsnmode>")])
9967
9968(define_insn "<mask_codefor>ashrv2di3<mask_name>"
9969  [(set (match_operand:V2DI 0 "register_operand" "=v,v")
9970	(ashiftrt:V2DI
9971	  (match_operand:V2DI 1 "nonimmediate_operand" "v,vm")
9972	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
9973  "TARGET_AVX512VL"
9974  "vpsraq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9975  [(set_attr "type" "sseishft")
9976   (set (attr "length_immediate")
9977     (if_then_else (match_operand 2 "const_int_operand")
9978       (const_string "1")
9979       (const_string "0")))
9980   (set_attr "mode" "TI")])
9981
9982(define_insn "ashr<mode>3<mask_name>"
9983  [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
9984	(ashiftrt:VI248_AVX512BW_AVX512VL
9985	  (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
9986	  (match_operand:SI 2 "nonmemory_operand" "v,N")))]
9987  "TARGET_AVX512F"
9988  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9989  [(set_attr "type" "sseishft")
9990   (set (attr "length_immediate")
9991     (if_then_else (match_operand 2 "const_int_operand")
9992       (const_string "1")
9993       (const_string "0")))
9994   (set_attr "mode" "<sseinsnmode>")])
9995
9996(define_insn "<shift_insn><mode>3<mask_name>"
9997  [(set (match_operand:VI2_AVX2_AVX512BW 0 "register_operand" "=x,v")
9998	(any_lshift:VI2_AVX2_AVX512BW
9999	  (match_operand:VI2_AVX2_AVX512BW 1 "register_operand" "0,v")
10000	  (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10001  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
10002  "@
10003   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10004   vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10005  [(set_attr "isa" "noavx,avx")
10006   (set_attr "type" "sseishft")
10007   (set (attr "length_immediate")
10008     (if_then_else (match_operand 2 "const_int_operand")
10009       (const_string "1")
10010       (const_string "0")))
10011   (set_attr "prefix_data16" "1,*")
10012   (set_attr "prefix" "orig,vex")
10013   (set_attr "mode" "<sseinsnmode>")])
10014
10015(define_insn "<shift_insn><mode>3<mask_name>"
10016  [(set (match_operand:VI48_AVX2 0 "register_operand" "=x,v")
10017	(any_lshift:VI48_AVX2
10018	  (match_operand:VI48_AVX2 1 "register_operand" "0,v")
10019	  (match_operand:SI 2 "nonmemory_operand" "xN,vN")))]
10020  "TARGET_SSE2 && <mask_mode512bit_condition>"
10021  "@
10022   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
10023   vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10024  [(set_attr "isa" "noavx,avx")
10025   (set_attr "type" "sseishft")
10026   (set (attr "length_immediate")
10027     (if_then_else (match_operand 2 "const_int_operand")
10028       (const_string "1")
10029       (const_string "0")))
10030   (set_attr "prefix_data16" "1,*")
10031   (set_attr "prefix" "orig,vex")
10032   (set_attr "mode" "<sseinsnmode>")])
10033
10034(define_insn "<shift_insn><mode>3<mask_name>"
10035  [(set (match_operand:VI48_512 0 "register_operand" "=v,v")
10036	(any_lshift:VI48_512
10037	  (match_operand:VI48_512 1 "nonimmediate_operand" "v,m")
10038	  (match_operand:SI 2 "nonmemory_operand" "vN,N")))]
10039  "TARGET_AVX512F && <mask_mode512bit_condition>"
10040  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10041  [(set_attr "isa" "avx512f")
10042   (set_attr "type" "sseishft")
10043   (set (attr "length_immediate")
10044     (if_then_else (match_operand 2 "const_int_operand")
10045       (const_string "1")
10046       (const_string "0")))
10047   (set_attr "prefix" "evex")
10048   (set_attr "mode" "<sseinsnmode>")])
10049
10050
10051(define_expand "vec_shl_<mode>"
10052  [(set (match_dup 3)
10053	(ashift:V1TI
10054	 (match_operand:VI_128 1 "register_operand")
10055	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10056   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10057  "TARGET_SSE2"
10058{
10059  operands[1] = gen_lowpart (V1TImode, operands[1]);
10060  operands[3] = gen_reg_rtx (V1TImode);
10061  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10062})
10063
10064(define_insn "<sse2_avx2>_ashl<mode>3"
10065  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10066	(ashift:VIMAX_AVX2
10067	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10068	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10069  "TARGET_SSE2"
10070{
10071  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10072
10073  switch (which_alternative)
10074    {
10075    case 0:
10076      return "pslldq\t{%2, %0|%0, %2}";
10077    case 1:
10078      return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
10079    default:
10080      gcc_unreachable ();
10081    }
10082}
10083  [(set_attr "isa" "noavx,avx")
10084   (set_attr "type" "sseishft")
10085   (set_attr "length_immediate" "1")
10086   (set_attr "prefix_data16" "1,*")
10087   (set_attr "prefix" "orig,vex")
10088   (set_attr "mode" "<sseinsnmode>")])
10089
10090(define_expand "vec_shr_<mode>"
10091  [(set (match_dup 3)
10092	(lshiftrt:V1TI
10093	 (match_operand:VI_128 1 "register_operand")
10094	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
10095   (set (match_operand:VI_128 0 "register_operand") (match_dup 4))]
10096  "TARGET_SSE2"
10097{
10098  operands[1] = gen_lowpart (V1TImode, operands[1]);
10099  operands[3] = gen_reg_rtx (V1TImode);
10100  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
10101})
10102
10103(define_insn "<sse2_avx2>_lshr<mode>3"
10104  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
10105	(lshiftrt:VIMAX_AVX2
10106	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
10107	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
10108  "TARGET_SSE2"
10109{
10110  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
10111
10112  switch (which_alternative)
10113    {
10114    case 0:
10115      return "psrldq\t{%2, %0|%0, %2}";
10116    case 1:
10117      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
10118    default:
10119      gcc_unreachable ();
10120    }
10121}
10122  [(set_attr "isa" "noavx,avx")
10123   (set_attr "type" "sseishft")
10124   (set_attr "length_immediate" "1")
10125   (set_attr "atom_unit" "sishuf")
10126   (set_attr "prefix_data16" "1,*")
10127   (set_attr "prefix" "orig,vex")
10128   (set_attr "mode" "<sseinsnmode>")])
10129
10130(define_insn "<avx512>_<rotate>v<mode><mask_name>"
10131  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10132	(any_rotate:VI48_AVX512VL
10133	  (match_operand:VI48_AVX512VL 1 "register_operand" "v")
10134	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10135  "TARGET_AVX512F"
10136  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10137  [(set_attr "prefix" "evex")
10138   (set_attr "mode" "<sseinsnmode>")])
10139
10140(define_insn "<avx512>_<rotate><mode><mask_name>"
10141  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10142	(any_rotate:VI48_AVX512VL
10143	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
10144	  (match_operand:SI 2 "const_0_to_255_operand")))]
10145  "TARGET_AVX512F"
10146  "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10147  [(set_attr "prefix" "evex")
10148   (set_attr "mode" "<sseinsnmode>")])
10149
10150(define_expand "<code><mode>3"
10151  [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
10152	(maxmin:VI124_256_AVX512F_AVX512BW
10153	  (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
10154	  (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
10155  "TARGET_AVX2"
10156  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10157
10158(define_insn "*avx2_<code><mode>3"
10159  [(set (match_operand:VI124_256 0 "register_operand" "=v")
10160	(maxmin:VI124_256
10161	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
10162	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
10163  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10164  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10165  [(set_attr "type" "sseiadd")
10166   (set_attr "prefix_extra" "1")
10167   (set_attr "prefix" "vex")
10168   (set_attr "mode" "OI")])
10169
10170(define_expand "<code><mode>3_mask"
10171  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10172	(vec_merge:VI48_AVX512VL
10173	  (maxmin:VI48_AVX512VL
10174	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
10175	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10176	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10177	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10178  "TARGET_AVX512F"
10179  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
10180
10181(define_insn "*avx512bw_<code><mode>3<mask_name>"
10182  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10183	(maxmin:VI48_AVX512VL
10184	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
10185	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
10186  "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10187  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10188  [(set_attr "type" "sseiadd")
10189   (set_attr "prefix_extra" "1")
10190   (set_attr "prefix" "maybe_evex")
10191   (set_attr "mode" "<sseinsnmode>")])
10192
10193(define_insn "<mask_codefor><code><mode>3<mask_name>"
10194  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
10195        (maxmin:VI12_AVX512VL
10196          (match_operand:VI12_AVX512VL 1 "register_operand" "v")
10197          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
10198  "TARGET_AVX512BW"
10199  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10200  [(set_attr "type" "sseiadd")
10201   (set_attr "prefix" "evex")
10202   (set_attr "mode" "<sseinsnmode>")])
10203
10204(define_expand "<code><mode>3"
10205  [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand")
10206	(maxmin:VI8_AVX2_AVX512BW
10207	  (match_operand:VI8_AVX2_AVX512BW 1 "register_operand")
10208	  (match_operand:VI8_AVX2_AVX512BW 2 "register_operand")))]
10209  "TARGET_SSE4_2"
10210{
10211  if (TARGET_AVX512F
10212      && (<MODE>mode == V8DImode || TARGET_AVX512VL))
10213    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10214  else 
10215    {
10216      enum rtx_code code;
10217      rtx xops[6];
10218      bool ok;
10219
10220
10221      xops[0] = operands[0];
10222
10223      if (<CODE> == SMAX || <CODE> == UMAX)
10224	{
10225	  xops[1] = operands[1];
10226	  xops[2] = operands[2];
10227	}
10228      else
10229	{
10230	  xops[1] = operands[2];
10231	  xops[2] = operands[1];
10232	}
10233
10234      code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
10235
10236      xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
10237      xops[4] = operands[1];
10238      xops[5] = operands[2];
10239
10240      ok = ix86_expand_int_vcond (xops);
10241      gcc_assert (ok);
10242      DONE;
10243    }
10244})
10245
10246(define_expand "<code><mode>3"
10247  [(set (match_operand:VI124_128 0 "register_operand")
10248	(smaxmin:VI124_128
10249	  (match_operand:VI124_128 1 "nonimmediate_operand")
10250	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
10251  "TARGET_SSE2"
10252{
10253  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
10254    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10255  else
10256    {
10257      rtx xops[6];
10258      bool ok;
10259
10260      xops[0] = operands[0];
10261      operands[1] = force_reg (<MODE>mode, operands[1]);
10262      operands[2] = force_reg (<MODE>mode, operands[2]);
10263
10264      if (<CODE> == SMAX)
10265	{
10266	  xops[1] = operands[1];
10267	  xops[2] = operands[2];
10268	}
10269      else
10270	{
10271	  xops[1] = operands[2];
10272	  xops[2] = operands[1];
10273	}
10274
10275      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
10276      xops[4] = operands[1];
10277      xops[5] = operands[2];
10278
10279      ok = ix86_expand_int_vcond (xops);
10280      gcc_assert (ok);
10281      DONE;
10282    }
10283})
10284
10285(define_insn "*sse4_1_<code><mode>3<mask_name>"
10286  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
10287	(smaxmin:VI14_128
10288	  (match_operand:VI14_128 1 "nonimmediate_operand" "%0,0,v")
10289	  (match_operand:VI14_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10290  "TARGET_SSE4_1
10291   && <mask_mode512bit_condition>
10292   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10293  "@
10294   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10295   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10296   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10297  [(set_attr "isa" "noavx,noavx,avx")
10298   (set_attr "type" "sseiadd")
10299   (set_attr "prefix_extra" "1,1,*")
10300   (set_attr "prefix" "orig,orig,vex")
10301   (set_attr "mode" "TI")])
10302
10303(define_insn "*<code>v8hi3"
10304  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10305	(smaxmin:V8HI
10306	  (match_operand:V8HI 1 "nonimmediate_operand" "%0,x")
10307	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")))]
10308  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V8HImode, operands)"
10309  "@
10310   p<maxmin_int>w\t{%2, %0|%0, %2}
10311   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
10312  [(set_attr "isa" "noavx,avx")
10313   (set_attr "type" "sseiadd")
10314   (set_attr "prefix_data16" "1,*")
10315   (set_attr "prefix_extra" "*,1")
10316   (set_attr "prefix" "orig,vex")
10317   (set_attr "mode" "TI")])
10318
10319(define_expand "<code><mode>3"
10320  [(set (match_operand:VI124_128 0 "register_operand")
10321	(umaxmin:VI124_128
10322	  (match_operand:VI124_128 1 "nonimmediate_operand")
10323	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
10324  "TARGET_SSE2"
10325{
10326  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
10327    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
10328  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
10329    {
10330      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
10331      operands[1] = force_reg (<MODE>mode, operands[1]);
10332      if (rtx_equal_p (op3, op2))
10333	op3 = gen_reg_rtx (V8HImode);
10334      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
10335      emit_insn (gen_addv8hi3 (op0, op3, op2));
10336      DONE;
10337    }
10338  else
10339    {
10340      rtx xops[6];
10341      bool ok;
10342
10343      operands[1] = force_reg (<MODE>mode, operands[1]);
10344      operands[2] = force_reg (<MODE>mode, operands[2]);
10345
10346      xops[0] = operands[0];
10347
10348      if (<CODE> == UMAX)
10349	{
10350	  xops[1] = operands[1];
10351	  xops[2] = operands[2];
10352	}
10353      else
10354	{
10355	  xops[1] = operands[2];
10356	  xops[2] = operands[1];
10357	}
10358
10359      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
10360      xops[4] = operands[1];
10361      xops[5] = operands[2];
10362
10363      ok = ix86_expand_int_vcond (xops);
10364      gcc_assert (ok);
10365      DONE;
10366    }
10367})
10368
10369(define_insn "*sse4_1_<code><mode>3<mask_name>"
10370  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
10371	(umaxmin:VI24_128
10372	  (match_operand:VI24_128 1 "nonimmediate_operand" "%0,0,v")
10373	  (match_operand:VI24_128 2 "nonimmediate_operand" "Yrm,*xm,vm")))]
10374  "TARGET_SSE4_1
10375   && <mask_mode512bit_condition>
10376   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10377  "@
10378   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10379   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
10380   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10381  [(set_attr "isa" "noavx,noavx,avx")
10382   (set_attr "type" "sseiadd")
10383   (set_attr "prefix_extra" "1,1,*")
10384   (set_attr "prefix" "orig,orig,vex")
10385   (set_attr "mode" "TI")])
10386
10387(define_insn "*<code>v16qi3"
10388  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10389	(umaxmin:V16QI
10390	  (match_operand:V16QI 1 "nonimmediate_operand" "%0,x")
10391	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")))]
10392  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, V16QImode, operands)"
10393  "@
10394   p<maxmin_int>b\t{%2, %0|%0, %2}
10395   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
10396  [(set_attr "isa" "noavx,avx")
10397   (set_attr "type" "sseiadd")
10398   (set_attr "prefix_data16" "1,*")
10399   (set_attr "prefix_extra" "*,1")
10400   (set_attr "prefix" "orig,vex")
10401   (set_attr "mode" "TI")])
10402
10403;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10404;;
10405;; Parallel integral comparisons
10406;;
10407;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10408
10409(define_expand "avx2_eq<mode>3"
10410  [(set (match_operand:VI_256 0 "register_operand")
10411	(eq:VI_256
10412	  (match_operand:VI_256 1 "nonimmediate_operand")
10413	  (match_operand:VI_256 2 "nonimmediate_operand")))]
10414  "TARGET_AVX2"
10415  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10416
10417(define_insn "*avx2_eq<mode>3"
10418  [(set (match_operand:VI_256 0 "register_operand" "=x")
10419	(eq:VI_256
10420	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
10421	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10422  "TARGET_AVX2 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10423  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10424  [(set_attr "type" "ssecmp")
10425   (set_attr "prefix_extra" "1")
10426   (set_attr "prefix" "vex")
10427   (set_attr "mode" "OI")])
10428
10429(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10430  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10431	(unspec:<avx512fmaskmode>
10432	  [(match_operand:VI12_AVX512VL 1 "register_operand")
10433	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
10434	  UNSPEC_MASKED_EQ))]
10435  "TARGET_AVX512BW"
10436  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10437
10438(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
10439  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
10440	(unspec:<avx512fmaskmode>
10441	  [(match_operand:VI48_AVX512VL 1 "register_operand")
10442	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
10443	  UNSPEC_MASKED_EQ))]
10444  "TARGET_AVX512F"
10445  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10446
10447(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10448  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10449	(unspec:<avx512fmaskmode>
10450	  [(match_operand:VI12_AVX512VL 1 "register_operand" "%v")
10451	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
10452	  UNSPEC_MASKED_EQ))]
10453  "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10454  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10455  [(set_attr "type" "ssecmp")
10456   (set_attr "prefix_extra" "1")
10457   (set_attr "prefix" "evex")
10458   (set_attr "mode" "<sseinsnmode>")])
10459
10460(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
10461  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10462	(unspec:<avx512fmaskmode>
10463	  [(match_operand:VI48_AVX512VL 1 "register_operand" "%v")
10464	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
10465	  UNSPEC_MASKED_EQ))]
10466  "TARGET_AVX512F && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10467  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10468  [(set_attr "type" "ssecmp")
10469   (set_attr "prefix_extra" "1")
10470   (set_attr "prefix" "evex")
10471   (set_attr "mode" "<sseinsnmode>")])
10472
10473(define_insn "*sse4_1_eqv2di3"
10474  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10475	(eq:V2DI
10476	  (match_operand:V2DI 1 "nonimmediate_operand" "%0,0,x")
10477	  (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10478  "TARGET_SSE4_1 && ix86_binary_operator_ok (EQ, V2DImode, operands)"
10479  "@
10480   pcmpeqq\t{%2, %0|%0, %2}
10481   pcmpeqq\t{%2, %0|%0, %2}
10482   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
10483  [(set_attr "isa" "noavx,noavx,avx")
10484   (set_attr "type" "ssecmp")
10485   (set_attr "prefix_extra" "1")
10486   (set_attr "prefix" "orig,orig,vex")
10487   (set_attr "mode" "TI")])
10488
10489(define_insn "*sse2_eq<mode>3"
10490  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10491	(eq:VI124_128
10492	  (match_operand:VI124_128 1 "nonimmediate_operand" "%0,x")
10493	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10494  "TARGET_SSE2 && !TARGET_XOP
10495   && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
10496  "@
10497   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
10498   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10499  [(set_attr "isa" "noavx,avx")
10500   (set_attr "type" "ssecmp")
10501   (set_attr "prefix_data16" "1,*")
10502   (set_attr "prefix" "orig,vex")
10503   (set_attr "mode" "TI")])
10504
10505(define_expand "sse2_eq<mode>3"
10506  [(set (match_operand:VI124_128 0 "register_operand")
10507	(eq:VI124_128
10508	  (match_operand:VI124_128 1 "nonimmediate_operand")
10509	  (match_operand:VI124_128 2 "nonimmediate_operand")))]
10510  "TARGET_SSE2 && !TARGET_XOP "
10511  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
10512
10513(define_expand "sse4_1_eqv2di3"
10514  [(set (match_operand:V2DI 0 "register_operand")
10515	(eq:V2DI
10516	  (match_operand:V2DI 1 "nonimmediate_operand")
10517	  (match_operand:V2DI 2 "nonimmediate_operand")))]
10518  "TARGET_SSE4_1"
10519  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
10520
10521(define_insn "sse4_2_gtv2di3"
10522  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
10523	(gt:V2DI
10524	  (match_operand:V2DI 1 "register_operand" "0,0,x")
10525	  (match_operand:V2DI 2 "nonimmediate_operand" "Yrm,*xm,xm")))]
10526  "TARGET_SSE4_2"
10527  "@
10528   pcmpgtq\t{%2, %0|%0, %2}
10529   pcmpgtq\t{%2, %0|%0, %2}
10530   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
10531  [(set_attr "isa" "noavx,noavx,avx")
10532   (set_attr "type" "ssecmp")
10533   (set_attr "prefix_extra" "1")
10534   (set_attr "prefix" "orig,orig,vex")
10535   (set_attr "mode" "TI")])
10536
10537(define_insn "avx2_gt<mode>3"
10538  [(set (match_operand:VI_256 0 "register_operand" "=x")
10539	(gt:VI_256
10540	  (match_operand:VI_256 1 "register_operand" "x")
10541	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
10542  "TARGET_AVX2"
10543  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10544  [(set_attr "type" "ssecmp")
10545   (set_attr "prefix_extra" "1")
10546   (set_attr "prefix" "vex")
10547   (set_attr "mode" "OI")])
10548
10549(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10550  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10551	(unspec:<avx512fmaskmode>
10552	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
10553	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10554  "TARGET_AVX512F"
10555  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10556  [(set_attr "type" "ssecmp")
10557   (set_attr "prefix_extra" "1")
10558   (set_attr "prefix" "evex")
10559   (set_attr "mode" "<sseinsnmode>")])
10560
10561(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
10562  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
10563	(unspec:<avx512fmaskmode>
10564	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
10565	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
10566  "TARGET_AVX512BW"
10567  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
10568  [(set_attr "type" "ssecmp")
10569   (set_attr "prefix_extra" "1")
10570   (set_attr "prefix" "evex")
10571   (set_attr "mode" "<sseinsnmode>")])
10572
10573(define_insn "sse2_gt<mode>3"
10574  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
10575	(gt:VI124_128
10576	  (match_operand:VI124_128 1 "register_operand" "0,x")
10577	  (match_operand:VI124_128 2 "nonimmediate_operand" "xm,xm")))]
10578  "TARGET_SSE2 && !TARGET_XOP"
10579  "@
10580   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
10581   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
10582  [(set_attr "isa" "noavx,avx")
10583   (set_attr "type" "ssecmp")
10584   (set_attr "prefix_data16" "1,*")
10585   (set_attr "prefix" "orig,vex")
10586   (set_attr "mode" "TI")])
10587
10588(define_expand "vcond<V_512:mode><VI_512:mode>"
10589  [(set (match_operand:V_512 0 "register_operand")
10590	(if_then_else:V_512
10591	  (match_operator 3 ""
10592	    [(match_operand:VI_512 4 "nonimmediate_operand")
10593	     (match_operand:VI_512 5 "general_operand")])
10594	  (match_operand:V_512 1)
10595	  (match_operand:V_512 2)))]
10596  "TARGET_AVX512F
10597   && (GET_MODE_NUNITS (<V_512:MODE>mode)
10598       == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10599{
10600  bool ok = ix86_expand_int_vcond (operands);
10601  gcc_assert (ok);
10602  DONE;
10603})
10604
10605(define_expand "vcond<V_256:mode><VI_256:mode>"
10606  [(set (match_operand:V_256 0 "register_operand")
10607	(if_then_else:V_256
10608	  (match_operator 3 ""
10609	    [(match_operand:VI_256 4 "nonimmediate_operand")
10610	     (match_operand:VI_256 5 "general_operand")])
10611	  (match_operand:V_256 1)
10612	  (match_operand:V_256 2)))]
10613  "TARGET_AVX2
10614   && (GET_MODE_NUNITS (<V_256:MODE>mode)
10615       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10616{
10617  bool ok = ix86_expand_int_vcond (operands);
10618  gcc_assert (ok);
10619  DONE;
10620})
10621
10622(define_expand "vcond<V_128:mode><VI124_128:mode>"
10623  [(set (match_operand:V_128 0 "register_operand")
10624	(if_then_else:V_128
10625	  (match_operator 3 ""
10626	    [(match_operand:VI124_128 4 "nonimmediate_operand")
10627	     (match_operand:VI124_128 5 "general_operand")])
10628	  (match_operand:V_128 1)
10629	  (match_operand:V_128 2)))]
10630  "TARGET_SSE2
10631   && (GET_MODE_NUNITS (<V_128:MODE>mode)
10632       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10633{
10634  bool ok = ix86_expand_int_vcond (operands);
10635  gcc_assert (ok);
10636  DONE;
10637})
10638
10639(define_expand "vcond<VI8F_128:mode>v2di"
10640  [(set (match_operand:VI8F_128 0 "register_operand")
10641	(if_then_else:VI8F_128
10642	  (match_operator 3 ""
10643	    [(match_operand:V2DI 4 "nonimmediate_operand")
10644	     (match_operand:V2DI 5 "general_operand")])
10645	  (match_operand:VI8F_128 1)
10646	  (match_operand:VI8F_128 2)))]
10647  "TARGET_SSE4_2"
10648{
10649  bool ok = ix86_expand_int_vcond (operands);
10650  gcc_assert (ok);
10651  DONE;
10652})
10653
10654(define_expand "vcondu<V_512:mode><VI_512:mode>"
10655  [(set (match_operand:V_512 0 "register_operand")
10656	(if_then_else:V_512
10657	  (match_operator 3 ""
10658	    [(match_operand:VI_512 4 "nonimmediate_operand")
10659	     (match_operand:VI_512 5 "nonimmediate_operand")])
10660	  (match_operand:V_512 1 "general_operand")
10661	  (match_operand:V_512 2 "general_operand")))]
10662  "TARGET_AVX512F
10663   && (GET_MODE_NUNITS (<V_512:MODE>mode)
10664       == GET_MODE_NUNITS (<VI_512:MODE>mode))"
10665{
10666  bool ok = ix86_expand_int_vcond (operands);
10667  gcc_assert (ok);
10668  DONE;
10669})
10670
10671(define_expand "vcondu<V_256:mode><VI_256:mode>"
10672  [(set (match_operand:V_256 0 "register_operand")
10673	(if_then_else:V_256
10674	  (match_operator 3 ""
10675	    [(match_operand:VI_256 4 "nonimmediate_operand")
10676	     (match_operand:VI_256 5 "nonimmediate_operand")])
10677	  (match_operand:V_256 1 "general_operand")
10678	  (match_operand:V_256 2 "general_operand")))]
10679  "TARGET_AVX2
10680   && (GET_MODE_NUNITS (<V_256:MODE>mode)
10681       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
10682{
10683  bool ok = ix86_expand_int_vcond (operands);
10684  gcc_assert (ok);
10685  DONE;
10686})
10687
10688(define_expand "vcondu<V_128:mode><VI124_128:mode>"
10689  [(set (match_operand:V_128 0 "register_operand")
10690	(if_then_else:V_128
10691	  (match_operator 3 ""
10692	    [(match_operand:VI124_128 4 "nonimmediate_operand")
10693	     (match_operand:VI124_128 5 "nonimmediate_operand")])
10694	  (match_operand:V_128 1 "general_operand")
10695	  (match_operand:V_128 2 "general_operand")))]
10696  "TARGET_SSE2
10697   && (GET_MODE_NUNITS (<V_128:MODE>mode)
10698       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
10699{
10700  bool ok = ix86_expand_int_vcond (operands);
10701  gcc_assert (ok);
10702  DONE;
10703})
10704
10705(define_expand "vcondu<VI8F_128:mode>v2di"
10706  [(set (match_operand:VI8F_128 0 "register_operand")
10707	(if_then_else:VI8F_128
10708	  (match_operator 3 ""
10709	    [(match_operand:V2DI 4 "nonimmediate_operand")
10710	     (match_operand:V2DI 5 "nonimmediate_operand")])
10711	  (match_operand:VI8F_128 1 "general_operand")
10712	  (match_operand:VI8F_128 2 "general_operand")))]
10713  "TARGET_SSE4_2"
10714{
10715  bool ok = ix86_expand_int_vcond (operands);
10716  gcc_assert (ok);
10717  DONE;
10718})
10719
10720(define_mode_iterator VEC_PERM_AVX2
10721  [V16QI V8HI V4SI V2DI V4SF V2DF
10722   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10723   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
10724   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
10725   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10726   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10727   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
10728
10729(define_expand "vec_perm<mode>"
10730  [(match_operand:VEC_PERM_AVX2 0 "register_operand")
10731   (match_operand:VEC_PERM_AVX2 1 "register_operand")
10732   (match_operand:VEC_PERM_AVX2 2 "register_operand")
10733   (match_operand:<sseintvecmode> 3 "register_operand")]
10734  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
10735{
10736  ix86_expand_vec_perm (operands);
10737  DONE;
10738})
10739
10740(define_mode_iterator VEC_PERM_CONST
10741  [(V4SF "TARGET_SSE") (V4SI "TARGET_SSE")
10742   (V2DF "TARGET_SSE") (V2DI "TARGET_SSE")
10743   (V16QI "TARGET_SSE2") (V8HI "TARGET_SSE2")
10744   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
10745   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
10746   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
10747   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
10748   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
10749   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
10750
10751(define_expand "vec_perm_const<mode>"
10752  [(match_operand:VEC_PERM_CONST 0 "register_operand")
10753   (match_operand:VEC_PERM_CONST 1 "register_operand")
10754   (match_operand:VEC_PERM_CONST 2 "register_operand")
10755   (match_operand:<sseintvecmode> 3)]
10756  ""
10757{
10758  if (ix86_expand_vec_perm_const (operands))
10759    DONE;
10760  else
10761    FAIL;
10762})
10763
10764;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10765;;
10766;; Parallel bitwise logical operations
10767;;
10768;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10769
10770(define_expand "one_cmpl<mode>2"
10771  [(set (match_operand:VI 0 "register_operand")
10772	(xor:VI (match_operand:VI 1 "nonimmediate_operand")
10773		(match_dup 2)))]
10774  "TARGET_SSE"
10775{
10776  int i, n = GET_MODE_NUNITS (<MODE>mode);
10777  rtvec v = rtvec_alloc (n);
10778
10779  for (i = 0; i < n; ++i)
10780    RTVEC_ELT (v, i) = constm1_rtx;
10781
10782  operands[2] = force_reg (<MODE>mode, gen_rtx_CONST_VECTOR (<MODE>mode, v));
10783})
10784
10785(define_expand "<sse2_avx2>_andnot<mode>3"
10786  [(set (match_operand:VI_AVX2 0 "register_operand")
10787	(and:VI_AVX2
10788	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
10789	  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
10790  "TARGET_SSE2")
10791
10792(define_expand "<sse2_avx2>_andnot<mode>3_mask"
10793  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
10794	(vec_merge:VI48_AVX512VL
10795	  (and:VI48_AVX512VL
10796	    (not:VI48_AVX512VL
10797	      (match_operand:VI48_AVX512VL 1 "register_operand"))
10798	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
10799	  (match_operand:VI48_AVX512VL 3 "vector_move_operand")
10800	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10801  "TARGET_AVX512F")
10802
10803(define_expand "<sse2_avx2>_andnot<mode>3_mask"
10804  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
10805	(vec_merge:VI12_AVX512VL
10806	  (and:VI12_AVX512VL
10807	    (not:VI12_AVX512VL
10808	      (match_operand:VI12_AVX512VL 1 "register_operand"))
10809	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
10810	  (match_operand:VI12_AVX512VL 3 "vector_move_operand")
10811	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
10812  "TARGET_AVX512BW")
10813
10814(define_insn "*andnot<mode>3"
10815  [(set (match_operand:VI 0 "register_operand" "=x,v")
10816	(and:VI
10817	  (not:VI (match_operand:VI 1 "register_operand" "0,v"))
10818	  (match_operand:VI 2 "nonimmediate_operand" "xm,vm")))]
10819  "TARGET_SSE"
10820{
10821  static char buf[64];
10822  const char *ops;
10823  const char *tmp;
10824
10825  switch (get_attr_mode (insn))
10826    {
10827    case MODE_XI:
10828      gcc_assert (TARGET_AVX512F);
10829    case MODE_OI:
10830      gcc_assert (TARGET_AVX2);
10831    case MODE_TI:
10832      gcc_assert (TARGET_SSE2);
10833      switch (<MODE>mode)
10834	{
10835	case V64QImode:
10836	case V32HImode:
10837	  /* There is no vpandnb or vpandnw instruction, nor vpandn for
10838	     512-bit vectors. Use vpandnq instead.  */
10839	  tmp = "pandnq";
10840	  break;
10841	case V16SImode:
10842	case V8DImode:
10843	  tmp = "pandn<ssemodesuffix>";
10844	  break;
10845	case V8SImode:
10846	case V4DImode:
10847	case V4SImode:
10848	case V2DImode:
10849	  tmp = TARGET_AVX512VL ? "pandn<ssemodesuffix>" : "pandn";
10850	  break;
10851	default:
10852	  tmp = TARGET_AVX512VL ? "pandnq" : "pandn";
10853	  break;
10854	}
10855      break;
10856
10857    case MODE_V16SF:
10858      gcc_assert (TARGET_AVX512F);
10859    case MODE_V8SF:
10860      gcc_assert (TARGET_AVX);
10861    case MODE_V4SF:
10862      gcc_assert (TARGET_SSE);
10863
10864      tmp = "andnps";
10865      break;
10866
10867    default:
10868      gcc_unreachable ();
10869    }
10870
10871  switch (which_alternative)
10872    {
10873    case 0:
10874      ops = "%s\t{%%2, %%0|%%0, %%2}";
10875      break;
10876    case 1:
10877      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
10878      break;
10879    default:
10880      gcc_unreachable ();
10881    }
10882
10883  snprintf (buf, sizeof (buf), ops, tmp);
10884  return buf;
10885}
10886  [(set_attr "isa" "noavx,avx")
10887   (set_attr "type" "sselog")
10888   (set (attr "prefix_data16")
10889     (if_then_else
10890       (and (eq_attr "alternative" "0")
10891	    (eq_attr "mode" "TI"))
10892       (const_string "1")
10893       (const_string "*")))
10894   (set_attr "prefix" "orig,vex")
10895   (set (attr "mode")
10896	(cond [(and (match_test "<MODE_SIZE> == 16")
10897		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
10898		 (const_string "<ssePSmode>")
10899	       (match_test "TARGET_AVX2")
10900		 (const_string "<sseinsnmode>")
10901	       (match_test "TARGET_AVX")
10902		 (if_then_else
10903		   (match_test "<MODE_SIZE> > 16")
10904		   (const_string "V8SF")
10905		   (const_string "<sseinsnmode>"))
10906	       (ior (not (match_test "TARGET_SSE2"))
10907		    (match_test "optimize_function_for_size_p (cfun)"))
10908		 (const_string "V4SF")
10909	      ]
10910	      (const_string "<sseinsnmode>")))])
10911
10912(define_insn "*andnot<mode>3_mask"
10913  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
10914	(vec_merge:VI48_AVX512VL
10915	  (and:VI48_AVX512VL
10916	    (not:VI48_AVX512VL
10917	      (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
10918	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
10919	  (match_operand:VI48_AVX512VL 3 "vector_move_operand" "0C")
10920	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
10921  "TARGET_AVX512F"
10922  "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
10923  [(set_attr "type" "sselog")
10924   (set_attr "prefix" "evex")
10925   (set_attr "mode" "<sseinsnmode>")])
10926
10927(define_expand "<code><mode>3"
10928  [(set (match_operand:VI 0 "register_operand")
10929	(any_logic:VI
10930	  (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
10931	  (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
10932  "TARGET_SSE"
10933{
10934  ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
10935  DONE;
10936})
10937
10938(define_insn "<mask_codefor><code><mode>3<mask_name>"
10939  [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
10940	(any_logic:VI48_AVX_AVX512F
10941	  (match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
10942	  (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
10943  "TARGET_SSE && <mask_mode512bit_condition>
10944   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
10945{
10946  static char buf[64];
10947  const char *ops;
10948  const char *tmp;
10949
10950  switch (get_attr_mode (insn))
10951    {
10952    case MODE_XI:
10953      gcc_assert (TARGET_AVX512F);
10954    case MODE_OI:
10955      gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
10956    case MODE_TI:
10957      gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
10958      switch (<MODE>mode)
10959      {
10960        case V16SImode:
10961        case V8DImode:
10962          if (TARGET_AVX512F)
10963          {
10964            tmp = "p<logic><ssemodesuffix>";
10965            break;
10966          }
10967        case V8SImode:
10968        case V4DImode:
10969        case V4SImode:
10970        case V2DImode:
10971          tmp = TARGET_AVX512VL ? "p<logic><ssemodesuffix>" : "p<logic>";
10972          break;
10973        default:
10974          gcc_unreachable ();
10975      }
10976      break;
10977
10978   case MODE_V8SF:
10979      gcc_assert (TARGET_AVX);
10980   case MODE_V4SF:
10981      gcc_assert (TARGET_SSE);
10982      gcc_assert (!<mask_applied>);
10983      tmp = "<logic>ps";
10984      break;
10985
10986   default:
10987      gcc_unreachable ();
10988   }
10989
10990  switch (which_alternative)
10991    {
10992    case 0:
10993      if (<mask_applied>)
10994        ops = "v%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
10995      else
10996        ops = "%s\t{%%2, %%0|%%0, %%2}";
10997      break;
10998    case 1:
10999      ops = "v%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
11000      break;
11001    default:
11002      gcc_unreachable ();
11003    }
11004
11005  snprintf (buf, sizeof (buf), ops, tmp);
11006  return buf;
11007}
11008  [(set_attr "isa" "noavx,avx")
11009   (set_attr "type" "sselog")
11010   (set (attr "prefix_data16")
11011     (if_then_else
11012       (and (eq_attr "alternative" "0")
11013	    (eq_attr "mode" "TI"))
11014       (const_string "1")
11015       (const_string "*")))
11016   (set_attr "prefix" "<mask_prefix3>")
11017   (set (attr "mode")
11018	(cond [(and (match_test "<MODE_SIZE> == 16")
11019		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11020		 (const_string "<ssePSmode>")
11021	       (match_test "TARGET_AVX2")
11022		 (const_string "<sseinsnmode>")
11023	       (match_test "TARGET_AVX")
11024		 (if_then_else
11025		   (match_test "<MODE_SIZE> > 16")
11026		   (const_string "V8SF")
11027		   (const_string "<sseinsnmode>"))
11028	       (ior (not (match_test "TARGET_SSE2"))
11029		    (match_test "optimize_function_for_size_p (cfun)"))
11030		 (const_string "V4SF")
11031	      ]
11032	      (const_string "<sseinsnmode>")))])
11033
11034(define_insn "*<code><mode>3"
11035  [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
11036	(any_logic: VI12_AVX_AVX512F
11037	  (match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
11038	  (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
11039  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11040{
11041  static char buf[64];
11042  const char *ops;
11043  const char *tmp;
11044  const char *ssesuffix;
11045
11046  switch (get_attr_mode (insn))
11047    {
11048    case MODE_XI:
11049      gcc_assert (TARGET_AVX512F);
11050    case MODE_OI:
11051      gcc_assert (TARGET_AVX2 || TARGET_AVX512VL);
11052    case MODE_TI:
11053      gcc_assert (TARGET_SSE2 || TARGET_AVX512VL);
11054      switch (<MODE>mode)
11055        {
11056        case V64QImode:
11057        case V32HImode:
11058          if (TARGET_AVX512F)
11059          {
11060            tmp = "p<logic>";
11061            ssesuffix = "q";
11062            break;
11063          }
11064        case V32QImode:
11065        case V16HImode:
11066        case V16QImode:
11067        case V8HImode:
11068          if (TARGET_AVX512VL || TARGET_AVX2 || TARGET_SSE2)
11069          {
11070            tmp = "p<logic>";
11071            ssesuffix = TARGET_AVX512VL ? "q" : "";
11072            break;
11073          }
11074        default:
11075          gcc_unreachable ();
11076      }
11077      break;
11078
11079   case MODE_V8SF:
11080      gcc_assert (TARGET_AVX);
11081   case MODE_V4SF:
11082      gcc_assert (TARGET_SSE);
11083      tmp = "<logic>ps";
11084      ssesuffix = "";
11085      break;
11086
11087   default:
11088      gcc_unreachable ();
11089   }
11090
11091  switch (which_alternative)
11092    {
11093    case 0:
11094      ops = "%s\t{%%2, %%0|%%0, %%2}";
11095      snprintf (buf, sizeof (buf), ops, tmp);
11096      break;
11097    case 1:
11098      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
11099      snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
11100      break;
11101    default:
11102      gcc_unreachable ();
11103    }
11104
11105  return buf;
11106}
11107  [(set_attr "isa" "noavx,avx")
11108   (set_attr "type" "sselog")
11109   (set (attr "prefix_data16")
11110     (if_then_else
11111       (and (eq_attr "alternative" "0")
11112	    (eq_attr "mode" "TI"))
11113       (const_string "1")
11114       (const_string "*")))
11115   (set_attr "prefix" "<mask_prefix3>")
11116   (set (attr "mode")
11117	(cond [(and (match_test "<MODE_SIZE> == 16")
11118		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
11119		 (const_string "<ssePSmode>")
11120	       (match_test "TARGET_AVX2")
11121		 (const_string "<sseinsnmode>")
11122	       (match_test "TARGET_AVX")
11123		 (if_then_else
11124		   (match_test "<MODE_SIZE> > 16")
11125		   (const_string "V8SF")
11126		   (const_string "<sseinsnmode>"))
11127	       (ior (not (match_test "TARGET_SSE2"))
11128		    (match_test "optimize_function_for_size_p (cfun)"))
11129		 (const_string "V4SF")
11130	      ]
11131	      (const_string "<sseinsnmode>")))])
11132
11133(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11134  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11135	(unspec:<avx512fmaskmode>
11136	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11137	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11138	 UNSPEC_TESTM))]
11139  "TARGET_AVX512BW"
11140  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11141  [(set_attr "prefix" "evex")
11142   (set_attr "mode"  "<sseinsnmode>")])
11143
11144(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
11145  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11146	(unspec:<avx512fmaskmode>
11147	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11148	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11149	 UNSPEC_TESTM))]
11150  "TARGET_AVX512F"
11151  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11152  [(set_attr "prefix" "evex")
11153   (set_attr "mode"  "<sseinsnmode>")])
11154
11155(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11156  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11157	(unspec:<avx512fmaskmode>
11158	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
11159	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")]
11160	 UNSPEC_TESTNM))]
11161  "TARGET_AVX512BW"
11162  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11163  [(set_attr "prefix" "evex")
11164   (set_attr "mode"  "<sseinsnmode>")])
11165
11166(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
11167  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
11168	(unspec:<avx512fmaskmode>
11169	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
11170	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")]
11171	 UNSPEC_TESTNM))]
11172  "TARGET_AVX512F"
11173  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
11174  [(set_attr "prefix" "evex")
11175   (set_attr "mode"  "<sseinsnmode>")])
11176
11177;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11178;;
11179;; Parallel integral element swizzling
11180;;
11181;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11182
11183(define_expand "vec_pack_trunc_<mode>"
11184  [(match_operand:<ssepackmode> 0 "register_operand")
11185   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
11186   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
11187  "TARGET_SSE2"
11188{
11189  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
11190  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
11191  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
11192  DONE;
11193})
11194
11195(define_insn "<sse2_avx2>_packsswb<mask_name>"
11196  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11197	(vec_concat:VI1_AVX512
11198	  (ss_truncate:<ssehalfvecmode>
11199	    (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11200	  (ss_truncate:<ssehalfvecmode>
11201	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11202  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11203  "@
11204   packsswb\t{%2, %0|%0, %2}
11205   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11206  [(set_attr "isa" "noavx,avx")
11207   (set_attr "type" "sselog")
11208   (set_attr "prefix_data16" "1,*")
11209   (set_attr "prefix" "orig,maybe_evex")
11210   (set_attr "mode" "<sseinsnmode>")])
11211
11212(define_insn "<sse2_avx2>_packssdw<mask_name>"
11213  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11214	(vec_concat:VI2_AVX2
11215	  (ss_truncate:<ssehalfvecmode>
11216	    (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11217	  (ss_truncate:<ssehalfvecmode>
11218	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11219  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11220  "@
11221   packssdw\t{%2, %0|%0, %2}
11222   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11223  [(set_attr "isa" "noavx,avx")
11224   (set_attr "type" "sselog")
11225   (set_attr "prefix_data16" "1,*")
11226   (set_attr "prefix" "orig,vex")
11227   (set_attr "mode" "<sseinsnmode>")])
11228
11229(define_insn "<sse2_avx2>_packuswb<mask_name>"
11230  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x")
11231	(vec_concat:VI1_AVX512
11232	  (us_truncate:<ssehalfvecmode>
11233	    (match_operand:<sseunpackmode> 1 "register_operand" "0,v"))
11234	  (us_truncate:<ssehalfvecmode>
11235	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "xm,vm"))))]
11236  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11237  "@
11238   packuswb\t{%2, %0|%0, %2}
11239   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11240  [(set_attr "isa" "noavx,avx")
11241   (set_attr "type" "sselog")
11242   (set_attr "prefix_data16" "1,*")
11243   (set_attr "prefix" "orig,vex")
11244   (set_attr "mode" "<sseinsnmode>")])
11245
11246(define_insn "avx512bw_interleave_highv64qi<mask_name>"
11247  [(set (match_operand:V64QI 0 "register_operand" "=v")
11248	(vec_select:V64QI
11249	  (vec_concat:V128QI
11250	    (match_operand:V64QI 1 "register_operand" "v")
11251	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11252	  (parallel [(const_int 8)  (const_int 72)
11253		     (const_int 9)  (const_int 73)
11254		     (const_int 10) (const_int 74)
11255		     (const_int 11) (const_int 75)
11256		     (const_int 12) (const_int 76)
11257		     (const_int 13) (const_int 77)
11258		     (const_int 14) (const_int 78)
11259		     (const_int 15) (const_int 79)
11260		     (const_int 24) (const_int 88)
11261		     (const_int 25) (const_int 89)
11262		     (const_int 26) (const_int 90)
11263		     (const_int 27) (const_int 91)
11264		     (const_int 28) (const_int 92)
11265		     (const_int 29) (const_int 93)
11266		     (const_int 30) (const_int 94)
11267		     (const_int 31) (const_int 95)
11268		     (const_int 40) (const_int 104)
11269		     (const_int 41) (const_int 105)
11270		     (const_int 42) (const_int 106)
11271		     (const_int 43) (const_int 107)
11272		     (const_int 44) (const_int 108)
11273		     (const_int 45) (const_int 109)
11274		     (const_int 46) (const_int 110)
11275		     (const_int 47) (const_int 111)
11276		     (const_int 56) (const_int 120)
11277		     (const_int 57) (const_int 121)
11278		     (const_int 58) (const_int 122)
11279		     (const_int 59) (const_int 123)
11280		     (const_int 60) (const_int 124)
11281		     (const_int 61) (const_int 125)
11282		     (const_int 62) (const_int 126)
11283		     (const_int 63) (const_int 127)])))]
11284  "TARGET_AVX512BW"
11285  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11286  [(set_attr "type" "sselog")
11287   (set_attr "prefix" "evex")
11288   (set_attr "mode" "XI")])
11289
11290(define_insn "avx2_interleave_highv32qi<mask_name>"
11291  [(set (match_operand:V32QI 0 "register_operand" "=v")
11292	(vec_select:V32QI
11293	  (vec_concat:V64QI
11294	    (match_operand:V32QI 1 "register_operand" "v")
11295	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11296	  (parallel [(const_int 8)  (const_int 40)
11297		     (const_int 9)  (const_int 41)
11298		     (const_int 10) (const_int 42)
11299		     (const_int 11) (const_int 43)
11300		     (const_int 12) (const_int 44)
11301		     (const_int 13) (const_int 45)
11302		     (const_int 14) (const_int 46)
11303		     (const_int 15) (const_int 47)
11304		     (const_int 24) (const_int 56)
11305		     (const_int 25) (const_int 57)
11306		     (const_int 26) (const_int 58)
11307		     (const_int 27) (const_int 59)
11308		     (const_int 28) (const_int 60)
11309		     (const_int 29) (const_int 61)
11310		     (const_int 30) (const_int 62)
11311		     (const_int 31) (const_int 63)])))]
11312  "TARGET_AVX2 && <mask_avx512vl_condition>"
11313  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11314  [(set_attr "type" "sselog")
11315   (set_attr "prefix" "<mask_prefix>")
11316   (set_attr "mode" "OI")])
11317
11318(define_insn "vec_interleave_highv16qi<mask_name>"
11319  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11320	(vec_select:V16QI
11321	  (vec_concat:V32QI
11322	    (match_operand:V16QI 1 "register_operand" "0,v")
11323	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11324	  (parallel [(const_int 8)  (const_int 24)
11325		     (const_int 9)  (const_int 25)
11326		     (const_int 10) (const_int 26)
11327		     (const_int 11) (const_int 27)
11328		     (const_int 12) (const_int 28)
11329		     (const_int 13) (const_int 29)
11330		     (const_int 14) (const_int 30)
11331		     (const_int 15) (const_int 31)])))]
11332  "TARGET_SSE2 && <mask_avx512vl_condition>"
11333  "@
11334   punpckhbw\t{%2, %0|%0, %2}
11335   vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11336  [(set_attr "isa" "noavx,avx")
11337   (set_attr "type" "sselog")
11338   (set_attr "prefix_data16" "1,*")
11339   (set_attr "prefix" "orig,<mask_prefix>")
11340   (set_attr "mode" "TI")])
11341
11342(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
11343  [(set (match_operand:V64QI 0 "register_operand" "=v")
11344	(vec_select:V64QI
11345	  (vec_concat:V128QI
11346	    (match_operand:V64QI 1 "register_operand" "v")
11347	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
11348	  (parallel [(const_int 0) (const_int 64)
11349		     (const_int 1) (const_int 65)
11350		     (const_int 2) (const_int 66)
11351		     (const_int 3) (const_int 67)
11352		     (const_int 4) (const_int 68)
11353		     (const_int 5) (const_int 69)
11354		     (const_int 6) (const_int 70)
11355		     (const_int 7) (const_int 71)
11356		     (const_int 16) (const_int 80)
11357		     (const_int 17) (const_int 81)
11358		     (const_int 18) (const_int 82)
11359		     (const_int 19) (const_int 83)
11360		     (const_int 20) (const_int 84)
11361		     (const_int 21) (const_int 85)
11362		     (const_int 22) (const_int 86)
11363		     (const_int 23) (const_int 87)
11364		     (const_int 32) (const_int 96)
11365		     (const_int 33) (const_int 97)
11366		     (const_int 34) (const_int 98)
11367		     (const_int 35) (const_int 99)
11368		     (const_int 36) (const_int 100)
11369		     (const_int 37) (const_int 101)
11370		     (const_int 38) (const_int 102)
11371		     (const_int 39) (const_int 103)
11372		     (const_int 48) (const_int 112)
11373		     (const_int 49) (const_int 113)
11374		     (const_int 50) (const_int 114)
11375		     (const_int 51) (const_int 115)
11376		     (const_int 52) (const_int 116)
11377		     (const_int 53) (const_int 117)
11378		     (const_int 54) (const_int 118)
11379		     (const_int 55) (const_int 119)])))]
11380  "TARGET_AVX512BW"
11381  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11382  [(set_attr "type" "sselog")
11383   (set_attr "prefix" "evex")
11384   (set_attr "mode" "XI")])
11385
11386(define_insn "avx2_interleave_lowv32qi<mask_name>"
11387  [(set (match_operand:V32QI 0 "register_operand" "=v")
11388	(vec_select:V32QI
11389	  (vec_concat:V64QI
11390	    (match_operand:V32QI 1 "register_operand" "v")
11391	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
11392	  (parallel [(const_int 0) (const_int 32)
11393		     (const_int 1) (const_int 33)
11394		     (const_int 2) (const_int 34)
11395		     (const_int 3) (const_int 35)
11396		     (const_int 4) (const_int 36)
11397		     (const_int 5) (const_int 37)
11398		     (const_int 6) (const_int 38)
11399		     (const_int 7) (const_int 39)
11400		     (const_int 16) (const_int 48)
11401		     (const_int 17) (const_int 49)
11402		     (const_int 18) (const_int 50)
11403		     (const_int 19) (const_int 51)
11404		     (const_int 20) (const_int 52)
11405		     (const_int 21) (const_int 53)
11406		     (const_int 22) (const_int 54)
11407		     (const_int 23) (const_int 55)])))]
11408  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11409  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11410  [(set_attr "type" "sselog")
11411   (set_attr "prefix" "maybe_vex")
11412   (set_attr "mode" "OI")])
11413
11414(define_insn "vec_interleave_lowv16qi<mask_name>"
11415  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
11416	(vec_select:V16QI
11417	  (vec_concat:V32QI
11418	    (match_operand:V16QI 1 "register_operand" "0,v")
11419	    (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm"))
11420	  (parallel [(const_int 0) (const_int 16)
11421		     (const_int 1) (const_int 17)
11422		     (const_int 2) (const_int 18)
11423		     (const_int 3) (const_int 19)
11424		     (const_int 4) (const_int 20)
11425		     (const_int 5) (const_int 21)
11426		     (const_int 6) (const_int 22)
11427		     (const_int 7) (const_int 23)])))]
11428  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11429  "@
11430   punpcklbw\t{%2, %0|%0, %2}
11431   vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11432  [(set_attr "isa" "noavx,avx")
11433   (set_attr "type" "sselog")
11434   (set_attr "prefix_data16" "1,*")
11435   (set_attr "prefix" "orig,vex")
11436   (set_attr "mode" "TI")])
11437
11438(define_insn "avx512bw_interleave_highv32hi<mask_name>"
11439  [(set (match_operand:V32HI 0 "register_operand" "=v")
11440	(vec_select:V32HI
11441	  (vec_concat:V64HI
11442	    (match_operand:V32HI 1 "register_operand" "v")
11443	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11444	  (parallel [(const_int 4) (const_int 36)
11445		     (const_int 5) (const_int 37)
11446		     (const_int 6) (const_int 38)
11447		     (const_int 7) (const_int 39)
11448		     (const_int 12) (const_int 44)
11449		     (const_int 13) (const_int 45)
11450		     (const_int 14) (const_int 46)
11451		     (const_int 15) (const_int 47)
11452		     (const_int 20) (const_int 52)
11453		     (const_int 21) (const_int 53)
11454		     (const_int 22) (const_int 54)
11455		     (const_int 23) (const_int 55)
11456		     (const_int 28) (const_int 60)
11457		     (const_int 29) (const_int 61)
11458		     (const_int 30) (const_int 62)
11459		     (const_int 31) (const_int 63)])))]
11460  "TARGET_AVX512BW"
11461  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11462  [(set_attr "type" "sselog")
11463   (set_attr "prefix" "evex")
11464   (set_attr "mode" "XI")])
11465
11466(define_insn "avx2_interleave_highv16hi<mask_name>"
11467  [(set (match_operand:V16HI 0 "register_operand" "=v")
11468	(vec_select:V16HI
11469	  (vec_concat:V32HI
11470	    (match_operand:V16HI 1 "register_operand" "v")
11471	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11472	  (parallel [(const_int 4) (const_int 20)
11473		     (const_int 5) (const_int 21)
11474		     (const_int 6) (const_int 22)
11475		     (const_int 7) (const_int 23)
11476		     (const_int 12) (const_int 28)
11477		     (const_int 13) (const_int 29)
11478		     (const_int 14) (const_int 30)
11479		     (const_int 15) (const_int 31)])))]
11480  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11481  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11482  [(set_attr "type" "sselog")
11483   (set_attr "prefix" "maybe_evex")
11484   (set_attr "mode" "OI")])
11485
11486(define_insn "vec_interleave_highv8hi<mask_name>"
11487  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11488	(vec_select:V8HI
11489	  (vec_concat:V16HI
11490	    (match_operand:V8HI 1 "register_operand" "0,v")
11491	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11492	  (parallel [(const_int 4) (const_int 12)
11493		     (const_int 5) (const_int 13)
11494		     (const_int 6) (const_int 14)
11495		     (const_int 7) (const_int 15)])))]
11496  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11497  "@
11498   punpckhwd\t{%2, %0|%0, %2}
11499   vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11500  [(set_attr "isa" "noavx,avx")
11501   (set_attr "type" "sselog")
11502   (set_attr "prefix_data16" "1,*")
11503   (set_attr "prefix" "orig,maybe_vex")
11504   (set_attr "mode" "TI")])
11505
11506(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
11507  [(set (match_operand:V32HI 0 "register_operand" "=v")
11508	(vec_select:V32HI
11509	  (vec_concat:V64HI
11510	    (match_operand:V32HI 1 "register_operand" "v")
11511	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
11512	  (parallel [(const_int 0) (const_int 32)
11513		     (const_int 1) (const_int 33)
11514		     (const_int 2) (const_int 34)
11515		     (const_int 3) (const_int 35)
11516		     (const_int 8) (const_int 40)
11517		     (const_int 9) (const_int 41)
11518		     (const_int 10) (const_int 42)
11519		     (const_int 11) (const_int 43)
11520		     (const_int 16) (const_int 48)
11521		     (const_int 17) (const_int 49)
11522		     (const_int 18) (const_int 50)
11523		     (const_int 19) (const_int 51)
11524		     (const_int 24) (const_int 56)
11525		     (const_int 25) (const_int 57)
11526		     (const_int 26) (const_int 58)
11527		     (const_int 27) (const_int 59)])))]
11528  "TARGET_AVX512BW"
11529  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11530  [(set_attr "type" "sselog")
11531   (set_attr "prefix" "evex")
11532   (set_attr "mode" "XI")])
11533
11534(define_insn "avx2_interleave_lowv16hi<mask_name>"
11535  [(set (match_operand:V16HI 0 "register_operand" "=v")
11536	(vec_select:V16HI
11537	  (vec_concat:V32HI
11538	    (match_operand:V16HI 1 "register_operand" "v")
11539	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
11540	  (parallel [(const_int 0) (const_int 16)
11541		     (const_int 1) (const_int 17)
11542		     (const_int 2) (const_int 18)
11543		     (const_int 3) (const_int 19)
11544		     (const_int 8) (const_int 24)
11545		     (const_int 9) (const_int 25)
11546		     (const_int 10) (const_int 26)
11547		     (const_int 11) (const_int 27)])))]
11548  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11549  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11550  [(set_attr "type" "sselog")
11551   (set_attr "prefix" "maybe_evex")
11552   (set_attr "mode" "OI")])
11553
11554(define_insn "vec_interleave_lowv8hi<mask_name>"
11555  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
11556	(vec_select:V8HI
11557	  (vec_concat:V16HI
11558	    (match_operand:V8HI 1 "register_operand" "0,v")
11559	    (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm"))
11560	  (parallel [(const_int 0) (const_int 8)
11561		     (const_int 1) (const_int 9)
11562		     (const_int 2) (const_int 10)
11563		     (const_int 3) (const_int 11)])))]
11564  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
11565  "@
11566   punpcklwd\t{%2, %0|%0, %2}
11567   vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11568  [(set_attr "isa" "noavx,avx")
11569   (set_attr "type" "sselog")
11570   (set_attr "prefix_data16" "1,*")
11571   (set_attr "prefix" "orig,maybe_evex")
11572   (set_attr "mode" "TI")])
11573
11574(define_insn "avx2_interleave_highv8si<mask_name>"
11575  [(set (match_operand:V8SI 0 "register_operand" "=v")
11576	(vec_select:V8SI
11577	  (vec_concat:V16SI
11578	    (match_operand:V8SI 1 "register_operand" "v")
11579	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11580	  (parallel [(const_int 2) (const_int 10)
11581		     (const_int 3) (const_int 11)
11582		     (const_int 6) (const_int 14)
11583		     (const_int 7) (const_int 15)])))]
11584  "TARGET_AVX2 && <mask_avx512vl_condition>"
11585  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11586  [(set_attr "type" "sselog")
11587   (set_attr "prefix" "maybe_evex")
11588   (set_attr "mode" "OI")])
11589
11590(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
11591  [(set (match_operand:V16SI 0 "register_operand" "=v")
11592	(vec_select:V16SI
11593	  (vec_concat:V32SI
11594	    (match_operand:V16SI 1 "register_operand" "v")
11595	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11596	  (parallel [(const_int 2) (const_int 18)
11597		     (const_int 3) (const_int 19)
11598		     (const_int 6) (const_int 22)
11599		     (const_int 7) (const_int 23)
11600		     (const_int 10) (const_int 26)
11601		     (const_int 11) (const_int 27)
11602		     (const_int 14) (const_int 30)
11603		     (const_int 15) (const_int 31)])))]
11604  "TARGET_AVX512F"
11605  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11606  [(set_attr "type" "sselog")
11607   (set_attr "prefix" "evex")
11608   (set_attr "mode" "XI")])
11609
11610
11611(define_insn "vec_interleave_highv4si<mask_name>"
11612  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11613	(vec_select:V4SI
11614	  (vec_concat:V8SI
11615	    (match_operand:V4SI 1 "register_operand" "0,v")
11616	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11617	  (parallel [(const_int 2) (const_int 6)
11618		     (const_int 3) (const_int 7)])))]
11619  "TARGET_SSE2 && <mask_avx512vl_condition>"
11620  "@
11621   punpckhdq\t{%2, %0|%0, %2}
11622   vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11623  [(set_attr "isa" "noavx,avx")
11624   (set_attr "type" "sselog")
11625   (set_attr "prefix_data16" "1,*")
11626   (set_attr "prefix" "orig,maybe_vex")
11627   (set_attr "mode" "TI")])
11628
11629(define_insn "avx2_interleave_lowv8si<mask_name>"
11630  [(set (match_operand:V8SI 0 "register_operand" "=v")
11631	(vec_select:V8SI
11632	  (vec_concat:V16SI
11633	    (match_operand:V8SI 1 "register_operand" "v")
11634	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
11635	  (parallel [(const_int 0) (const_int 8)
11636		     (const_int 1) (const_int 9)
11637		     (const_int 4) (const_int 12)
11638		     (const_int 5) (const_int 13)])))]
11639  "TARGET_AVX2 && <mask_avx512vl_condition>"
11640  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11641  [(set_attr "type" "sselog")
11642   (set_attr "prefix" "maybe_evex")
11643   (set_attr "mode" "OI")])
11644
11645(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
11646  [(set (match_operand:V16SI 0 "register_operand" "=v")
11647	(vec_select:V16SI
11648	  (vec_concat:V32SI
11649	    (match_operand:V16SI 1 "register_operand" "v")
11650	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
11651	  (parallel [(const_int 0) (const_int 16)
11652		     (const_int 1) (const_int 17)
11653		     (const_int 4) (const_int 20)
11654		     (const_int 5) (const_int 21)
11655		     (const_int 8) (const_int 24)
11656		     (const_int 9) (const_int 25)
11657		     (const_int 12) (const_int 28)
11658		     (const_int 13) (const_int 29)])))]
11659  "TARGET_AVX512F"
11660  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11661  [(set_attr "type" "sselog")
11662   (set_attr "prefix" "evex")
11663   (set_attr "mode" "XI")])
11664
11665(define_insn "vec_interleave_lowv4si<mask_name>"
11666  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
11667	(vec_select:V4SI
11668	  (vec_concat:V8SI
11669	    (match_operand:V4SI 1 "register_operand" "0,v")
11670	    (match_operand:V4SI 2 "nonimmediate_operand" "xm,vm"))
11671	  (parallel [(const_int 0) (const_int 4)
11672		     (const_int 1) (const_int 5)])))]
11673  "TARGET_SSE2 && <mask_avx512vl_condition>"
11674  "@
11675   punpckldq\t{%2, %0|%0, %2}
11676   vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11677  [(set_attr "isa" "noavx,avx")
11678   (set_attr "type" "sselog")
11679   (set_attr "prefix_data16" "1,*")
11680   (set_attr "prefix" "orig,vex")
11681   (set_attr "mode" "TI")])
11682
11683(define_expand "vec_interleave_high<mode>"
11684  [(match_operand:VI_256 0 "register_operand" "=x")
11685   (match_operand:VI_256 1 "register_operand" "x")
11686   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11687 "TARGET_AVX2"
11688{
11689  rtx t1 = gen_reg_rtx (<MODE>mode);
11690  rtx t2 = gen_reg_rtx (<MODE>mode);
11691  rtx t3 = gen_reg_rtx (V4DImode);
11692  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11693  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
11694  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11695				gen_lowpart (V4DImode, t2),
11696				GEN_INT (1 + (3 << 4))));
11697  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11698  DONE;
11699})
11700
11701(define_expand "vec_interleave_low<mode>"
11702  [(match_operand:VI_256 0 "register_operand" "=x")
11703   (match_operand:VI_256 1 "register_operand" "x")
11704   (match_operand:VI_256 2 "nonimmediate_operand" "xm")]
11705 "TARGET_AVX2"
11706{
11707  rtx t1 = gen_reg_rtx (<MODE>mode);
11708  rtx t2 = gen_reg_rtx (<MODE>mode);
11709  rtx t3 = gen_reg_rtx (V4DImode);
11710  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
11711  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
11712  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
11713				gen_lowpart (V4DImode, t2),
11714				GEN_INT (0 + (2 << 4))));
11715  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
11716  DONE;
11717})
11718
11719;; Modes handled by pinsr patterns.
11720(define_mode_iterator PINSR_MODE
11721  [(V16QI "TARGET_SSE4_1") V8HI
11722   (V4SI "TARGET_SSE4_1")
11723   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
11724
11725(define_mode_attr sse2p4_1
11726  [(V16QI "sse4_1") (V8HI "sse2")
11727   (V4SI "sse4_1") (V2DI "sse4_1")])
11728
11729;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
11730(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
11731  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x")
11732	(vec_merge:PINSR_MODE
11733	  (vec_duplicate:PINSR_MODE
11734	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m"))
11735	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x")
11736	  (match_operand:SI 3 "const_int_operand")))]
11737  "TARGET_SSE2
11738   && ((unsigned) exact_log2 (INTVAL (operands[3]))
11739       < GET_MODE_NUNITS (<MODE>mode))"
11740{
11741  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
11742
11743  switch (which_alternative)
11744    {
11745    case 0:
11746      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11747	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
11748      /* FALLTHRU */
11749    case 1:
11750      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
11751    case 2:
11752      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
11753	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
11754      /* FALLTHRU */
11755    case 3:
11756      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11757    default:
11758      gcc_unreachable ();
11759    }
11760}
11761  [(set_attr "isa" "noavx,noavx,avx,avx")
11762   (set_attr "type" "sselog")
11763   (set (attr "prefix_rex")
11764     (if_then_else
11765       (and (not (match_test "TARGET_AVX"))
11766	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
11767       (const_string "1")
11768       (const_string "*")))
11769   (set (attr "prefix_data16")
11770     (if_then_else
11771       (and (not (match_test "TARGET_AVX"))
11772	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11773       (const_string "1")
11774       (const_string "*")))
11775   (set (attr "prefix_extra")
11776     (if_then_else
11777       (and (not (match_test "TARGET_AVX"))
11778	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
11779       (const_string "*")
11780       (const_string "1")))
11781   (set_attr "length_immediate" "1")
11782   (set_attr "prefix" "orig,orig,vex,vex")
11783   (set_attr "mode" "TI")])
11784
11785(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
11786  [(match_operand:AVX512_VEC 0 "register_operand")
11787   (match_operand:AVX512_VEC 1 "register_operand")
11788   (match_operand:<ssequartermode> 2 "nonimmediate_operand")
11789   (match_operand:SI 3 "const_0_to_3_operand")
11790   (match_operand:AVX512_VEC 4 "register_operand")
11791   (match_operand:<avx512fmaskmode> 5 "register_operand")]
11792  "TARGET_AVX512F"
11793{
11794  int mask,selector;
11795  mask = INTVAL (operands[3]);
11796  selector = GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4 ?
11797    0xFFFF ^ (0xF000 >> mask * 4)
11798    : 0xFF ^ (0xC0 >> mask * 2);
11799  emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
11800    (operands[0], operands[1], operands[2], GEN_INT (selector),
11801     operands[4], operands[5]));
11802  DONE;
11803})
11804
11805(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
11806  [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
11807	(vec_merge:AVX512_VEC
11808	  (match_operand:AVX512_VEC 1 "register_operand" "v")
11809	  (vec_duplicate:AVX512_VEC
11810		(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
11811	  (match_operand:SI 3 "const_int_operand" "n")))]
11812  "TARGET_AVX512F"
11813{
11814  int mask;
11815  int selector = INTVAL (operands[3]);
11816
11817  if (selector == 0xFFF || selector == 0x3F)
11818    mask = 0;
11819  else if ( selector == 0xF0FF || selector == 0xCF)
11820    mask = 1;
11821  else if ( selector == 0xFF0F || selector == 0xF3)
11822    mask = 2;
11823  else if ( selector == 0xFFF0 || selector == 0xFC)
11824    mask = 3;
11825  else
11826      gcc_unreachable ();
11827
11828  operands[3] = GEN_INT (mask);
11829
11830  return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
11831}
11832  [(set_attr "type" "sselog")
11833   (set_attr "length_immediate" "1")
11834   (set_attr "prefix" "evex")
11835   (set_attr "mode" "<sseinsnmode>")])
11836
11837(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
11838  [(match_operand:AVX512_VEC_2 0 "register_operand")
11839   (match_operand:AVX512_VEC_2 1 "register_operand")
11840   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
11841   (match_operand:SI 3 "const_0_to_1_operand")
11842   (match_operand:AVX512_VEC_2 4 "register_operand")
11843   (match_operand:<avx512fmaskmode> 5 "register_operand")]
11844  "TARGET_AVX512F"
11845{
11846  int mask = INTVAL (operands[3]);
11847  if (mask == 0)
11848    emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
11849					   operands[2], operands[4],
11850					   operands[5]));
11851  else
11852    emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
11853					   operands[2], operands[4],
11854					   operands[5]));
11855  DONE;
11856})
11857
11858(define_insn "vec_set_lo_<mode><mask_name>"
11859  [(set (match_operand:V16FI 0 "register_operand" "=v")
11860	(vec_concat:V16FI
11861	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11862	  (vec_select:<ssehalfvecmode>
11863	    (match_operand:V16FI 1 "register_operand" "v")
11864	    (parallel [(const_int 8) (const_int 9)
11865		       (const_int 10) (const_int 11)
11866		       (const_int 12) (const_int 13)
11867		       (const_int 14) (const_int 15)]))))]
11868  "TARGET_AVX512DQ"
11869  "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11870  [(set_attr "type" "sselog")
11871   (set_attr "length_immediate" "1")
11872   (set_attr "prefix" "evex")
11873   (set_attr "mode" "<sseinsnmode>")])
11874
11875(define_insn "vec_set_hi_<mode><mask_name>"
11876  [(set (match_operand:V16FI 0 "register_operand" "=v")
11877	(vec_concat:V16FI
11878	  (vec_select:<ssehalfvecmode>
11879	    (match_operand:V16FI 1 "register_operand" "v")
11880	    (parallel [(const_int 0) (const_int 1)
11881		       (const_int 2) (const_int 3)
11882		       (const_int 4) (const_int 5)
11883		       (const_int 6) (const_int 7)]))
11884	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
11885  "TARGET_AVX512DQ"
11886  "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11887  [(set_attr "type" "sselog")
11888   (set_attr "length_immediate" "1")
11889   (set_attr "prefix" "evex")
11890   (set_attr "mode" "<sseinsnmode>")])
11891
11892(define_insn "vec_set_lo_<mode><mask_name>"
11893  [(set (match_operand:V8FI 0 "register_operand" "=v")
11894	(vec_concat:V8FI
11895	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
11896	  (vec_select:<ssehalfvecmode>
11897	    (match_operand:V8FI 1 "register_operand" "v")
11898	    (parallel [(const_int 4) (const_int 5)
11899		       (const_int 6) (const_int 7)]))))]
11900  "TARGET_AVX512F"
11901  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x0}"
11902  [(set_attr "type" "sselog")
11903   (set_attr "length_immediate" "1")
11904   (set_attr "prefix" "evex")
11905   (set_attr "mode" "XI")])
11906
11907(define_insn "vec_set_hi_<mode><mask_name>"
11908  [(set (match_operand:V8FI 0 "register_operand" "=v")
11909	(vec_concat:V8FI
11910	  (vec_select:<ssehalfvecmode>
11911	    (match_operand:V8FI 1 "register_operand" "v")
11912	    (parallel [(const_int 0) (const_int 1)
11913		       (const_int 2) (const_int 3)]))
11914	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
11915  "TARGET_AVX512F"
11916  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, $0x1}"
11917  [(set_attr "type" "sselog")
11918   (set_attr "length_immediate" "1")
11919   (set_attr "prefix" "evex")
11920   (set_attr "mode" "XI")])
11921
11922(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
11923  [(match_operand:VI8F_256 0 "register_operand")
11924   (match_operand:VI8F_256 1 "register_operand")
11925   (match_operand:VI8F_256 2 "nonimmediate_operand")
11926   (match_operand:SI 3 "const_0_to_3_operand")
11927   (match_operand:VI8F_256 4 "register_operand")
11928   (match_operand:QI 5 "register_operand")]
11929  "TARGET_AVX512DQ"
11930{
11931  int mask = INTVAL (operands[3]);
11932  emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
11933      (operands[0], operands[1], operands[2],
11934       GEN_INT (((mask >> 0) & 1) * 2 + 0),
11935       GEN_INT (((mask >> 0) & 1) * 2 + 1),
11936       GEN_INT (((mask >> 1) & 1) * 2 + 4),
11937       GEN_INT (((mask >> 1) & 1) * 2 + 5),
11938       operands[4], operands[5]));
11939  DONE;
11940})
11941
11942(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
11943  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
11944	(vec_select:VI8F_256
11945	  (vec_concat:<ssedoublemode>
11946	    (match_operand:VI8F_256 1 "register_operand" "v")
11947	    (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
11948	  (parallel [(match_operand 3  "const_0_to_3_operand")
11949		     (match_operand 4  "const_0_to_3_operand")
11950		     (match_operand 5  "const_4_to_7_operand")
11951		     (match_operand 6  "const_4_to_7_operand")])))]
11952  "TARGET_AVX512VL
11953   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
11954       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1))"
11955{
11956  int mask;
11957  mask = INTVAL (operands[3]) / 2;
11958  mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
11959  operands[3] = GEN_INT (mask);
11960  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
11961}
11962  [(set_attr "type" "sselog")
11963   (set_attr "length_immediate" "1")
11964   (set_attr "prefix" "evex")
11965   (set_attr "mode" "XI")])
11966
11967(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
11968  [(match_operand:V8FI 0 "register_operand")
11969   (match_operand:V8FI 1 "register_operand")
11970   (match_operand:V8FI 2 "nonimmediate_operand")
11971   (match_operand:SI 3 "const_0_to_255_operand")
11972   (match_operand:V8FI 4 "register_operand")
11973   (match_operand:QI 5 "register_operand")]
11974  "TARGET_AVX512F"
11975{
11976  int mask = INTVAL (operands[3]);
11977  emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
11978      (operands[0], operands[1], operands[2],
11979       GEN_INT (((mask >> 0) & 3) * 2),
11980       GEN_INT (((mask >> 0) & 3) * 2 + 1),
11981       GEN_INT (((mask >> 2) & 3) * 2),
11982       GEN_INT (((mask >> 2) & 3) * 2 + 1),
11983       GEN_INT (((mask >> 4) & 3) * 2 + 8),
11984       GEN_INT (((mask >> 4) & 3) * 2 + 9),
11985       GEN_INT (((mask >> 6) & 3) * 2 + 8),
11986       GEN_INT (((mask >> 6) & 3) * 2 + 9),
11987       operands[4], operands[5]));
11988  DONE;
11989})
11990
11991(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
11992  [(set (match_operand:V8FI 0 "register_operand" "=v")
11993	(vec_select:V8FI
11994	  (vec_concat:<ssedoublemode>
11995	    (match_operand:V8FI 1 "register_operand" "v")
11996	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
11997	  (parallel [(match_operand 3  "const_0_to_7_operand")
11998		     (match_operand 4  "const_0_to_7_operand")
11999		     (match_operand 5  "const_0_to_7_operand")
12000		     (match_operand 6  "const_0_to_7_operand")
12001		     (match_operand 7  "const_8_to_15_operand")
12002		     (match_operand 8  "const_8_to_15_operand")
12003		     (match_operand 9  "const_8_to_15_operand")
12004		     (match_operand 10  "const_8_to_15_operand")])))]
12005  "TARGET_AVX512F
12006   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12007       && INTVAL (operands[5]) == (INTVAL (operands[6]) - 1)
12008       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12009       && INTVAL (operands[9]) == (INTVAL (operands[10]) - 1))"
12010{
12011  int mask;
12012  mask = INTVAL (operands[3]) / 2;
12013  mask |= INTVAL (operands[5]) / 2 << 2;
12014  mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
12015  mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
12016  operands[3] = GEN_INT (mask);
12017
12018  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12019}
12020  [(set_attr "type" "sselog")
12021   (set_attr "length_immediate" "1")
12022   (set_attr "prefix" "evex")
12023   (set_attr "mode" "<sseinsnmode>")])
12024
12025(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
12026  [(match_operand:VI4F_256 0 "register_operand")
12027   (match_operand:VI4F_256 1 "register_operand")
12028   (match_operand:VI4F_256 2 "nonimmediate_operand")
12029   (match_operand:SI 3 "const_0_to_3_operand")
12030   (match_operand:VI4F_256 4 "register_operand")
12031   (match_operand:QI 5 "register_operand")]
12032  "TARGET_AVX512VL"
12033{
12034  int mask = INTVAL (operands[3]);
12035  emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
12036      (operands[0], operands[1], operands[2],
12037       GEN_INT (((mask >> 0) & 1) * 4 + 0),
12038       GEN_INT (((mask >> 0) & 1) * 4 + 1),
12039       GEN_INT (((mask >> 0) & 1) * 4 + 2),
12040       GEN_INT (((mask >> 0) & 1) * 4 + 3),
12041       GEN_INT (((mask >> 1) & 1) * 4 + 8),
12042       GEN_INT (((mask >> 1) & 1) * 4 + 9),
12043       GEN_INT (((mask >> 1) & 1) * 4 + 10),
12044       GEN_INT (((mask >> 1) & 1) * 4 + 11),
12045       operands[4], operands[5]));
12046  DONE;
12047})
12048
12049(define_insn "<mask_codefor>avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
12050  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
12051	(vec_select:VI4F_256
12052	  (vec_concat:<ssedoublemode>
12053	    (match_operand:VI4F_256 1 "register_operand" "v")
12054	    (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
12055	  (parallel [(match_operand 3  "const_0_to_7_operand")
12056		     (match_operand 4  "const_0_to_7_operand")
12057		     (match_operand 5  "const_0_to_7_operand")
12058		     (match_operand 6  "const_0_to_7_operand")
12059		     (match_operand 7  "const_8_to_15_operand")
12060		     (match_operand 8  "const_8_to_15_operand")
12061		     (match_operand 9  "const_8_to_15_operand")
12062		     (match_operand 10 "const_8_to_15_operand")])))]
12063  "TARGET_AVX512VL
12064   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12065       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12066       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12067       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12068       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12069       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3))"
12070{
12071  int mask;
12072  mask = INTVAL (operands[3]) / 4;
12073  mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
12074  operands[3] = GEN_INT (mask);
12075
12076  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
12077}
12078  [(set_attr "type" "sselog")
12079   (set_attr "length_immediate" "1")
12080   (set_attr "prefix" "evex")
12081   (set_attr "mode" "<sseinsnmode>")])
12082
12083(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
12084  [(match_operand:V16FI 0 "register_operand")
12085   (match_operand:V16FI 1 "register_operand")
12086   (match_operand:V16FI 2 "nonimmediate_operand")
12087   (match_operand:SI 3 "const_0_to_255_operand")
12088   (match_operand:V16FI 4 "register_operand")
12089   (match_operand:HI 5 "register_operand")]
12090  "TARGET_AVX512F"
12091{
12092  int mask = INTVAL (operands[3]);
12093  emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
12094      (operands[0], operands[1], operands[2],
12095       GEN_INT (((mask >> 0) & 3) * 4),
12096       GEN_INT (((mask >> 0) & 3) * 4 + 1),
12097       GEN_INT (((mask >> 0) & 3) * 4 + 2),
12098       GEN_INT (((mask >> 0) & 3) * 4 + 3),
12099       GEN_INT (((mask >> 2) & 3) * 4),
12100       GEN_INT (((mask >> 2) & 3) * 4 + 1),
12101       GEN_INT (((mask >> 2) & 3) * 4 + 2),
12102       GEN_INT (((mask >> 2) & 3) * 4 + 3),
12103       GEN_INT (((mask >> 4) & 3) * 4 + 16),
12104       GEN_INT (((mask >> 4) & 3) * 4 + 17),
12105       GEN_INT (((mask >> 4) & 3) * 4 + 18),
12106       GEN_INT (((mask >> 4) & 3) * 4 + 19),
12107       GEN_INT (((mask >> 6) & 3) * 4 + 16),
12108       GEN_INT (((mask >> 6) & 3) * 4 + 17),
12109       GEN_INT (((mask >> 6) & 3) * 4 + 18),
12110       GEN_INT (((mask >> 6) & 3) * 4 + 19),
12111       operands[4], operands[5]));
12112  DONE;
12113})
12114
12115(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
12116  [(set (match_operand:V16FI 0 "register_operand" "=v")
12117	(vec_select:V16FI
12118	  (vec_concat:<ssedoublemode>
12119	    (match_operand:V16FI 1 "register_operand" "v")
12120	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
12121	  (parallel [(match_operand 3  "const_0_to_15_operand")
12122		     (match_operand 4  "const_0_to_15_operand")
12123		     (match_operand 5  "const_0_to_15_operand")
12124		     (match_operand 6  "const_0_to_15_operand")
12125		     (match_operand 7  "const_0_to_15_operand")
12126		     (match_operand 8  "const_0_to_15_operand")
12127		     (match_operand 9  "const_0_to_15_operand")
12128		     (match_operand 10  "const_0_to_15_operand")
12129		     (match_operand 11  "const_16_to_31_operand")
12130		     (match_operand 12  "const_16_to_31_operand")
12131		     (match_operand 13  "const_16_to_31_operand")
12132		     (match_operand 14  "const_16_to_31_operand")
12133		     (match_operand 15  "const_16_to_31_operand")
12134		     (match_operand 16  "const_16_to_31_operand")
12135		     (match_operand 17  "const_16_to_31_operand")
12136		     (match_operand 18  "const_16_to_31_operand")])))]
12137  "TARGET_AVX512F
12138   && (INTVAL (operands[3]) == (INTVAL (operands[4]) - 1)
12139       && INTVAL (operands[3]) == (INTVAL (operands[5]) - 2)
12140       && INTVAL (operands[3]) == (INTVAL (operands[6]) - 3)
12141       && INTVAL (operands[7]) == (INTVAL (operands[8]) - 1)
12142       && INTVAL (operands[7]) == (INTVAL (operands[9]) - 2)
12143       && INTVAL (operands[7]) == (INTVAL (operands[10]) - 3)
12144       && INTVAL (operands[11]) == (INTVAL (operands[12]) - 1)
12145       && INTVAL (operands[11]) == (INTVAL (operands[13]) - 2)
12146       && INTVAL (operands[11]) == (INTVAL (operands[14]) - 3)
12147       && INTVAL (operands[15]) == (INTVAL (operands[16]) - 1)
12148       && INTVAL (operands[15]) == (INTVAL (operands[17]) - 2)
12149       && INTVAL (operands[15]) == (INTVAL (operands[18]) - 3))"
12150{
12151  int mask;
12152  mask = INTVAL (operands[3]) / 4;
12153  mask |= INTVAL (operands[7]) / 4 << 2;
12154  mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
12155  mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
12156  operands[3] = GEN_INT (mask);
12157
12158  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
12159}
12160  [(set_attr "type" "sselog")
12161   (set_attr "length_immediate" "1")
12162   (set_attr "prefix" "evex")
12163   (set_attr "mode" "<sseinsnmode>")])
12164
12165(define_expand "avx512f_pshufdv3_mask"
12166  [(match_operand:V16SI 0 "register_operand")
12167   (match_operand:V16SI 1 "nonimmediate_operand")
12168   (match_operand:SI 2 "const_0_to_255_operand")
12169   (match_operand:V16SI 3 "register_operand")
12170   (match_operand:HI 4 "register_operand")]
12171  "TARGET_AVX512F"
12172{
12173  int mask = INTVAL (operands[2]);
12174  emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
12175				       GEN_INT ((mask >> 0) & 3),
12176				       GEN_INT ((mask >> 2) & 3),
12177				       GEN_INT ((mask >> 4) & 3),
12178				       GEN_INT ((mask >> 6) & 3),
12179				       GEN_INT (((mask >> 0) & 3) + 4),
12180				       GEN_INT (((mask >> 2) & 3) + 4),
12181				       GEN_INT (((mask >> 4) & 3) + 4),
12182				       GEN_INT (((mask >> 6) & 3) + 4),
12183				       GEN_INT (((mask >> 0) & 3) + 8),
12184				       GEN_INT (((mask >> 2) & 3) + 8),
12185				       GEN_INT (((mask >> 4) & 3) + 8),
12186				       GEN_INT (((mask >> 6) & 3) + 8),
12187				       GEN_INT (((mask >> 0) & 3) + 12),
12188				       GEN_INT (((mask >> 2) & 3) + 12),
12189				       GEN_INT (((mask >> 4) & 3) + 12),
12190				       GEN_INT (((mask >> 6) & 3) + 12),
12191				       operands[3], operands[4]));
12192  DONE;
12193})
12194
12195(define_insn "avx512f_pshufd_1<mask_name>"
12196  [(set (match_operand:V16SI 0 "register_operand" "=v")
12197	(vec_select:V16SI
12198	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")
12199	  (parallel [(match_operand 2 "const_0_to_3_operand")
12200		     (match_operand 3 "const_0_to_3_operand")
12201		     (match_operand 4 "const_0_to_3_operand")
12202		     (match_operand 5 "const_0_to_3_operand")
12203		     (match_operand 6 "const_4_to_7_operand")
12204		     (match_operand 7 "const_4_to_7_operand")
12205		     (match_operand 8 "const_4_to_7_operand")
12206		     (match_operand 9 "const_4_to_7_operand")
12207		     (match_operand 10 "const_8_to_11_operand")
12208		     (match_operand 11 "const_8_to_11_operand")
12209		     (match_operand 12 "const_8_to_11_operand")
12210		     (match_operand 13 "const_8_to_11_operand")
12211		     (match_operand 14 "const_12_to_15_operand")
12212		     (match_operand 15 "const_12_to_15_operand")
12213		     (match_operand 16 "const_12_to_15_operand")
12214		     (match_operand 17 "const_12_to_15_operand")])))]
12215  "TARGET_AVX512F
12216   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12217   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12218   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12219   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
12220   && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
12221   && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
12222   && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
12223   && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
12224   && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
12225   && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
12226   && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
12227   && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
12228{
12229  int mask = 0;
12230  mask |= INTVAL (operands[2]) << 0;
12231  mask |= INTVAL (operands[3]) << 2;
12232  mask |= INTVAL (operands[4]) << 4;
12233  mask |= INTVAL (operands[5]) << 6;
12234  operands[2] = GEN_INT (mask);
12235
12236  return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
12237}
12238  [(set_attr "type" "sselog1")
12239   (set_attr "prefix" "evex")
12240   (set_attr "length_immediate" "1")
12241   (set_attr "mode" "XI")])
12242
12243(define_expand "avx512vl_pshufdv3_mask"
12244  [(match_operand:V8SI 0 "register_operand")
12245   (match_operand:V8SI 1 "nonimmediate_operand")
12246   (match_operand:SI 2 "const_0_to_255_operand")
12247   (match_operand:V8SI 3 "register_operand")
12248   (match_operand:QI 4 "register_operand")]
12249  "TARGET_AVX512VL"
12250{
12251  int mask = INTVAL (operands[2]);
12252  emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
12253				GEN_INT ((mask >> 0) & 3),
12254				GEN_INT ((mask >> 2) & 3),
12255				GEN_INT ((mask >> 4) & 3),
12256				GEN_INT ((mask >> 6) & 3),
12257				GEN_INT (((mask >> 0) & 3) + 4),
12258				GEN_INT (((mask >> 2) & 3) + 4),
12259				GEN_INT (((mask >> 4) & 3) + 4),
12260				GEN_INT (((mask >> 6) & 3) + 4),
12261                operands[3], operands[4]));
12262  DONE;
12263})
12264
12265(define_expand "avx2_pshufdv3"
12266  [(match_operand:V8SI 0 "register_operand")
12267   (match_operand:V8SI 1 "nonimmediate_operand")
12268   (match_operand:SI 2 "const_0_to_255_operand")]
12269  "TARGET_AVX2"
12270{
12271  int mask = INTVAL (operands[2]);
12272  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
12273				GEN_INT ((mask >> 0) & 3),
12274				GEN_INT ((mask >> 2) & 3),
12275				GEN_INT ((mask >> 4) & 3),
12276				GEN_INT ((mask >> 6) & 3),
12277				GEN_INT (((mask >> 0) & 3) + 4),
12278				GEN_INT (((mask >> 2) & 3) + 4),
12279				GEN_INT (((mask >> 4) & 3) + 4),
12280				GEN_INT (((mask >> 6) & 3) + 4)));
12281  DONE;
12282})
12283
12284(define_insn "avx2_pshufd_1<mask_name>"
12285  [(set (match_operand:V8SI 0 "register_operand" "=v")
12286	(vec_select:V8SI
12287	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")
12288	  (parallel [(match_operand 2 "const_0_to_3_operand")
12289		     (match_operand 3 "const_0_to_3_operand")
12290		     (match_operand 4 "const_0_to_3_operand")
12291		     (match_operand 5 "const_0_to_3_operand")
12292		     (match_operand 6 "const_4_to_7_operand")
12293		     (match_operand 7 "const_4_to_7_operand")
12294		     (match_operand 8 "const_4_to_7_operand")
12295		     (match_operand 9 "const_4_to_7_operand")])))]
12296  "TARGET_AVX2
12297   && <mask_avx512vl_condition>
12298   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
12299   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
12300   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
12301   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
12302{
12303  int mask = 0;
12304  mask |= INTVAL (operands[2]) << 0;
12305  mask |= INTVAL (operands[3]) << 2;
12306  mask |= INTVAL (operands[4]) << 4;
12307  mask |= INTVAL (operands[5]) << 6;
12308  operands[2] = GEN_INT (mask);
12309
12310  return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12311}
12312  [(set_attr "type" "sselog1")
12313   (set_attr "prefix" "maybe_evex")
12314   (set_attr "length_immediate" "1")
12315   (set_attr "mode" "OI")])
12316
12317(define_expand "avx512vl_pshufd_mask"
12318  [(match_operand:V4SI 0 "register_operand")
12319   (match_operand:V4SI 1 "nonimmediate_operand")
12320   (match_operand:SI 2 "const_0_to_255_operand")
12321   (match_operand:V4SI 3 "register_operand")
12322   (match_operand:QI 4 "register_operand")]
12323  "TARGET_AVX512VL"
12324{
12325  int mask = INTVAL (operands[2]);
12326  emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
12327				GEN_INT ((mask >> 0) & 3),
12328				GEN_INT ((mask >> 2) & 3),
12329				GEN_INT ((mask >> 4) & 3),
12330				GEN_INT ((mask >> 6) & 3),
12331                operands[3], operands[4]));
12332  DONE;
12333})
12334
12335(define_expand "sse2_pshufd"
12336  [(match_operand:V4SI 0 "register_operand")
12337   (match_operand:V4SI 1 "nonimmediate_operand")
12338   (match_operand:SI 2 "const_int_operand")]
12339  "TARGET_SSE2"
12340{
12341  int mask = INTVAL (operands[2]);
12342  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
12343				GEN_INT ((mask >> 0) & 3),
12344				GEN_INT ((mask >> 2) & 3),
12345				GEN_INT ((mask >> 4) & 3),
12346				GEN_INT ((mask >> 6) & 3)));
12347  DONE;
12348})
12349
12350(define_insn "sse2_pshufd_1<mask_name>"
12351  [(set (match_operand:V4SI 0 "register_operand" "=v")
12352	(vec_select:V4SI
12353	  (match_operand:V4SI 1 "nonimmediate_operand" "vm")
12354	  (parallel [(match_operand 2 "const_0_to_3_operand")
12355		     (match_operand 3 "const_0_to_3_operand")
12356		     (match_operand 4 "const_0_to_3_operand")
12357		     (match_operand 5 "const_0_to_3_operand")])))]
12358  "TARGET_SSE2 && <mask_avx512vl_condition>"
12359{
12360  int mask = 0;
12361  mask |= INTVAL (operands[2]) << 0;
12362  mask |= INTVAL (operands[3]) << 2;
12363  mask |= INTVAL (operands[4]) << 4;
12364  mask |= INTVAL (operands[5]) << 6;
12365  operands[2] = GEN_INT (mask);
12366
12367  return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12368}
12369  [(set_attr "type" "sselog1")
12370   (set_attr "prefix_data16" "1")
12371   (set_attr "prefix" "<mask_prefix2>")
12372   (set_attr "length_immediate" "1")
12373   (set_attr "mode" "TI")])
12374
12375(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
12376  [(set (match_operand:V32HI 0 "register_operand" "=v")
12377	(unspec:V32HI
12378	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12379	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
12380	  UNSPEC_PSHUFLW))]
12381  "TARGET_AVX512BW"
12382  "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12383  [(set_attr "type" "sselog")
12384   (set_attr "prefix" "evex")
12385   (set_attr "mode" "XI")])
12386
12387(define_expand "avx512vl_pshuflwv3_mask"
12388  [(match_operand:V16HI 0 "register_operand")
12389   (match_operand:V16HI 1 "nonimmediate_operand")
12390   (match_operand:SI 2 "const_0_to_255_operand")
12391   (match_operand:V16HI 3 "register_operand")
12392   (match_operand:HI 4 "register_operand")]
12393  "TARGET_AVX512VL && TARGET_AVX512BW"
12394{
12395  int mask = INTVAL (operands[2]);
12396  emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
12397				 GEN_INT ((mask >> 0) & 3),
12398				 GEN_INT ((mask >> 2) & 3),
12399				 GEN_INT ((mask >> 4) & 3),
12400				 GEN_INT ((mask >> 6) & 3),
12401				 GEN_INT (((mask >> 0) & 3) + 8),
12402				 GEN_INT (((mask >> 2) & 3) + 8),
12403				 GEN_INT (((mask >> 4) & 3) + 8),
12404				 GEN_INT (((mask >> 6) & 3) + 8),
12405                 operands[3], operands[4]));
12406  DONE;
12407})
12408
12409(define_expand "avx2_pshuflwv3"
12410  [(match_operand:V16HI 0 "register_operand")
12411   (match_operand:V16HI 1 "nonimmediate_operand")
12412   (match_operand:SI 2 "const_0_to_255_operand")]
12413  "TARGET_AVX2"
12414{
12415  int mask = INTVAL (operands[2]);
12416  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
12417				 GEN_INT ((mask >> 0) & 3),
12418				 GEN_INT ((mask >> 2) & 3),
12419				 GEN_INT ((mask >> 4) & 3),
12420				 GEN_INT ((mask >> 6) & 3),
12421				 GEN_INT (((mask >> 0) & 3) + 8),
12422				 GEN_INT (((mask >> 2) & 3) + 8),
12423				 GEN_INT (((mask >> 4) & 3) + 8),
12424				 GEN_INT (((mask >> 6) & 3) + 8)));
12425  DONE;
12426})
12427
12428(define_insn "avx2_pshuflw_1<mask_name>"
12429  [(set (match_operand:V16HI 0 "register_operand" "=v")
12430	(vec_select:V16HI
12431	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12432	  (parallel [(match_operand 2 "const_0_to_3_operand")
12433		     (match_operand 3 "const_0_to_3_operand")
12434		     (match_operand 4 "const_0_to_3_operand")
12435		     (match_operand 5 "const_0_to_3_operand")
12436		     (const_int 4)
12437		     (const_int 5)
12438		     (const_int 6)
12439		     (const_int 7)
12440		     (match_operand 6 "const_8_to_11_operand")
12441		     (match_operand 7 "const_8_to_11_operand")
12442		     (match_operand 8 "const_8_to_11_operand")
12443		     (match_operand 9 "const_8_to_11_operand")
12444		     (const_int 12)
12445		     (const_int 13)
12446		     (const_int 14)
12447		     (const_int 15)])))]
12448  "TARGET_AVX2
12449   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12450   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12451   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12452   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12453   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12454{
12455  int mask = 0;
12456  mask |= INTVAL (operands[2]) << 0;
12457  mask |= INTVAL (operands[3]) << 2;
12458  mask |= INTVAL (operands[4]) << 4;
12459  mask |= INTVAL (operands[5]) << 6;
12460  operands[2] = GEN_INT (mask);
12461
12462  return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12463}
12464  [(set_attr "type" "sselog")
12465   (set_attr "prefix" "maybe_evex")
12466   (set_attr "length_immediate" "1")
12467   (set_attr "mode" "OI")])
12468
12469(define_expand "avx512vl_pshuflw_mask"
12470  [(match_operand:V8HI 0 "register_operand")
12471   (match_operand:V8HI 1 "nonimmediate_operand")
12472   (match_operand:SI 2 "const_0_to_255_operand")
12473   (match_operand:V8HI 3 "register_operand")
12474   (match_operand:QI 4 "register_operand")]
12475  "TARGET_AVX512VL && TARGET_AVX512BW"
12476{
12477  int mask = INTVAL (operands[2]);
12478  emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
12479				 GEN_INT ((mask >> 0) & 3),
12480				 GEN_INT ((mask >> 2) & 3),
12481				 GEN_INT ((mask >> 4) & 3),
12482				 GEN_INT ((mask >> 6) & 3),
12483                 operands[3], operands[4]));
12484  DONE;
12485})
12486
12487(define_expand "sse2_pshuflw"
12488  [(match_operand:V8HI 0 "register_operand")
12489   (match_operand:V8HI 1 "nonimmediate_operand")
12490   (match_operand:SI 2 "const_int_operand")]
12491  "TARGET_SSE2"
12492{
12493  int mask = INTVAL (operands[2]);
12494  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
12495				 GEN_INT ((mask >> 0) & 3),
12496				 GEN_INT ((mask >> 2) & 3),
12497				 GEN_INT ((mask >> 4) & 3),
12498				 GEN_INT ((mask >> 6) & 3)));
12499  DONE;
12500})
12501
12502(define_insn "sse2_pshuflw_1<mask_name>"
12503  [(set (match_operand:V8HI 0 "register_operand" "=v")
12504	(vec_select:V8HI
12505	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12506	  (parallel [(match_operand 2 "const_0_to_3_operand")
12507		     (match_operand 3 "const_0_to_3_operand")
12508		     (match_operand 4 "const_0_to_3_operand")
12509		     (match_operand 5 "const_0_to_3_operand")
12510		     (const_int 4)
12511		     (const_int 5)
12512		     (const_int 6)
12513		     (const_int 7)])))]
12514  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12515{
12516  int mask = 0;
12517  mask |= INTVAL (operands[2]) << 0;
12518  mask |= INTVAL (operands[3]) << 2;
12519  mask |= INTVAL (operands[4]) << 4;
12520  mask |= INTVAL (operands[5]) << 6;
12521  operands[2] = GEN_INT (mask);
12522
12523  return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12524}
12525  [(set_attr "type" "sselog")
12526   (set_attr "prefix_data16" "0")
12527   (set_attr "prefix_rep" "1")
12528   (set_attr "prefix" "maybe_vex")
12529   (set_attr "length_immediate" "1")
12530   (set_attr "mode" "TI")])
12531
12532(define_expand "avx2_pshufhwv3"
12533  [(match_operand:V16HI 0 "register_operand")
12534   (match_operand:V16HI 1 "nonimmediate_operand")
12535   (match_operand:SI 2 "const_0_to_255_operand")]
12536  "TARGET_AVX2"
12537{
12538  int mask = INTVAL (operands[2]);
12539  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
12540				 GEN_INT (((mask >> 0) & 3) + 4),
12541				 GEN_INT (((mask >> 2) & 3) + 4),
12542				 GEN_INT (((mask >> 4) & 3) + 4),
12543				 GEN_INT (((mask >> 6) & 3) + 4),
12544				 GEN_INT (((mask >> 0) & 3) + 12),
12545				 GEN_INT (((mask >> 2) & 3) + 12),
12546				 GEN_INT (((mask >> 4) & 3) + 12),
12547				 GEN_INT (((mask >> 6) & 3) + 12)));
12548  DONE;
12549})
12550
12551(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
12552  [(set (match_operand:V32HI 0 "register_operand" "=v")
12553	(unspec:V32HI
12554	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
12555	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
12556	  UNSPEC_PSHUFHW))]
12557  "TARGET_AVX512BW"
12558  "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12559  [(set_attr "type" "sselog")
12560   (set_attr "prefix" "evex")
12561   (set_attr "mode" "XI")])
12562
12563(define_expand "avx512vl_pshufhwv3_mask"
12564  [(match_operand:V16HI 0 "register_operand")
12565   (match_operand:V16HI 1 "nonimmediate_operand")
12566   (match_operand:SI 2 "const_0_to_255_operand")
12567   (match_operand:V16HI 3 "register_operand")
12568   (match_operand:HI 4 "register_operand")]
12569  "TARGET_AVX512VL && TARGET_AVX512BW"
12570{
12571  int mask = INTVAL (operands[2]);
12572  emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
12573				 GEN_INT (((mask >> 0) & 3) + 4),
12574				 GEN_INT (((mask >> 2) & 3) + 4),
12575				 GEN_INT (((mask >> 4) & 3) + 4),
12576				 GEN_INT (((mask >> 6) & 3) + 4),
12577				 GEN_INT (((mask >> 0) & 3) + 12),
12578				 GEN_INT (((mask >> 2) & 3) + 12),
12579				 GEN_INT (((mask >> 4) & 3) + 12),
12580				 GEN_INT (((mask >> 6) & 3) + 12),
12581                 operands[3], operands[4]));
12582  DONE;
12583})
12584
12585(define_insn "avx2_pshufhw_1<mask_name>"
12586  [(set (match_operand:V16HI 0 "register_operand" "=v")
12587	(vec_select:V16HI
12588	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
12589	  (parallel [(const_int 0)
12590		     (const_int 1)
12591		     (const_int 2)
12592		     (const_int 3)
12593		     (match_operand 2 "const_4_to_7_operand")
12594		     (match_operand 3 "const_4_to_7_operand")
12595		     (match_operand 4 "const_4_to_7_operand")
12596		     (match_operand 5 "const_4_to_7_operand")
12597		     (const_int 8)
12598		     (const_int 9)
12599		     (const_int 10)
12600		     (const_int 11)
12601		     (match_operand 6 "const_12_to_15_operand")
12602		     (match_operand 7 "const_12_to_15_operand")
12603		     (match_operand 8 "const_12_to_15_operand")
12604		     (match_operand 9 "const_12_to_15_operand")])))]
12605  "TARGET_AVX2
12606   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
12607   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
12608   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
12609   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
12610   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
12611{
12612  int mask = 0;
12613  mask |= (INTVAL (operands[2]) - 4) << 0;
12614  mask |= (INTVAL (operands[3]) - 4) << 2;
12615  mask |= (INTVAL (operands[4]) - 4) << 4;
12616  mask |= (INTVAL (operands[5]) - 4) << 6;
12617  operands[2] = GEN_INT (mask);
12618
12619  return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
12620}
12621  [(set_attr "type" "sselog")
12622   (set_attr "prefix" "maybe_evex")
12623   (set_attr "length_immediate" "1")
12624   (set_attr "mode" "OI")])
12625
12626(define_expand "avx512vl_pshufhw_mask"
12627  [(match_operand:V8HI 0 "register_operand")
12628   (match_operand:V8HI 1 "nonimmediate_operand")
12629   (match_operand:SI 2 "const_0_to_255_operand")
12630   (match_operand:V8HI 3 "register_operand")
12631   (match_operand:QI 4 "register_operand")]
12632  "TARGET_AVX512VL && TARGET_AVX512BW"
12633{
12634  int mask = INTVAL (operands[2]);
12635  emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
12636				 GEN_INT (((mask >> 0) & 3) + 4),
12637				 GEN_INT (((mask >> 2) & 3) + 4),
12638				 GEN_INT (((mask >> 4) & 3) + 4),
12639				 GEN_INT (((mask >> 6) & 3) + 4),
12640                 operands[3], operands[4]));
12641  DONE;
12642})
12643
12644(define_expand "sse2_pshufhw"
12645  [(match_operand:V8HI 0 "register_operand")
12646   (match_operand:V8HI 1 "nonimmediate_operand")
12647   (match_operand:SI 2 "const_int_operand")]
12648  "TARGET_SSE2"
12649{
12650  int mask = INTVAL (operands[2]);
12651  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
12652				 GEN_INT (((mask >> 0) & 3) + 4),
12653				 GEN_INT (((mask >> 2) & 3) + 4),
12654				 GEN_INT (((mask >> 4) & 3) + 4),
12655				 GEN_INT (((mask >> 6) & 3) + 4)));
12656  DONE;
12657})
12658
12659(define_insn "sse2_pshufhw_1<mask_name>"
12660  [(set (match_operand:V8HI 0 "register_operand" "=v")
12661	(vec_select:V8HI
12662	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")
12663	  (parallel [(const_int 0)
12664		     (const_int 1)
12665		     (const_int 2)
12666		     (const_int 3)
12667		     (match_operand 2 "const_4_to_7_operand")
12668		     (match_operand 3 "const_4_to_7_operand")
12669		     (match_operand 4 "const_4_to_7_operand")
12670		     (match_operand 5 "const_4_to_7_operand")])))]
12671  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
12672{
12673  int mask = 0;
12674  mask |= (INTVAL (operands[2]) - 4) << 0;
12675  mask |= (INTVAL (operands[3]) - 4) << 2;
12676  mask |= (INTVAL (operands[4]) - 4) << 4;
12677  mask |= (INTVAL (operands[5]) - 4) << 6;
12678  operands[2] = GEN_INT (mask);
12679
12680  return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
12681}
12682  [(set_attr "type" "sselog")
12683   (set_attr "prefix_rep" "1")
12684   (set_attr "prefix_data16" "0")
12685   (set_attr "prefix" "maybe_vex")
12686   (set_attr "length_immediate" "1")
12687   (set_attr "mode" "TI")])
12688
12689(define_expand "sse2_loadd"
12690  [(set (match_operand:V4SI 0 "register_operand")
12691	(vec_merge:V4SI
12692	  (vec_duplicate:V4SI
12693	    (match_operand:SI 1 "nonimmediate_operand"))
12694	  (match_dup 2)
12695	  (const_int 1)))]
12696  "TARGET_SSE"
12697  "operands[2] = CONST0_RTX (V4SImode);")
12698
12699(define_insn "sse2_loadld"
12700  [(set (match_operand:V4SI 0 "register_operand"       "=x,Yi,x,x,x")
12701	(vec_merge:V4SI
12702	  (vec_duplicate:V4SI
12703	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,x"))
12704	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,x")
12705	  (const_int 1)))]
12706  "TARGET_SSE"
12707  "@
12708   %vmovd\t{%2, %0|%0, %2}
12709   %vmovd\t{%2, %0|%0, %2}
12710   movss\t{%2, %0|%0, %2}
12711   movss\t{%2, %0|%0, %2}
12712   vmovss\t{%2, %1, %0|%0, %1, %2}"
12713  [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
12714   (set_attr "type" "ssemov")
12715   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,vex")
12716   (set_attr "mode" "TI,TI,V4SF,SF,SF")])
12717
12718(define_insn "*vec_extract<mode>"
12719  [(set (match_operand:<ssescalarmode> 0 "nonimmediate_operand" "=r,m")
12720	(vec_select:<ssescalarmode>
12721	  (match_operand:VI12_128 1 "register_operand" "x,x")
12722	  (parallel
12723	    [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
12724  "TARGET_SSE4_1"
12725  "@
12726   %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
12727   %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12728  [(set_attr "type" "sselog1")
12729   (set (attr "prefix_data16")
12730     (if_then_else
12731       (and (eq_attr "alternative" "0")
12732	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12733       (const_string "1")
12734       (const_string "*")))
12735   (set (attr "prefix_extra")
12736     (if_then_else
12737       (and (eq_attr "alternative" "0")
12738	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
12739       (const_string "*")
12740       (const_string "1")))
12741   (set_attr "length_immediate" "1")
12742   (set_attr "prefix" "maybe_vex")
12743   (set_attr "mode" "TI")])
12744
12745(define_insn "*vec_extractv8hi_sse2"
12746  [(set (match_operand:HI 0 "register_operand" "=r")
12747	(vec_select:HI
12748	  (match_operand:V8HI 1 "register_operand" "x")
12749	  (parallel
12750	    [(match_operand:SI 2 "const_0_to_7_operand")])))]
12751  "TARGET_SSE2 && !TARGET_SSE4_1"
12752  "pextrw\t{%2, %1, %k0|%k0, %1, %2}"
12753  [(set_attr "type" "sselog1")
12754   (set_attr "prefix_data16" "1")
12755   (set_attr "length_immediate" "1")
12756   (set_attr "mode" "TI")])
12757
12758(define_insn "*vec_extractv16qi_zext"
12759  [(set (match_operand:SWI48 0 "register_operand" "=r")
12760	(zero_extend:SWI48
12761	  (vec_select:QI
12762	    (match_operand:V16QI 1 "register_operand" "x")
12763	    (parallel
12764	      [(match_operand:SI 2 "const_0_to_15_operand")]))))]
12765  "TARGET_SSE4_1"
12766  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
12767  [(set_attr "type" "sselog1")
12768   (set_attr "prefix_extra" "1")
12769   (set_attr "length_immediate" "1")
12770   (set_attr "prefix" "maybe_vex")
12771   (set_attr "mode" "TI")])
12772
12773(define_insn "*vec_extractv8hi_zext"
12774  [(set (match_operand:SWI48 0 "register_operand" "=r")
12775	(zero_extend:SWI48
12776	  (vec_select:HI
12777	    (match_operand:V8HI 1 "register_operand" "x")
12778	    (parallel
12779	      [(match_operand:SI 2 "const_0_to_7_operand")]))))]
12780  "TARGET_SSE2"
12781  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
12782  [(set_attr "type" "sselog1")
12783   (set_attr "prefix_data16" "1")
12784   (set_attr "length_immediate" "1")
12785   (set_attr "prefix" "maybe_vex")
12786   (set_attr "mode" "TI")])
12787
12788(define_insn "*vec_extract<mode>_mem"
12789  [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
12790	(vec_select:<ssescalarmode>
12791	  (match_operand:VI12_128 1 "memory_operand" "o")
12792	  (parallel
12793	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12794  "TARGET_SSE"
12795  "#")
12796
12797(define_insn "*vec_extract<ssevecmodelower>_0"
12798  [(set (match_operand:SWI48 0 "nonimmediate_operand"	       "=r ,r,x ,m")
12799	(vec_select:SWI48
12800	  (match_operand:<ssevecmode> 1 "nonimmediate_operand" "mYj,x,xm,x")
12801	  (parallel [(const_int 0)])))]
12802  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12803  "#"
12804  [(set_attr "isa" "*,sse4,*,*")])
12805
12806(define_insn_and_split "*vec_extractv4si_0_zext"
12807  [(set (match_operand:DI 0 "register_operand" "=r")
12808	(zero_extend:DI
12809	  (vec_select:SI
12810	    (match_operand:V4SI 1 "register_operand" "x")
12811	    (parallel [(const_int 0)]))))]
12812  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
12813  "#"
12814  "&& reload_completed"
12815  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12816  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[1]));")
12817
12818(define_insn "*vec_extractv2di_0_sse"
12819  [(set (match_operand:DI 0 "nonimmediate_operand"     "=x,m")
12820	(vec_select:DI
12821	  (match_operand:V2DI 1 "nonimmediate_operand" "xm,x")
12822	  (parallel [(const_int 0)])))]
12823  "TARGET_SSE && !TARGET_64BIT
12824   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12825  "#")
12826
12827(define_split
12828  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12829	(vec_select:SWI48x
12830	  (match_operand:<ssevecmode> 1 "register_operand")
12831	  (parallel [(const_int 0)])))]
12832  "TARGET_SSE && reload_completed"
12833  [(set (match_dup 0) (match_dup 1))]
12834  "operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));")
12835
12836(define_insn "*vec_extractv4si"
12837  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,Yr,*x,x")
12838	(vec_select:SI
12839	  (match_operand:V4SI 1 "register_operand" "x,0,0,x")
12840	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
12841  "TARGET_SSE4_1"
12842{
12843  switch (which_alternative)
12844    {
12845    case 0:
12846      return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
12847
12848    case 1:
12849    case 2:
12850      operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12851      return "psrldq\t{%2, %0|%0, %2}";
12852
12853    case 3:
12854      operands [2] = GEN_INT (INTVAL (operands[2]) * 4);
12855      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
12856
12857    default:
12858      gcc_unreachable ();
12859    }
12860}
12861  [(set_attr "isa" "*,noavx,noavx,avx")
12862   (set_attr "type" "sselog1,sseishft1,sseishft1,sseishft1")
12863   (set_attr "prefix_extra" "1,*,*,*")
12864   (set_attr "length_immediate" "1")
12865   (set_attr "prefix" "maybe_vex,orig,orig,vex")
12866   (set_attr "mode" "TI")])
12867
12868(define_insn "*vec_extractv4si_zext"
12869  [(set (match_operand:DI 0 "register_operand" "=r")
12870	(zero_extend:DI
12871	  (vec_select:SI
12872	    (match_operand:V4SI 1 "register_operand" "x")
12873	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12874  "TARGET_64BIT && TARGET_SSE4_1"
12875  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
12876  [(set_attr "type" "sselog1")
12877   (set_attr "prefix_extra" "1")
12878   (set_attr "length_immediate" "1")
12879   (set_attr "prefix" "maybe_vex")
12880   (set_attr "mode" "TI")])
12881
12882(define_insn "*vec_extractv4si_mem"
12883  [(set (match_operand:SI 0 "register_operand" "=x,r")
12884	(vec_select:SI
12885	  (match_operand:V4SI 1 "memory_operand" "o,o")
12886	  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
12887  "TARGET_SSE"
12888  "#")
12889
12890(define_insn_and_split "*vec_extractv4si_zext_mem"
12891  [(set (match_operand:DI 0 "register_operand" "=x,r")
12892	(zero_extend:DI
12893	  (vec_select:SI
12894	    (match_operand:V4SI 1 "memory_operand" "o,o")
12895	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
12896  "TARGET_64BIT && TARGET_SSE"
12897  "#"
12898  "&& reload_completed"
12899  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
12900{
12901  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
12902})
12903
12904(define_insn "*vec_extractv2di_1"
12905  [(set (match_operand:DI 0 "nonimmediate_operand"     "=rm,m,x,x,x,x,r")
12906	(vec_select:DI
12907	  (match_operand:V2DI 1 "nonimmediate_operand"  "x ,x,0,x,x,o,o")
12908	  (parallel [(const_int 1)])))]
12909  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
12910  "@
12911   %vpextrq\t{$1, %1, %0|%0, %1, 1}
12912   %vmovhps\t{%1, %0|%0, %1}
12913   psrldq\t{$8, %0|%0, 8}
12914   vpsrldq\t{$8, %1, %0|%0, %1, 8}
12915   movhlps\t{%1, %0|%0, %1}
12916   #
12917   #"
12918  [(set_attr "isa" "x64_sse4,*,sse2_noavx,avx,noavx,*,x64")
12919   (set_attr "type" "sselog1,ssemov,sseishft1,sseishft1,ssemov,ssemov,imov")
12920   (set_attr "length_immediate" "1,*,1,1,*,*,*")
12921   (set_attr "prefix_rex" "1,*,*,*,*,*,*")
12922   (set_attr "prefix_extra" "1,*,*,*,*,*,*")
12923   (set_attr "prefix" "maybe_vex,maybe_vex,orig,vex,orig,*,*")
12924   (set_attr "mode" "TI,V2SF,TI,TI,V4SF,DI,DI")])
12925
12926(define_split
12927  [(set (match_operand:<ssescalarmode> 0 "register_operand")
12928	(vec_select:<ssescalarmode>
12929	  (match_operand:VI_128 1 "memory_operand")
12930	  (parallel
12931	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
12932  "TARGET_SSE && reload_completed"
12933  [(set (match_dup 0) (match_dup 1))]
12934{
12935  int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
12936
12937  operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
12938})
12939
12940;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
12941;; vector modes into vec_extract*.
12942(define_split
12943  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
12944	(match_operand:SWI48x 1 "register_operand"))]
12945  "can_create_pseudo_p ()
12946   && GET_CODE (operands[1]) == SUBREG
12947   && REG_P (SUBREG_REG (operands[1]))
12948   && (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1]))) == MODE_VECTOR_INT
12949       || (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[1])))
12950	   == MODE_VECTOR_FLOAT))
12951   && SUBREG_BYTE (operands[1]) == 0
12952   && TARGET_SSE
12953   && (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 16
12954       || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 32
12955	   && TARGET_AVX)
12956       || (GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1]))) == 64
12957	   && TARGET_AVX512F))
12958   && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
12959  [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
12960					 (parallel [(const_int 0)])))]
12961{
12962  rtx tmp;
12963  operands[1] = SUBREG_REG (operands[1]);
12964  switch (GET_MODE_SIZE (GET_MODE (operands[1])))
12965    {
12966    case 64:
12967      if (<MODE>mode == SImode)
12968	{
12969	  tmp = gen_reg_rtx (V8SImode);
12970	  emit_insn (gen_vec_extract_lo_v16si (tmp,
12971					       gen_lowpart (V16SImode,
12972							    operands[1])));
12973	}
12974      else
12975	{
12976	  tmp = gen_reg_rtx (V4DImode);
12977	  emit_insn (gen_vec_extract_lo_v8di (tmp,
12978					      gen_lowpart (V8DImode,
12979							   operands[1])));
12980	}
12981      operands[1] = tmp;
12982      /* FALLTHRU */
12983    case 32:
12984      tmp = gen_reg_rtx (<ssevecmode>mode);
12985      if (<MODE>mode == SImode)
12986	emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
12987							      operands[1])));
12988      else
12989	emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
12990							      operands[1])));
12991      operands[1] = tmp;
12992      break;
12993    case 16:
12994      operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
12995      break;
12996    }
12997})
12998
12999(define_insn "*vec_concatv2si_sse4_1"
13000  [(set (match_operand:V2SI 0 "register_operand"     "=Yr,*x,x, Yr,*x,x, x, *y,*y")
13001	(vec_concat:V2SI
13002	  (match_operand:SI 1 "nonimmediate_operand" "  0, 0,x,  0,0, x,rm,  0,rm")
13003	  (match_operand:SI 2 "vector_move_operand"  " rm,rm,rm,Yr,*x,x, C,*ym, C")))]
13004  "TARGET_SSE4_1"
13005  "@
13006   pinsrd\t{$1, %2, %0|%0, %2, 1}
13007   pinsrd\t{$1, %2, %0|%0, %2, 1}
13008   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
13009   punpckldq\t{%2, %0|%0, %2}
13010   punpckldq\t{%2, %0|%0, %2}
13011   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
13012   %vmovd\t{%1, %0|%0, %1}
13013   punpckldq\t{%2, %0|%0, %2}
13014   movd\t{%1, %0|%0, %1}"
13015  [(set_attr "isa" "noavx,noavx,avx,noavx,noavx,avx,*,*,*")
13016   (set_attr "type" "sselog,sselog,sselog,sselog,sselog,sselog,ssemov,mmxcvt,mmxmov")
13017   (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*")
13018   (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*")
13019   (set_attr "prefix" "orig,orig,vex,orig,orig,vex,maybe_vex,orig,orig")
13020   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,DI,DI")])
13021
13022;; ??? In theory we can match memory for the MMX alternative, but allowing
13023;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
13024;; alternatives pretty much forces the MMX alternative to be chosen.
13025(define_insn "*vec_concatv2si"
13026  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,*y,x,x,*y,*y")
13027	(vec_concat:V2SI
13028	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm,rm,0,m, 0,*rm")
13029	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,C, x,C,*y,C")))]
13030  "TARGET_SSE && !TARGET_SSE4_1"
13031  "@
13032   punpckldq\t{%2, %0|%0, %2}
13033   movd\t{%1, %0|%0, %1}
13034   movd\t{%1, %0|%0, %1}
13035   unpcklps\t{%2, %0|%0, %2}
13036   movss\t{%1, %0|%0, %1}
13037   punpckldq\t{%2, %0|%0, %2}
13038   movd\t{%1, %0|%0, %1}"
13039  [(set_attr "isa" "sse2,sse2,sse2,*,*,*,*")
13040   (set_attr "type" "sselog,ssemov,mmxmov,sselog,ssemov,mmxcvt,mmxmov")
13041   (set_attr "mode" "TI,TI,DI,V4SF,SF,DI,DI")])
13042
13043(define_insn "*vec_concatv4si"
13044  [(set (match_operand:V4SI 0 "register_operand"       "=x,x,x,x,x")
13045	(vec_concat:V4SI
13046	  (match_operand:V2SI 1 "register_operand"     " 0,x,0,0,x")
13047	  (match_operand:V2SI 2 "nonimmediate_operand" " x,x,x,m,m")))]
13048  "TARGET_SSE"
13049  "@
13050   punpcklqdq\t{%2, %0|%0, %2}
13051   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13052   movlhps\t{%2, %0|%0, %2}
13053   movhps\t{%2, %0|%0, %q2}
13054   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
13055  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
13056   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
13057   (set_attr "prefix" "orig,vex,orig,orig,vex")
13058   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
13059
13060;; movd instead of movq is required to handle broken assemblers.
13061(define_insn "vec_concatv2di"
13062  [(set (match_operand:V2DI 0 "register_operand"
13063	  "=Yr,*x,x ,Yi,x ,!x,x,x,x,x,x")
13064	(vec_concat:V2DI
13065	  (match_operand:DI 1 "nonimmediate_operand"
13066	  "  0, 0,x ,r ,xm,*y,0,x,0,0,x")
13067	  (match_operand:DI 2 "vector_move_operand"
13068	  "*rm,rm,rm,C ,C ,C ,x,x,x,m,m")))]
13069  "TARGET_SSE"
13070  "@
13071   pinsrq\t{$1, %2, %0|%0, %2, 1}
13072   pinsrq\t{$1, %2, %0|%0, %2, 1}
13073   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
13074   * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
13075   %vmovq\t{%1, %0|%0, %1}
13076   movq2dq\t{%1, %0|%0, %1}
13077   punpcklqdq\t{%2, %0|%0, %2}
13078   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
13079   movlhps\t{%2, %0|%0, %2}
13080   movhps\t{%2, %0|%0, %2}
13081   vmovhps\t{%2, %1, %0|%0, %1, %2}"
13082  [(set_attr "isa" "x64_sse4_noavx,x64_sse4_noavx,x64_avx,x64,sse2,sse2,sse2_noavx,avx,noavx,noavx,avx")
13083   (set (attr "type")
13084     (if_then_else
13085       (eq_attr "alternative" "0,1,2,6,7")
13086       (const_string "sselog")
13087       (const_string "ssemov")))
13088   (set_attr "prefix_rex" "1,1,1,1,*,*,*,*,*,*,*")
13089   (set_attr "prefix_extra" "1,1,1,*,*,*,*,*,*,*,*")
13090   (set_attr "length_immediate" "1,1,1,*,*,*,*,*,*,*,*")
13091   (set_attr "prefix" "orig,orig,vex,maybe_vex,maybe_vex,orig,orig,vex,orig,orig,vex")
13092   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
13093
13094(define_expand "vec_unpacks_lo_<mode>"
13095  [(match_operand:<sseunpackmode> 0 "register_operand")
13096   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13097  "TARGET_SSE2"
13098  "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
13099
13100(define_expand "vec_unpacks_hi_<mode>"
13101  [(match_operand:<sseunpackmode> 0 "register_operand")
13102   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13103  "TARGET_SSE2"
13104  "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
13105
13106(define_expand "vec_unpacku_lo_<mode>"
13107  [(match_operand:<sseunpackmode> 0 "register_operand")
13108   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13109  "TARGET_SSE2"
13110  "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
13111
13112(define_expand "vec_unpacku_hi_<mode>"
13113  [(match_operand:<sseunpackmode> 0 "register_operand")
13114   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
13115  "TARGET_SSE2"
13116  "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
13117
13118;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13119;;
13120;; Miscellaneous
13121;;
13122;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13123
13124(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
13125  [(set (match_operand:VI12_AVX2 0 "register_operand")
13126	(truncate:VI12_AVX2
13127	  (lshiftrt:<ssedoublemode>
13128	    (plus:<ssedoublemode>
13129	      (plus:<ssedoublemode>
13130		(zero_extend:<ssedoublemode>
13131		  (match_operand:VI12_AVX2 1 "nonimmediate_operand"))
13132		(zero_extend:<ssedoublemode>
13133		  (match_operand:VI12_AVX2 2 "nonimmediate_operand")))
13134	      (match_dup <mask_expand_op3>))
13135	    (const_int 1))))]
13136  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13137{
13138  rtx tmp;
13139  if (<mask_applied>)
13140    tmp = operands[3];
13141  operands[3] = CONST1_RTX(<MODE>mode);
13142  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
13143
13144  if (<mask_applied>)
13145    {
13146      operands[5] = operands[3];
13147      operands[3] = tmp;
13148    }
13149})
13150
13151(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
13152  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
13153	(truncate:VI12_AVX2
13154	  (lshiftrt:<ssedoublemode>
13155	    (plus:<ssedoublemode>
13156	      (plus:<ssedoublemode>
13157		(zero_extend:<ssedoublemode>
13158		  (match_operand:VI12_AVX2 1 "nonimmediate_operand" "%0,v"))
13159		(zero_extend:<ssedoublemode>
13160		  (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))
13161	      (match_operand:VI12_AVX2 <mask_expand_op3> "const1_operand"))
13162	    (const_int 1))))]
13163  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13164   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
13165  "@
13166   pavg<ssemodesuffix>\t{%2, %0|%0, %2}
13167   vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13168  [(set_attr "isa" "noavx,avx")
13169   (set_attr "type" "sseiadd")
13170   (set_attr "prefix_data16" "1,*")
13171   (set_attr "prefix" "orig,<mask_prefix>")
13172   (set_attr "mode" "<sseinsnmode>")])
13173
13174;; The correct representation for this is absolutely enormous, and
13175;; surely not generally useful.
13176(define_insn "<sse2_avx2>_psadbw"
13177  [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
13178	(unspec:VI8_AVX2_AVX512BW
13179	  [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
13180	   (match_operand:<ssebytemode> 2 "nonimmediate_operand" "xm,vm")]
13181	  UNSPEC_PSADBW))]
13182  "TARGET_SSE2"
13183  "@
13184   psadbw\t{%2, %0|%0, %2}
13185   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
13186  [(set_attr "isa" "noavx,avx")
13187   (set_attr "type" "sseiadd")
13188   (set_attr "atom_unit" "simul")
13189   (set_attr "prefix_data16" "1,*")
13190   (set_attr "prefix" "orig,maybe_evex")
13191   (set_attr "mode" "<sseinsnmode>")])
13192
13193(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
13194  [(set (match_operand:SI 0 "register_operand" "=r")
13195	(unspec:SI
13196	  [(match_operand:VF_128_256 1 "register_operand" "x")]
13197	  UNSPEC_MOVMSK))]
13198  "TARGET_SSE"
13199  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
13200  [(set_attr "type" "ssemov")
13201   (set_attr "prefix" "maybe_vex")
13202   (set_attr "mode" "<MODE>")])
13203
13204(define_insn "avx2_pmovmskb"
13205  [(set (match_operand:SI 0 "register_operand" "=r")
13206	(unspec:SI [(match_operand:V32QI 1 "register_operand" "x")]
13207		   UNSPEC_MOVMSK))]
13208  "TARGET_AVX2"
13209  "vpmovmskb\t{%1, %0|%0, %1}"
13210  [(set_attr "type" "ssemov")
13211   (set_attr "prefix" "vex")
13212   (set_attr "mode" "DI")])
13213
13214(define_insn "sse2_pmovmskb"
13215  [(set (match_operand:SI 0 "register_operand" "=r")
13216	(unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
13217		   UNSPEC_MOVMSK))]
13218  "TARGET_SSE2"
13219  "%vpmovmskb\t{%1, %0|%0, %1}"
13220  [(set_attr "type" "ssemov")
13221   (set_attr "prefix_data16" "1")
13222   (set_attr "prefix" "maybe_vex")
13223   (set_attr "mode" "SI")])
13224
13225(define_expand "sse2_maskmovdqu"
13226  [(set (match_operand:V16QI 0 "memory_operand")
13227	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
13228		       (match_operand:V16QI 2 "register_operand")
13229		       (match_dup 0)]
13230		      UNSPEC_MASKMOV))]
13231  "TARGET_SSE2")
13232
13233(define_insn "*sse2_maskmovdqu"
13234  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
13235	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
13236		       (match_operand:V16QI 2 "register_operand" "x")
13237		       (mem:V16QI (match_dup 0))]
13238		      UNSPEC_MASKMOV))]
13239  "TARGET_SSE2"
13240{
13241  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
13242     that requires %v to be at the beginning of the opcode name.  */
13243  if (Pmode != word_mode)
13244    fputs ("\taddr32", asm_out_file);
13245  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
13246}
13247  [(set_attr "type" "ssemov")
13248   (set_attr "prefix_data16" "1")
13249   (set (attr "length_address")
13250     (symbol_ref ("Pmode != word_mode")))
13251   ;; The implicit %rdi operand confuses default length_vex computation.
13252   (set (attr "length_vex")
13253     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
13254   (set_attr "prefix" "maybe_vex")
13255   (set_attr "mode" "TI")])
13256
13257(define_insn "sse_ldmxcsr"
13258  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
13259		    UNSPECV_LDMXCSR)]
13260  "TARGET_SSE"
13261  "%vldmxcsr\t%0"
13262  [(set_attr "type" "sse")
13263   (set_attr "atom_sse_attr" "mxcsr")
13264   (set_attr "prefix" "maybe_vex")
13265   (set_attr "memory" "load")])
13266
13267(define_insn "sse_stmxcsr"
13268  [(set (match_operand:SI 0 "memory_operand" "=m")
13269	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
13270  "TARGET_SSE"
13271  "%vstmxcsr\t%0"
13272  [(set_attr "type" "sse")
13273   (set_attr "atom_sse_attr" "mxcsr")
13274   (set_attr "prefix" "maybe_vex")
13275   (set_attr "memory" "store")])
13276
13277(define_insn "sse2_clflush"
13278  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
13279		    UNSPECV_CLFLUSH)]
13280  "TARGET_SSE2"
13281  "clflush\t%a0"
13282  [(set_attr "type" "sse")
13283   (set_attr "atom_sse_attr" "fence")
13284   (set_attr "memory" "unknown")])
13285
13286;; As per AMD and Intel ISA manuals, the first operand is extensions
13287;; and it goes to %ecx. The second operand received is hints and it goes
13288;; to %eax.
13289(define_insn "sse3_mwait"
13290  [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
13291		     (match_operand:SI 1 "register_operand" "a")]
13292		    UNSPECV_MWAIT)]
13293  "TARGET_SSE3"
13294;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
13295;; Since 32bit register operands are implicitly zero extended to 64bit,
13296;; we only need to set up 32bit registers.
13297  "mwait"
13298  [(set_attr "length" "3")])
13299
13300(define_insn "sse3_monitor_<mode>"
13301  [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
13302		     (match_operand:SI 1 "register_operand" "c")
13303		     (match_operand:SI 2 "register_operand" "d")]
13304		    UNSPECV_MONITOR)]
13305  "TARGET_SSE3"
13306;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
13307;; RCX and RDX are used.  Since 32bit register operands are implicitly
13308;; zero extended to 64bit, we only need to set up 32bit registers.
13309  "%^monitor"
13310  [(set (attr "length")
13311     (symbol_ref ("(Pmode != word_mode) + 3")))])
13312
13313;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13314;;
13315;; SSSE3 instructions
13316;;
13317;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13318
13319(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
13320
13321(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
13322  [(set (match_operand:V16HI 0 "register_operand" "=x")
13323	(vec_concat:V16HI
13324	  (vec_concat:V8HI
13325	    (vec_concat:V4HI
13326	      (vec_concat:V2HI
13327		(ssse3_plusminus:HI
13328		  (vec_select:HI
13329		    (match_operand:V16HI 1 "register_operand" "x")
13330		    (parallel [(const_int 0)]))
13331		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13332		(ssse3_plusminus:HI
13333		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13334		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13335	      (vec_concat:V2HI
13336		(ssse3_plusminus:HI
13337		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13338		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13339		(ssse3_plusminus:HI
13340		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13341		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13342	    (vec_concat:V4HI
13343	      (vec_concat:V2HI
13344		(ssse3_plusminus:HI
13345		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
13346		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
13347		(ssse3_plusminus:HI
13348		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
13349		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
13350	      (vec_concat:V2HI
13351		(ssse3_plusminus:HI
13352		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
13353		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
13354		(ssse3_plusminus:HI
13355		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
13356		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)]))))))
13357	  (vec_concat:V8HI
13358	    (vec_concat:V4HI
13359	      (vec_concat:V2HI
13360		(ssse3_plusminus:HI
13361		  (vec_select:HI
13362		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
13363		    (parallel [(const_int 0)]))
13364		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13365		(ssse3_plusminus:HI
13366		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13367		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13368	      (vec_concat:V2HI
13369		(ssse3_plusminus:HI
13370		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13371		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13372		(ssse3_plusminus:HI
13373		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13374		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))
13375	    (vec_concat:V4HI
13376	      (vec_concat:V2HI
13377		(ssse3_plusminus:HI
13378		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
13379		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
13380		(ssse3_plusminus:HI
13381		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
13382		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
13383	      (vec_concat:V2HI
13384		(ssse3_plusminus:HI
13385		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
13386		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
13387		(ssse3_plusminus:HI
13388		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
13389		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
13390  "TARGET_AVX2"
13391  "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13392  [(set_attr "type" "sseiadd")
13393   (set_attr "prefix_extra" "1")
13394   (set_attr "prefix" "vex")
13395   (set_attr "mode" "OI")])
13396
13397(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
13398  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13399	(vec_concat:V8HI
13400	  (vec_concat:V4HI
13401	    (vec_concat:V2HI
13402	      (ssse3_plusminus:HI
13403		(vec_select:HI
13404		  (match_operand:V8HI 1 "register_operand" "0,x")
13405		  (parallel [(const_int 0)]))
13406		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13407	      (ssse3_plusminus:HI
13408		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13409		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13410	    (vec_concat:V2HI
13411	      (ssse3_plusminus:HI
13412		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
13413		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
13414	      (ssse3_plusminus:HI
13415		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
13416		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
13417	  (vec_concat:V4HI
13418	    (vec_concat:V2HI
13419	      (ssse3_plusminus:HI
13420		(vec_select:HI
13421		  (match_operand:V8HI 2 "nonimmediate_operand" "xm,xm")
13422		  (parallel [(const_int 0)]))
13423		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13424	      (ssse3_plusminus:HI
13425		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13426		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
13427	    (vec_concat:V2HI
13428	      (ssse3_plusminus:HI
13429		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
13430		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
13431	      (ssse3_plusminus:HI
13432		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
13433		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
13434  "TARGET_SSSE3"
13435  "@
13436   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
13437   vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
13438  [(set_attr "isa" "noavx,avx")
13439   (set_attr "type" "sseiadd")
13440   (set_attr "atom_unit" "complex")
13441   (set_attr "prefix_data16" "1,*")
13442   (set_attr "prefix_extra" "1")
13443   (set_attr "prefix" "orig,vex")
13444   (set_attr "mode" "TI")])
13445
13446(define_insn "ssse3_ph<plusminus_mnemonic>wv4hi3"
13447  [(set (match_operand:V4HI 0 "register_operand" "=y")
13448	(vec_concat:V4HI
13449	  (vec_concat:V2HI
13450	    (ssse3_plusminus:HI
13451	      (vec_select:HI
13452		(match_operand:V4HI 1 "register_operand" "0")
13453		(parallel [(const_int 0)]))
13454	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
13455	    (ssse3_plusminus:HI
13456	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
13457	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
13458	  (vec_concat:V2HI
13459	    (ssse3_plusminus:HI
13460	      (vec_select:HI
13461		(match_operand:V4HI 2 "nonimmediate_operand" "ym")
13462		(parallel [(const_int 0)]))
13463	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
13464	    (ssse3_plusminus:HI
13465	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
13466	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
13467  "TARGET_SSSE3"
13468  "ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}"
13469  [(set_attr "type" "sseiadd")
13470   (set_attr "atom_unit" "complex")
13471   (set_attr "prefix_extra" "1")
13472   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13473   (set_attr "mode" "DI")])
13474
13475(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
13476  [(set (match_operand:V8SI 0 "register_operand" "=x")
13477	(vec_concat:V8SI
13478	  (vec_concat:V4SI
13479	    (vec_concat:V2SI
13480	      (plusminus:SI
13481		(vec_select:SI
13482		  (match_operand:V8SI 1 "register_operand" "x")
13483		  (parallel [(const_int 0)]))
13484		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13485	      (plusminus:SI
13486		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13487		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13488	    (vec_concat:V2SI
13489	      (plusminus:SI
13490		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
13491		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
13492	      (plusminus:SI
13493		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
13494		(vec_select:SI (match_dup 1) (parallel [(const_int 7)])))))
13495	  (vec_concat:V4SI
13496	    (vec_concat:V2SI
13497	      (plusminus:SI
13498		(vec_select:SI
13499		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
13500		  (parallel [(const_int 0)]))
13501		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13502	      (plusminus:SI
13503		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13504		(vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))
13505	    (vec_concat:V2SI
13506	      (plusminus:SI
13507		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
13508		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
13509	      (plusminus:SI
13510		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
13511		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
13512  "TARGET_AVX2"
13513  "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13514  [(set_attr "type" "sseiadd")
13515   (set_attr "prefix_extra" "1")
13516   (set_attr "prefix" "vex")
13517   (set_attr "mode" "OI")])
13518
13519(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
13520  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
13521	(vec_concat:V4SI
13522	  (vec_concat:V2SI
13523	    (plusminus:SI
13524	      (vec_select:SI
13525		(match_operand:V4SI 1 "register_operand" "0,x")
13526		(parallel [(const_int 0)]))
13527	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13528	    (plusminus:SI
13529	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
13530	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
13531	  (vec_concat:V2SI
13532	    (plusminus:SI
13533	      (vec_select:SI
13534		(match_operand:V4SI 2 "nonimmediate_operand" "xm,xm")
13535		(parallel [(const_int 0)]))
13536	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
13537	    (plusminus:SI
13538	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
13539	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
13540  "TARGET_SSSE3"
13541  "@
13542   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
13543   vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
13544  [(set_attr "isa" "noavx,avx")
13545   (set_attr "type" "sseiadd")
13546   (set_attr "atom_unit" "complex")
13547   (set_attr "prefix_data16" "1,*")
13548   (set_attr "prefix_extra" "1")
13549   (set_attr "prefix" "orig,vex")
13550   (set_attr "mode" "TI")])
13551
13552(define_insn "ssse3_ph<plusminus_mnemonic>dv2si3"
13553  [(set (match_operand:V2SI 0 "register_operand" "=y")
13554	(vec_concat:V2SI
13555	  (plusminus:SI
13556	    (vec_select:SI
13557	      (match_operand:V2SI 1 "register_operand" "0")
13558	      (parallel [(const_int 0)]))
13559	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
13560	  (plusminus:SI
13561	    (vec_select:SI
13562	      (match_operand:V2SI 2 "nonimmediate_operand" "ym")
13563	      (parallel [(const_int 0)]))
13564	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
13565  "TARGET_SSSE3"
13566  "ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}"
13567  [(set_attr "type" "sseiadd")
13568   (set_attr "atom_unit" "complex")
13569   (set_attr "prefix_extra" "1")
13570   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13571   (set_attr "mode" "DI")])
13572
13573(define_insn "avx2_pmaddubsw256"
13574  [(set (match_operand:V16HI 0 "register_operand" "=x")
13575	(ss_plus:V16HI
13576	  (mult:V16HI
13577	    (zero_extend:V16HI
13578	      (vec_select:V16QI
13579		(match_operand:V32QI 1 "register_operand" "x")
13580		(parallel [(const_int 0) (const_int 2)
13581			   (const_int 4) (const_int 6)
13582			   (const_int 8) (const_int 10)
13583			   (const_int 12) (const_int 14)
13584			   (const_int 16) (const_int 18)
13585			   (const_int 20) (const_int 22)
13586			   (const_int 24) (const_int 26)
13587			   (const_int 28) (const_int 30)])))
13588	    (sign_extend:V16HI
13589	      (vec_select:V16QI
13590		(match_operand:V32QI 2 "nonimmediate_operand" "xm")
13591		(parallel [(const_int 0) (const_int 2)
13592			   (const_int 4) (const_int 6)
13593			   (const_int 8) (const_int 10)
13594			   (const_int 12) (const_int 14)
13595			   (const_int 16) (const_int 18)
13596			   (const_int 20) (const_int 22)
13597			   (const_int 24) (const_int 26)
13598			   (const_int 28) (const_int 30)]))))
13599	  (mult:V16HI
13600	    (zero_extend:V16HI
13601	      (vec_select:V16QI (match_dup 1)
13602		(parallel [(const_int 1) (const_int 3)
13603			   (const_int 5) (const_int 7)
13604			   (const_int 9) (const_int 11)
13605			   (const_int 13) (const_int 15)
13606			   (const_int 17) (const_int 19)
13607			   (const_int 21) (const_int 23)
13608			   (const_int 25) (const_int 27)
13609			   (const_int 29) (const_int 31)])))
13610	    (sign_extend:V16HI
13611	      (vec_select:V16QI (match_dup 2)
13612		(parallel [(const_int 1) (const_int 3)
13613			   (const_int 5) (const_int 7)
13614			   (const_int 9) (const_int 11)
13615			   (const_int 13) (const_int 15)
13616			   (const_int 17) (const_int 19)
13617			   (const_int 21) (const_int 23)
13618			   (const_int 25) (const_int 27)
13619			   (const_int 29) (const_int 31)]))))))]
13620  "TARGET_AVX2"
13621  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13622  [(set_attr "type" "sseiadd")
13623   (set_attr "prefix_extra" "1")
13624   (set_attr "prefix" "vex")
13625   (set_attr "mode" "OI")])
13626
13627;; The correct representation for this is absolutely enormous, and
13628;; surely not generally useful.
13629(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
13630  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
13631          (unspec:VI2_AVX512VL
13632            [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
13633             (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
13634             UNSPEC_PMADDUBSW512))]
13635   "TARGET_AVX512BW"
13636   "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
13637  [(set_attr "type" "sseiadd")
13638   (set_attr "prefix" "evex")
13639   (set_attr "mode" "XI")])
13640
13641(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
13642  [(set (match_operand:V32HI 0 "register_operand" "=v")
13643	(truncate:V32HI
13644	  (lshiftrt:V32SI
13645	    (plus:V32SI
13646	      (lshiftrt:V32SI
13647		(mult:V32SI
13648		  (sign_extend:V32SI
13649		    (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
13650		  (sign_extend:V32SI
13651		    (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
13652		(const_int 14))
13653	      (const_vector:V32HI [(const_int 1) (const_int 1)
13654				   (const_int 1) (const_int 1)
13655				   (const_int 1) (const_int 1)
13656				   (const_int 1) (const_int 1)
13657				   (const_int 1) (const_int 1)
13658				   (const_int 1) (const_int 1)
13659				   (const_int 1) (const_int 1)
13660				   (const_int 1) (const_int 1)
13661				   (const_int 1) (const_int 1)
13662				   (const_int 1) (const_int 1)
13663				   (const_int 1) (const_int 1)
13664				   (const_int 1) (const_int 1)
13665				   (const_int 1) (const_int 1)
13666				   (const_int 1) (const_int 1)
13667				   (const_int 1) (const_int 1)
13668				   (const_int 1) (const_int 1)]))
13669	    (const_int 1))))]
13670  "TARGET_AVX512BW"
13671  "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13672  [(set_attr "type" "sseimul")
13673   (set_attr "prefix" "evex")
13674   (set_attr "mode" "XI")])
13675
13676(define_insn "ssse3_pmaddubsw128"
13677  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
13678	(ss_plus:V8HI
13679	  (mult:V8HI
13680	    (zero_extend:V8HI
13681	      (vec_select:V8QI
13682		(match_operand:V16QI 1 "register_operand" "0,x")
13683		(parallel [(const_int 0) (const_int 2)
13684			   (const_int 4) (const_int 6)
13685			   (const_int 8) (const_int 10)
13686			   (const_int 12) (const_int 14)])))
13687	    (sign_extend:V8HI
13688	      (vec_select:V8QI
13689		(match_operand:V16QI 2 "nonimmediate_operand" "xm,xm")
13690		(parallel [(const_int 0) (const_int 2)
13691			   (const_int 4) (const_int 6)
13692			   (const_int 8) (const_int 10)
13693			   (const_int 12) (const_int 14)]))))
13694	  (mult:V8HI
13695	    (zero_extend:V8HI
13696	      (vec_select:V8QI (match_dup 1)
13697		(parallel [(const_int 1) (const_int 3)
13698			   (const_int 5) (const_int 7)
13699			   (const_int 9) (const_int 11)
13700			   (const_int 13) (const_int 15)])))
13701	    (sign_extend:V8HI
13702	      (vec_select:V8QI (match_dup 2)
13703		(parallel [(const_int 1) (const_int 3)
13704			   (const_int 5) (const_int 7)
13705			   (const_int 9) (const_int 11)
13706			   (const_int 13) (const_int 15)]))))))]
13707  "TARGET_SSSE3"
13708  "@
13709   pmaddubsw\t{%2, %0|%0, %2}
13710   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
13711  [(set_attr "isa" "noavx,avx")
13712   (set_attr "type" "sseiadd")
13713   (set_attr "atom_unit" "simul")
13714   (set_attr "prefix_data16" "1,*")
13715   (set_attr "prefix_extra" "1")
13716   (set_attr "prefix" "orig,vex")
13717   (set_attr "mode" "TI")])
13718
13719(define_insn "ssse3_pmaddubsw"
13720  [(set (match_operand:V4HI 0 "register_operand" "=y")
13721	(ss_plus:V4HI
13722	  (mult:V4HI
13723	    (zero_extend:V4HI
13724	      (vec_select:V4QI
13725		(match_operand:V8QI 1 "register_operand" "0")
13726		(parallel [(const_int 0) (const_int 2)
13727			   (const_int 4) (const_int 6)])))
13728	    (sign_extend:V4HI
13729	      (vec_select:V4QI
13730		(match_operand:V8QI 2 "nonimmediate_operand" "ym")
13731		(parallel [(const_int 0) (const_int 2)
13732			   (const_int 4) (const_int 6)]))))
13733	  (mult:V4HI
13734	    (zero_extend:V4HI
13735	      (vec_select:V4QI (match_dup 1)
13736		(parallel [(const_int 1) (const_int 3)
13737			   (const_int 5) (const_int 7)])))
13738	    (sign_extend:V4HI
13739	      (vec_select:V4QI (match_dup 2)
13740		(parallel [(const_int 1) (const_int 3)
13741			   (const_int 5) (const_int 7)]))))))]
13742  "TARGET_SSSE3"
13743  "pmaddubsw\t{%2, %0|%0, %2}"
13744  [(set_attr "type" "sseiadd")
13745   (set_attr "atom_unit" "simul")
13746   (set_attr "prefix_extra" "1")
13747   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13748   (set_attr "mode" "DI")])
13749
13750(define_mode_iterator PMULHRSW
13751  [V4HI V8HI (V16HI "TARGET_AVX2")])
13752
13753(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
13754  [(set (match_operand:PMULHRSW 0 "register_operand")
13755	(vec_merge:PMULHRSW
13756	  (truncate:PMULHRSW
13757	    (lshiftrt:<ssedoublemode>
13758	      (plus:<ssedoublemode>
13759	        (lshiftrt:<ssedoublemode>
13760		  (mult:<ssedoublemode>
13761		    (sign_extend:<ssedoublemode>
13762		      (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13763		    (sign_extend:<ssedoublemode>
13764		      (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13765		  (const_int 14))
13766	        (match_dup 5))
13767	      (const_int 1)))
13768	  (match_operand:PMULHRSW 3 "register_operand")
13769	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
13770  "TARGET_AVX512BW && TARGET_AVX512VL"
13771{
13772  operands[5] = CONST1_RTX(<MODE>mode);
13773  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13774})
13775
13776(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
13777  [(set (match_operand:PMULHRSW 0 "register_operand")
13778	(truncate:PMULHRSW
13779	  (lshiftrt:<ssedoublemode>
13780	    (plus:<ssedoublemode>
13781	      (lshiftrt:<ssedoublemode>
13782		(mult:<ssedoublemode>
13783		  (sign_extend:<ssedoublemode>
13784		    (match_operand:PMULHRSW 1 "nonimmediate_operand"))
13785		  (sign_extend:<ssedoublemode>
13786		    (match_operand:PMULHRSW 2 "nonimmediate_operand")))
13787		(const_int 14))
13788	      (match_dup 3))
13789	    (const_int 1))))]
13790  "TARGET_AVX2"
13791{
13792  operands[3] = CONST1_RTX(<MODE>mode);
13793  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
13794})
13795
13796(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
13797  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
13798	(truncate:VI2_AVX2
13799	  (lshiftrt:<ssedoublemode>
13800	    (plus:<ssedoublemode>
13801	      (lshiftrt:<ssedoublemode>
13802		(mult:<ssedoublemode>
13803		  (sign_extend:<ssedoublemode>
13804		    (match_operand:VI2_AVX2 1 "nonimmediate_operand" "%0,v"))
13805		  (sign_extend:<ssedoublemode>
13806		    (match_operand:VI2_AVX2 2 "nonimmediate_operand" "xm,vm")))
13807		(const_int 14))
13808	      (match_operand:VI2_AVX2 3 "const1_operand"))
13809	    (const_int 1))))]
13810  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
13811   && ix86_binary_operator_ok (MULT, <MODE>mode, operands)"
13812  "@
13813   pmulhrsw\t{%2, %0|%0, %2}
13814   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
13815  [(set_attr "isa" "noavx,avx")
13816   (set_attr "type" "sseimul")
13817   (set_attr "prefix_data16" "1,*")
13818   (set_attr "prefix_extra" "1")
13819   (set_attr "prefix" "orig,maybe_evex")
13820   (set_attr "mode" "<sseinsnmode>")])
13821
13822(define_insn "*ssse3_pmulhrswv4hi3"
13823  [(set (match_operand:V4HI 0 "register_operand" "=y")
13824	(truncate:V4HI
13825	  (lshiftrt:V4SI
13826	    (plus:V4SI
13827	      (lshiftrt:V4SI
13828		(mult:V4SI
13829		  (sign_extend:V4SI
13830		    (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
13831		  (sign_extend:V4SI
13832		    (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
13833		(const_int 14))
13834	      (match_operand:V4HI 3 "const1_operand"))
13835	    (const_int 1))))]
13836  "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
13837  "pmulhrsw\t{%2, %0|%0, %2}"
13838  [(set_attr "type" "sseimul")
13839   (set_attr "prefix_extra" "1")
13840   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13841   (set_attr "mode" "DI")])
13842
13843(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
13844  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,v")
13845	(unspec:VI1_AVX512
13846	  [(match_operand:VI1_AVX512 1 "register_operand" "0,v")
13847	   (match_operand:VI1_AVX512 2 "nonimmediate_operand" "xm,vm")]
13848	  UNSPEC_PSHUFB))]
13849  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13850  "@
13851   pshufb\t{%2, %0|%0, %2}
13852   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13853  [(set_attr "isa" "noavx,avx")
13854   (set_attr "type" "sselog1")
13855   (set_attr "prefix_data16" "1,*")
13856   (set_attr "prefix_extra" "1")
13857   (set_attr "prefix" "orig,maybe_evex")
13858   (set_attr "btver2_decode" "vector,vector")
13859   (set_attr "mode" "<sseinsnmode>")])
13860
13861(define_insn "ssse3_pshufbv8qi3"
13862  [(set (match_operand:V8QI 0 "register_operand" "=y")
13863	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
13864		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
13865		     UNSPEC_PSHUFB))]
13866  "TARGET_SSSE3"
13867  "pshufb\t{%2, %0|%0, %2}";
13868  [(set_attr "type" "sselog1")
13869   (set_attr "prefix_extra" "1")
13870   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13871   (set_attr "mode" "DI")])
13872
13873(define_insn "<ssse3_avx2>_psign<mode>3"
13874  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
13875	(unspec:VI124_AVX2
13876	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
13877	   (match_operand:VI124_AVX2 2 "nonimmediate_operand" "xm,xm")]
13878	  UNSPEC_PSIGN))]
13879  "TARGET_SSSE3"
13880  "@
13881   psign<ssemodesuffix>\t{%2, %0|%0, %2}
13882   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13883  [(set_attr "isa" "noavx,avx")
13884   (set_attr "type" "sselog1")
13885   (set_attr "prefix_data16" "1,*")
13886   (set_attr "prefix_extra" "1")
13887   (set_attr "prefix" "orig,vex")
13888   (set_attr "mode" "<sseinsnmode>")])
13889
13890(define_insn "ssse3_psign<mode>3"
13891  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
13892	(unspec:MMXMODEI
13893	  [(match_operand:MMXMODEI 1 "register_operand" "0")
13894	   (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
13895	  UNSPEC_PSIGN))]
13896  "TARGET_SSSE3"
13897  "psign<mmxvecsize>\t{%2, %0|%0, %2}";
13898  [(set_attr "type" "sselog1")
13899   (set_attr "prefix_extra" "1")
13900   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13901   (set_attr "mode" "DI")])
13902
13903(define_insn "<ssse3_avx2>_palignr<mode>_mask"
13904  [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
13905        (vec_merge:VI1_AVX512
13906	  (unspec:VI1_AVX512
13907	    [(match_operand:VI1_AVX512 1 "register_operand" "v")
13908	     (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
13909	     (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13910	    UNSPEC_PALIGNR)
13911	(match_operand:VI1_AVX512 4 "vector_move_operand" "0C")
13912	(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
13913  "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
13914{
13915  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13916  return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
13917}
13918  [(set_attr "type" "sseishft")
13919   (set_attr "atom_unit" "sishuf")
13920   (set_attr "prefix_extra" "1")
13921   (set_attr "length_immediate" "1")
13922   (set_attr "prefix" "evex")
13923   (set_attr "mode" "<sseinsnmode>")])
13924
13925(define_insn "<ssse3_avx2>_palignr<mode>"
13926  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,v")
13927	(unspec:SSESCALARMODE
13928	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,v")
13929	   (match_operand:SSESCALARMODE 2 "nonimmediate_operand" "xm,vm")
13930	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n")]
13931	  UNSPEC_PALIGNR))]
13932  "TARGET_SSSE3"
13933{
13934  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13935
13936  switch (which_alternative)
13937    {
13938    case 0:
13939      return "palignr\t{%3, %2, %0|%0, %2, %3}";
13940    case 1:
13941      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13942    default:
13943      gcc_unreachable ();
13944    }
13945}
13946  [(set_attr "isa" "noavx,avx")
13947   (set_attr "type" "sseishft")
13948   (set_attr "atom_unit" "sishuf")
13949   (set_attr "prefix_data16" "1,*")
13950   (set_attr "prefix_extra" "1")
13951   (set_attr "length_immediate" "1")
13952   (set_attr "prefix" "orig,vex")
13953   (set_attr "mode" "<sseinsnmode>")])
13954
13955(define_insn "ssse3_palignrdi"
13956  [(set (match_operand:DI 0 "register_operand" "=y")
13957	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
13958		    (match_operand:DI 2 "nonimmediate_operand" "ym")
13959		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
13960		   UNSPEC_PALIGNR))]
13961  "TARGET_SSSE3"
13962{
13963  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
13964  return "palignr\t{%3, %2, %0|%0, %2, %3}";
13965}
13966  [(set_attr "type" "sseishft")
13967   (set_attr "atom_unit" "sishuf")
13968   (set_attr "prefix_extra" "1")
13969   (set_attr "length_immediate" "1")
13970   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
13971   (set_attr "mode" "DI")])
13972
13973;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
13974;; modes for abs instruction on pre AVX-512 targets.
13975(define_mode_iterator VI1248_AVX512VL_AVX512BW
13976  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
13977   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
13978   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
13979   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13980
13981(define_insn "*abs<mode>2"
13982  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
13983	(abs:VI1248_AVX512VL_AVX512BW
13984	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand" "vm")))]
13985  "TARGET_SSSE3"
13986  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
13987  [(set_attr "type" "sselog1")
13988   (set_attr "prefix_data16" "1")
13989   (set_attr "prefix_extra" "1")
13990   (set_attr "prefix" "maybe_vex")
13991   (set_attr "mode" "<sseinsnmode>")])
13992
13993(define_insn "abs<mode>2_mask"
13994  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13995	(vec_merge:VI48_AVX512VL
13996	  (abs:VI48_AVX512VL
13997	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
13998	  (match_operand:VI48_AVX512VL 2 "vector_move_operand" "0C")
13999	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14000  "TARGET_AVX512F"
14001  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14002  [(set_attr "type" "sselog1")
14003   (set_attr "prefix" "evex")
14004   (set_attr "mode" "<sseinsnmode>")])
14005
14006(define_insn "abs<mode>2_mask"
14007  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
14008	(vec_merge:VI12_AVX512VL
14009	  (abs:VI12_AVX512VL
14010	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
14011	  (match_operand:VI12_AVX512VL 2 "vector_move_operand" "0C")
14012	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
14013  "TARGET_AVX512BW"
14014  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
14015  [(set_attr "type" "sselog1")
14016   (set_attr "prefix" "evex")
14017   (set_attr "mode" "<sseinsnmode>")])
14018
14019(define_expand "abs<mode>2"
14020  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
14021	(abs:VI1248_AVX512VL_AVX512BW
14022	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "nonimmediate_operand")))]
14023  "TARGET_SSE2"
14024{
14025  if (!TARGET_SSSE3)
14026    {
14027      ix86_expand_sse2_abs (operands[0], operands[1]);
14028      DONE;
14029    }
14030})
14031
14032(define_insn "abs<mode>2"
14033  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
14034	(abs:MMXMODEI
14035	  (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
14036  "TARGET_SSSE3"
14037  "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
14038  [(set_attr "type" "sselog1")
14039   (set_attr "prefix_rep" "0")
14040   (set_attr "prefix_extra" "1")
14041   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
14042   (set_attr "mode" "DI")])
14043
14044;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14045;;
14046;; AMD SSE4A instructions
14047;;
14048;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14049
14050(define_insn "sse4a_movnt<mode>"
14051  [(set (match_operand:MODEF 0 "memory_operand" "=m")
14052	(unspec:MODEF
14053	  [(match_operand:MODEF 1 "register_operand" "x")]
14054	  UNSPEC_MOVNT))]
14055  "TARGET_SSE4A"
14056  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
14057  [(set_attr "type" "ssemov")
14058   (set_attr "mode" "<MODE>")])
14059
14060(define_insn "sse4a_vmmovnt<mode>"
14061  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
14062	(unspec:<ssescalarmode>
14063	  [(vec_select:<ssescalarmode>
14064	     (match_operand:VF_128 1 "register_operand" "x")
14065	     (parallel [(const_int 0)]))]
14066	  UNSPEC_MOVNT))]
14067  "TARGET_SSE4A"
14068  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
14069  [(set_attr "type" "ssemov")
14070   (set_attr "mode" "<ssescalarmode>")])
14071
14072(define_insn "sse4a_extrqi"
14073  [(set (match_operand:V2DI 0 "register_operand" "=x")
14074	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14075		      (match_operand 2 "const_0_to_255_operand")
14076		      (match_operand 3 "const_0_to_255_operand")]
14077		     UNSPEC_EXTRQI))]
14078  "TARGET_SSE4A"
14079  "extrq\t{%3, %2, %0|%0, %2, %3}"
14080  [(set_attr "type" "sse")
14081   (set_attr "prefix_data16" "1")
14082   (set_attr "length_immediate" "2")
14083   (set_attr "mode" "TI")])
14084
14085(define_insn "sse4a_extrq"
14086  [(set (match_operand:V2DI 0 "register_operand" "=x")
14087	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14088		      (match_operand:V16QI 2 "register_operand" "x")]
14089		     UNSPEC_EXTRQ))]
14090  "TARGET_SSE4A"
14091  "extrq\t{%2, %0|%0, %2}"
14092  [(set_attr "type" "sse")
14093   (set_attr "prefix_data16" "1")
14094   (set_attr "mode" "TI")])
14095
14096(define_insn "sse4a_insertqi"
14097  [(set (match_operand:V2DI 0 "register_operand" "=x")
14098	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14099		      (match_operand:V2DI 2 "register_operand" "x")
14100		      (match_operand 3 "const_0_to_255_operand")
14101		      (match_operand 4 "const_0_to_255_operand")]
14102		     UNSPEC_INSERTQI))]
14103  "TARGET_SSE4A"
14104  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
14105  [(set_attr "type" "sseins")
14106   (set_attr "prefix_data16" "0")
14107   (set_attr "prefix_rep" "1")
14108   (set_attr "length_immediate" "2")
14109   (set_attr "mode" "TI")])
14110
14111(define_insn "sse4a_insertq"
14112  [(set (match_operand:V2DI 0 "register_operand" "=x")
14113	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
14114		      (match_operand:V2DI 2 "register_operand" "x")]
14115		     UNSPEC_INSERTQ))]
14116  "TARGET_SSE4A"
14117  "insertq\t{%2, %0|%0, %2}"
14118  [(set_attr "type" "sseins")
14119   (set_attr "prefix_data16" "0")
14120   (set_attr "prefix_rep" "1")
14121   (set_attr "mode" "TI")])
14122
14123;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14124;;
14125;; Intel SSE4.1 instructions
14126;;
14127;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14128
14129;; Mapping of immediate bits for blend instructions
14130(define_mode_attr blendbits
14131  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
14132
14133(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
14134  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14135	(vec_merge:VF_128_256
14136	  (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14137	  (match_operand:VF_128_256 1 "register_operand" "0,0,x")
14138	  (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
14139  "TARGET_SSE4_1"
14140  "@
14141   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14142   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14143   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14144  [(set_attr "isa" "noavx,noavx,avx")
14145   (set_attr "type" "ssemov")
14146   (set_attr "length_immediate" "1")
14147   (set_attr "prefix_data16" "1,1,*")
14148   (set_attr "prefix_extra" "1")
14149   (set_attr "prefix" "orig,orig,vex")
14150   (set_attr "mode" "<MODE>")])
14151
14152(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
14153  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14154	(unspec:VF_128_256
14155	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
14156	   (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14157	   (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
14158	  UNSPEC_BLENDV))]
14159  "TARGET_SSE4_1"
14160  "@
14161   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14162   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14163   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14164  [(set_attr "isa" "noavx,noavx,avx")
14165   (set_attr "type" "ssemov")
14166   (set_attr "length_immediate" "1")
14167   (set_attr "prefix_data16" "1,1,*")
14168   (set_attr "prefix_extra" "1")
14169   (set_attr "prefix" "orig,orig,vex")
14170   (set_attr "btver2_decode" "vector,vector,vector") 
14171   (set_attr "mode" "<MODE>")])
14172
14173(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
14174  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
14175	(unspec:VF_128_256
14176	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,x")
14177	   (match_operand:VF_128_256 2 "nonimmediate_operand" "Yrm,*xm,xm")
14178	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14179	  UNSPEC_DP))]
14180  "TARGET_SSE4_1"
14181  "@
14182   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14183   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
14184   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14185  [(set_attr "isa" "noavx,noavx,avx")
14186   (set_attr "type" "ssemul")
14187   (set_attr "length_immediate" "1")
14188   (set_attr "prefix_data16" "1,1,*")
14189   (set_attr "prefix_extra" "1")
14190   (set_attr "prefix" "orig,orig,vex")
14191   (set_attr "btver2_decode" "vector,vector,vector")
14192   (set_attr "mode" "<MODE>")])
14193
14194;; Mode attribute used by `vmovntdqa' pattern
14195(define_mode_attr vi8_sse4_1_avx2_avx512
14196   [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
14197
14198(define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
14199  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x, v")
14200	(unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m, m, m")]
14201		     UNSPEC_MOVNTDQA))]
14202  "TARGET_SSE4_1"
14203  "%vmovntdqa\t{%1, %0|%0, %1}"
14204  [(set_attr "type" "ssemov")
14205   (set_attr "prefix_extra" "1,1,*")
14206   (set_attr "prefix" "maybe_vex,maybe_vex,evex")
14207   (set_attr "mode" "<sseinsnmode>")])
14208
14209(define_insn "<sse4_1_avx2>_mpsadbw"
14210  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14211	(unspec:VI1_AVX2
14212	  [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
14213	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14214	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
14215	  UNSPEC_MPSADBW))]
14216  "TARGET_SSE4_1"
14217  "@
14218   mpsadbw\t{%3, %2, %0|%0, %2, %3}
14219   mpsadbw\t{%3, %2, %0|%0, %2, %3}
14220   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14221  [(set_attr "isa" "noavx,noavx,avx")
14222   (set_attr "type" "sselog1")
14223   (set_attr "length_immediate" "1")
14224   (set_attr "prefix_extra" "1")
14225   (set_attr "prefix" "orig,orig,vex")
14226   (set_attr "btver2_decode" "vector,vector,vector")
14227   (set_attr "mode" "<sseinsnmode>")])
14228
14229(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
14230  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,v")
14231	(vec_concat:VI2_AVX2
14232	  (us_truncate:<ssehalfvecmode>
14233	    (match_operand:<sseunpackmode> 1 "register_operand" "0,0,v"))
14234	  (us_truncate:<ssehalfvecmode>
14235	    (match_operand:<sseunpackmode> 2 "nonimmediate_operand" "Yrm,*xm,vm"))))]
14236  "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
14237  "@
14238   packusdw\t{%2, %0|%0, %2}
14239   packusdw\t{%2, %0|%0, %2}
14240   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14241  [(set_attr "isa" "noavx,noavx,avx")
14242   (set_attr "type" "sselog")
14243   (set_attr "prefix_extra" "1")
14244   (set_attr "prefix" "orig,orig,maybe_evex")
14245   (set_attr "mode" "<sseinsnmode>")])
14246
14247(define_insn "<sse4_1_avx2>_pblendvb"
14248  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
14249	(unspec:VI1_AVX2
14250	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,0,x")
14251	   (match_operand:VI1_AVX2 2 "nonimmediate_operand" "Yrm,*xm,xm")
14252	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
14253	  UNSPEC_BLENDV))]
14254  "TARGET_SSE4_1"
14255  "@
14256   pblendvb\t{%3, %2, %0|%0, %2, %3}
14257   pblendvb\t{%3, %2, %0|%0, %2, %3}
14258   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14259  [(set_attr "isa" "noavx,noavx,avx")
14260   (set_attr "type" "ssemov")
14261   (set_attr "prefix_extra" "1")
14262   (set_attr "length_immediate" "*,*,1")
14263   (set_attr "prefix" "orig,orig,vex")
14264   (set_attr "btver2_decode" "vector,vector,vector")
14265   (set_attr "mode" "<sseinsnmode>")])
14266
14267(define_insn "sse4_1_pblendw"
14268  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
14269	(vec_merge:V8HI
14270	  (match_operand:V8HI 2 "nonimmediate_operand" "Yrm,*xm,xm")
14271	  (match_operand:V8HI 1 "register_operand" "0,0,x")
14272	  (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
14273  "TARGET_SSE4_1"
14274  "@
14275   pblendw\t{%3, %2, %0|%0, %2, %3}
14276   pblendw\t{%3, %2, %0|%0, %2, %3}
14277   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14278  [(set_attr "isa" "noavx,noavx,avx")
14279   (set_attr "type" "ssemov")
14280   (set_attr "prefix_extra" "1")
14281   (set_attr "length_immediate" "1")
14282   (set_attr "prefix" "orig,orig,vex")
14283   (set_attr "mode" "TI")])
14284
14285;; The builtin uses an 8-bit immediate.  Expand that.
14286(define_expand "avx2_pblendw"
14287  [(set (match_operand:V16HI 0 "register_operand")
14288	(vec_merge:V16HI
14289	  (match_operand:V16HI 2 "nonimmediate_operand")
14290	  (match_operand:V16HI 1 "register_operand")
14291	  (match_operand:SI 3 "const_0_to_255_operand")))]
14292  "TARGET_AVX2"
14293{
14294  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
14295  operands[3] = GEN_INT (val << 8 | val);
14296})
14297
14298(define_insn "*avx2_pblendw"
14299  [(set (match_operand:V16HI 0 "register_operand" "=x")
14300	(vec_merge:V16HI
14301	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
14302	  (match_operand:V16HI 1 "register_operand" "x")
14303	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
14304  "TARGET_AVX2"
14305{
14306  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
14307  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
14308}
14309  [(set_attr "type" "ssemov")
14310   (set_attr "prefix_extra" "1")
14311   (set_attr "length_immediate" "1")
14312   (set_attr "prefix" "vex")
14313   (set_attr "mode" "OI")])
14314
14315(define_insn "avx2_pblendd<mode>"
14316  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
14317	(vec_merge:VI4_AVX2
14318	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
14319	  (match_operand:VI4_AVX2 1 "register_operand" "x")
14320	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
14321  "TARGET_AVX2"
14322  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14323  [(set_attr "type" "ssemov")
14324   (set_attr "prefix_extra" "1")
14325   (set_attr "length_immediate" "1")
14326   (set_attr "prefix" "vex")
14327   (set_attr "mode" "<sseinsnmode>")])
14328
14329(define_insn "sse4_1_phminposuw"
14330  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x")
14331	(unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*xm")]
14332		     UNSPEC_PHMINPOSUW))]
14333  "TARGET_SSE4_1"
14334  "%vphminposuw\t{%1, %0|%0, %1}"
14335  [(set_attr "type" "sselog1")
14336   (set_attr "prefix_extra" "1")
14337   (set_attr "prefix" "maybe_vex")
14338   (set_attr "mode" "TI")])
14339
14340(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
14341  [(set (match_operand:V16HI 0 "register_operand" "=v")
14342	(any_extend:V16HI
14343	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14344  "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14345  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14346  [(set_attr "type" "ssemov")
14347   (set_attr "prefix_extra" "1")
14348   (set_attr "prefix" "maybe_evex")
14349   (set_attr "mode" "OI")])
14350
14351(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
14352  [(set (match_operand:V32HI 0 "register_operand" "=v")
14353	(any_extend:V32HI
14354	  (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
14355  "TARGET_AVX512BW"
14356  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14357  [(set_attr "type" "ssemov")
14358   (set_attr "prefix_extra" "1")
14359   (set_attr "prefix" "evex")
14360   (set_attr "mode" "XI")])
14361
14362(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
14363  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*v")
14364	(any_extend:V8HI
14365	  (vec_select:V8QI
14366	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14367	    (parallel [(const_int 0) (const_int 1)
14368		       (const_int 2) (const_int 3)
14369		       (const_int 4) (const_int 5)
14370		       (const_int 6) (const_int 7)]))))]
14371  "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14372  "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14373  [(set_attr "type" "ssemov")
14374   (set_attr "ssememalign" "64")
14375   (set_attr "prefix_extra" "1")
14376   (set_attr "prefix" "maybe_vex")
14377   (set_attr "mode" "TI")])
14378
14379(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
14380  [(set (match_operand:V16SI 0 "register_operand" "=v")
14381	(any_extend:V16SI
14382	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
14383  "TARGET_AVX512F"
14384  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14385  [(set_attr "type" "ssemov")
14386   (set_attr "prefix" "evex")
14387   (set_attr "mode" "XI")])
14388
14389(define_insn "avx2_<code>v8qiv8si2<mask_name>"
14390  [(set (match_operand:V8SI 0 "register_operand" "=v")
14391	(any_extend:V8SI
14392	  (vec_select:V8QI
14393	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14394	    (parallel [(const_int 0) (const_int 1)
14395		       (const_int 2) (const_int 3)
14396		       (const_int 4) (const_int 5)
14397		       (const_int 6) (const_int 7)]))))]
14398  "TARGET_AVX2 && <mask_avx512vl_condition>"
14399  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14400  [(set_attr "type" "ssemov")
14401   (set_attr "prefix_extra" "1")
14402   (set_attr "prefix" "maybe_evex")
14403   (set_attr "mode" "OI")])
14404
14405(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
14406  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14407	(any_extend:V4SI
14408	  (vec_select:V4QI
14409	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14410	    (parallel [(const_int 0) (const_int 1)
14411		       (const_int 2) (const_int 3)]))))]
14412  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14413  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14414  [(set_attr "type" "ssemov")
14415   (set_attr "ssememalign" "32")
14416   (set_attr "prefix_extra" "1")
14417   (set_attr "prefix" "maybe_vex")
14418   (set_attr "mode" "TI")])
14419
14420(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
14421  [(set (match_operand:V16SI 0 "register_operand" "=v")
14422	(any_extend:V16SI
14423	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
14424  "TARGET_AVX512F"
14425  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14426  [(set_attr "type" "ssemov")
14427   (set_attr "prefix" "evex")
14428   (set_attr "mode" "XI")])
14429
14430(define_insn "avx2_<code>v8hiv8si2<mask_name>"
14431  [(set (match_operand:V8SI 0 "register_operand" "=v")
14432	(any_extend:V8SI
14433	    (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14434  "TARGET_AVX2 && <mask_avx512vl_condition>"
14435  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14436  [(set_attr "type" "ssemov")
14437   (set_attr "prefix_extra" "1")
14438   (set_attr "prefix" "maybe_evex")
14439   (set_attr "mode" "OI")])
14440
14441(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
14442  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*v")
14443	(any_extend:V4SI
14444	  (vec_select:V4HI
14445	    (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14446	    (parallel [(const_int 0) (const_int 1)
14447		       (const_int 2) (const_int 3)]))))]
14448  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14449  "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14450  [(set_attr "type" "ssemov")
14451   (set_attr "ssememalign" "64")
14452   (set_attr "prefix_extra" "1")
14453   (set_attr "prefix" "maybe_vex")
14454   (set_attr "mode" "TI")])
14455
14456(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
14457  [(set (match_operand:V8DI 0 "register_operand" "=v")
14458	(any_extend:V8DI
14459	  (vec_select:V8QI
14460	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14461	    (parallel [(const_int 0) (const_int 1)
14462		       (const_int 2) (const_int 3)
14463		       (const_int 4) (const_int 5)
14464		       (const_int 6) (const_int 7)]))))]
14465  "TARGET_AVX512F"
14466  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14467  [(set_attr "type" "ssemov")
14468   (set_attr "prefix" "evex")
14469   (set_attr "mode" "XI")])
14470
14471(define_insn "avx2_<code>v4qiv4di2<mask_name>"
14472  [(set (match_operand:V4DI 0 "register_operand" "=v")
14473	(any_extend:V4DI
14474	  (vec_select:V4QI
14475	    (match_operand:V16QI 1 "nonimmediate_operand" "vm")
14476	    (parallel [(const_int 0) (const_int 1)
14477		       (const_int 2) (const_int 3)]))))]
14478  "TARGET_AVX2 && <mask_avx512vl_condition>"
14479  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14480  [(set_attr "type" "ssemov")
14481   (set_attr "prefix_extra" "1")
14482   (set_attr "prefix" "maybe_evex")
14483   (set_attr "mode" "OI")])
14484
14485(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
14486  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14487	(any_extend:V2DI
14488	  (vec_select:V2QI
14489	    (match_operand:V16QI 1 "nonimmediate_operand" "Yrm,*vm")
14490	    (parallel [(const_int 0) (const_int 1)]))))]
14491  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14492  "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %w1}"
14493  [(set_attr "type" "ssemov")
14494   (set_attr "ssememalign" "16")
14495   (set_attr "prefix_extra" "1")
14496   (set_attr "prefix" "maybe_vex")
14497   (set_attr "mode" "TI")])
14498
14499(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
14500  [(set (match_operand:V8DI 0 "register_operand" "=v")
14501	(any_extend:V8DI
14502	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
14503  "TARGET_AVX512F"
14504  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14505  [(set_attr "type" "ssemov")
14506   (set_attr "prefix" "evex")
14507   (set_attr "mode" "XI")])
14508
14509(define_insn "avx2_<code>v4hiv4di2<mask_name>"
14510  [(set (match_operand:V4DI 0 "register_operand" "=v")
14511	(any_extend:V4DI
14512	  (vec_select:V4HI
14513	    (match_operand:V8HI 1 "nonimmediate_operand" "vm")
14514	    (parallel [(const_int 0) (const_int 1)
14515		       (const_int 2) (const_int 3)]))))]
14516  "TARGET_AVX2 && <mask_avx512vl_condition>"
14517  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14518  [(set_attr "type" "ssemov")
14519   (set_attr "prefix_extra" "1")
14520   (set_attr "prefix" "maybe_evex")
14521   (set_attr "mode" "OI")])
14522
14523(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
14524  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14525	(any_extend:V2DI
14526	  (vec_select:V2HI
14527	    (match_operand:V8HI 1 "nonimmediate_operand" "Yrm,*vm")
14528	    (parallel [(const_int 0) (const_int 1)]))))]
14529  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14530  "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
14531  [(set_attr "type" "ssemov")
14532   (set_attr "ssememalign" "32")
14533   (set_attr "prefix_extra" "1")
14534   (set_attr "prefix" "maybe_vex")
14535   (set_attr "mode" "TI")])
14536
14537(define_insn "avx512f_<code>v8siv8di2<mask_name>"
14538  [(set (match_operand:V8DI 0 "register_operand" "=v")
14539	(any_extend:V8DI
14540	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
14541  "TARGET_AVX512F"
14542  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14543  [(set_attr "type" "ssemov")
14544   (set_attr "prefix" "evex")
14545   (set_attr "mode" "XI")])
14546
14547(define_insn "avx2_<code>v4siv4di2<mask_name>"
14548  [(set (match_operand:V4DI 0 "register_operand" "=v")
14549	(any_extend:V4DI
14550	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
14551  "TARGET_AVX2 && <mask_avx512vl_condition>"
14552  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
14553  [(set_attr "type" "ssemov")
14554   (set_attr "prefix" "maybe_evex")
14555   (set_attr "prefix_extra" "1")
14556   (set_attr "mode" "OI")])
14557
14558(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
14559  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*v")
14560	(any_extend:V2DI
14561	  (vec_select:V2SI
14562	    (match_operand:V4SI 1 "nonimmediate_operand" "Yrm,*vm")
14563	    (parallel [(const_int 0) (const_int 1)]))))]
14564  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
14565  "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
14566  [(set_attr "type" "ssemov")
14567   (set_attr "ssememalign" "64")
14568   (set_attr "prefix_extra" "1")
14569   (set_attr "prefix" "maybe_vex")
14570   (set_attr "mode" "TI")])
14571
14572;; ptestps/ptestpd are very similar to comiss and ucomiss when
14573;; setting FLAGS_REG. But it is not a really compare instruction.
14574(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
14575  [(set (reg:CC FLAGS_REG)
14576	(unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
14577		    (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
14578		   UNSPEC_VTESTP))]
14579  "TARGET_AVX"
14580  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
14581  [(set_attr "type" "ssecomi")
14582   (set_attr "prefix_extra" "1")
14583   (set_attr "prefix" "vex")
14584   (set_attr "mode" "<MODE>")])
14585
14586;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
14587;; But it is not a really compare instruction.
14588(define_insn "avx_ptest256"
14589  [(set (reg:CC FLAGS_REG)
14590	(unspec:CC [(match_operand:V4DI 0 "register_operand" "x")
14591		    (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
14592		   UNSPEC_PTEST))]
14593  "TARGET_AVX"
14594  "vptest\t{%1, %0|%0, %1}"
14595  [(set_attr "type" "ssecomi")
14596   (set_attr "prefix_extra" "1")
14597   (set_attr "prefix" "vex")
14598   (set_attr "btver2_decode" "vector")
14599   (set_attr "mode" "OI")])
14600
14601(define_insn "sse4_1_ptest"
14602  [(set (reg:CC FLAGS_REG)
14603	(unspec:CC [(match_operand:V2DI 0 "register_operand" "Yr,*x")
14604		    (match_operand:V2DI 1 "nonimmediate_operand" "Yrm,*xm")]
14605		   UNSPEC_PTEST))]
14606  "TARGET_SSE4_1"
14607  "%vptest\t{%1, %0|%0, %1}"
14608  [(set_attr "type" "ssecomi")
14609   (set_attr "prefix_extra" "1")
14610   (set_attr "prefix" "maybe_vex")
14611   (set_attr "mode" "TI")])
14612
14613(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
14614  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x")
14615	(unspec:VF_128_256
14616	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "Yrm,*xm")
14617	   (match_operand:SI 2 "const_0_to_15_operand" "n,n")]
14618	  UNSPEC_ROUND))]
14619  "TARGET_ROUND"
14620  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14621  [(set_attr "type" "ssecvt")
14622   (set (attr "prefix_data16")
14623     (if_then_else
14624       (match_test "TARGET_AVX")
14625     (const_string "*")
14626     (const_string "1")))
14627   (set_attr "prefix_extra" "1")
14628   (set_attr "length_immediate" "1")
14629   (set_attr "prefix" "maybe_vex")
14630   (set_attr "mode" "<MODE>")])
14631
14632(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
14633  [(match_operand:<sseintvecmode> 0 "register_operand")
14634   (match_operand:VF1_128_256 1 "nonimmediate_operand")
14635   (match_operand:SI 2 "const_0_to_15_operand")]
14636  "TARGET_ROUND"
14637{
14638  rtx tmp = gen_reg_rtx (<MODE>mode);
14639
14640  emit_insn
14641    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
14642						       operands[2]));
14643  emit_insn
14644    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14645  DONE;
14646})
14647
14648(define_expand "avx512f_roundpd512"
14649  [(match_operand:V8DF 0 "register_operand")
14650   (match_operand:V8DF 1 "nonimmediate_operand")
14651   (match_operand:SI 2 "const_0_to_15_operand")]
14652  "TARGET_AVX512F"
14653{
14654  emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
14655  DONE;
14656})
14657
14658(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
14659  [(match_operand:<ssepackfltmode> 0 "register_operand")
14660   (match_operand:VF2 1 "nonimmediate_operand")
14661   (match_operand:VF2 2 "nonimmediate_operand")
14662   (match_operand:SI 3 "const_0_to_15_operand")]
14663  "TARGET_ROUND"
14664{
14665  rtx tmp0, tmp1;
14666
14667  if (<MODE>mode == V2DFmode
14668      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14669    {
14670      rtx tmp2 = gen_reg_rtx (V4DFmode);
14671
14672      tmp0 = gen_reg_rtx (V4DFmode);
14673      tmp1 = force_reg (V2DFmode, operands[1]);
14674
14675      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14676      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
14677      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14678    }
14679  else
14680    {
14681      tmp0 = gen_reg_rtx (<MODE>mode);
14682      tmp1 = gen_reg_rtx (<MODE>mode);
14683
14684      emit_insn
14685       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
14686							  operands[3]));
14687      emit_insn
14688       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
14689							  operands[3]));
14690      emit_insn
14691       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14692    }
14693  DONE;
14694})
14695
14696(define_insn "sse4_1_round<ssescalarmodesuffix>"
14697  [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
14698	(vec_merge:VF_128
14699	  (unspec:VF_128
14700	    [(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
14701	     (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
14702	    UNSPEC_ROUND)
14703	  (match_operand:VF_128 1 "register_operand" "0,0,x")
14704	  (const_int 1)))]
14705  "TARGET_ROUND"
14706  "@
14707   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14708   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
14709   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
14710  [(set_attr "isa" "noavx,noavx,avx")
14711   (set_attr "type" "ssecvt")
14712   (set_attr "length_immediate" "1")
14713   (set_attr "prefix_data16" "1,1,*")
14714   (set_attr "prefix_extra" "1")
14715   (set_attr "prefix" "orig,orig,vex")
14716   (set_attr "mode" "<MODE>")])
14717
14718(define_expand "round<mode>2"
14719  [(set (match_dup 4)
14720	(plus:VF
14721	  (match_operand:VF 1 "register_operand")
14722	  (match_dup 3)))
14723   (set (match_operand:VF 0 "register_operand")
14724	(unspec:VF
14725	  [(match_dup 4) (match_dup 5)]
14726	  UNSPEC_ROUND))]
14727  "TARGET_ROUND && !flag_trapping_math"
14728{
14729  machine_mode scalar_mode;
14730  const struct real_format *fmt;
14731  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
14732  rtx half, vec_half;
14733
14734  scalar_mode = GET_MODE_INNER (<MODE>mode);
14735
14736  /* load nextafter (0.5, 0.0) */
14737  fmt = REAL_MODE_FORMAT (scalar_mode);
14738  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
14739  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
14740  half = const_double_from_real_value (pred_half, scalar_mode);
14741
14742  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
14743  vec_half = force_reg (<MODE>mode, vec_half);
14744
14745  operands[3] = gen_reg_rtx (<MODE>mode);
14746  emit_insn (gen_copysign<mode>3 (operands[3], vec_half, operands[1]));
14747
14748  operands[4] = gen_reg_rtx (<MODE>mode);
14749  operands[5] = GEN_INT (ROUND_TRUNC);
14750})
14751
14752(define_expand "round<mode>2_sfix"
14753  [(match_operand:<sseintvecmode> 0 "register_operand")
14754   (match_operand:VF1_128_256 1 "register_operand")]
14755  "TARGET_ROUND && !flag_trapping_math"
14756{
14757  rtx tmp = gen_reg_rtx (<MODE>mode);
14758
14759  emit_insn (gen_round<mode>2 (tmp, operands[1]));
14760
14761  emit_insn
14762    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
14763  DONE;
14764})
14765
14766(define_expand "round<mode>2_vec_pack_sfix"
14767  [(match_operand:<ssepackfltmode> 0 "register_operand")
14768   (match_operand:VF2 1 "register_operand")
14769   (match_operand:VF2 2 "register_operand")]
14770  "TARGET_ROUND && !flag_trapping_math"
14771{
14772  rtx tmp0, tmp1;
14773
14774  if (<MODE>mode == V2DFmode
14775      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
14776    {
14777      rtx tmp2 = gen_reg_rtx (V4DFmode);
14778
14779      tmp0 = gen_reg_rtx (V4DFmode);
14780      tmp1 = force_reg (V2DFmode, operands[1]);
14781
14782      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
14783      emit_insn (gen_roundv4df2 (tmp2, tmp0));
14784      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
14785    }
14786  else
14787    {
14788      tmp0 = gen_reg_rtx (<MODE>mode);
14789      tmp1 = gen_reg_rtx (<MODE>mode);
14790
14791      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
14792      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
14793
14794      emit_insn
14795       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
14796    }
14797  DONE;
14798})
14799
14800;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14801;;
14802;; Intel SSE4.2 string/text processing instructions
14803;;
14804;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
14805
14806(define_insn_and_split "sse4_2_pcmpestr"
14807  [(set (match_operand:SI 0 "register_operand" "=c,c")
14808	(unspec:SI
14809	  [(match_operand:V16QI 2 "register_operand" "x,x")
14810	   (match_operand:SI 3 "register_operand" "a,a")
14811	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
14812	   (match_operand:SI 5 "register_operand" "d,d")
14813	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
14814	  UNSPEC_PCMPESTR))
14815   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
14816	(unspec:V16QI
14817	  [(match_dup 2)
14818	   (match_dup 3)
14819	   (match_dup 4)
14820	   (match_dup 5)
14821	   (match_dup 6)]
14822	  UNSPEC_PCMPESTR))
14823   (set (reg:CC FLAGS_REG)
14824	(unspec:CC
14825	  [(match_dup 2)
14826	   (match_dup 3)
14827	   (match_dup 4)
14828	   (match_dup 5)
14829	   (match_dup 6)]
14830	  UNSPEC_PCMPESTR))]
14831  "TARGET_SSE4_2
14832   && can_create_pseudo_p ()"
14833  "#"
14834  "&& 1"
14835  [(const_int 0)]
14836{
14837  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14838  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14839  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14840
14841  if (ecx)
14842    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14843				     operands[3], operands[4],
14844				     operands[5], operands[6]));
14845  if (xmm0)
14846    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14847				     operands[3], operands[4],
14848				     operands[5], operands[6]));
14849  if (flags && !(ecx || xmm0))
14850    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14851					   operands[2], operands[3],
14852					   operands[4], operands[5],
14853					   operands[6]));
14854  if (!(flags || ecx || xmm0))
14855    emit_note (NOTE_INSN_DELETED);
14856
14857  DONE;
14858}
14859  [(set_attr "type" "sselog")
14860   (set_attr "prefix_data16" "1")
14861   (set_attr "prefix_extra" "1")
14862   (set_attr "ssememalign" "8")
14863   (set_attr "length_immediate" "1")
14864   (set_attr "memory" "none,load")
14865   (set_attr "mode" "TI")])
14866
14867(define_insn_and_split "*sse4_2_pcmpestr_unaligned"
14868  [(set (match_operand:SI 0 "register_operand" "=c")
14869	(unspec:SI
14870	  [(match_operand:V16QI 2 "register_operand" "x")
14871	   (match_operand:SI 3 "register_operand" "a")
14872	   (unspec:V16QI
14873	     [(match_operand:V16QI 4 "memory_operand" "m")]
14874	     UNSPEC_LOADU)
14875	   (match_operand:SI 5 "register_operand" "d")
14876	   (match_operand:SI 6 "const_0_to_255_operand" "n")]
14877	  UNSPEC_PCMPESTR))
14878   (set (match_operand:V16QI 1 "register_operand" "=Yz")
14879	(unspec:V16QI
14880	  [(match_dup 2)
14881	   (match_dup 3)
14882	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14883	   (match_dup 5)
14884	   (match_dup 6)]
14885	  UNSPEC_PCMPESTR))
14886   (set (reg:CC FLAGS_REG)
14887	(unspec:CC
14888	  [(match_dup 2)
14889	   (match_dup 3)
14890	   (unspec:V16QI [(match_dup 4)] UNSPEC_LOADU)
14891	   (match_dup 5)
14892	   (match_dup 6)]
14893	  UNSPEC_PCMPESTR))]
14894  "TARGET_SSE4_2
14895   && can_create_pseudo_p ()"
14896  "#"
14897  "&& 1"
14898  [(const_int 0)]
14899{
14900  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
14901  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
14902  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
14903
14904  if (ecx)
14905    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
14906				     operands[3], operands[4],
14907				     operands[5], operands[6]));
14908  if (xmm0)
14909    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
14910				     operands[3], operands[4],
14911				     operands[5], operands[6]));
14912  if (flags && !(ecx || xmm0))
14913    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
14914					   operands[2], operands[3],
14915					   operands[4], operands[5],
14916					   operands[6]));
14917  if (!(flags || ecx || xmm0))
14918    emit_note (NOTE_INSN_DELETED);
14919
14920  DONE;
14921}
14922  [(set_attr "type" "sselog")
14923   (set_attr "prefix_data16" "1")
14924   (set_attr "prefix_extra" "1")
14925   (set_attr "ssememalign" "8")
14926   (set_attr "length_immediate" "1")
14927   (set_attr "memory" "load")
14928   (set_attr "mode" "TI")])
14929
14930(define_insn "sse4_2_pcmpestri"
14931  [(set (match_operand:SI 0 "register_operand" "=c,c")
14932	(unspec:SI
14933	  [(match_operand:V16QI 1 "register_operand" "x,x")
14934	   (match_operand:SI 2 "register_operand" "a,a")
14935	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14936	   (match_operand:SI 4 "register_operand" "d,d")
14937	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14938	  UNSPEC_PCMPESTR))
14939   (set (reg:CC FLAGS_REG)
14940	(unspec:CC
14941	  [(match_dup 1)
14942	   (match_dup 2)
14943	   (match_dup 3)
14944	   (match_dup 4)
14945	   (match_dup 5)]
14946	  UNSPEC_PCMPESTR))]
14947  "TARGET_SSE4_2"
14948  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
14949  [(set_attr "type" "sselog")
14950   (set_attr "prefix_data16" "1")
14951   (set_attr "prefix_extra" "1")
14952   (set_attr "prefix" "maybe_vex")
14953   (set_attr "ssememalign" "8")
14954   (set_attr "length_immediate" "1")
14955   (set_attr "btver2_decode" "vector")
14956   (set_attr "memory" "none,load")
14957   (set_attr "mode" "TI")])
14958
14959(define_insn "sse4_2_pcmpestrm"
14960  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
14961	(unspec:V16QI
14962	  [(match_operand:V16QI 1 "register_operand" "x,x")
14963	   (match_operand:SI 2 "register_operand" "a,a")
14964	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
14965	   (match_operand:SI 4 "register_operand" "d,d")
14966	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
14967	  UNSPEC_PCMPESTR))
14968   (set (reg:CC FLAGS_REG)
14969	(unspec:CC
14970	  [(match_dup 1)
14971	   (match_dup 2)
14972	   (match_dup 3)
14973	   (match_dup 4)
14974	   (match_dup 5)]
14975	  UNSPEC_PCMPESTR))]
14976  "TARGET_SSE4_2"
14977  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
14978  [(set_attr "type" "sselog")
14979   (set_attr "prefix_data16" "1")
14980   (set_attr "prefix_extra" "1")
14981   (set_attr "ssememalign" "8")
14982   (set_attr "length_immediate" "1")
14983   (set_attr "prefix" "maybe_vex")
14984   (set_attr "btver2_decode" "vector")
14985   (set_attr "memory" "none,load")
14986   (set_attr "mode" "TI")])
14987
14988(define_insn "sse4_2_pcmpestr_cconly"
14989  [(set (reg:CC FLAGS_REG)
14990	(unspec:CC
14991	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
14992	   (match_operand:SI 3 "register_operand" "a,a,a,a")
14993	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
14994	   (match_operand:SI 5 "register_operand" "d,d,d,d")
14995	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
14996	  UNSPEC_PCMPESTR))
14997   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
14998   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
14999  "TARGET_SSE4_2"
15000  "@
15001   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15002   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
15003   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
15004   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
15005  [(set_attr "type" "sselog")
15006   (set_attr "prefix_data16" "1")
15007   (set_attr "prefix_extra" "1")
15008   (set_attr "ssememalign" "8")
15009   (set_attr "length_immediate" "1")
15010   (set_attr "memory" "none,load,none,load")
15011   (set_attr "btver2_decode" "vector,vector,vector,vector") 
15012   (set_attr "prefix" "maybe_vex")
15013   (set_attr "mode" "TI")])
15014
15015(define_insn_and_split "sse4_2_pcmpistr"
15016  [(set (match_operand:SI 0 "register_operand" "=c,c")
15017	(unspec:SI
15018	  [(match_operand:V16QI 2 "register_operand" "x,x")
15019	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
15020	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
15021	  UNSPEC_PCMPISTR))
15022   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
15023	(unspec:V16QI
15024	  [(match_dup 2)
15025	   (match_dup 3)
15026	   (match_dup 4)]
15027	  UNSPEC_PCMPISTR))
15028   (set (reg:CC FLAGS_REG)
15029	(unspec:CC
15030	  [(match_dup 2)
15031	   (match_dup 3)
15032	   (match_dup 4)]
15033	  UNSPEC_PCMPISTR))]
15034  "TARGET_SSE4_2
15035   && can_create_pseudo_p ()"
15036  "#"
15037  "&& 1"
15038  [(const_int 0)]
15039{
15040  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15041  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15042  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15043
15044  if (ecx)
15045    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15046				     operands[3], operands[4]));
15047  if (xmm0)
15048    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15049				     operands[3], operands[4]));
15050  if (flags && !(ecx || xmm0))
15051    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15052					   operands[2], operands[3],
15053					   operands[4]));
15054  if (!(flags || ecx || xmm0))
15055    emit_note (NOTE_INSN_DELETED);
15056
15057  DONE;
15058}
15059  [(set_attr "type" "sselog")
15060   (set_attr "prefix_data16" "1")
15061   (set_attr "prefix_extra" "1")
15062   (set_attr "ssememalign" "8")
15063   (set_attr "length_immediate" "1")
15064   (set_attr "memory" "none,load")
15065   (set_attr "mode" "TI")])
15066
15067(define_insn_and_split "*sse4_2_pcmpistr_unaligned"
15068  [(set (match_operand:SI 0 "register_operand" "=c")
15069	(unspec:SI
15070	  [(match_operand:V16QI 2 "register_operand" "x")
15071	   (unspec:V16QI
15072	     [(match_operand:V16QI 3 "memory_operand" "m")]
15073	     UNSPEC_LOADU)
15074	   (match_operand:SI 4 "const_0_to_255_operand" "n")]
15075	  UNSPEC_PCMPISTR))
15076   (set (match_operand:V16QI 1 "register_operand" "=Yz")
15077	(unspec:V16QI
15078	  [(match_dup 2)
15079	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
15080	   (match_dup 4)]
15081	  UNSPEC_PCMPISTR))
15082   (set (reg:CC FLAGS_REG)
15083	(unspec:CC
15084	  [(match_dup 2)
15085	   (unspec:V16QI [(match_dup 3)] UNSPEC_LOADU)
15086	   (match_dup 4)]
15087	  UNSPEC_PCMPISTR))]
15088  "TARGET_SSE4_2
15089   && can_create_pseudo_p ()"
15090  "#"
15091  "&& 1"
15092  [(const_int 0)]
15093{
15094  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
15095  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
15096  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
15097
15098  if (ecx)
15099    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
15100				     operands[3], operands[4]));
15101  if (xmm0)
15102    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
15103				     operands[3], operands[4]));
15104  if (flags && !(ecx || xmm0))
15105    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
15106					   operands[2], operands[3],
15107					   operands[4]));
15108  if (!(flags || ecx || xmm0))
15109    emit_note (NOTE_INSN_DELETED);
15110
15111  DONE;
15112}
15113  [(set_attr "type" "sselog")
15114   (set_attr "prefix_data16" "1")
15115   (set_attr "prefix_extra" "1")
15116   (set_attr "ssememalign" "8")
15117   (set_attr "length_immediate" "1")
15118   (set_attr "memory" "load")
15119   (set_attr "mode" "TI")])
15120
15121(define_insn "sse4_2_pcmpistri"
15122  [(set (match_operand:SI 0 "register_operand" "=c,c")
15123	(unspec:SI
15124	  [(match_operand:V16QI 1 "register_operand" "x,x")
15125	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15126	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15127	  UNSPEC_PCMPISTR))
15128   (set (reg:CC FLAGS_REG)
15129	(unspec:CC
15130	  [(match_dup 1)
15131	   (match_dup 2)
15132	   (match_dup 3)]
15133	  UNSPEC_PCMPISTR))]
15134  "TARGET_SSE4_2"
15135  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
15136  [(set_attr "type" "sselog")
15137   (set_attr "prefix_data16" "1")
15138   (set_attr "prefix_extra" "1")
15139   (set_attr "ssememalign" "8")
15140   (set_attr "length_immediate" "1")
15141   (set_attr "prefix" "maybe_vex")
15142   (set_attr "memory" "none,load")
15143   (set_attr "btver2_decode" "vector")
15144   (set_attr "mode" "TI")])
15145
15146(define_insn "sse4_2_pcmpistrm"
15147  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
15148	(unspec:V16QI
15149	  [(match_operand:V16QI 1 "register_operand" "x,x")
15150	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15151	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
15152	  UNSPEC_PCMPISTR))
15153   (set (reg:CC FLAGS_REG)
15154	(unspec:CC
15155	  [(match_dup 1)
15156	   (match_dup 2)
15157	   (match_dup 3)]
15158	  UNSPEC_PCMPISTR))]
15159  "TARGET_SSE4_2"
15160  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
15161  [(set_attr "type" "sselog")
15162   (set_attr "prefix_data16" "1")
15163   (set_attr "prefix_extra" "1")
15164   (set_attr "ssememalign" "8")
15165   (set_attr "length_immediate" "1")
15166   (set_attr "prefix" "maybe_vex")
15167   (set_attr "memory" "none,load")
15168   (set_attr "btver2_decode" "vector")
15169   (set_attr "mode" "TI")])
15170
15171(define_insn "sse4_2_pcmpistr_cconly"
15172  [(set (reg:CC FLAGS_REG)
15173	(unspec:CC
15174	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
15175	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
15176	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
15177	  UNSPEC_PCMPISTR))
15178   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
15179   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
15180  "TARGET_SSE4_2"
15181  "@
15182   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15183   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
15184   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
15185   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
15186  [(set_attr "type" "sselog")
15187   (set_attr "prefix_data16" "1")
15188   (set_attr "prefix_extra" "1")
15189   (set_attr "ssememalign" "8")
15190   (set_attr "length_immediate" "1")
15191   (set_attr "memory" "none,load,none,load")
15192   (set_attr "prefix" "maybe_vex")
15193   (set_attr "btver2_decode" "vector,vector,vector,vector")
15194   (set_attr "mode" "TI")])
15195
15196;; Packed float variants
15197(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
15198		      [(V8DI "V8SF") (V16SI "V16SF")])
15199
15200(define_expand "avx512pf_gatherpf<mode>sf"
15201  [(unspec
15202     [(match_operand:<avx512fmaskmode> 0 "register_operand")
15203      (mem:<GATHER_SCATTER_SF_MEM_MODE>
15204	(match_par_dup 5
15205	  [(match_operand 2 "vsib_address_operand")
15206	   (match_operand:VI48_512 1 "register_operand")
15207	   (match_operand:SI 3 "const1248_operand")]))
15208      (match_operand:SI 4 "const_2_to_3_operand")]
15209     UNSPEC_GATHER_PREFETCH)]
15210  "TARGET_AVX512PF"
15211{
15212  operands[5]
15213    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15214					operands[3]), UNSPEC_VSIBADDR);
15215})
15216
15217(define_insn "*avx512pf_gatherpf<mode>sf_mask"
15218  [(unspec
15219     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15220      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15221	[(unspec:P
15222	   [(match_operand:P 2 "vsib_address_operand" "Tv")
15223	    (match_operand:VI48_512 1 "register_operand" "v")
15224	    (match_operand:SI 3 "const1248_operand" "n")]
15225	   UNSPEC_VSIBADDR)])
15226      (match_operand:SI 4 "const_2_to_3_operand" "n")]
15227     UNSPEC_GATHER_PREFETCH)]
15228  "TARGET_AVX512PF"
15229{
15230  switch (INTVAL (operands[4]))
15231    {
15232    case 3:
15233      return "vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15234    case 2:
15235      return "vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15236    default:
15237      gcc_unreachable ();
15238    }
15239}
15240  [(set_attr "type" "sse")
15241   (set_attr "prefix" "evex")
15242   (set_attr "mode" "XI")])
15243
15244;; Packed double variants
15245(define_expand "avx512pf_gatherpf<mode>df"
15246  [(unspec
15247     [(match_operand:<avx512fmaskmode> 0 "register_operand")
15248      (mem:V8DF
15249	(match_par_dup 5
15250	  [(match_operand 2 "vsib_address_operand")
15251	   (match_operand:VI4_256_8_512 1 "register_operand")
15252	   (match_operand:SI 3 "const1248_operand")]))
15253      (match_operand:SI 4 "const_2_to_3_operand")]
15254     UNSPEC_GATHER_PREFETCH)]
15255  "TARGET_AVX512PF"
15256{
15257  operands[5]
15258    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15259					operands[3]), UNSPEC_VSIBADDR);
15260})
15261
15262(define_insn "*avx512pf_gatherpf<mode>df_mask"
15263  [(unspec
15264     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15265      (match_operator:V8DF 5 "vsib_mem_operator"
15266	[(unspec:P
15267	   [(match_operand:P 2 "vsib_address_operand" "Tv")
15268	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
15269	    (match_operand:SI 3 "const1248_operand" "n")]
15270	   UNSPEC_VSIBADDR)])
15271      (match_operand:SI 4 "const_2_to_3_operand" "n")]
15272     UNSPEC_GATHER_PREFETCH)]
15273  "TARGET_AVX512PF"
15274{
15275  switch (INTVAL (operands[4]))
15276    {
15277    case 3:
15278      return "vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15279    case 2:
15280      return "vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15281    default:
15282      gcc_unreachable ();
15283    }
15284}
15285  [(set_attr "type" "sse")
15286   (set_attr "prefix" "evex")
15287   (set_attr "mode" "XI")])
15288
15289;; Packed float variants
15290(define_expand "avx512pf_scatterpf<mode>sf"
15291  [(unspec
15292     [(match_operand:<avx512fmaskmode> 0 "register_operand")
15293      (mem:<GATHER_SCATTER_SF_MEM_MODE>
15294	(match_par_dup 5
15295	  [(match_operand 2 "vsib_address_operand")
15296	   (match_operand:VI48_512 1 "register_operand")
15297	   (match_operand:SI 3 "const1248_operand")]))
15298      (match_operand:SI 4 "const2367_operand")]
15299     UNSPEC_SCATTER_PREFETCH)]
15300  "TARGET_AVX512PF"
15301{
15302  operands[5]
15303    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15304					operands[3]), UNSPEC_VSIBADDR);
15305})
15306
15307(define_insn "*avx512pf_scatterpf<mode>sf_mask"
15308  [(unspec
15309     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15310      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
15311	[(unspec:P
15312	   [(match_operand:P 2 "vsib_address_operand" "Tv")
15313	    (match_operand:VI48_512 1 "register_operand" "v")
15314	    (match_operand:SI 3 "const1248_operand" "n")]
15315	   UNSPEC_VSIBADDR)])
15316      (match_operand:SI 4 "const2367_operand" "n")]
15317     UNSPEC_SCATTER_PREFETCH)]
15318  "TARGET_AVX512PF"
15319{
15320  switch (INTVAL (operands[4]))
15321    {
15322    case 3:
15323    case 7:
15324      return "vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15325    case 2:
15326    case 6:
15327      return "vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%5%{%0%}}";
15328    default:
15329      gcc_unreachable ();
15330    }
15331}
15332  [(set_attr "type" "sse")
15333   (set_attr "prefix" "evex")
15334   (set_attr "mode" "XI")])
15335
15336;; Packed double variants
15337(define_expand "avx512pf_scatterpf<mode>df"
15338  [(unspec
15339     [(match_operand:<avx512fmaskmode> 0 "register_operand")
15340      (mem:V8DF
15341	(match_par_dup 5
15342	  [(match_operand 2 "vsib_address_operand")
15343	   (match_operand:VI4_256_8_512 1 "register_operand")
15344	   (match_operand:SI 3 "const1248_operand")]))
15345      (match_operand:SI 4 "const2367_operand")]
15346     UNSPEC_SCATTER_PREFETCH)]
15347  "TARGET_AVX512PF"
15348{
15349  operands[5]
15350    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
15351					operands[3]), UNSPEC_VSIBADDR);
15352})
15353
15354(define_insn "*avx512pf_scatterpf<mode>df_mask"
15355  [(unspec
15356     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
15357      (match_operator:V8DF 5 "vsib_mem_operator"
15358	[(unspec:P
15359	   [(match_operand:P 2 "vsib_address_operand" "Tv")
15360	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
15361	    (match_operand:SI 3 "const1248_operand" "n")]
15362	   UNSPEC_VSIBADDR)])
15363      (match_operand:SI 4 "const2367_operand" "n")]
15364     UNSPEC_SCATTER_PREFETCH)]
15365  "TARGET_AVX512PF"
15366{
15367  switch (INTVAL (operands[4]))
15368    {
15369    case 3:
15370    case 7:
15371      return "vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15372    case 2:
15373    case 6:
15374      return "vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%5%{%0%}}";
15375    default:
15376      gcc_unreachable ();
15377    }
15378}
15379  [(set_attr "type" "sse")
15380   (set_attr "prefix" "evex")
15381   (set_attr "mode" "XI")])
15382
15383(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
15384  [(set (match_operand:VF_512 0 "register_operand" "=v")
15385	(unspec:VF_512
15386	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15387	  UNSPEC_EXP2))]
15388  "TARGET_AVX512ER"
15389  "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15390  [(set_attr "prefix" "evex")
15391   (set_attr "type" "sse")
15392   (set_attr "mode" "<MODE>")])
15393
15394(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
15395  [(set (match_operand:VF_512 0 "register_operand" "=v")
15396	(unspec:VF_512
15397	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15398	  UNSPEC_RCP28))]
15399  "TARGET_AVX512ER"
15400  "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15401  [(set_attr "prefix" "evex")
15402   (set_attr "type" "sse")
15403   (set_attr "mode" "<MODE>")])
15404
15405(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
15406  [(set (match_operand:VF_128 0 "register_operand" "=v")
15407	(vec_merge:VF_128
15408	  (unspec:VF_128
15409	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15410	    UNSPEC_RCP28)
15411	  (match_operand:VF_128 2 "register_operand" "v")
15412	  (const_int 1)))]
15413  "TARGET_AVX512ER"
15414  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15415  [(set_attr "length_immediate" "1")
15416   (set_attr "prefix" "evex")
15417   (set_attr "type" "sse")
15418   (set_attr "mode" "<MODE>")])
15419
15420(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
15421  [(set (match_operand:VF_512 0 "register_operand" "=v")
15422	(unspec:VF_512
15423	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15424	  UNSPEC_RSQRT28))]
15425  "TARGET_AVX512ER"
15426  "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
15427  [(set_attr "prefix" "evex")
15428   (set_attr "type" "sse")
15429   (set_attr "mode" "<MODE>")])
15430
15431(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
15432  [(set (match_operand:VF_128 0 "register_operand" "=v")
15433	(vec_merge:VF_128
15434	  (unspec:VF_128
15435	    [(match_operand:VF_128 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
15436	    UNSPEC_RSQRT28)
15437	  (match_operand:VF_128 2 "register_operand" "v")
15438	  (const_int 1)))]
15439  "TARGET_AVX512ER"
15440  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
15441  [(set_attr "length_immediate" "1")
15442   (set_attr "type" "sse")
15443   (set_attr "prefix" "evex")
15444   (set_attr "mode" "<MODE>")])
15445
15446;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15447;;
15448;; XOP instructions
15449;;
15450;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15451
15452(define_code_iterator xop_plus [plus ss_plus])
15453
15454(define_code_attr macs [(plus "macs") (ss_plus "macss")])
15455(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
15456
15457;; XOP parallel integer multiply/add instructions.
15458
15459(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
15460  [(set (match_operand:VI24_128 0 "register_operand" "=x")
15461	(xop_plus:VI24_128
15462	 (mult:VI24_128
15463	  (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
15464	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
15465	 (match_operand:VI24_128 3 "register_operand" "x")))]
15466  "TARGET_XOP"
15467  "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15468  [(set_attr "type" "ssemuladd")
15469   (set_attr "mode" "TI")])
15470
15471(define_insn "xop_p<macs>dql"
15472  [(set (match_operand:V2DI 0 "register_operand" "=x")
15473	(xop_plus:V2DI
15474	 (mult:V2DI
15475	  (sign_extend:V2DI
15476	   (vec_select:V2SI
15477	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15478	    (parallel [(const_int 0) (const_int 2)])))
15479	  (sign_extend:V2DI
15480	   (vec_select:V2SI
15481	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15482	    (parallel [(const_int 0) (const_int 2)]))))
15483	 (match_operand:V2DI 3 "register_operand" "x")))]
15484  "TARGET_XOP"
15485  "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15486  [(set_attr "type" "ssemuladd")
15487   (set_attr "mode" "TI")])
15488
15489(define_insn "xop_p<macs>dqh"
15490  [(set (match_operand:V2DI 0 "register_operand" "=x")
15491	(xop_plus:V2DI
15492	 (mult:V2DI
15493	  (sign_extend:V2DI
15494	   (vec_select:V2SI
15495	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
15496	    (parallel [(const_int 1) (const_int 3)])))
15497	  (sign_extend:V2DI
15498	   (vec_select:V2SI
15499	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
15500	    (parallel [(const_int 1) (const_int 3)]))))
15501	 (match_operand:V2DI 3 "register_operand" "x")))]
15502  "TARGET_XOP"
15503  "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15504  [(set_attr "type" "ssemuladd")
15505   (set_attr "mode" "TI")])
15506
15507;; XOP parallel integer multiply/add instructions for the intrinisics
15508(define_insn "xop_p<macs>wd"
15509  [(set (match_operand:V4SI 0 "register_operand" "=x")
15510	(xop_plus:V4SI
15511	 (mult:V4SI
15512	  (sign_extend:V4SI
15513	   (vec_select:V4HI
15514	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15515	    (parallel [(const_int 1) (const_int 3)
15516		       (const_int 5) (const_int 7)])))
15517	  (sign_extend:V4SI
15518	   (vec_select:V4HI
15519	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15520	    (parallel [(const_int 1) (const_int 3)
15521		       (const_int 5) (const_int 7)]))))
15522	 (match_operand:V4SI 3 "register_operand" "x")))]
15523  "TARGET_XOP"
15524  "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15525  [(set_attr "type" "ssemuladd")
15526   (set_attr "mode" "TI")])
15527
15528(define_insn "xop_p<madcs>wd"
15529  [(set (match_operand:V4SI 0 "register_operand" "=x")
15530	(xop_plus:V4SI
15531	 (plus:V4SI
15532	  (mult:V4SI
15533	   (sign_extend:V4SI
15534	    (vec_select:V4HI
15535	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
15536	     (parallel [(const_int 0) (const_int 2)
15537			(const_int 4) (const_int 6)])))
15538	   (sign_extend:V4SI
15539	    (vec_select:V4HI
15540	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
15541	     (parallel [(const_int 0) (const_int 2)
15542			(const_int 4) (const_int 6)]))))
15543	  (mult:V4SI
15544	   (sign_extend:V4SI
15545	    (vec_select:V4HI
15546	     (match_dup 1)
15547	     (parallel [(const_int 1) (const_int 3)
15548			(const_int 5) (const_int 7)])))
15549	   (sign_extend:V4SI
15550	    (vec_select:V4HI
15551	     (match_dup 2)
15552	     (parallel [(const_int 1) (const_int 3)
15553			(const_int 5) (const_int 7)])))))
15554	 (match_operand:V4SI 3 "register_operand" "x")))]
15555  "TARGET_XOP"
15556  "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15557  [(set_attr "type" "ssemuladd")
15558   (set_attr "mode" "TI")])
15559
15560;; XOP parallel XMM conditional moves
15561(define_insn "xop_pcmov_<mode><avxsizesuffix>"
15562  [(set (match_operand:V 0 "register_operand" "=x,x")
15563	(if_then_else:V
15564	  (match_operand:V 3 "nonimmediate_operand" "x,m")
15565	  (match_operand:V 1 "register_operand" "x,x")
15566	  (match_operand:V 2 "nonimmediate_operand" "xm,x")))]
15567  "TARGET_XOP"
15568  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15569  [(set_attr "type" "sse4arg")])
15570
15571;; XOP horizontal add/subtract instructions
15572(define_insn "xop_phadd<u>bw"
15573  [(set (match_operand:V8HI 0 "register_operand" "=x")
15574	(plus:V8HI
15575	 (any_extend:V8HI
15576	  (vec_select:V8QI
15577	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15578	   (parallel [(const_int 0) (const_int 2)
15579		      (const_int 4) (const_int 6)
15580		      (const_int 8) (const_int 10)
15581		      (const_int 12) (const_int 14)])))
15582	 (any_extend:V8HI
15583	  (vec_select:V8QI
15584	   (match_dup 1)
15585	   (parallel [(const_int 1) (const_int 3)
15586		      (const_int 5) (const_int 7)
15587		      (const_int 9) (const_int 11)
15588		      (const_int 13) (const_int 15)])))))]
15589  "TARGET_XOP"
15590  "vphadd<u>bw\t{%1, %0|%0, %1}"
15591  [(set_attr "type" "sseiadd1")])
15592
15593(define_insn "xop_phadd<u>bd"
15594  [(set (match_operand:V4SI 0 "register_operand" "=x")
15595	(plus:V4SI
15596	 (plus:V4SI
15597	  (any_extend:V4SI
15598	   (vec_select:V4QI
15599	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15600	    (parallel [(const_int 0) (const_int 4)
15601		       (const_int 8) (const_int 12)])))
15602	  (any_extend:V4SI
15603	   (vec_select:V4QI
15604	    (match_dup 1)
15605	    (parallel [(const_int 1) (const_int 5)
15606		       (const_int 9) (const_int 13)]))))
15607	 (plus:V4SI
15608	  (any_extend:V4SI
15609	   (vec_select:V4QI
15610	    (match_dup 1)
15611	    (parallel [(const_int 2) (const_int 6)
15612		       (const_int 10) (const_int 14)])))
15613	  (any_extend:V4SI
15614	   (vec_select:V4QI
15615	    (match_dup 1)
15616	    (parallel [(const_int 3) (const_int 7)
15617		       (const_int 11) (const_int 15)]))))))]
15618  "TARGET_XOP"
15619  "vphadd<u>bd\t{%1, %0|%0, %1}"
15620  [(set_attr "type" "sseiadd1")])
15621
15622(define_insn "xop_phadd<u>bq"
15623  [(set (match_operand:V2DI 0 "register_operand" "=x")
15624	(plus:V2DI
15625	 (plus:V2DI
15626	  (plus:V2DI
15627	   (any_extend:V2DI
15628	    (vec_select:V2QI
15629	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15630	     (parallel [(const_int 0) (const_int 8)])))
15631	   (any_extend:V2DI
15632	    (vec_select:V2QI
15633	     (match_dup 1)
15634	     (parallel [(const_int 1) (const_int 9)]))))
15635	  (plus:V2DI
15636	   (any_extend:V2DI
15637	    (vec_select:V2QI
15638	     (match_dup 1)
15639	     (parallel [(const_int 2) (const_int 10)])))
15640	   (any_extend:V2DI
15641	    (vec_select:V2QI
15642	     (match_dup 1)
15643	     (parallel [(const_int 3) (const_int 11)])))))
15644	 (plus:V2DI
15645	  (plus:V2DI
15646	   (any_extend:V2DI
15647	    (vec_select:V2QI
15648	     (match_dup 1)
15649	     (parallel [(const_int 4) (const_int 12)])))
15650	   (any_extend:V2DI
15651	    (vec_select:V2QI
15652	     (match_dup 1)
15653	     (parallel [(const_int 5) (const_int 13)]))))
15654	  (plus:V2DI
15655	   (any_extend:V2DI
15656	    (vec_select:V2QI
15657	     (match_dup 1)
15658	     (parallel [(const_int 6) (const_int 14)])))
15659	   (any_extend:V2DI
15660	    (vec_select:V2QI
15661	     (match_dup 1)
15662	     (parallel [(const_int 7) (const_int 15)])))))))]
15663  "TARGET_XOP"
15664  "vphadd<u>bq\t{%1, %0|%0, %1}"
15665  [(set_attr "type" "sseiadd1")])
15666
15667(define_insn "xop_phadd<u>wd"
15668  [(set (match_operand:V4SI 0 "register_operand" "=x")
15669	(plus:V4SI
15670	 (any_extend:V4SI
15671	  (vec_select:V4HI
15672	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15673	   (parallel [(const_int 0) (const_int 2)
15674		      (const_int 4) (const_int 6)])))
15675	 (any_extend:V4SI
15676	  (vec_select:V4HI
15677	   (match_dup 1)
15678	   (parallel [(const_int 1) (const_int 3)
15679		      (const_int 5) (const_int 7)])))))]
15680  "TARGET_XOP"
15681  "vphadd<u>wd\t{%1, %0|%0, %1}"
15682  [(set_attr "type" "sseiadd1")])
15683
15684(define_insn "xop_phadd<u>wq"
15685  [(set (match_operand:V2DI 0 "register_operand" "=x")
15686	(plus:V2DI
15687	 (plus:V2DI
15688	  (any_extend:V2DI
15689	   (vec_select:V2HI
15690	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15691	    (parallel [(const_int 0) (const_int 4)])))
15692	  (any_extend:V2DI
15693	   (vec_select:V2HI
15694	    (match_dup 1)
15695	    (parallel [(const_int 1) (const_int 5)]))))
15696	 (plus:V2DI
15697	  (any_extend:V2DI
15698	   (vec_select:V2HI
15699	    (match_dup 1)
15700	    (parallel [(const_int 2) (const_int 6)])))
15701	  (any_extend:V2DI
15702	   (vec_select:V2HI
15703	    (match_dup 1)
15704	    (parallel [(const_int 3) (const_int 7)]))))))]
15705  "TARGET_XOP"
15706  "vphadd<u>wq\t{%1, %0|%0, %1}"
15707  [(set_attr "type" "sseiadd1")])
15708
15709(define_insn "xop_phadd<u>dq"
15710  [(set (match_operand:V2DI 0 "register_operand" "=x")
15711	(plus:V2DI
15712	 (any_extend:V2DI
15713	  (vec_select:V2SI
15714	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15715	   (parallel [(const_int 0) (const_int 2)])))
15716	 (any_extend:V2DI
15717	  (vec_select:V2SI
15718	   (match_dup 1)
15719	   (parallel [(const_int 1) (const_int 3)])))))]
15720  "TARGET_XOP"
15721  "vphadd<u>dq\t{%1, %0|%0, %1}"
15722  [(set_attr "type" "sseiadd1")])
15723
15724(define_insn "xop_phsubbw"
15725  [(set (match_operand:V8HI 0 "register_operand" "=x")
15726	(minus:V8HI
15727	 (sign_extend:V8HI
15728	  (vec_select:V8QI
15729	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
15730	   (parallel [(const_int 0) (const_int 2)
15731		      (const_int 4) (const_int 6)
15732		      (const_int 8) (const_int 10)
15733		      (const_int 12) (const_int 14)])))
15734	 (sign_extend:V8HI
15735	  (vec_select:V8QI
15736	   (match_dup 1)
15737	   (parallel [(const_int 1) (const_int 3)
15738		      (const_int 5) (const_int 7)
15739		      (const_int 9) (const_int 11)
15740		      (const_int 13) (const_int 15)])))))]
15741  "TARGET_XOP"
15742  "vphsubbw\t{%1, %0|%0, %1}"
15743  [(set_attr "type" "sseiadd1")])
15744
15745(define_insn "xop_phsubwd"
15746  [(set (match_operand:V4SI 0 "register_operand" "=x")
15747	(minus:V4SI
15748	 (sign_extend:V4SI
15749	  (vec_select:V4HI
15750	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
15751	   (parallel [(const_int 0) (const_int 2)
15752		      (const_int 4) (const_int 6)])))
15753	 (sign_extend:V4SI
15754	  (vec_select:V4HI
15755	   (match_dup 1)
15756	   (parallel [(const_int 1) (const_int 3)
15757		      (const_int 5) (const_int 7)])))))]
15758  "TARGET_XOP"
15759  "vphsubwd\t{%1, %0|%0, %1}"
15760  [(set_attr "type" "sseiadd1")])
15761
15762(define_insn "xop_phsubdq"
15763  [(set (match_operand:V2DI 0 "register_operand" "=x")
15764	(minus:V2DI
15765	 (sign_extend:V2DI
15766	  (vec_select:V2SI
15767	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
15768	   (parallel [(const_int 0) (const_int 2)])))
15769	 (sign_extend:V2DI
15770	  (vec_select:V2SI
15771	   (match_dup 1)
15772	   (parallel [(const_int 1) (const_int 3)])))))]
15773  "TARGET_XOP"
15774  "vphsubdq\t{%1, %0|%0, %1}"
15775  [(set_attr "type" "sseiadd1")])
15776
15777;; XOP permute instructions
15778(define_insn "xop_pperm"
15779  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15780	(unspec:V16QI
15781	  [(match_operand:V16QI 1 "register_operand" "x,x")
15782	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
15783	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
15784	  UNSPEC_XOP_PERMUTE))]
15785  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15786  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15787  [(set_attr "type" "sse4arg")
15788   (set_attr "mode" "TI")])
15789
15790;; XOP pack instructions that combine two vectors into a smaller vector
15791(define_insn "xop_pperm_pack_v2di_v4si"
15792  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
15793	(vec_concat:V4SI
15794	 (truncate:V2SI
15795	  (match_operand:V2DI 1 "register_operand" "x,x"))
15796	 (truncate:V2SI
15797	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
15798   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15799  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15800  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15801  [(set_attr "type" "sse4arg")
15802   (set_attr "mode" "TI")])
15803
15804(define_insn "xop_pperm_pack_v4si_v8hi"
15805  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
15806	(vec_concat:V8HI
15807	 (truncate:V4HI
15808	  (match_operand:V4SI 1 "register_operand" "x,x"))
15809	 (truncate:V4HI
15810	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
15811   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15812  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15813  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15814  [(set_attr "type" "sse4arg")
15815   (set_attr "mode" "TI")])
15816
15817(define_insn "xop_pperm_pack_v8hi_v16qi"
15818  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
15819	(vec_concat:V16QI
15820	 (truncate:V8QI
15821	  (match_operand:V8HI 1 "register_operand" "x,x"))
15822	 (truncate:V8QI
15823	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
15824   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
15825  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
15826  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
15827  [(set_attr "type" "sse4arg")
15828   (set_attr "mode" "TI")])
15829
15830;; XOP packed rotate instructions
15831(define_expand "rotl<mode>3"
15832  [(set (match_operand:VI_128 0 "register_operand")
15833	(rotate:VI_128
15834	 (match_operand:VI_128 1 "nonimmediate_operand")
15835	 (match_operand:SI 2 "general_operand")))]
15836  "TARGET_XOP"
15837{
15838  /* If we were given a scalar, convert it to parallel */
15839  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15840    {
15841      rtvec vs = rtvec_alloc (<ssescalarnum>);
15842      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15843      rtx reg = gen_reg_rtx (<MODE>mode);
15844      rtx op2 = operands[2];
15845      int i;
15846
15847      if (GET_MODE (op2) != <ssescalarmode>mode)
15848	{
15849	  op2 = gen_reg_rtx (<ssescalarmode>mode);
15850	  convert_move (op2, operands[2], false);
15851	}
15852
15853      for (i = 0; i < <ssescalarnum>; i++)
15854	RTVEC_ELT (vs, i) = op2;
15855
15856      emit_insn (gen_vec_init<mode> (reg, par));
15857      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15858      DONE;
15859    }
15860})
15861
15862(define_expand "rotr<mode>3"
15863  [(set (match_operand:VI_128 0 "register_operand")
15864	(rotatert:VI_128
15865	 (match_operand:VI_128 1 "nonimmediate_operand")
15866	 (match_operand:SI 2 "general_operand")))]
15867  "TARGET_XOP"
15868{
15869  /* If we were given a scalar, convert it to parallel */
15870  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
15871    {
15872      rtvec vs = rtvec_alloc (<ssescalarnum>);
15873      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
15874      rtx neg = gen_reg_rtx (<MODE>mode);
15875      rtx reg = gen_reg_rtx (<MODE>mode);
15876      rtx op2 = operands[2];
15877      int i;
15878
15879      if (GET_MODE (op2) != <ssescalarmode>mode)
15880	{
15881	  op2 = gen_reg_rtx (<ssescalarmode>mode);
15882	  convert_move (op2, operands[2], false);
15883	}
15884
15885      for (i = 0; i < <ssescalarnum>; i++)
15886	RTVEC_ELT (vs, i) = op2;
15887
15888      emit_insn (gen_vec_init<mode> (reg, par));
15889      emit_insn (gen_neg<mode>2 (neg, reg));
15890      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
15891      DONE;
15892    }
15893})
15894
15895(define_insn "xop_rotl<mode>3"
15896  [(set (match_operand:VI_128 0 "register_operand" "=x")
15897	(rotate:VI_128
15898	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15899	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15900  "TARGET_XOP"
15901  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15902  [(set_attr "type" "sseishft")
15903   (set_attr "length_immediate" "1")
15904   (set_attr "mode" "TI")])
15905
15906(define_insn "xop_rotr<mode>3"
15907  [(set (match_operand:VI_128 0 "register_operand" "=x")
15908	(rotatert:VI_128
15909	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
15910	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
15911  "TARGET_XOP"
15912{
15913  operands[3]
15914    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
15915  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
15916}
15917  [(set_attr "type" "sseishft")
15918   (set_attr "length_immediate" "1")
15919   (set_attr "mode" "TI")])
15920
15921(define_expand "vrotr<mode>3"
15922  [(match_operand:VI_128 0 "register_operand")
15923   (match_operand:VI_128 1 "register_operand")
15924   (match_operand:VI_128 2 "register_operand")]
15925  "TARGET_XOP"
15926{
15927  rtx reg = gen_reg_rtx (<MODE>mode);
15928  emit_insn (gen_neg<mode>2 (reg, operands[2]));
15929  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
15930  DONE;
15931})
15932
15933(define_expand "vrotl<mode>3"
15934  [(match_operand:VI_128 0 "register_operand")
15935   (match_operand:VI_128 1 "register_operand")
15936   (match_operand:VI_128 2 "register_operand")]
15937  "TARGET_XOP"
15938{
15939  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
15940  DONE;
15941})
15942
15943(define_insn "xop_vrotl<mode>3"
15944  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
15945	(if_then_else:VI_128
15946	 (ge:VI_128
15947	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
15948	  (const_int 0))
15949	 (rotate:VI_128
15950	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
15951	  (match_dup 2))
15952	 (rotatert:VI_128
15953	  (match_dup 1)
15954	  (neg:VI_128 (match_dup 2)))))]
15955  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15956  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
15957  [(set_attr "type" "sseishft")
15958   (set_attr "prefix_data16" "0")
15959   (set_attr "prefix_extra" "2")
15960   (set_attr "mode" "TI")])
15961
15962;; XOP packed shift instructions.
15963(define_expand "vlshr<mode>3"
15964  [(set (match_operand:VI12_128 0 "register_operand")
15965	(lshiftrt:VI12_128
15966	  (match_operand:VI12_128 1 "register_operand")
15967	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
15968  "TARGET_XOP"
15969{
15970  rtx neg = gen_reg_rtx (<MODE>mode);
15971  emit_insn (gen_neg<mode>2 (neg, operands[2]));
15972  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15973  DONE;
15974})
15975
15976(define_expand "vlshr<mode>3"
15977  [(set (match_operand:VI48_128 0 "register_operand")
15978	(lshiftrt:VI48_128
15979	  (match_operand:VI48_128 1 "register_operand")
15980	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
15981  "TARGET_AVX2 || TARGET_XOP"
15982{
15983  if (!TARGET_AVX2)
15984    {
15985      rtx neg = gen_reg_rtx (<MODE>mode);
15986      emit_insn (gen_neg<mode>2 (neg, operands[2]));
15987      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
15988      DONE;
15989    }
15990})
15991
15992(define_expand "vlshr<mode>3"
15993  [(set (match_operand:VI48_512 0 "register_operand")
15994	(lshiftrt:VI48_512
15995	  (match_operand:VI48_512 1 "register_operand")
15996	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
15997  "TARGET_AVX512F")
15998
15999(define_expand "vlshr<mode>3"
16000  [(set (match_operand:VI48_256 0 "register_operand")
16001	(lshiftrt:VI48_256
16002	  (match_operand:VI48_256 1 "register_operand")
16003	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
16004  "TARGET_AVX2")
16005
16006(define_expand "vashrv8hi3<mask_name>"
16007  [(set (match_operand:V8HI 0 "register_operand")
16008	(ashiftrt:V8HI
16009	  (match_operand:V8HI 1 "register_operand")
16010	  (match_operand:V8HI 2 "nonimmediate_operand")))]
16011  "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
16012{
16013  if (TARGET_XOP)
16014    {
16015      rtx neg = gen_reg_rtx (V8HImode);
16016      emit_insn (gen_negv8hi2 (neg, operands[2]));
16017      emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
16018      DONE;
16019    }
16020})
16021
16022(define_expand "vashrv16qi3"
16023  [(set (match_operand:V16QI 0 "register_operand")
16024	(ashiftrt:V16QI
16025	  (match_operand:V16QI 1 "register_operand")
16026	  (match_operand:V16QI 2 "nonimmediate_operand")))]
16027  "TARGET_XOP"
16028{
16029   rtx neg = gen_reg_rtx (V16QImode);
16030   emit_insn (gen_negv16qi2 (neg, operands[2]));
16031   emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
16032   DONE;
16033})
16034
16035(define_expand "vashrv2di3<mask_name>"
16036  [(set (match_operand:V2DI 0 "register_operand")
16037	(ashiftrt:V2DI
16038	  (match_operand:V2DI 1 "register_operand")
16039	  (match_operand:V2DI 2 "nonimmediate_operand")))]
16040  "TARGET_XOP || TARGET_AVX512VL"
16041{
16042  if (TARGET_XOP)
16043    {
16044      rtx neg = gen_reg_rtx (V2DImode);
16045      emit_insn (gen_negv2di2 (neg, operands[2]));
16046      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
16047      DONE;
16048    }
16049})
16050
16051(define_expand "vashrv4si3"
16052  [(set (match_operand:V4SI 0 "register_operand")
16053	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
16054		       (match_operand:V4SI 2 "nonimmediate_operand")))]
16055  "TARGET_AVX2 || TARGET_XOP"
16056{
16057  if (!TARGET_AVX2)
16058    {
16059      rtx neg = gen_reg_rtx (V4SImode);
16060      emit_insn (gen_negv4si2 (neg, operands[2]));
16061      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
16062      DONE;
16063    }
16064})
16065
16066(define_expand "vashrv16si3"
16067  [(set (match_operand:V16SI 0 "register_operand")
16068	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
16069		        (match_operand:V16SI 2 "nonimmediate_operand")))]
16070  "TARGET_AVX512F")
16071
16072(define_expand "vashrv8si3"
16073  [(set (match_operand:V8SI 0 "register_operand")
16074	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
16075		       (match_operand:V8SI 2 "nonimmediate_operand")))]
16076  "TARGET_AVX2")
16077
16078(define_expand "vashl<mode>3"
16079  [(set (match_operand:VI12_128 0 "register_operand")
16080	(ashift:VI12_128
16081	  (match_operand:VI12_128 1 "register_operand")
16082	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
16083  "TARGET_XOP"
16084{
16085  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16086  DONE;
16087})
16088
16089(define_expand "vashl<mode>3"
16090  [(set (match_operand:VI48_128 0 "register_operand")
16091	(ashift:VI48_128
16092	  (match_operand:VI48_128 1 "register_operand")
16093	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
16094  "TARGET_AVX2 || TARGET_XOP"
16095{
16096  if (!TARGET_AVX2)
16097    {
16098      operands[2] = force_reg (<MODE>mode, operands[2]);
16099      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
16100      DONE;
16101    }
16102})
16103
16104(define_expand "vashl<mode>3"
16105  [(set (match_operand:VI48_512 0 "register_operand")
16106	(ashift:VI48_512
16107	  (match_operand:VI48_512 1 "register_operand")
16108	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
16109  "TARGET_AVX512F")
16110
16111(define_expand "vashl<mode>3"
16112  [(set (match_operand:VI48_256 0 "register_operand")
16113	(ashift:VI48_256
16114	  (match_operand:VI48_256 1 "register_operand")
16115	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
16116  "TARGET_AVX2")
16117
16118(define_insn "xop_sha<mode>3"
16119  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16120	(if_then_else:VI_128
16121	 (ge:VI_128
16122	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16123	  (const_int 0))
16124	 (ashift:VI_128
16125	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16126	  (match_dup 2))
16127	 (ashiftrt:VI_128
16128	  (match_dup 1)
16129	  (neg:VI_128 (match_dup 2)))))]
16130  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16131  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16132  [(set_attr "type" "sseishft")
16133   (set_attr "prefix_data16" "0")
16134   (set_attr "prefix_extra" "2")
16135   (set_attr "mode" "TI")])
16136
16137(define_insn "xop_shl<mode>3"
16138  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
16139	(if_then_else:VI_128
16140	 (ge:VI_128
16141	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
16142	  (const_int 0))
16143	 (ashift:VI_128
16144	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
16145	  (match_dup 2))
16146	 (lshiftrt:VI_128
16147	  (match_dup 1)
16148	  (neg:VI_128 (match_dup 2)))))]
16149  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16150  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16151  [(set_attr "type" "sseishft")
16152   (set_attr "prefix_data16" "0")
16153   (set_attr "prefix_extra" "2")
16154   (set_attr "mode" "TI")])
16155
16156(define_expand "<shift_insn><mode>3"
16157  [(set (match_operand:VI1_AVX512 0 "register_operand")
16158	(any_shift:VI1_AVX512
16159	  (match_operand:VI1_AVX512 1 "register_operand")
16160	  (match_operand:SI 2 "nonmemory_operand")))]
16161  "TARGET_SSE2"
16162{
16163  if (TARGET_XOP && <MODE>mode == V16QImode)
16164    {
16165      bool negate = false;
16166      rtx (*gen) (rtx, rtx, rtx);
16167      rtx tmp, par;
16168      int i;
16169
16170      if (<CODE> != ASHIFT)
16171	{
16172	  if (CONST_INT_P (operands[2]))
16173	    operands[2] = GEN_INT (-INTVAL (operands[2]));
16174	  else
16175	    negate = true;
16176	}
16177      par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
16178      for (i = 0; i < 16; i++)
16179        XVECEXP (par, 0, i) = operands[2];
16180
16181      tmp = gen_reg_rtx (V16QImode);
16182      emit_insn (gen_vec_initv16qi (tmp, par));
16183
16184      if (negate)
16185	emit_insn (gen_negv16qi2 (tmp, tmp));
16186
16187      gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
16188      emit_insn (gen (operands[0], operands[1], tmp));
16189    }
16190  else
16191    ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
16192  DONE;
16193})
16194
16195(define_expand "ashrv2di3"
16196  [(set (match_operand:V2DI 0 "register_operand")
16197	(ashiftrt:V2DI
16198	  (match_operand:V2DI 1 "register_operand")
16199	  (match_operand:DI 2 "nonmemory_operand")))]
16200  "TARGET_XOP || TARGET_AVX512VL"
16201{
16202  if (!TARGET_AVX512VL)
16203    {
16204      rtx reg = gen_reg_rtx (V2DImode);
16205      rtx par;
16206      bool negate = false;
16207      int i;
16208
16209      if (CONST_INT_P (operands[2]))
16210	operands[2] = GEN_INT (-INTVAL (operands[2]));
16211      else
16212	negate = true;
16213
16214      par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
16215      for (i = 0; i < 2; i++)
16216	XVECEXP (par, 0, i) = operands[2];
16217
16218      emit_insn (gen_vec_initv2di (reg, par));
16219
16220      if (negate)
16221	emit_insn (gen_negv2di2 (reg, reg));
16222
16223      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
16224      DONE;
16225    }
16226})
16227
16228;; XOP FRCZ support
16229(define_insn "xop_frcz<mode>2"
16230  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
16231	(unspec:FMAMODE
16232	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
16233	 UNSPEC_FRCZ))]
16234  "TARGET_XOP"
16235  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
16236  [(set_attr "type" "ssecvt1")
16237   (set_attr "mode" "<MODE>")])
16238
16239(define_expand "xop_vmfrcz<mode>2"
16240  [(set (match_operand:VF_128 0 "register_operand")
16241	(vec_merge:VF_128
16242	  (unspec:VF_128
16243	   [(match_operand:VF_128 1 "nonimmediate_operand")]
16244	   UNSPEC_FRCZ)
16245	  (match_dup 2)
16246	  (const_int 1)))]
16247  "TARGET_XOP"
16248  "operands[2] = CONST0_RTX (<MODE>mode);")
16249
16250(define_insn "*xop_vmfrcz<mode>2"
16251  [(set (match_operand:VF_128 0 "register_operand" "=x")
16252	(vec_merge:VF_128
16253	  (unspec:VF_128
16254	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
16255	   UNSPEC_FRCZ)
16256	  (match_operand:VF_128 2 "const0_operand")
16257	  (const_int 1)))]
16258  "TARGET_XOP"
16259  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
16260  [(set_attr "type" "ssecvt1")
16261   (set_attr "mode" "<MODE>")])
16262
16263(define_insn "xop_maskcmp<mode>3"
16264  [(set (match_operand:VI_128 0 "register_operand" "=x")
16265	(match_operator:VI_128 1 "ix86_comparison_int_operator"
16266	 [(match_operand:VI_128 2 "register_operand" "x")
16267	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16268  "TARGET_XOP"
16269  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16270  [(set_attr "type" "sse4arg")
16271   (set_attr "prefix_data16" "0")
16272   (set_attr "prefix_rep" "0")
16273   (set_attr "prefix_extra" "2")
16274   (set_attr "length_immediate" "1")
16275   (set_attr "mode" "TI")])
16276
16277(define_insn "xop_maskcmp_uns<mode>3"
16278  [(set (match_operand:VI_128 0 "register_operand" "=x")
16279	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16280	 [(match_operand:VI_128 2 "register_operand" "x")
16281	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
16282  "TARGET_XOP"
16283  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16284  [(set_attr "type" "ssecmp")
16285   (set_attr "prefix_data16" "0")
16286   (set_attr "prefix_rep" "0")
16287   (set_attr "prefix_extra" "2")
16288   (set_attr "length_immediate" "1")
16289   (set_attr "mode" "TI")])
16290
16291;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
16292;; and pcomneu* not to be converted to the signed ones in case somebody needs
16293;; the exact instruction generated for the intrinsic.
16294(define_insn "xop_maskcmp_uns2<mode>3"
16295  [(set (match_operand:VI_128 0 "register_operand" "=x")
16296	(unspec:VI_128
16297	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
16298	  [(match_operand:VI_128 2 "register_operand" "x")
16299	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
16300	 UNSPEC_XOP_UNSIGNED_CMP))]
16301  "TARGET_XOP"
16302  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
16303  [(set_attr "type" "ssecmp")
16304   (set_attr "prefix_data16" "0")
16305   (set_attr "prefix_extra" "2")
16306   (set_attr "length_immediate" "1")
16307   (set_attr "mode" "TI")])
16308
16309;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
16310;; being added here to be complete.
16311(define_insn "xop_pcom_tf<mode>3"
16312  [(set (match_operand:VI_128 0 "register_operand" "=x")
16313	(unspec:VI_128
16314	  [(match_operand:VI_128 1 "register_operand" "x")
16315	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
16316	   (match_operand:SI 3 "const_int_operand" "n")]
16317	  UNSPEC_XOP_TRUEFALSE))]
16318  "TARGET_XOP"
16319{
16320  return ((INTVAL (operands[3]) != 0)
16321	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16322	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
16323}
16324  [(set_attr "type" "ssecmp")
16325   (set_attr "prefix_data16" "0")
16326   (set_attr "prefix_extra" "2")
16327   (set_attr "length_immediate" "1")
16328   (set_attr "mode" "TI")])
16329
16330(define_insn "xop_vpermil2<mode>3"
16331  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
16332	(unspec:VF_128_256
16333	  [(match_operand:VF_128_256 1 "register_operand" "x")
16334	   (match_operand:VF_128_256 2 "nonimmediate_operand" "%x")
16335	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm")
16336	   (match_operand:SI 4 "const_0_to_3_operand" "n")]
16337	  UNSPEC_VPERMIL2))]
16338  "TARGET_XOP"
16339  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
16340  [(set_attr "type" "sse4arg")
16341   (set_attr "length_immediate" "1")
16342   (set_attr "mode" "<MODE>")])
16343
16344;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16345
16346(define_insn "aesenc"
16347  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16348	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16349		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16350		      UNSPEC_AESENC))]
16351  "TARGET_AES"
16352  "@
16353   aesenc\t{%2, %0|%0, %2}
16354   vaesenc\t{%2, %1, %0|%0, %1, %2}"
16355  [(set_attr "isa" "noavx,avx")
16356   (set_attr "type" "sselog1")
16357   (set_attr "prefix_extra" "1")
16358   (set_attr "prefix" "orig,vex")
16359   (set_attr "btver2_decode" "double,double")
16360   (set_attr "mode" "TI")])
16361
16362(define_insn "aesenclast"
16363  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16364	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16365		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16366		      UNSPEC_AESENCLAST))]
16367  "TARGET_AES"
16368  "@
16369   aesenclast\t{%2, %0|%0, %2}
16370   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
16371  [(set_attr "isa" "noavx,avx")
16372   (set_attr "type" "sselog1")
16373   (set_attr "prefix_extra" "1")
16374   (set_attr "prefix" "orig,vex")
16375   (set_attr "btver2_decode" "double,double") 
16376   (set_attr "mode" "TI")])
16377
16378(define_insn "aesdec"
16379  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16380	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16381		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16382		      UNSPEC_AESDEC))]
16383  "TARGET_AES"
16384  "@
16385   aesdec\t{%2, %0|%0, %2}
16386   vaesdec\t{%2, %1, %0|%0, %1, %2}"
16387  [(set_attr "isa" "noavx,avx")
16388   (set_attr "type" "sselog1")
16389   (set_attr "prefix_extra" "1")
16390   (set_attr "prefix" "orig,vex")
16391   (set_attr "btver2_decode" "double,double") 
16392   (set_attr "mode" "TI")])
16393
16394(define_insn "aesdeclast"
16395  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16396	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16397		       (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")]
16398		      UNSPEC_AESDECLAST))]
16399  "TARGET_AES"
16400  "@
16401   aesdeclast\t{%2, %0|%0, %2}
16402   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
16403  [(set_attr "isa" "noavx,avx")
16404   (set_attr "type" "sselog1")
16405   (set_attr "prefix_extra" "1")
16406   (set_attr "prefix" "orig,vex")
16407   (set_attr "btver2_decode" "double,double")
16408   (set_attr "mode" "TI")])
16409
16410(define_insn "aesimc"
16411  [(set (match_operand:V2DI 0 "register_operand" "=x")
16412	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")]
16413		      UNSPEC_AESIMC))]
16414  "TARGET_AES"
16415  "%vaesimc\t{%1, %0|%0, %1}"
16416  [(set_attr "type" "sselog1")
16417   (set_attr "prefix_extra" "1")
16418   (set_attr "prefix" "maybe_vex")
16419   (set_attr "mode" "TI")])
16420
16421(define_insn "aeskeygenassist"
16422  [(set (match_operand:V2DI 0 "register_operand" "=x")
16423	(unspec:V2DI [(match_operand:V2DI 1 "nonimmediate_operand" "xm")
16424		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
16425		     UNSPEC_AESKEYGENASSIST))]
16426  "TARGET_AES"
16427  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
16428  [(set_attr "type" "sselog1")
16429   (set_attr "prefix_extra" "1")
16430   (set_attr "length_immediate" "1")
16431   (set_attr "prefix" "maybe_vex")
16432   (set_attr "mode" "TI")])
16433
16434(define_insn "pclmulqdq"
16435  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
16436	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
16437		      (match_operand:V2DI 2 "nonimmediate_operand" "xm,xm")
16438		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
16439		     UNSPEC_PCLMUL))]
16440  "TARGET_PCLMUL"
16441  "@
16442   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
16443   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16444  [(set_attr "isa" "noavx,avx")
16445   (set_attr "type" "sselog1")
16446   (set_attr "prefix_extra" "1")
16447   (set_attr "length_immediate" "1")
16448   (set_attr "prefix" "orig,vex")
16449   (set_attr "mode" "TI")])
16450
16451(define_expand "avx_vzeroall"
16452  [(match_par_dup 0 [(const_int 0)])]
16453  "TARGET_AVX"
16454{
16455  int nregs = TARGET_64BIT ? 16 : 8;
16456  int regno;
16457
16458  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
16459
16460  XVECEXP (operands[0], 0, 0)
16461    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
16462			       UNSPECV_VZEROALL);
16463
16464  for (regno = 0; regno < nregs; regno++)
16465    XVECEXP (operands[0], 0, regno + 1)
16466      = gen_rtx_SET (VOIDmode,
16467		     gen_rtx_REG (V8SImode, SSE_REGNO (regno)),
16468		     CONST0_RTX (V8SImode));
16469})
16470
16471(define_insn "*avx_vzeroall"
16472  [(match_parallel 0 "vzeroall_operation"
16473    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
16474  "TARGET_AVX"
16475  "vzeroall"
16476  [(set_attr "type" "sse")
16477   (set_attr "modrm" "0")
16478   (set_attr "memory" "none")
16479   (set_attr "prefix" "vex")
16480   (set_attr "btver2_decode" "vector")
16481   (set_attr "mode" "OI")])
16482
16483;; Clear the upper 128bits of AVX registers, equivalent to a NOP
16484;; if the upper 128bits are unused.
16485(define_insn "avx_vzeroupper"
16486  [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)]
16487  "TARGET_AVX"
16488  "vzeroupper"
16489  [(set_attr "type" "sse")
16490   (set_attr "modrm" "0")
16491   (set_attr "memory" "none")
16492   (set_attr "prefix" "vex")
16493   (set_attr "btver2_decode" "vector")
16494   (set_attr "mode" "OI")])
16495
16496(define_insn "avx2_pbroadcast<mode>"
16497  [(set (match_operand:VI 0 "register_operand" "=x")
16498	(vec_duplicate:VI
16499	  (vec_select:<ssescalarmode>
16500	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm")
16501	    (parallel [(const_int 0)]))))]
16502  "TARGET_AVX2"
16503  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
16504  [(set_attr "type" "ssemov")
16505   (set_attr "prefix_extra" "1")
16506   (set_attr "prefix" "vex")
16507   (set_attr "mode" "<sseinsnmode>")])
16508
16509(define_insn "avx2_pbroadcast<mode>_1"
16510  [(set (match_operand:VI_256 0 "register_operand" "=x,x")
16511	(vec_duplicate:VI_256
16512	  (vec_select:<ssescalarmode>
16513	    (match_operand:VI_256 1 "nonimmediate_operand" "m,x")
16514	    (parallel [(const_int 0)]))))]
16515  "TARGET_AVX2"
16516  "@
16517   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
16518   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
16519  [(set_attr "type" "ssemov")
16520   (set_attr "prefix_extra" "1")
16521   (set_attr "prefix" "vex")
16522   (set_attr "mode" "<sseinsnmode>")])
16523
16524(define_insn "<avx2_avx512>_permvar<mode><mask_name>"
16525  [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
16526	(unspec:VI48F_256_512
16527	  [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
16528	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16529	  UNSPEC_VPERMVAR))]
16530  "TARGET_AVX2 && <mask_mode512bit_condition>"
16531  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16532  [(set_attr "type" "sselog")
16533   (set_attr "prefix" "<mask_prefix2>")
16534   (set_attr "mode" "<sseinsnmode>")])
16535
16536(define_insn "<avx512>_permvar<mode><mask_name>"
16537  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
16538	(unspec:VI1_AVX512VL
16539	  [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
16540	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16541	  UNSPEC_VPERMVAR))]
16542  "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
16543  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16544  [(set_attr "type" "sselog")
16545   (set_attr "prefix" "<mask_prefix2>")
16546   (set_attr "mode" "<sseinsnmode>")])
16547
16548(define_insn "<avx512>_permvar<mode><mask_name>"
16549  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16550	(unspec:VI2_AVX512VL
16551	  [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
16552	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
16553	  UNSPEC_VPERMVAR))]
16554  "TARGET_AVX512BW && <mask_mode512bit_condition>"
16555  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
16556  [(set_attr "type" "sselog")
16557   (set_attr "prefix" "<mask_prefix2>")
16558   (set_attr "mode" "<sseinsnmode>")])
16559
16560(define_expand "<avx2_avx512>_perm<mode>"
16561  [(match_operand:VI8F_256_512 0 "register_operand")
16562   (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16563   (match_operand:SI 2 "const_0_to_255_operand")]
16564  "TARGET_AVX2"
16565{
16566  int mask = INTVAL (operands[2]);
16567  emit_insn (gen_<avx2_avx512>_perm<mode>_1 (operands[0], operands[1],
16568					      GEN_INT ((mask >> 0) & 3),
16569					      GEN_INT ((mask >> 2) & 3),
16570					      GEN_INT ((mask >> 4) & 3),
16571					      GEN_INT ((mask >> 6) & 3)));
16572  DONE;
16573})
16574
16575(define_expand "<avx512>_perm<mode>_mask"
16576  [(match_operand:VI8F_256_512 0 "register_operand")
16577   (match_operand:VI8F_256_512 1 "nonimmediate_operand")
16578   (match_operand:SI 2 "const_0_to_255_operand")
16579   (match_operand:VI8F_256_512 3 "vector_move_operand")
16580   (match_operand:<avx512fmaskmode> 4 "register_operand")]
16581  "TARGET_AVX512F"
16582{
16583  int mask = INTVAL (operands[2]);
16584  emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
16585						   GEN_INT ((mask >> 0) & 3),
16586						   GEN_INT ((mask >> 2) & 3),
16587						   GEN_INT ((mask >> 4) & 3),
16588						   GEN_INT ((mask >> 6) & 3),
16589						   operands[3], operands[4]));
16590  DONE;
16591})
16592
16593(define_insn "<avx2_avx512>_perm<mode>_1<mask_name>"
16594  [(set (match_operand:VI8F_256_512 0 "register_operand" "=v")
16595	(vec_select:VI8F_256_512
16596	  (match_operand:VI8F_256_512 1 "nonimmediate_operand" "vm")
16597	  (parallel [(match_operand 2 "const_0_to_3_operand")
16598		     (match_operand 3 "const_0_to_3_operand")
16599		     (match_operand 4 "const_0_to_3_operand")
16600		     (match_operand 5 "const_0_to_3_operand")])))]
16601  "TARGET_AVX2 && <mask_mode512bit_condition>"
16602{
16603  int mask = 0;
16604  mask |= INTVAL (operands[2]) << 0;
16605  mask |= INTVAL (operands[3]) << 2;
16606  mask |= INTVAL (operands[4]) << 4;
16607  mask |= INTVAL (operands[5]) << 6;
16608  operands[2] = GEN_INT (mask);
16609  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
16610}
16611  [(set_attr "type" "sselog")
16612   (set_attr "prefix" "<mask_prefix2>")
16613   (set_attr "mode" "<sseinsnmode>")])
16614
16615(define_insn "avx2_permv2ti"
16616  [(set (match_operand:V4DI 0 "register_operand" "=x")
16617	(unspec:V4DI
16618	  [(match_operand:V4DI 1 "register_operand" "x")
16619	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
16620	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
16621	  UNSPEC_VPERMTI))]
16622  "TARGET_AVX2"
16623  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
16624  [(set_attr "type" "sselog")
16625   (set_attr "prefix" "vex")
16626   (set_attr "mode" "OI")])
16627
16628(define_insn "avx2_vec_dupv4df"
16629  [(set (match_operand:V4DF 0 "register_operand" "=x")
16630	(vec_duplicate:V4DF
16631	  (vec_select:DF
16632	    (match_operand:V2DF 1 "register_operand" "x")
16633	    (parallel [(const_int 0)]))))]
16634  "TARGET_AVX2"
16635  "vbroadcastsd\t{%1, %0|%0, %1}"
16636  [(set_attr "type" "sselog1")
16637   (set_attr "prefix" "vex")
16638   (set_attr "mode" "V4DF")])
16639
16640(define_insn "<avx512>_vec_dup<mode>_1"
16641  [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
16642	(vec_duplicate:VI_AVX512BW
16643	  (vec_select:VI_AVX512BW
16644	    (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
16645	    (parallel [(const_int 0)]))))]
16646  "TARGET_AVX512F"
16647  "@
16648   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
16649   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
16650  [(set_attr "type" "ssemov")
16651   (set_attr "prefix" "evex")
16652   (set_attr "mode" "<sseinsnmode>")])
16653
16654(define_insn "<avx512>_vec_dup<mode><mask_name>"
16655  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
16656	(vec_duplicate:V48_AVX512VL
16657	  (vec_select:<ssescalarmode>
16658	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16659	    (parallel [(const_int 0)]))))]
16660  "TARGET_AVX512F"
16661{
16662  /*  There is no DF broadcast (in AVX-512*) to 128b register.
16663      Mimic it with integer variant.  */
16664  if (<MODE>mode == V2DFmode)
16665    return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
16666
16667  if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
16668    return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %k1}";
16669   else
16670    return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
16671}
16672  [(set_attr "type" "ssemov")
16673   (set_attr "prefix" "evex")
16674   (set_attr "mode" "<sseinsnmode>")])
16675
16676(define_insn "<avx512>_vec_dup<mode><mask_name>"
16677  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16678	(vec_duplicate:VI12_AVX512VL
16679	  (vec_select:<ssescalarmode>
16680	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16681	    (parallel [(const_int 0)]))))]
16682  "TARGET_AVX512BW"
16683  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16684  [(set_attr "type" "ssemov")
16685   (set_attr "prefix" "evex")
16686   (set_attr "mode" "<sseinsnmode>")])
16687
16688(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16689  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16690	(vec_duplicate:V16FI
16691	  (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16692  "TARGET_AVX512F"
16693  "@
16694   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
16695   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16696  [(set_attr "type" "ssemov")
16697   (set_attr "prefix" "evex")
16698   (set_attr "mode" "<sseinsnmode>")])
16699
16700(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
16701  [(set (match_operand:V8FI 0 "register_operand" "=v,v")
16702	(vec_duplicate:V8FI
16703	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16704  "TARGET_AVX512F"
16705  "@
16706   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16707   vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16708  [(set_attr "type" "ssemov")
16709   (set_attr "prefix" "evex")
16710   (set_attr "mode" "<sseinsnmode>")])
16711
16712(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16713  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
16714	(vec_duplicate:VI12_AVX512VL
16715	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16716  "TARGET_AVX512BW"
16717  "@
16718   vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
16719   vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
16720  [(set_attr "type" "ssemov")
16721   (set_attr "prefix" "evex")
16722   (set_attr "mode" "<sseinsnmode>")])
16723
16724(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
16725  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
16726	(vec_duplicate:V48_AVX512VL
16727	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
16728  "TARGET_AVX512F"
16729  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16730  [(set_attr "type" "ssemov")
16731   (set_attr "prefix" "evex")
16732   (set_attr "mode" "<sseinsnmode>")
16733   (set (attr "enabled")
16734     (if_then_else (eq_attr "alternative" "1")
16735	(symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
16736		     && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
16737	(const_int 1)))])
16738
16739(define_insn "vec_dupv4sf"
16740  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16741	(vec_duplicate:V4SF
16742	  (match_operand:SF 1 "nonimmediate_operand" "x,m,0")))]
16743  "TARGET_SSE"
16744  "@
16745   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
16746   vbroadcastss\t{%1, %0|%0, %1}
16747   shufps\t{$0, %0, %0|%0, %0, 0}"
16748  [(set_attr "isa" "avx,avx,noavx")
16749   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
16750   (set_attr "length_immediate" "1,0,1")
16751   (set_attr "prefix_extra" "0,1,*")
16752   (set_attr "prefix" "vex,vex,orig")
16753   (set_attr "mode" "V4SF")])
16754
16755(define_insn "*vec_dupv4si"
16756  [(set (match_operand:V4SI 0 "register_operand"     "=x,x,x")
16757	(vec_duplicate:V4SI
16758	  (match_operand:SI 1 "nonimmediate_operand" " x,m,0")))]
16759  "TARGET_SSE"
16760  "@
16761   %vpshufd\t{$0, %1, %0|%0, %1, 0}
16762   vbroadcastss\t{%1, %0|%0, %1}
16763   shufps\t{$0, %0, %0|%0, %0, 0}"
16764  [(set_attr "isa" "sse2,avx,noavx")
16765   (set_attr "type" "sselog1,ssemov,sselog1")
16766   (set_attr "length_immediate" "1,0,1")
16767   (set_attr "prefix_extra" "0,1,*")
16768   (set_attr "prefix" "maybe_vex,vex,orig")
16769   (set_attr "mode" "TI,V4SF,V4SF")])
16770
16771(define_insn "*vec_dupv2di"
16772  [(set (match_operand:V2DI 0 "register_operand"     "=x,x,x,x")
16773	(vec_duplicate:V2DI
16774	  (match_operand:DI 1 "nonimmediate_operand" " 0,x,m,0")))]
16775  "TARGET_SSE"
16776  "@
16777   punpcklqdq\t%0, %0
16778   vpunpcklqdq\t{%d1, %0|%0, %d1}
16779   %vmovddup\t{%1, %0|%0, %1}
16780   movlhps\t%0, %0"
16781  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
16782   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
16783   (set_attr "prefix" "orig,vex,maybe_vex,orig")
16784   (set_attr "mode" "TI,TI,DF,V4SF")])
16785
16786(define_insn "avx2_vbroadcasti128_<mode>"
16787  [(set (match_operand:VI_256 0 "register_operand" "=x")
16788	(vec_concat:VI_256
16789	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m")
16790	  (match_dup 1)))]
16791  "TARGET_AVX2"
16792  "vbroadcasti128\t{%1, %0|%0, %1}"
16793  [(set_attr "type" "ssemov")
16794   (set_attr "prefix_extra" "1")
16795   (set_attr "prefix" "vex")
16796   (set_attr "mode" "OI")])
16797
16798;; Modes handled by AVX vec_dup patterns.
16799(define_mode_iterator AVX_VEC_DUP_MODE
16800  [V8SI V8SF V4DI V4DF])
16801;; Modes handled by AVX2 vec_dup patterns.
16802(define_mode_iterator AVX2_VEC_DUP_MODE
16803  [V32QI V16QI V16HI V8HI V8SI V4SI])
16804
16805(define_insn "*vec_dup<mode>"
16806  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,Yi")
16807	(vec_duplicate:AVX2_VEC_DUP_MODE
16808	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
16809  "TARGET_AVX2"
16810  "@
16811   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16812   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16813   #"
16814  [(set_attr "isa" "*,*,noavx512vl")
16815   (set_attr "type" "ssemov")
16816   (set_attr "prefix_extra" "1")
16817   (set_attr "prefix" "maybe_evex")
16818   (set_attr "mode" "<sseinsnmode>")])
16819
16820(define_insn "vec_dup<mode>"
16821  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
16822	(vec_duplicate:AVX_VEC_DUP_MODE
16823	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
16824  "TARGET_AVX"
16825  "@
16826   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
16827   vbroadcast<ssescalarmodesuffix>\t{%1, %0|%0, %1}
16828   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
16829   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
16830   #"
16831  [(set_attr "type" "ssemov")
16832   (set_attr "prefix_extra" "1")
16833   (set_attr "prefix" "maybe_evex")
16834   (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
16835   (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
16836
16837(define_split
16838  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
16839	(vec_duplicate:AVX2_VEC_DUP_MODE
16840	  (match_operand:<ssescalarmode> 1 "register_operand")))]
16841  "TARGET_AVX2
16842   /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
16843      available, because then we can broadcast from GPRs directly.
16844      For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
16845      for V*SI mode it requires just -mavx512vl.  */
16846   && !(TARGET_AVX512VL
16847	&& (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
16848   && reload_completed && GENERAL_REG_P (operands[1])"
16849  [(const_int 0)]
16850{
16851  emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
16852				CONST0_RTX (V4SImode),
16853				gen_lowpart (SImode, operands[1])));
16854  emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
16855					gen_lowpart (<ssexmmmode>mode,
16856						     operands[0])));
16857  DONE;
16858})
16859
16860(define_split
16861  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
16862	(vec_duplicate:AVX_VEC_DUP_MODE
16863	  (match_operand:<ssescalarmode> 1 "register_operand")))]
16864  "TARGET_AVX && !TARGET_AVX2 && reload_completed"
16865  [(set (match_dup 2)
16866	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
16867   (set (match_dup 0)
16868	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
16869  "operands[2] = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (operands[0]));")
16870
16871(define_insn "avx_vbroadcastf128_<mode>"
16872  [(set (match_operand:V_256 0 "register_operand" "=x,x,x")
16873	(vec_concat:V_256
16874	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
16875	  (match_dup 1)))]
16876  "TARGET_AVX"
16877  "@
16878   vbroadcast<i128>\t{%1, %0|%0, %1}
16879   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
16880   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
16881  [(set_attr "type" "ssemov,sselog1,sselog1")
16882   (set_attr "prefix_extra" "1")
16883   (set_attr "length_immediate" "0,1,1")
16884   (set_attr "prefix" "vex")
16885   (set_attr "mode" "<sseinsnmode>")])
16886
16887;; For broadcast[i|f]32x2.  Yes there is no v4sf version, only v4si.
16888(define_mode_iterator VI4F_BRCST32x2
16889  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
16890   V16SF (V8SF "TARGET_AVX512VL")])
16891
16892(define_mode_attr 64x2mode
16893  [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
16894
16895(define_mode_attr 32x2mode
16896  [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
16897  (V8SF "V2SF") (V4SI "V2SI")])
16898
16899(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
16900  [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
16901	(vec_duplicate:VI4F_BRCST32x2
16902	  (vec_select:<32x2mode>
16903	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
16904	    (parallel [(const_int 0) (const_int 1)]))))]
16905  "TARGET_AVX512DQ"
16906  "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16907  [(set_attr "type" "ssemov")
16908   (set_attr "prefix_extra" "1")
16909   (set_attr "prefix" "evex")
16910   (set_attr "mode" "<sseinsnmode>")])
16911
16912(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
16913  [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
16914        (vec_duplicate:VI4F_256
16915         (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
16916  "TARGET_AVX512VL"
16917  "@
16918   vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
16919   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16920  [(set_attr "type" "ssemov")
16921   (set_attr "prefix_extra" "1")
16922   (set_attr "prefix" "evex")
16923   (set_attr "mode" "<sseinsnmode>")])
16924
16925(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16926  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
16927       (vec_duplicate:V16FI
16928         (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
16929  "TARGET_AVX512DQ"
16930  "@
16931   vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
16932   vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16933  [(set_attr "type" "ssemov")
16934   (set_attr "prefix_extra" "1")
16935   (set_attr "prefix" "evex")
16936   (set_attr "mode" "<sseinsnmode>")])
16937
16938;; For broadcast[i|f]64x2
16939(define_mode_iterator VI8F_BRCST64x2
16940  [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
16941
16942(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
16943  [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
16944       (vec_duplicate:VI8F_BRCST64x2
16945         (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
16946  "TARGET_AVX512DQ"
16947  "@
16948   vshuf<shuffletype>64x2\t{$0x0, %<concat_tg_mode>1, %<concat_tg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<concat_tg_mode>1, %<concat_tg_mode>1, 0x0}
16949   vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
16950  [(set_attr "type" "ssemov")
16951   (set_attr "prefix_extra" "1")
16952   (set_attr "prefix" "evex")
16953   (set_attr "mode" "<sseinsnmode>")])
16954
16955(define_insn "avx512cd_maskb_vec_dup<mode>"
16956  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
16957	(vec_duplicate:VI8_AVX512VL
16958	  (zero_extend:DI
16959	    (match_operand:QI 1 "register_operand" "Yk"))))]
16960  "TARGET_AVX512CD"
16961  "vpbroadcastmb2q\t{%1, %0|%0, %1}"
16962  [(set_attr "type" "mskmov")
16963   (set_attr "prefix" "evex")
16964   (set_attr "mode" "XI")])
16965
16966(define_insn "avx512cd_maskw_vec_dup<mode>"
16967  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
16968	(vec_duplicate:VI4_AVX512VL
16969	  (zero_extend:SI
16970	    (match_operand:HI 1 "register_operand" "Yk"))))]
16971  "TARGET_AVX512CD"
16972  "vpbroadcastmw2d\t{%1, %0|%0, %1}"
16973  [(set_attr "type" "mskmov")
16974   (set_attr "prefix" "evex")
16975   (set_attr "mode" "XI")])
16976
16977;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
16978;; If it so happens that the input is in memory, use vbroadcast.
16979;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
16980(define_insn "*avx_vperm_broadcast_v4sf"
16981  [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
16982	(vec_select:V4SF
16983	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
16984	  (match_parallel 2 "avx_vbroadcast_operand"
16985	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
16986  "TARGET_AVX"
16987{
16988  int elt = INTVAL (operands[3]);
16989  switch (which_alternative)
16990    {
16991    case 0:
16992    case 1:
16993      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
16994      return "vbroadcastss\t{%1, %0|%0, %k1}";
16995    case 2:
16996      operands[2] = GEN_INT (elt * 0x55);
16997      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
16998    default:
16999      gcc_unreachable ();
17000    }
17001}
17002  [(set_attr "type" "ssemov,ssemov,sselog1")
17003   (set_attr "prefix_extra" "1")
17004   (set_attr "length_immediate" "0,0,1")
17005   (set_attr "prefix" "vex")
17006   (set_attr "mode" "SF,SF,V4SF")])
17007
17008(define_insn_and_split "*avx_vperm_broadcast_<mode>"
17009  [(set (match_operand:VF_256 0 "register_operand" "=x,x,x")
17010	(vec_select:VF_256
17011	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?x")
17012	  (match_parallel 2 "avx_vbroadcast_operand"
17013	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
17014  "TARGET_AVX"
17015  "#"
17016  "&& reload_completed && (<MODE>mode != V4DFmode || !TARGET_AVX2)"
17017  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
17018{
17019  rtx op0 = operands[0], op1 = operands[1];
17020  int elt = INTVAL (operands[3]);
17021
17022  if (REG_P (op1))
17023    {
17024      int mask;
17025
17026      if (TARGET_AVX2 && elt == 0)
17027	{
17028	  emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
17029							  op1)));
17030	  DONE;
17031	}
17032
17033      /* Shuffle element we care about into all elements of the 128-bit lane.
17034	 The other lane gets shuffled too, but we don't care.  */
17035      if (<MODE>mode == V4DFmode)
17036	mask = (elt & 1 ? 15 : 0);
17037      else
17038	mask = (elt & 3) * 0x55;
17039      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
17040
17041      /* Shuffle the lane we care about into both lanes of the dest.  */
17042      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
17043      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
17044      DONE;
17045    }
17046
17047  operands[1] = adjust_address (op1, <ssescalarmode>mode,
17048				elt * GET_MODE_SIZE (<ssescalarmode>mode));
17049})
17050
17051(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17052  [(set (match_operand:VF2 0 "register_operand")
17053	(vec_select:VF2
17054	  (match_operand:VF2 1 "nonimmediate_operand")
17055	  (match_operand:SI 2 "const_0_to_255_operand")))]
17056  "TARGET_AVX && <mask_mode512bit_condition>"
17057{
17058  int mask = INTVAL (operands[2]);
17059  rtx perm[<ssescalarnum>];
17060
17061  int i;
17062  for (i = 0; i < <ssescalarnum>; i = i + 2)
17063    {
17064      perm[i]     = GEN_INT (((mask >> i)       & 1) + i);
17065      perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
17066    }
17067
17068  operands[2]
17069    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17070})
17071
17072(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
17073  [(set (match_operand:VF1 0 "register_operand")
17074	(vec_select:VF1
17075	  (match_operand:VF1 1 "nonimmediate_operand")
17076	  (match_operand:SI 2 "const_0_to_255_operand")))]
17077  "TARGET_AVX && <mask_mode512bit_condition>"
17078{
17079  int mask = INTVAL (operands[2]);
17080  rtx perm[<ssescalarnum>];
17081
17082  int i;
17083  for (i = 0; i < <ssescalarnum>; i = i + 4)
17084    {
17085      perm[i]     = GEN_INT (((mask >> 0) & 3) + i);
17086      perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
17087      perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
17088      perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
17089    }
17090
17091  operands[2]
17092    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
17093})
17094
17095(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
17096  [(set (match_operand:VF 0 "register_operand" "=v")
17097	(vec_select:VF
17098	  (match_operand:VF 1 "nonimmediate_operand" "vm")
17099	  (match_parallel 2 ""
17100	    [(match_operand 3 "const_int_operand")])))]
17101  "TARGET_AVX && <mask_mode512bit_condition>
17102   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
17103{
17104  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
17105  operands[2] = GEN_INT (mask);
17106  return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
17107}
17108  [(set_attr "type" "sselog")
17109   (set_attr "prefix_extra" "1")
17110   (set_attr "length_immediate" "1")
17111   (set_attr "prefix" "<mask_prefix>")
17112   (set_attr "mode" "<sseinsnmode>")])
17113
17114(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
17115  [(set (match_operand:VF 0 "register_operand" "=v")
17116	(unspec:VF
17117	  [(match_operand:VF 1 "register_operand" "v")
17118	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
17119	  UNSPEC_VPERMIL))]
17120  "TARGET_AVX && <mask_mode512bit_condition>"
17121  "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17122  [(set_attr "type" "sselog")
17123   (set_attr "prefix_extra" "1")
17124   (set_attr "btver2_decode" "vector")
17125   (set_attr "prefix" "<mask_prefix>")
17126   (set_attr "mode" "<sseinsnmode>")])
17127
17128(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17129  [(match_operand:VI48F 0 "register_operand" "=v")
17130   (match_operand:VI48F 1 "register_operand" "v")
17131   (match_operand:<sseintvecmode> 2 "register_operand" "0")
17132   (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17133   (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17134  "TARGET_AVX512F"
17135{
17136  emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17137	operands[0], operands[1], operands[2], operands[3],
17138	CONST0_RTX (<MODE>mode), operands[4]));
17139  DONE;
17140})
17141
17142(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17143  [(match_operand:VI1_AVX512VL 0 "register_operand")
17144   (match_operand:VI1_AVX512VL 1 "register_operand")
17145   (match_operand:<sseintvecmode> 2 "register_operand")
17146   (match_operand:VI1_AVX512VL 3 "nonimmediate_operand")
17147   (match_operand:<avx512fmaskmode> 4 "register_operand")]
17148  "TARGET_AVX512VBMI"
17149{
17150  emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17151	operands[0], operands[1], operands[2], operands[3],
17152	CONST0_RTX (<MODE>mode), operands[4]));
17153  DONE;
17154})
17155
17156(define_expand "<avx512>_vpermi2var<mode>3_maskz"
17157  [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17158   (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17159   (match_operand:<sseintvecmode> 2 "register_operand" "0")
17160   (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17161   (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17162  "TARGET_AVX512BW"
17163{
17164  emit_insn (gen_<avx512>_vpermi2var<mode>3_maskz_1 (
17165	operands[0], operands[1], operands[2], operands[3],
17166	CONST0_RTX (<MODE>mode), operands[4]));
17167  DONE;
17168})
17169
17170(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17171  [(set (match_operand:VI48F 0 "register_operand" "=v")
17172	(unspec:VI48F
17173	  [(match_operand:VI48F 1 "register_operand" "v")
17174	   (match_operand:<sseintvecmode> 2 "register_operand" "0")
17175	   (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17176	  UNSPEC_VPERMI2))]
17177  "TARGET_AVX512F"
17178  "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17179  [(set_attr "type" "sselog")
17180   (set_attr "prefix" "evex")
17181   (set_attr "mode" "<sseinsnmode>")])
17182
17183(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17184  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17185	(unspec:VI1_AVX512VL
17186	  [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17187	   (match_operand:<sseintvecmode> 2 "register_operand" "0")
17188	   (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17189	  UNSPEC_VPERMI2))]
17190  "TARGET_AVX512VBMI"
17191  "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17192  [(set_attr "type" "sselog")
17193   (set_attr "prefix" "evex")
17194   (set_attr "mode" "<sseinsnmode>")])
17195
17196(define_insn "<avx512>_vpermi2var<mode>3<sd_maskz_name>"
17197  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17198	(unspec:VI2_AVX512VL
17199	  [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17200	   (match_operand:<sseintvecmode> 2 "register_operand" "0")
17201	   (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17202	  UNSPEC_VPERMI2))]
17203  "TARGET_AVX512BW"
17204  "vpermi2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17205  [(set_attr "type" "sselog")
17206   (set_attr "prefix" "evex")
17207   (set_attr "mode" "<sseinsnmode>")])
17208
17209(define_insn "<avx512>_vpermi2var<mode>3_mask"
17210  [(set (match_operand:VI48F 0 "register_operand" "=v")
17211	(vec_merge:VI48F
17212	  (unspec:VI48F
17213	    [(match_operand:VI48F 1 "register_operand" "v")
17214	    (match_operand:<sseintvecmode> 2 "register_operand" "0")
17215	    (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17216	    UNSPEC_VPERMI2_MASK)
17217	  (match_dup 0)
17218	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17219  "TARGET_AVX512F"
17220  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17221  [(set_attr "type" "sselog")
17222   (set_attr "prefix" "evex")
17223   (set_attr "mode" "<sseinsnmode>")])
17224
17225(define_insn "<avx512>_vpermi2var<mode>3_mask"
17226  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17227	(vec_merge:VI1_AVX512VL
17228	  (unspec:VI1_AVX512VL
17229	    [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
17230	    (match_operand:<sseintvecmode> 2 "register_operand" "0")
17231	    (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17232	    UNSPEC_VPERMI2_MASK)
17233	  (match_dup 0)
17234	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17235  "TARGET_AVX512VBMI"
17236  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17237  [(set_attr "type" "sselog")
17238   (set_attr "prefix" "evex")
17239   (set_attr "mode" "<sseinsnmode>")])
17240
17241(define_insn "<avx512>_vpermi2var<mode>3_mask"
17242  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17243	(vec_merge:VI2_AVX512VL
17244	  (unspec:VI2_AVX512VL
17245	    [(match_operand:VI2_AVX512VL 1 "register_operand" "v")
17246	    (match_operand:<sseintvecmode> 2 "register_operand" "0")
17247	    (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17248	    UNSPEC_VPERMI2_MASK)
17249	  (match_dup 0)
17250	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17251  "TARGET_AVX512BW"
17252  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17253  [(set_attr "type" "sselog")
17254   (set_attr "prefix" "evex")
17255   (set_attr "mode" "<sseinsnmode>")])
17256
17257(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17258  [(match_operand:VI48F 0 "register_operand" "=v")
17259   (match_operand:<sseintvecmode> 1 "register_operand" "v")
17260   (match_operand:VI48F 2 "register_operand" "0")
17261   (match_operand:VI48F 3 "nonimmediate_operand" "vm")
17262   (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17263  "TARGET_AVX512F"
17264{
17265  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17266	operands[0], operands[1], operands[2], operands[3],
17267	CONST0_RTX (<MODE>mode), operands[4]));
17268  DONE;
17269})
17270
17271(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17272  [(match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17273   (match_operand:<sseintvecmode> 1 "register_operand" "v")
17274   (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17275   (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")
17276   (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17277  "TARGET_AVX512VBMI"
17278{
17279  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17280	operands[0], operands[1], operands[2], operands[3],
17281	CONST0_RTX (<MODE>mode), operands[4]));
17282  DONE;
17283})
17284
17285(define_expand "<avx512>_vpermt2var<mode>3_maskz"
17286  [(match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17287   (match_operand:<sseintvecmode> 1 "register_operand" "v")
17288   (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17289   (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")
17290   (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")]
17291  "TARGET_AVX512BW"
17292{
17293  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
17294	operands[0], operands[1], operands[2], operands[3],
17295	CONST0_RTX (<MODE>mode), operands[4]));
17296  DONE;
17297})
17298
17299(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17300  [(set (match_operand:VI48F 0 "register_operand" "=v")
17301	(unspec:VI48F
17302	  [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17303	   (match_operand:VI48F 2 "register_operand" "0")
17304	   (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17305	  UNSPEC_VPERMT2))]
17306  "TARGET_AVX512F"
17307  "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17308  [(set_attr "type" "sselog")
17309   (set_attr "prefix" "evex")
17310   (set_attr "mode" "<sseinsnmode>")])
17311
17312(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17313  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17314	(unspec:VI1_AVX512VL
17315	  [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17316	   (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17317	   (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17318	  UNSPEC_VPERMT2))]
17319  "TARGET_AVX512VBMI"
17320  "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17321  [(set_attr "type" "sselog")
17322   (set_attr "prefix" "evex")
17323   (set_attr "mode" "<sseinsnmode>")])
17324
17325(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
17326  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17327	(unspec:VI2_AVX512VL
17328	  [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17329	   (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17330	   (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17331	  UNSPEC_VPERMT2))]
17332  "TARGET_AVX512BW"
17333  "vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}"
17334  [(set_attr "type" "sselog")
17335   (set_attr "prefix" "evex")
17336   (set_attr "mode" "<sseinsnmode>")])
17337
17338(define_insn "<avx512>_vpermt2var<mode>3_mask"
17339  [(set (match_operand:VI48F 0 "register_operand" "=v")
17340	(vec_merge:VI48F
17341	  (unspec:VI48F
17342	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17343	    (match_operand:VI48F 2 "register_operand" "0")
17344	    (match_operand:VI48F 3 "nonimmediate_operand" "vm")]
17345	    UNSPEC_VPERMT2)
17346	  (match_dup 2)
17347	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17348  "TARGET_AVX512F"
17349  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17350  [(set_attr "type" "sselog")
17351   (set_attr "prefix" "evex")
17352   (set_attr "mode" "<sseinsnmode>")])
17353
17354(define_insn "<avx512>_vpermt2var<mode>3_mask"
17355  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
17356	(vec_merge:VI1_AVX512VL
17357	  (unspec:VI1_AVX512VL
17358	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17359	    (match_operand:VI1_AVX512VL 2 "register_operand" "0")
17360	    (match_operand:VI1_AVX512VL 3 "nonimmediate_operand" "vm")]
17361	    UNSPEC_VPERMT2)
17362	  (match_dup 2)
17363	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17364  "TARGET_AVX512VBMI"
17365  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17366  [(set_attr "type" "sselog")
17367   (set_attr "prefix" "evex")
17368   (set_attr "mode" "<sseinsnmode>")])
17369
17370(define_insn "<avx512>_vpermt2var<mode>3_mask"
17371  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17372	(vec_merge:VI2_AVX512VL
17373	  (unspec:VI2_AVX512VL
17374	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
17375	    (match_operand:VI2_AVX512VL 2 "register_operand" "0")
17376	    (match_operand:VI2_AVX512VL 3 "nonimmediate_operand" "vm")]
17377	    UNSPEC_VPERMT2)
17378	  (match_dup 2)
17379	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
17380  "TARGET_AVX512BW"
17381  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
17382  [(set_attr "type" "sselog")
17383   (set_attr "prefix" "evex")
17384   (set_attr "mode" "<sseinsnmode>")])
17385
17386(define_expand "avx_vperm2f128<mode>3"
17387  [(set (match_operand:AVX256MODE2P 0 "register_operand")
17388	(unspec:AVX256MODE2P
17389	  [(match_operand:AVX256MODE2P 1 "register_operand")
17390	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
17391	   (match_operand:SI 3 "const_0_to_255_operand")]
17392	  UNSPEC_VPERMIL2F128))]
17393  "TARGET_AVX"
17394{
17395  int mask = INTVAL (operands[3]);
17396  if ((mask & 0x88) == 0)
17397    {
17398      rtx perm[<ssescalarnum>], t1, t2;
17399      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
17400
17401      base = (mask & 3) * nelt2;
17402      for (i = 0; i < nelt2; ++i)
17403	perm[i] = GEN_INT (base + i);
17404
17405      base = ((mask >> 4) & 3) * nelt2;
17406      for (i = 0; i < nelt2; ++i)
17407	perm[i + nelt2] = GEN_INT (base + i);
17408
17409      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
17410			       operands[1], operands[2]);
17411      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
17412      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
17413      t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
17414      emit_insn (t2);
17415      DONE;
17416    }
17417})
17418
17419;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
17420;; means that in order to represent this properly in rtl we'd have to
17421;; nest *another* vec_concat with a zero operand and do the select from
17422;; a 4x wide vector.  That doesn't seem very nice.
17423(define_insn "*avx_vperm2f128<mode>_full"
17424  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17425	(unspec:AVX256MODE2P
17426	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
17427	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
17428	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
17429	  UNSPEC_VPERMIL2F128))]
17430  "TARGET_AVX"
17431  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17432  [(set_attr "type" "sselog")
17433   (set_attr "prefix_extra" "1")
17434   (set_attr "length_immediate" "1")
17435   (set_attr "prefix" "vex")
17436   (set_attr "mode" "<sseinsnmode>")])
17437
17438(define_insn "*avx_vperm2f128<mode>_nozero"
17439  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
17440	(vec_select:AVX256MODE2P
17441	  (vec_concat:<ssedoublevecmode>
17442	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
17443	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
17444	  (match_parallel 3 ""
17445	    [(match_operand 4 "const_int_operand")])))]
17446  "TARGET_AVX
17447   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
17448{
17449  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
17450  if (mask == 0x12)
17451    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
17452  if (mask == 0x20)
17453    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
17454  operands[3] = GEN_INT (mask);
17455  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17456}
17457  [(set_attr "type" "sselog")
17458   (set_attr "prefix_extra" "1")
17459   (set_attr "length_immediate" "1")
17460   (set_attr "prefix" "vex")
17461   (set_attr "mode" "<sseinsnmode>")])
17462
17463(define_insn "*ssse3_palignr<mode>_perm"
17464  [(set (match_operand:V_128 0 "register_operand" "=x,x")
17465      (vec_select:V_128
17466	(match_operand:V_128 1 "register_operand" "0,x")
17467	(match_parallel 2 "palignr_operand"
17468	  [(match_operand 3 "const_int_operand" "n, n")])))]
17469  "TARGET_SSSE3"
17470{
17471  machine_mode imode = GET_MODE_INNER (GET_MODE (operands[0]));
17472  operands[2] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (imode));
17473
17474  switch (which_alternative)
17475    {
17476    case 0:
17477      return "palignr\t{%2, %1, %0|%0, %1, %2}";
17478    case 1:
17479      return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
17480    default:
17481      gcc_unreachable ();
17482    }
17483}
17484  [(set_attr "isa" "noavx,avx")
17485   (set_attr "type" "sseishft")
17486   (set_attr "atom_unit" "sishuf")
17487   (set_attr "prefix_data16" "1,*")
17488   (set_attr "prefix_extra" "1")
17489   (set_attr "length_immediate" "1")
17490   (set_attr "prefix" "orig,vex")])
17491
17492(define_expand "avx512vl_vinsert<mode>"
17493  [(match_operand:VI48F_256 0 "register_operand")
17494   (match_operand:VI48F_256 1 "register_operand")
17495   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17496   (match_operand:SI 3 "const_0_to_1_operand")
17497   (match_operand:VI48F_256 4 "register_operand")
17498   (match_operand:<avx512fmaskmode> 5 "register_operand")]
17499  "TARGET_AVX512VL"
17500{
17501  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17502
17503  switch (INTVAL (operands[3]))
17504    {
17505    case 0:
17506      insn = gen_vec_set_lo_<mode>_mask;
17507      break;
17508    case 1:
17509      insn = gen_vec_set_hi_<mode>_mask;
17510      break;
17511    default:
17512      gcc_unreachable ();
17513    }
17514
17515  emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
17516		   operands[5]));
17517  DONE;
17518})
17519
17520(define_expand "avx_vinsertf128<mode>"
17521  [(match_operand:V_256 0 "register_operand")
17522   (match_operand:V_256 1 "register_operand")
17523   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
17524   (match_operand:SI 3 "const_0_to_1_operand")]
17525  "TARGET_AVX"
17526{
17527  rtx (*insn)(rtx, rtx, rtx);
17528
17529  switch (INTVAL (operands[3]))
17530    {
17531    case 0:
17532      insn = gen_vec_set_lo_<mode>;
17533      break;
17534    case 1:
17535      insn = gen_vec_set_hi_<mode>;
17536      break;
17537    default:
17538      gcc_unreachable ();
17539    }
17540
17541  emit_insn (insn (operands[0], operands[1], operands[2]));
17542  DONE;
17543})
17544
17545(define_insn "vec_set_lo_<mode><mask_name>"
17546  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17547	(vec_concat:VI8F_256
17548	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17549	  (vec_select:<ssehalfvecmode>
17550	    (match_operand:VI8F_256 1 "register_operand" "v")
17551	    (parallel [(const_int 2) (const_int 3)]))))]
17552  "TARGET_AVX"
17553{
17554  if (TARGET_AVX512VL)
17555    return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17556  else
17557    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17558}
17559  [(set_attr "type" "sselog")
17560   (set_attr "prefix_extra" "1")
17561   (set_attr "length_immediate" "1")
17562   (set_attr "prefix" "vex")
17563   (set_attr "mode" "<sseinsnmode>")])
17564
17565(define_insn "vec_set_hi_<mode><mask_name>"
17566  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
17567	(vec_concat:VI8F_256
17568	  (vec_select:<ssehalfvecmode>
17569	    (match_operand:VI8F_256 1 "register_operand" "v")
17570	    (parallel [(const_int 0) (const_int 1)]))
17571	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17572  "TARGET_AVX"
17573{
17574  if (TARGET_AVX512VL)
17575    return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17576  else
17577    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17578}
17579  [(set_attr "type" "sselog")
17580   (set_attr "prefix_extra" "1")
17581   (set_attr "length_immediate" "1")
17582   (set_attr "prefix" "vex")
17583   (set_attr "mode" "<sseinsnmode>")])
17584
17585(define_insn "vec_set_lo_<mode><mask_name>"
17586  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17587	(vec_concat:VI4F_256
17588	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
17589	  (vec_select:<ssehalfvecmode>
17590	    (match_operand:VI4F_256 1 "register_operand" "v")
17591	    (parallel [(const_int 4) (const_int 5)
17592		       (const_int 6) (const_int 7)]))))]
17593  "TARGET_AVX"
17594{
17595  if (TARGET_AVX512VL)
17596    return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
17597  else
17598    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
17599}
17600  [(set_attr "type" "sselog")
17601   (set_attr "prefix_extra" "1")
17602   (set_attr "length_immediate" "1")
17603   (set_attr "prefix" "vex")
17604   (set_attr "mode" "<sseinsnmode>")])
17605
17606(define_insn "vec_set_hi_<mode><mask_name>"
17607  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
17608	(vec_concat:VI4F_256
17609	  (vec_select:<ssehalfvecmode>
17610	    (match_operand:VI4F_256 1 "register_operand" "v")
17611	    (parallel [(const_int 0) (const_int 1)
17612		       (const_int 2) (const_int 3)]))
17613	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
17614  "TARGET_AVX"
17615{
17616  if (TARGET_AVX512VL)
17617    return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
17618  else
17619    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
17620}
17621  [(set_attr "type" "sselog")
17622   (set_attr "prefix_extra" "1")
17623   (set_attr "length_immediate" "1")
17624   (set_attr "prefix" "vex")
17625   (set_attr "mode" "<sseinsnmode>")])
17626
17627(define_insn "vec_set_lo_v16hi"
17628  [(set (match_operand:V16HI 0 "register_operand" "=x")
17629	(vec_concat:V16HI
17630	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")
17631	  (vec_select:V8HI
17632	    (match_operand:V16HI 1 "register_operand" "x")
17633	    (parallel [(const_int 8) (const_int 9)
17634		       (const_int 10) (const_int 11)
17635		       (const_int 12) (const_int 13)
17636		       (const_int 14) (const_int 15)]))))]
17637  "TARGET_AVX"
17638  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17639  [(set_attr "type" "sselog")
17640   (set_attr "prefix_extra" "1")
17641   (set_attr "length_immediate" "1")
17642   (set_attr "prefix" "vex")
17643   (set_attr "mode" "OI")])
17644
17645(define_insn "vec_set_hi_v16hi"
17646  [(set (match_operand:V16HI 0 "register_operand" "=x")
17647	(vec_concat:V16HI
17648	  (vec_select:V8HI
17649	    (match_operand:V16HI 1 "register_operand" "x")
17650	    (parallel [(const_int 0) (const_int 1)
17651		       (const_int 2) (const_int 3)
17652		       (const_int 4) (const_int 5)
17653		       (const_int 6) (const_int 7)]))
17654	  (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
17655  "TARGET_AVX"
17656  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17657  [(set_attr "type" "sselog")
17658   (set_attr "prefix_extra" "1")
17659   (set_attr "length_immediate" "1")
17660   (set_attr "prefix" "vex")
17661   (set_attr "mode" "OI")])
17662
17663(define_insn "vec_set_lo_v32qi"
17664  [(set (match_operand:V32QI 0 "register_operand" "=x")
17665	(vec_concat:V32QI
17666	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")
17667	  (vec_select:V16QI
17668	    (match_operand:V32QI 1 "register_operand" "x")
17669	    (parallel [(const_int 16) (const_int 17)
17670		       (const_int 18) (const_int 19)
17671		       (const_int 20) (const_int 21)
17672		       (const_int 22) (const_int 23)
17673		       (const_int 24) (const_int 25)
17674		       (const_int 26) (const_int 27)
17675		       (const_int 28) (const_int 29)
17676		       (const_int 30) (const_int 31)]))))]
17677  "TARGET_AVX"
17678  "vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
17679  [(set_attr "type" "sselog")
17680   (set_attr "prefix_extra" "1")
17681   (set_attr "length_immediate" "1")
17682   (set_attr "prefix" "vex")
17683   (set_attr "mode" "OI")])
17684
17685(define_insn "vec_set_hi_v32qi"
17686  [(set (match_operand:V32QI 0 "register_operand" "=x")
17687	(vec_concat:V32QI
17688	  (vec_select:V16QI
17689	    (match_operand:V32QI 1 "register_operand" "x")
17690	    (parallel [(const_int 0) (const_int 1)
17691		       (const_int 2) (const_int 3)
17692		       (const_int 4) (const_int 5)
17693		       (const_int 6) (const_int 7)
17694		       (const_int 8) (const_int 9)
17695		       (const_int 10) (const_int 11)
17696		       (const_int 12) (const_int 13)
17697		       (const_int 14) (const_int 15)]))
17698	  (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
17699  "TARGET_AVX"
17700  "vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
17701  [(set_attr "type" "sselog")
17702   (set_attr "prefix_extra" "1")
17703   (set_attr "length_immediate" "1")
17704   (set_attr "prefix" "vex")
17705   (set_attr "mode" "OI")])
17706
17707(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
17708  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
17709	(unspec:V48_AVX2
17710	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
17711	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
17712	  UNSPEC_MASKMOV))]
17713  "TARGET_AVX"
17714  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
17715  [(set_attr "type" "sselog1")
17716   (set_attr "prefix_extra" "1")
17717   (set_attr "prefix" "vex")
17718   (set_attr "btver2_decode" "vector")
17719   (set_attr "mode" "<sseinsnmode>")])
17720
17721(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
17722  [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
17723	(unspec:V48_AVX2
17724	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
17725	   (match_operand:V48_AVX2 2 "register_operand" "x")
17726	   (match_dup 0)]
17727	  UNSPEC_MASKMOV))]
17728  "TARGET_AVX"
17729  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17730  [(set_attr "type" "sselog1")
17731   (set_attr "prefix_extra" "1")
17732   (set_attr "prefix" "vex")
17733   (set_attr "btver2_decode" "vector") 
17734   (set_attr "mode" "<sseinsnmode>")])
17735
17736(define_expand "maskload<mode>"
17737  [(set (match_operand:V48_AVX2 0 "register_operand")
17738	(unspec:V48_AVX2
17739	  [(match_operand:<sseintvecmode> 2 "register_operand")
17740	   (match_operand:V48_AVX2 1 "memory_operand")]
17741	  UNSPEC_MASKMOV))]
17742  "TARGET_AVX")
17743
17744(define_expand "maskstore<mode>"
17745  [(set (match_operand:V48_AVX2 0 "memory_operand")
17746	(unspec:V48_AVX2
17747	  [(match_operand:<sseintvecmode> 2 "register_operand")
17748	   (match_operand:V48_AVX2 1 "register_operand")
17749	   (match_dup 0)]
17750	  UNSPEC_MASKMOV))]
17751  "TARGET_AVX")
17752
17753(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
17754  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
17755	(unspec:AVX256MODE2P
17756	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
17757	  UNSPEC_CAST))]
17758  "TARGET_AVX"
17759  "#"
17760  "&& reload_completed"
17761  [(const_int 0)]
17762{
17763  rtx op0 = operands[0];
17764  rtx op1 = operands[1];
17765  if (REG_P (op0))
17766    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
17767  else
17768    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
17769  emit_move_insn (op0, op1);
17770  DONE;
17771})
17772
17773(define_expand "vec_init<mode>"
17774  [(match_operand:V_256 0 "register_operand")
17775   (match_operand 1)]
17776  "TARGET_AVX"
17777{
17778  ix86_expand_vector_init (false, operands[0], operands[1]);
17779  DONE;
17780})
17781
17782(define_expand "vec_init<mode>"
17783  [(match_operand:VF48_I1248 0 "register_operand")
17784   (match_operand 1)]
17785  "TARGET_AVX512F"
17786{
17787  ix86_expand_vector_init (false, operands[0], operands[1]);
17788  DONE;
17789})
17790
17791(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17792  [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
17793	(ashiftrt:VI48_AVX512F_AVX512VL
17794	  (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
17795	  (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
17796  "TARGET_AVX2 && <mask_mode512bit_condition>"
17797  "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17798  [(set_attr "type" "sseishft")
17799   (set_attr "prefix" "maybe_evex")
17800   (set_attr "mode" "<sseinsnmode>")])
17801
17802(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
17803  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17804	(ashiftrt:VI2_AVX512VL
17805	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17806	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17807  "TARGET_AVX512BW"
17808  "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17809  [(set_attr "type" "sseishft")
17810   (set_attr "prefix" "maybe_evex")
17811   (set_attr "mode" "<sseinsnmode>")])
17812
17813(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17814  [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
17815	(any_lshift:VI48_AVX512F
17816	  (match_operand:VI48_AVX512F 1 "register_operand" "v")
17817	  (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
17818  "TARGET_AVX2 && <mask_mode512bit_condition>"
17819  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17820  [(set_attr "type" "sseishft")
17821   (set_attr "prefix" "maybe_evex")
17822   (set_attr "mode" "<sseinsnmode>")])
17823
17824(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
17825  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
17826	(any_lshift:VI2_AVX512VL
17827	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
17828	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
17829  "TARGET_AVX512BW"
17830  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17831  [(set_attr "type" "sseishft")
17832   (set_attr "prefix" "maybe_evex")
17833   (set_attr "mode" "<sseinsnmode>")])
17834
17835(define_insn "avx_vec_concat<mode>"
17836  [(set (match_operand:V_256_512 0 "register_operand" "=x,x")
17837	(vec_concat:V_256_512
17838	  (match_operand:<ssehalfvecmode> 1 "register_operand" "x,x")
17839	  (match_operand:<ssehalfvecmode> 2 "vector_move_operand" "xm,C")))]
17840  "TARGET_AVX"
17841{
17842  switch (which_alternative)
17843    {
17844    case 0:
17845      return "vinsert<i128>\t{$0x1, %2, %<concat_tg_mode>1, %0|%0, %<concat_tg_mode>1, %2, 0x1}";
17846    case 1:
17847      switch (get_attr_mode (insn))
17848	{
17849	case MODE_V16SF:
17850	  return "vmovaps\t{%1, %t0|%t0, %1}";
17851	case MODE_V8DF:
17852	  return "vmovapd\t{%1, %t0|%t0, %1}";
17853	case MODE_V8SF:
17854	  return "vmovaps\t{%1, %x0|%x0, %1}";
17855	case MODE_V4DF:
17856	  return "vmovapd\t{%1, %x0|%x0, %1}";
17857	case MODE_XI:
17858	  return "vmovdqa\t{%1, %t0|%t0, %1}";
17859	case MODE_OI:
17860	  return "vmovdqa\t{%1, %x0|%x0, %1}";
17861	default:
17862	  gcc_unreachable ();
17863	}
17864    default:
17865      gcc_unreachable ();
17866    }
17867}
17868  [(set_attr "type" "sselog,ssemov")
17869   (set_attr "prefix_extra" "1,*")
17870   (set_attr "length_immediate" "1,*")
17871   (set_attr "prefix" "maybe_evex")
17872   (set_attr "mode" "<sseinsnmode>")])
17873
17874(define_insn "vcvtph2ps<mask_name>"
17875  [(set (match_operand:V4SF 0 "register_operand" "=v")
17876	(vec_select:V4SF
17877	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
17878		       UNSPEC_VCVTPH2PS)
17879	  (parallel [(const_int 0) (const_int 1)
17880		     (const_int 2) (const_int 3)])))]
17881  "TARGET_F16C || TARGET_AVX512VL"
17882  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17883  [(set_attr "type" "ssecvt")
17884   (set_attr "prefix" "maybe_evex")
17885   (set_attr "mode" "V4SF")])
17886
17887(define_insn "*vcvtph2ps_load<mask_name>"
17888  [(set (match_operand:V4SF 0 "register_operand" "=v")
17889	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
17890		     UNSPEC_VCVTPH2PS))]
17891  "TARGET_F16C || TARGET_AVX512VL"
17892  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17893  [(set_attr "type" "ssecvt")
17894   (set_attr "prefix" "vex")
17895   (set_attr "mode" "V8SF")])
17896
17897(define_insn "vcvtph2ps256<mask_name>"
17898  [(set (match_operand:V8SF 0 "register_operand" "=v")
17899	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
17900		     UNSPEC_VCVTPH2PS))]
17901  "TARGET_F16C || TARGET_AVX512VL"
17902  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17903  [(set_attr "type" "ssecvt")
17904   (set_attr "prefix" "vex")
17905   (set_attr "btver2_decode" "double")
17906   (set_attr "mode" "V8SF")])
17907
17908(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
17909  [(set (match_operand:V16SF 0 "register_operand" "=v")
17910	(unspec:V16SF
17911	  [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
17912	  UNSPEC_VCVTPH2PS))]
17913  "TARGET_AVX512F"
17914  "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
17915  [(set_attr "type" "ssecvt")
17916   (set_attr "prefix" "evex")
17917   (set_attr "mode" "V16SF")])
17918
17919(define_expand "vcvtps2ph_mask"
17920  [(set (match_operand:V8HI 0 "register_operand")
17921	(vec_merge:V8HI
17922	  (vec_concat:V8HI
17923	    (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17924			  (match_operand:SI 2 "const_0_to_255_operand")]
17925			  UNSPEC_VCVTPS2PH)
17926	    (match_dup 5))
17927	   (match_operand:V8HI 3 "vector_move_operand")
17928	   (match_operand:QI 4 "register_operand")))]
17929  "TARGET_AVX512VL"
17930  "operands[5] = CONST0_RTX (V4HImode);")
17931
17932(define_expand "vcvtps2ph"
17933  [(set (match_operand:V8HI 0 "register_operand")
17934	(vec_concat:V8HI
17935	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
17936			(match_operand:SI 2 "const_0_to_255_operand")]
17937		       UNSPEC_VCVTPS2PH)
17938	  (match_dup 3)))]
17939  "TARGET_F16C"
17940  "operands[3] = CONST0_RTX (V4HImode);")
17941
17942(define_insn "*vcvtps2ph<mask_name>"
17943  [(set (match_operand:V8HI 0 "register_operand" "=v")
17944	(vec_concat:V8HI
17945	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
17946			(match_operand:SI 2 "const_0_to_255_operand" "N")]
17947		       UNSPEC_VCVTPS2PH)
17948	  (match_operand:V4HI 3 "const0_operand")))]
17949  "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
17950  "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
17951  [(set_attr "type" "ssecvt")
17952   (set_attr "prefix" "maybe_evex")
17953   (set_attr "mode" "V4SF")])
17954
17955(define_insn "*vcvtps2ph_store<mask_name>"
17956  [(set (match_operand:V4HI 0 "memory_operand" "=m")
17957	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")
17958		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
17959		     UNSPEC_VCVTPS2PH))]
17960  "TARGET_F16C || TARGET_AVX512VL"
17961  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17962  [(set_attr "type" "ssecvt")
17963   (set_attr "prefix" "maybe_evex")
17964   (set_attr "mode" "V4SF")])
17965
17966(define_insn "vcvtps2ph256<mask_name>"
17967  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm")
17968	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "x")
17969		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
17970		     UNSPEC_VCVTPS2PH))]
17971  "TARGET_F16C || TARGET_AVX512VL"
17972  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17973  [(set_attr "type" "ssecvt")
17974   (set_attr "prefix" "maybe_evex")
17975   (set_attr "btver2_decode" "vector")
17976   (set_attr "mode" "V8SF")])
17977
17978(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
17979  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
17980	(unspec:V16HI
17981	  [(match_operand:V16SF 1 "register_operand" "v")
17982	   (match_operand:SI 2 "const_0_to_255_operand" "N")]
17983	  UNSPEC_VCVTPS2PH))]
17984  "TARGET_AVX512F"
17985  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17986  [(set_attr "type" "ssecvt")
17987   (set_attr "prefix" "evex")
17988   (set_attr "mode" "V16SF")])
17989
17990;; For gather* insn patterns
17991(define_mode_iterator VEC_GATHER_MODE
17992		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
17993(define_mode_attr VEC_GATHER_IDXSI
17994		      [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
17995		       (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
17996		       (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
17997		       (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
17998
17999(define_mode_attr VEC_GATHER_IDXDI
18000		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18001		       (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
18002		       (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
18003		       (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
18004
18005(define_mode_attr VEC_GATHER_SRCDI
18006		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
18007		       (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
18008		       (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
18009		       (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
18010
18011(define_expand "avx2_gathersi<mode>"
18012  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18013		   (unspec:VEC_GATHER_MODE
18014		     [(match_operand:VEC_GATHER_MODE 1 "register_operand")
18015		      (mem:<ssescalarmode>
18016			(match_par_dup 7
18017			  [(match_operand 2 "vsib_address_operand")
18018			   (match_operand:<VEC_GATHER_IDXSI>
18019			      3 "register_operand")
18020			   (match_operand:SI 5 "const1248_operand ")]))
18021		      (mem:BLK (scratch))
18022		      (match_operand:VEC_GATHER_MODE 4 "register_operand")]
18023		     UNSPEC_GATHER))
18024	      (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18025  "TARGET_AVX2"
18026{
18027  operands[7]
18028    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18029					operands[5]), UNSPEC_VSIBADDR);
18030})
18031
18032(define_insn "*avx2_gathersi<mode>"
18033  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18034	(unspec:VEC_GATHER_MODE
18035	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
18036	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18037	     [(unspec:P
18038		[(match_operand:P 3 "vsib_address_operand" "Tv")
18039		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
18040		 (match_operand:SI 6 "const1248_operand" "n")]
18041		UNSPEC_VSIBADDR)])
18042	   (mem:BLK (scratch))
18043	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
18044	  UNSPEC_GATHER))
18045   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18046  "TARGET_AVX2"
18047  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
18048  [(set_attr "type" "ssemov")
18049   (set_attr "prefix" "vex")
18050   (set_attr "mode" "<sseinsnmode>")])
18051
18052(define_insn "*avx2_gathersi<mode>_2"
18053  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18054	(unspec:VEC_GATHER_MODE
18055	  [(pc)
18056	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18057	     [(unspec:P
18058		[(match_operand:P 2 "vsib_address_operand" "Tv")
18059		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
18060		 (match_operand:SI 5 "const1248_operand" "n")]
18061		UNSPEC_VSIBADDR)])
18062	   (mem:BLK (scratch))
18063	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
18064	  UNSPEC_GATHER))
18065   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18066  "TARGET_AVX2"
18067  "v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
18068  [(set_attr "type" "ssemov")
18069   (set_attr "prefix" "vex")
18070   (set_attr "mode" "<sseinsnmode>")])
18071
18072(define_expand "avx2_gatherdi<mode>"
18073  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
18074		   (unspec:VEC_GATHER_MODE
18075		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18076		      (mem:<ssescalarmode>
18077			(match_par_dup 7
18078			  [(match_operand 2 "vsib_address_operand")
18079			   (match_operand:<VEC_GATHER_IDXDI>
18080			      3 "register_operand")
18081			   (match_operand:SI 5 "const1248_operand ")]))
18082		      (mem:BLK (scratch))
18083		      (match_operand:<VEC_GATHER_SRCDI>
18084			4 "register_operand")]
18085		     UNSPEC_GATHER))
18086	      (clobber (match_scratch:VEC_GATHER_MODE 6))])]
18087  "TARGET_AVX2"
18088{
18089  operands[7]
18090    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18091					operands[5]), UNSPEC_VSIBADDR);
18092})
18093
18094(define_insn "*avx2_gatherdi<mode>"
18095  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18096	(unspec:VEC_GATHER_MODE
18097	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18098	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18099	     [(unspec:P
18100		[(match_operand:P 3 "vsib_address_operand" "Tv")
18101		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18102		 (match_operand:SI 6 "const1248_operand" "n")]
18103		UNSPEC_VSIBADDR)])
18104	   (mem:BLK (scratch))
18105	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18106	  UNSPEC_GATHER))
18107   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18108  "TARGET_AVX2"
18109  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
18110  [(set_attr "type" "ssemov")
18111   (set_attr "prefix" "vex")
18112   (set_attr "mode" "<sseinsnmode>")])
18113
18114(define_insn "*avx2_gatherdi<mode>_2"
18115  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
18116	(unspec:VEC_GATHER_MODE
18117	  [(pc)
18118	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18119	     [(unspec:P
18120		[(match_operand:P 2 "vsib_address_operand" "Tv")
18121		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18122		 (match_operand:SI 5 "const1248_operand" "n")]
18123		UNSPEC_VSIBADDR)])
18124	   (mem:BLK (scratch))
18125	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18126	  UNSPEC_GATHER))
18127   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
18128  "TARGET_AVX2"
18129{
18130  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18131    return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
18132  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
18133}
18134  [(set_attr "type" "ssemov")
18135   (set_attr "prefix" "vex")
18136   (set_attr "mode" "<sseinsnmode>")])
18137
18138(define_insn "*avx2_gatherdi<mode>_3"
18139  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18140	(vec_select:<VEC_GATHER_SRCDI>
18141	  (unspec:VI4F_256
18142	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
18143	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
18144	       [(unspec:P
18145		  [(match_operand:P 3 "vsib_address_operand" "Tv")
18146		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
18147		   (match_operand:SI 6 "const1248_operand" "n")]
18148		  UNSPEC_VSIBADDR)])
18149	     (mem:BLK (scratch))
18150	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
18151	     UNSPEC_GATHER)
18152	  (parallel [(const_int 0) (const_int 1)
18153		     (const_int 2) (const_int 3)])))
18154   (clobber (match_scratch:VI4F_256 1 "=&x"))]
18155  "TARGET_AVX2"
18156  "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
18157  [(set_attr "type" "ssemov")
18158   (set_attr "prefix" "vex")
18159   (set_attr "mode" "<sseinsnmode>")])
18160
18161(define_insn "*avx2_gatherdi<mode>_4"
18162  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
18163	(vec_select:<VEC_GATHER_SRCDI>
18164	  (unspec:VI4F_256
18165	    [(pc)
18166	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18167	       [(unspec:P
18168		  [(match_operand:P 2 "vsib_address_operand" "Tv")
18169		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
18170		   (match_operand:SI 5 "const1248_operand" "n")]
18171		  UNSPEC_VSIBADDR)])
18172	     (mem:BLK (scratch))
18173	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
18174	    UNSPEC_GATHER)
18175	  (parallel [(const_int 0) (const_int 1)
18176		     (const_int 2) (const_int 3)])))
18177   (clobber (match_scratch:VI4F_256 1 "=&x"))]
18178  "TARGET_AVX2"
18179  "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
18180  [(set_attr "type" "ssemov")
18181   (set_attr "prefix" "vex")
18182   (set_attr "mode" "<sseinsnmode>")])
18183
18184(define_expand "<avx512>_gathersi<mode>"
18185  [(parallel [(set (match_operand:VI48F 0 "register_operand")
18186		   (unspec:VI48F
18187		     [(match_operand:VI48F 1 "register_operand")
18188		      (match_operand:<avx512fmaskmode> 4 "register_operand")
18189		      (mem:<ssescalarmode>
18190			(match_par_dup 6
18191			  [(match_operand 2 "vsib_address_operand")
18192			   (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
18193			   (match_operand:SI 5 "const1248_operand")]))]
18194		     UNSPEC_GATHER))
18195	      (clobber (match_scratch:<avx512fmaskmode> 7))])]
18196  "TARGET_AVX512F"
18197{
18198  operands[6]
18199    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18200					operands[5]), UNSPEC_VSIBADDR);
18201})
18202
18203(define_insn "*avx512f_gathersi<mode>"
18204  [(set (match_operand:VI48F 0 "register_operand" "=&v")
18205	(unspec:VI48F
18206	  [(match_operand:VI48F 1 "register_operand" "0")
18207	   (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
18208	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18209	     [(unspec:P
18210		[(match_operand:P 4 "vsib_address_operand" "Tv")
18211		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
18212		 (match_operand:SI 5 "const1248_operand" "n")]
18213		UNSPEC_VSIBADDR)])]
18214	  UNSPEC_GATHER))
18215   (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
18216  "TARGET_AVX512F"
18217  "v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %g6}"
18218  [(set_attr "type" "ssemov")
18219   (set_attr "prefix" "evex")
18220   (set_attr "mode" "<sseinsnmode>")])
18221
18222(define_insn "*avx512f_gathersi<mode>_2"
18223  [(set (match_operand:VI48F 0 "register_operand" "=&v")
18224	(unspec:VI48F
18225	  [(pc)
18226	   (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18227	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18228	     [(unspec:P
18229		[(match_operand:P 3 "vsib_address_operand" "Tv")
18230		 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18231		 (match_operand:SI 4 "const1248_operand" "n")]
18232		UNSPEC_VSIBADDR)])]
18233	  UNSPEC_GATHER))
18234   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18235  "TARGET_AVX512F"
18236  "v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}"
18237  [(set_attr "type" "ssemov")
18238   (set_attr "prefix" "evex")
18239   (set_attr "mode" "<sseinsnmode>")])
18240
18241
18242(define_expand "<avx512>_gatherdi<mode>"
18243  [(parallel [(set (match_operand:VI48F 0 "register_operand")
18244		   (unspec:VI48F
18245		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
18246		      (match_operand:QI 4 "register_operand")
18247		      (mem:<ssescalarmode>
18248			(match_par_dup 6
18249			  [(match_operand 2 "vsib_address_operand")
18250			   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
18251			   (match_operand:SI 5 "const1248_operand")]))]
18252		     UNSPEC_GATHER))
18253	      (clobber (match_scratch:QI 7))])]
18254  "TARGET_AVX512F"
18255{
18256  operands[6]
18257    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
18258					operands[5]), UNSPEC_VSIBADDR);
18259})
18260
18261(define_insn "*avx512f_gatherdi<mode>"
18262  [(set (match_operand:VI48F 0 "register_operand" "=&v")
18263	(unspec:VI48F
18264	  [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
18265	   (match_operand:QI 7 "register_operand" "2")
18266	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
18267	     [(unspec:P
18268		[(match_operand:P 4 "vsib_address_operand" "Tv")
18269		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
18270		 (match_operand:SI 5 "const1248_operand" "n")]
18271		UNSPEC_VSIBADDR)])]
18272	  UNSPEC_GATHER))
18273   (clobber (match_scratch:QI 2 "=&Yk"))]
18274  "TARGET_AVX512F"
18275  "v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %g6}"
18276  [(set_attr "type" "ssemov")
18277   (set_attr "prefix" "evex")
18278   (set_attr "mode" "<sseinsnmode>")])
18279
18280(define_insn "*avx512f_gatherdi<mode>_2"
18281  [(set (match_operand:VI48F 0 "register_operand" "=&v")
18282	(unspec:VI48F
18283	  [(pc)
18284	   (match_operand:QI 6 "register_operand" "1")
18285	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
18286	     [(unspec:P
18287		[(match_operand:P 3 "vsib_address_operand" "Tv")
18288		 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18289		 (match_operand:SI 4 "const1248_operand" "n")]
18290		UNSPEC_VSIBADDR)])]
18291	  UNSPEC_GATHER))
18292   (clobber (match_scratch:QI 1 "=&Yk"))]
18293  "TARGET_AVX512F"
18294{
18295  if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
18296    {
18297      if (<MODE_SIZE> != 64)
18298	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%t0%{%1%}, %g5}";
18299      else
18300	return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %g5}";
18301    }
18302  return "v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %g5}";
18303}
18304  [(set_attr "type" "ssemov")
18305   (set_attr "prefix" "evex")
18306   (set_attr "mode" "<sseinsnmode>")])
18307
18308(define_expand "<avx512>_scattersi<mode>"
18309  [(parallel [(set (mem:VI48F
18310		     (match_par_dup 5
18311		       [(match_operand 0 "vsib_address_operand")
18312			(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
18313			(match_operand:SI 4 "const1248_operand")]))
18314		   (unspec:VI48F
18315		     [(match_operand:<avx512fmaskmode> 1 "register_operand")
18316		      (match_operand:VI48F 3 "register_operand")]
18317		     UNSPEC_SCATTER))
18318	      (clobber (match_scratch:<avx512fmaskmode> 6))])]
18319  "TARGET_AVX512F"
18320{
18321  operands[5]
18322    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18323					operands[4]), UNSPEC_VSIBADDR);
18324})
18325
18326(define_insn "*avx512f_scattersi<mode>"
18327  [(set (match_operator:VI48F 5 "vsib_mem_operator"
18328	  [(unspec:P
18329	     [(match_operand:P 0 "vsib_address_operand" "Tv")
18330	      (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
18331	      (match_operand:SI 4 "const1248_operand" "n")]
18332	     UNSPEC_VSIBADDR)])
18333	(unspec:VI48F
18334	  [(match_operand:<avx512fmaskmode> 6 "register_operand" "1")
18335	   (match_operand:VI48F 3 "register_operand" "v")]
18336	  UNSPEC_SCATTER))
18337   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
18338  "TARGET_AVX512F"
18339  "v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18340  [(set_attr "type" "ssemov")
18341   (set_attr "prefix" "evex")
18342   (set_attr "mode" "<sseinsnmode>")])
18343
18344(define_expand "<avx512>_scatterdi<mode>"
18345  [(parallel [(set (mem:VI48F
18346		     (match_par_dup 5
18347		       [(match_operand 0 "vsib_address_operand")
18348			(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
18349			(match_operand:SI 4 "const1248_operand")]))
18350		   (unspec:VI48F
18351		     [(match_operand:QI 1 "register_operand")
18352		      (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
18353		     UNSPEC_SCATTER))
18354	      (clobber (match_scratch:QI 6))])]
18355  "TARGET_AVX512F"
18356{
18357  operands[5]
18358    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[0], operands[2],
18359					operands[4]), UNSPEC_VSIBADDR);
18360})
18361
18362(define_insn "*avx512f_scatterdi<mode>"
18363  [(set (match_operator:VI48F 5 "vsib_mem_operator"
18364	  [(unspec:P
18365	     [(match_operand:P 0 "vsib_address_operand" "Tv")
18366	      (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
18367	      (match_operand:SI 4 "const1248_operand" "n")]
18368	     UNSPEC_VSIBADDR)])
18369	(unspec:VI48F
18370	  [(match_operand:QI 6 "register_operand" "1")
18371	   (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
18372	  UNSPEC_SCATTER))
18373   (clobber (match_scratch:QI 1 "=&Yk"))]
18374  "TARGET_AVX512F"
18375  "v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%5%{%1%}, %3}"
18376  [(set_attr "type" "ssemov")
18377   (set_attr "prefix" "evex")
18378   (set_attr "mode" "<sseinsnmode>")])
18379
18380(define_insn "<avx512>_compress<mode>_mask"
18381  [(set (match_operand:VI48F 0 "register_operand" "=v")
18382	(unspec:VI48F
18383	  [(match_operand:VI48F 1 "register_operand" "v")
18384	   (match_operand:VI48F 2 "vector_move_operand" "0C")
18385	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
18386	  UNSPEC_COMPRESS))]
18387  "TARGET_AVX512F"
18388  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18389  [(set_attr "type" "ssemov")
18390   (set_attr "prefix" "evex")
18391   (set_attr "mode" "<sseinsnmode>")])
18392
18393(define_insn "<avx512>_compressstore<mode>_mask"
18394  [(set (match_operand:VI48F 0 "memory_operand" "=m")
18395	(unspec:VI48F
18396	  [(match_operand:VI48F 1 "register_operand" "x")
18397	   (match_dup 0)
18398	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
18399	  UNSPEC_COMPRESS_STORE))]
18400  "TARGET_AVX512F"
18401  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
18402  [(set_attr "type" "ssemov")
18403   (set_attr "prefix" "evex")
18404   (set_attr "memory" "store")
18405   (set_attr "mode" "<sseinsnmode>")])
18406
18407(define_expand "<avx512>_expand<mode>_maskz"
18408  [(set (match_operand:VI48F 0 "register_operand")
18409	(unspec:VI48F
18410	  [(match_operand:VI48F 1 "nonimmediate_operand")
18411	   (match_operand:VI48F 2 "vector_move_operand")
18412	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
18413	  UNSPEC_EXPAND))]
18414  "TARGET_AVX512F"
18415  "operands[2] = CONST0_RTX (<MODE>mode);")
18416
18417(define_insn "<avx512>_expand<mode>_mask"
18418  [(set (match_operand:VI48F 0 "register_operand" "=v,v")
18419	(unspec:VI48F
18420	  [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
18421	   (match_operand:VI48F 2 "vector_move_operand" "0C,0C")
18422	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
18423	  UNSPEC_EXPAND))]
18424  "TARGET_AVX512F"
18425  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
18426  [(set_attr "type" "ssemov")
18427   (set_attr "prefix" "evex")
18428   (set_attr "memory" "none,load")
18429   (set_attr "mode" "<sseinsnmode>")])
18430
18431(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
18432  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18433	(unspec:VF_AVX512VL
18434	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18435	   (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18436	   (match_operand:SI 3 "const_0_to_15_operand")]
18437	  UNSPEC_RANGE))]
18438  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
18439  "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
18440  [(set_attr "type" "sse")
18441   (set_attr "prefix" "evex")
18442   (set_attr "mode" "<MODE>")])
18443
18444(define_insn "avx512dq_ranges<mode><round_saeonly_name>"
18445  [(set (match_operand:VF_128 0 "register_operand" "=v")
18446	(vec_merge:VF_128
18447	  (unspec:VF_128
18448	    [(match_operand:VF_128 1 "register_operand" "v")
18449	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18450	     (match_operand:SI 3 "const_0_to_15_operand")]
18451	    UNSPEC_RANGE)
18452	  (match_dup 1)
18453	  (const_int 1)))]
18454  "TARGET_AVX512DQ"
18455  "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
18456  [(set_attr "type" "sse")
18457   (set_attr "prefix" "evex")
18458   (set_attr "mode" "<MODE>")])
18459
18460(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
18461  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18462          (unspec:<avx512fmaskmode>
18463            [(match_operand:VF_AVX512VL 1 "register_operand" "v")
18464             (match_operand:QI 2 "const_0_to_255_operand" "n")]
18465             UNSPEC_FPCLASS))]
18466   "TARGET_AVX512DQ"
18467   "vfpclass<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
18468  [(set_attr "type" "sse")
18469   (set_attr "length_immediate" "1")
18470   (set_attr "prefix" "evex")
18471   (set_attr "mode" "<MODE>")])
18472
18473(define_insn "avx512dq_vmfpclass<mode>"
18474  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=Yk")
18475	(and:<avx512fmaskmode>
18476	  (unspec:<avx512fmaskmode>
18477	    [(match_operand:VF_128 1 "register_operand" "v")
18478             (match_operand:QI 2 "const_0_to_255_operand" "n")]
18479	    UNSPEC_FPCLASS)
18480	  (const_int 1)))]
18481   "TARGET_AVX512DQ"
18482   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
18483  [(set_attr "type" "sse")
18484   (set_attr "length_immediate" "1")
18485   (set_attr "prefix" "evex")
18486   (set_attr "mode" "<MODE>")])
18487
18488(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
18489  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
18490	(unspec:VF_AVX512VL
18491	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
18492	   (match_operand:SI 2 "const_0_to_15_operand")]
18493	  UNSPEC_GETMANT))]
18494  "TARGET_AVX512F"
18495  "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
18496  [(set_attr "prefix" "evex")
18497   (set_attr "mode" "<MODE>")])
18498
18499(define_insn "avx512f_vgetmant<mode><round_saeonly_name>"
18500  [(set (match_operand:VF_128 0 "register_operand" "=v")
18501	(vec_merge:VF_128
18502	  (unspec:VF_128
18503	    [(match_operand:VF_128 1 "register_operand" "v")
18504	     (match_operand:VF_128 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
18505	     (match_operand:SI 3 "const_0_to_15_operand")]
18506	    UNSPEC_GETMANT)
18507	  (match_dup 1)
18508	  (const_int 1)))]
18509   "TARGET_AVX512F"
18510   "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}";
18511   [(set_attr "prefix" "evex")
18512   (set_attr "mode" "<ssescalarmode>")])
18513
18514;; The correct representation for this is absolutely enormous, and
18515;; surely not generally useful.
18516(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
18517  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
18518	(unspec:VI2_AVX512VL
18519	  [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
18520	   (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
18521	   (match_operand:SI 3 "const_0_to_255_operand")]
18522	  UNSPEC_DBPSADBW))]
18523   "TARGET_AVX512BW"
18524  "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
18525  [(set_attr "isa" "avx")
18526   (set_attr "type" "sselog1")
18527   (set_attr "length_immediate" "1")
18528   (set_attr "prefix" "evex")
18529   (set_attr "mode" "<sseinsnmode>")])
18530
18531(define_insn "clz<mode>2<mask_name>"
18532  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18533	(clz:VI48_AVX512VL
18534	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
18535  "TARGET_AVX512CD"
18536  "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18537  [(set_attr "type" "sse")
18538   (set_attr "prefix" "evex")
18539   (set_attr "mode" "<sseinsnmode>")])
18540
18541(define_insn "<mask_codefor>conflict<mode><mask_name>"
18542  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
18543	(unspec:VI48_AVX512VL
18544	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
18545	  UNSPEC_CONFLICT))]
18546  "TARGET_AVX512CD"
18547  "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
18548  [(set_attr "type" "sse")
18549   (set_attr "prefix" "evex")
18550   (set_attr "mode" "<sseinsnmode>")])
18551
18552(define_insn "sha1msg1"
18553  [(set (match_operand:V4SI 0 "register_operand" "=x")
18554	(unspec:V4SI
18555	  [(match_operand:V4SI 1 "register_operand" "0")
18556	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18557	  UNSPEC_SHA1MSG1))]
18558  "TARGET_SHA"
18559  "sha1msg1\t{%2, %0|%0, %2}"
18560  [(set_attr "type" "sselog1")
18561   (set_attr "mode" "TI")])
18562
18563(define_insn "sha1msg2"
18564  [(set (match_operand:V4SI 0 "register_operand" "=x")
18565	(unspec:V4SI
18566	  [(match_operand:V4SI 1 "register_operand" "0")
18567	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18568	  UNSPEC_SHA1MSG2))]
18569  "TARGET_SHA"
18570  "sha1msg2\t{%2, %0|%0, %2}"
18571  [(set_attr "type" "sselog1")
18572   (set_attr "mode" "TI")])
18573
18574(define_insn "sha1nexte"
18575  [(set (match_operand:V4SI 0 "register_operand" "=x")
18576	(unspec:V4SI
18577	  [(match_operand:V4SI 1 "register_operand" "0")
18578	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18579	  UNSPEC_SHA1NEXTE))]
18580  "TARGET_SHA"
18581  "sha1nexte\t{%2, %0|%0, %2}"
18582  [(set_attr "type" "sselog1")
18583   (set_attr "mode" "TI")])
18584
18585(define_insn "sha1rnds4"
18586  [(set (match_operand:V4SI 0 "register_operand" "=x")
18587	(unspec:V4SI
18588	  [(match_operand:V4SI 1 "register_operand" "0")
18589	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18590	   (match_operand:SI 3 "const_0_to_3_operand" "n")]
18591	  UNSPEC_SHA1RNDS4))]
18592  "TARGET_SHA"
18593  "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
18594  [(set_attr "type" "sselog1")
18595   (set_attr "length_immediate" "1")
18596   (set_attr "mode" "TI")])
18597
18598(define_insn "sha256msg1"
18599  [(set (match_operand:V4SI 0 "register_operand" "=x")
18600	(unspec:V4SI
18601	  [(match_operand:V4SI 1 "register_operand" "0")
18602	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18603	  UNSPEC_SHA256MSG1))]
18604  "TARGET_SHA"
18605  "sha256msg1\t{%2, %0|%0, %2}"
18606  [(set_attr "type" "sselog1")
18607   (set_attr "mode" "TI")])
18608
18609(define_insn "sha256msg2"
18610  [(set (match_operand:V4SI 0 "register_operand" "=x")
18611	(unspec:V4SI
18612	  [(match_operand:V4SI 1 "register_operand" "0")
18613	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")]
18614	  UNSPEC_SHA256MSG2))]
18615  "TARGET_SHA"
18616  "sha256msg2\t{%2, %0|%0, %2}"
18617  [(set_attr "type" "sselog1")
18618   (set_attr "mode" "TI")])
18619
18620(define_insn "sha256rnds2"
18621  [(set (match_operand:V4SI 0 "register_operand" "=x")
18622	(unspec:V4SI
18623	  [(match_operand:V4SI 1 "register_operand" "0")
18624	   (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18625	   (match_operand:V4SI 3 "register_operand" "Yz")]
18626	  UNSPEC_SHA256RNDS2))]
18627  "TARGET_SHA"
18628  "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
18629  [(set_attr "type" "sselog1")
18630   (set_attr "length_immediate" "1")
18631   (set_attr "mode" "TI")])
18632
18633(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
18634  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18635	(unspec:AVX512MODE2P
18636	  [(match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")]
18637	  UNSPEC_CAST))]
18638  "TARGET_AVX512F"
18639  "#"
18640  "&& reload_completed"
18641  [(const_int 0)]
18642{
18643  rtx op0 = operands[0];
18644  rtx op1 = operands[1];
18645  if (REG_P (op0))
18646    op0 = gen_rtx_REG (<ssequartermode>mode, REGNO (op0));
18647  else
18648    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18649  emit_move_insn (op0, op1);
18650  DONE;
18651})
18652
18653(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
18654  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
18655	(unspec:AVX512MODE2P
18656	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
18657	  UNSPEC_CAST))]
18658  "TARGET_AVX512F"
18659  "#"
18660  "&& reload_completed"
18661  [(const_int 0)]
18662{
18663  rtx op0 = operands[0];
18664  rtx op1 = operands[1];
18665  if (REG_P (op0))
18666    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
18667  else
18668    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
18669  emit_move_insn (op0, op1);
18670  DONE;
18671})
18672
18673(define_int_iterator VPMADD52
18674	[UNSPEC_VPMADD52LUQ
18675	 UNSPEC_VPMADD52HUQ])
18676
18677(define_int_attr vpmadd52type
18678  [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
18679
18680(define_expand "vpamdd52huq<mode>_maskz"
18681  [(match_operand:VI8_AVX512VL 0 "register_operand")
18682   (match_operand:VI8_AVX512VL 1 "register_operand")
18683   (match_operand:VI8_AVX512VL 2 "register_operand")
18684   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18685   (match_operand:<avx512fmaskmode> 4 "register_operand")]
18686  "TARGET_AVX512IFMA"
18687{
18688  emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
18689    operands[0], operands[1], operands[2], operands[3],
18690    CONST0_RTX (<MODE>mode), operands[4]));
18691  DONE;
18692})
18693
18694(define_expand "vpamdd52luq<mode>_maskz"
18695  [(match_operand:VI8_AVX512VL 0 "register_operand")
18696   (match_operand:VI8_AVX512VL 1 "register_operand")
18697   (match_operand:VI8_AVX512VL 2 "register_operand")
18698   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
18699   (match_operand:<avx512fmaskmode> 4 "register_operand")]
18700  "TARGET_AVX512IFMA"
18701{
18702  emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
18703    operands[0], operands[1], operands[2], operands[3],
18704    CONST0_RTX (<MODE>mode), operands[4]));
18705  DONE;
18706})
18707
18708(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
18709  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18710	(unspec:VI8_AVX512VL
18711	  [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18712	   (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18713	   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18714	  VPMADD52))]
18715  "TARGET_AVX512IFMA"
18716  "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
18717  [(set_attr "type" "ssemuladd")
18718   (set_attr "prefix" "evex")
18719   (set_attr "mode" "<sseinsnmode>")])
18720
18721(define_insn "vpamdd52<vpmadd52type><mode>_mask"
18722  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
18723	(vec_merge:VI8_AVX512VL
18724	  (unspec:VI8_AVX512VL
18725	    [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
18726	     (match_operand:VI8_AVX512VL 2 "register_operand" "v")
18727	     (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
18728	    VPMADD52)
18729	  (match_dup 1)
18730	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
18731  "TARGET_AVX512IFMA"
18732  "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
18733  [(set_attr "type" "ssemuladd")
18734   (set_attr "prefix" "evex")
18735   (set_attr "mode" "<sseinsnmode>")])
18736
18737(define_insn "vpmultishiftqb<mode><mask_name>"
18738  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
18739	(unspec:VI1_AVX512VL
18740	  [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
18741	   (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
18742	  UNSPEC_VPMULTISHIFT))]
18743  "TARGET_AVX512VBMI"
18744  "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
18745  [(set_attr "type" "sselog")
18746   (set_attr "prefix" "evex")
18747   (set_attr "mode" "<sseinsnmode>")])
18748