1;; GCC machine description for SSE instructions
2;; Copyright (C) 2005-2020 Free Software Foundation, Inc.
3;;
4;; This file is part of GCC.
5;;
6;; GCC is free software; you can redistribute it and/or modify
7;; it under the terms of the GNU General Public License as published by
8;; the Free Software Foundation; either version 3, or (at your option)
9;; any later version.
10;;
11;; GCC is distributed in the hope that it will be useful,
12;; but WITHOUT ANY WARRANTY; without even the implied warranty of
13;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14;; GNU General Public License for more details.
15;;
16;; You should have received a copy of the GNU General Public License
17;; along with GCC; see the file COPYING3.  If not see
18;; <http://www.gnu.org/licenses/>.
19
20(define_c_enum "unspec" [
21  ;; SSE
22  UNSPEC_MOVNT
23
24  ;; SSE2
25  UNSPEC_MOVDI_TO_SSE
26
27  ;; SSE3
28  UNSPEC_LDDQU
29
30  ;; SSSE3
31  UNSPEC_PSHUFB
32  UNSPEC_PSIGN
33  UNSPEC_PALIGNR
34
35  ;; For SSE4A support
36  UNSPEC_EXTRQI
37  UNSPEC_EXTRQ
38  UNSPEC_INSERTQI
39  UNSPEC_INSERTQ
40
41  ;; For SSE4.1 support
42  UNSPEC_BLENDV
43  UNSPEC_INSERTPS
44  UNSPEC_DP
45  UNSPEC_MOVNTDQA
46  UNSPEC_MPSADBW
47  UNSPEC_PHMINPOSUW
48  UNSPEC_PTEST
49
50  ;; For SSE4.2 support
51  UNSPEC_PCMPESTR
52  UNSPEC_PCMPISTR
53
54  ;; For FMA4 support
55  UNSPEC_FMADDSUB
56  UNSPEC_XOP_UNSIGNED_CMP
57  UNSPEC_XOP_TRUEFALSE
58  UNSPEC_XOP_PERMUTE
59  UNSPEC_FRCZ
60
61  ;; For AES support
62  UNSPEC_AESENC
63  UNSPEC_AESENCLAST
64  UNSPEC_AESDEC
65  UNSPEC_AESDECLAST
66  UNSPEC_AESIMC
67  UNSPEC_AESKEYGENASSIST
68
69  ;; For PCLMUL support
70  UNSPEC_PCLMUL
71
72  ;; For AVX support
73  UNSPEC_PCMP
74  UNSPEC_VPERMIL
75  UNSPEC_VPERMIL2
76  UNSPEC_VPERMIL2F128
77  UNSPEC_CAST
78  UNSPEC_VTESTP
79  UNSPEC_VCVTPH2PS
80  UNSPEC_VCVTPS2PH
81
82  ;; For AVX2 support
83  UNSPEC_VPERMVAR
84  UNSPEC_VPERMTI
85  UNSPEC_GATHER
86  UNSPEC_VSIBADDR
87
88  ;; For AVX512F support
89  UNSPEC_VPERMT2
90  UNSPEC_UNSIGNED_FIX_NOTRUNC
91  UNSPEC_UNSIGNED_PCMP
92  UNSPEC_TESTM
93  UNSPEC_TESTNM
94  UNSPEC_SCATTER
95  UNSPEC_RCP14
96  UNSPEC_RSQRT14
97  UNSPEC_FIXUPIMM
98  UNSPEC_SCALEF
99  UNSPEC_VTERNLOG
100  UNSPEC_GETEXP
101  UNSPEC_GETMANT
102  UNSPEC_ALIGN
103  UNSPEC_CONFLICT
104  UNSPEC_COMPRESS
105  UNSPEC_COMPRESS_STORE
106  UNSPEC_EXPAND
107  UNSPEC_MASKED_EQ
108  UNSPEC_MASKED_GT
109
110  ;; Mask operations
111  UNSPEC_MASKOP
112  UNSPEC_KORTEST
113  UNSPEC_KTEST
114
115  ;; For embed. rounding feature
116  UNSPEC_EMBEDDED_ROUNDING
117
118  ;; For AVX512PF support
119  UNSPEC_GATHER_PREFETCH
120  UNSPEC_SCATTER_PREFETCH
121
122  ;; For AVX512ER support
123  UNSPEC_EXP2
124  UNSPEC_RCP28
125  UNSPEC_RSQRT28
126
127  ;; For SHA support
128  UNSPEC_SHA1MSG1
129  UNSPEC_SHA1MSG2
130  UNSPEC_SHA1NEXTE
131  UNSPEC_SHA1RNDS4
132  UNSPEC_SHA256MSG1
133  UNSPEC_SHA256MSG2
134  UNSPEC_SHA256RNDS2
135
136  ;; For AVX512BW support
137  UNSPEC_DBPSADBW
138  UNSPEC_PMADDUBSW512
139  UNSPEC_PMADDWD512
140  UNSPEC_PSHUFHW
141  UNSPEC_PSHUFLW
142  UNSPEC_CVTINT2MASK
143
144  ;; For AVX512DQ support
145  UNSPEC_REDUCE
146  UNSPEC_FPCLASS
147  UNSPEC_RANGE
148
149  ;; For AVX512IFMA support
150  UNSPEC_VPMADD52LUQ
151  UNSPEC_VPMADD52HUQ
152
153  ;; For AVX512VBMI support
154  UNSPEC_VPMULTISHIFT
155
156  ;; For AVX5124FMAPS/AVX5124VNNIW support
157  UNSPEC_VP4FMADD
158  UNSPEC_VP4FNMADD
159  UNSPEC_VP4DPWSSD
160  UNSPEC_VP4DPWSSDS
161
162  ;; For GFNI support
163  UNSPEC_GF2P8AFFINEINV
164  UNSPEC_GF2P8AFFINE
165  UNSPEC_GF2P8MUL
166
167  ;; For AVX512VBMI2 support
168  UNSPEC_VPSHLD
169  UNSPEC_VPSHRD
170  UNSPEC_VPSHRDV
171  UNSPEC_VPSHLDV
172
173  ;; For AVX512VNNI support
174  UNSPEC_VPMADDUBSWACCD
175  UNSPEC_VPMADDUBSWACCSSD
176  UNSPEC_VPMADDWDACCD
177  UNSPEC_VPMADDWDACCSSD
178
179  ;; For VAES support
180  UNSPEC_VAESDEC
181  UNSPEC_VAESDECLAST
182  UNSPEC_VAESENC
183  UNSPEC_VAESENCLAST
184
185  ;; For VPCLMULQDQ support
186  UNSPEC_VPCLMULQDQ
187
188  ;; For AVX512BITALG support
189  UNSPEC_VPSHUFBIT
190
191  ;; For VP2INTERSECT support
192  UNSPEC_VP2INTERSECT
193
194  ;; For AVX512BF16 support
195  UNSPEC_VCVTNE2PS2BF16
196  UNSPEC_VCVTNEPS2BF16
197  UNSPEC_VDPBF16PS
198])
199
200(define_c_enum "unspecv" [
201  UNSPECV_LDMXCSR
202  UNSPECV_STMXCSR
203  UNSPECV_CLFLUSH
204  UNSPECV_MONITOR
205  UNSPECV_MWAIT
206  UNSPECV_VZEROALL
207  UNSPECV_VZEROUPPER
208])
209
210;; All vector modes including V?TImode, used in move patterns.
211(define_mode_iterator VMOVE
212  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
213   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
214   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
215   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
216   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
217   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
218   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
219
220;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
221(define_mode_iterator V48_AVX512VL
222  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
223   V8DI  (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
224   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
225   V8DF  (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
226
227;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
228(define_mode_iterator VI12_AVX512VL
229  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
230   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
231
232;; Same iterator, but without supposed TARGET_AVX512BW
233(define_mode_iterator VI12_AVX512VLBW
234  [(V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
235   (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
236   (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
237
238(define_mode_iterator VI1_AVX512VL
239  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
240
241;; All vector modes
242(define_mode_iterator V
243  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
244   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
245   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
246   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
247   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
248   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
249
250;; All 128bit vector modes
251(define_mode_iterator V_128
252  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
253
254;; All 256bit vector modes
255(define_mode_iterator V_256
256  [V32QI V16HI V8SI V4DI V8SF V4DF])
257
258;; All 128bit and 256bit vector modes
259(define_mode_iterator V_128_256
260  [V32QI V16QI V16HI V8HI V8SI V4SI V4DI V2DI V8SF V4SF V4DF V2DF])
261
262;; All 512bit vector modes
263(define_mode_iterator V_512 [V64QI V32HI V16SI V8DI V16SF V8DF])
264
265;; All 256bit and 512bit vector modes
266(define_mode_iterator V_256_512
267  [V32QI V16HI V8SI V4DI V8SF V4DF
268   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F") (V16SI "TARGET_AVX512F")
269   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
270
271;; All vector float modes
272(define_mode_iterator VF
273  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
274   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
275
276;; 128- and 256-bit float vector modes
277(define_mode_iterator VF_128_256
278  [(V8SF "TARGET_AVX") V4SF
279   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
280
281;; All SFmode vector float modes
282(define_mode_iterator VF1
283  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
284
285(define_mode_iterator VF1_AVX2
286  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
287
288;; 128- and 256-bit SF vector modes
289(define_mode_iterator VF1_128_256
290  [(V8SF "TARGET_AVX") V4SF])
291
292(define_mode_iterator VF1_128_256VL
293  [V8SF (V4SF "TARGET_AVX512VL")])
294
295;; All DFmode vector float modes
296(define_mode_iterator VF2
297  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
298
299;; 128- and 256-bit DF vector modes
300(define_mode_iterator VF2_128_256
301  [(V4DF "TARGET_AVX") V2DF])
302
303(define_mode_iterator VF2_512_256
304  [(V8DF "TARGET_AVX512F") V4DF])
305
306(define_mode_iterator VF2_512_256VL
307  [V8DF (V4DF "TARGET_AVX512VL")])
308
309;; All 128bit vector float modes
310(define_mode_iterator VF_128
311  [V4SF (V2DF "TARGET_SSE2")])
312
313;; All 256bit vector float modes
314(define_mode_iterator VF_256
315  [V8SF V4DF])
316
317;; All 512bit vector float modes
318(define_mode_iterator VF_512
319  [V16SF V8DF])
320
321(define_mode_iterator VI48_AVX512VL
322  [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
323   V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
324
325(define_mode_iterator VF_AVX512VL
326  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
327   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
328
329(define_mode_iterator VF2_AVX512VL
330  [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
331
332(define_mode_iterator VF1_AVX512VL
333  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
334
335;; All vector integer modes
336(define_mode_iterator VI
337  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
338   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
339   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
340   (V8SI "TARGET_AVX") V4SI
341   (V4DI "TARGET_AVX") V2DI])
342
343(define_mode_iterator VI_AVX2
344  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
345   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
346   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
347   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
348
349;; All QImode vector integer modes
350(define_mode_iterator VI1
351  [(V32QI "TARGET_AVX") V16QI])
352
353;; All DImode vector integer modes
354(define_mode_iterator V_AVX
355  [V16QI V8HI V4SI V2DI V4SF V2DF
356   (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
357   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")
358   (V8SF "TARGET_AVX") (V4DF"TARGET_AVX")])
359
360(define_mode_iterator VI48_AVX
361 [V4SI V2DI
362  (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
363
364(define_mode_iterator VI8
365  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
366
367(define_mode_iterator VI8_FVL
368  [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
369
370(define_mode_iterator VI8_AVX512VL
371  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
372
373(define_mode_iterator VI8_256_512
374  [V8DI (V4DI "TARGET_AVX512VL")])
375
376(define_mode_iterator VI1_AVX2
377  [(V32QI "TARGET_AVX2") V16QI])
378
379(define_mode_iterator VI1_AVX512
380  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
381
382(define_mode_iterator VI1_AVX512F
383  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
384
385(define_mode_iterator VI2_AVX2
386  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
387
388(define_mode_iterator VI2_AVX512F
389  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
390
391(define_mode_iterator VI4_AVX
392  [(V8SI "TARGET_AVX") V4SI])
393
394(define_mode_iterator VI4_AVX2
395  [(V8SI "TARGET_AVX2") V4SI])
396
397(define_mode_iterator VI4_AVX512F
398  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
399
400(define_mode_iterator VI4_AVX512VL
401  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
402
403(define_mode_iterator VI48_AVX512F_AVX512VL
404  [V4SI V8SI (V16SI "TARGET_AVX512F")
405   (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL") (V8DI "TARGET_AVX512F")])
406
407(define_mode_iterator VI2_AVX512VL
408  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
409
410(define_mode_iterator VI1_AVX512VL_F
411  [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
412
413(define_mode_iterator VI8_AVX2_AVX512BW
414  [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
415
416(define_mode_iterator VI8_AVX2
417  [(V4DI "TARGET_AVX2") V2DI])
418
419(define_mode_iterator VI8_AVX2_AVX512F
420  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
421
422(define_mode_iterator VI8_AVX_AVX512F
423  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
424
425(define_mode_iterator VI4_128_8_256
426  [V4SI V4DI])
427
428;; All V8D* modes
429(define_mode_iterator V8FI
430  [V8DF V8DI])
431
432;; All V16S* modes
433(define_mode_iterator V16FI
434  [V16SF V16SI])
435
436;; ??? We should probably use TImode instead.
437(define_mode_iterator VIMAX_AVX2_AVX512BW
438  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
439
440;; Suppose TARGET_AVX512BW as baseline
441(define_mode_iterator VIMAX_AVX512VL
442  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
443
444(define_mode_iterator VIMAX_AVX2
445  [(V2TI "TARGET_AVX2") V1TI])
446
447;; ??? This should probably be dropped in favor of VIMAX_AVX2_AVX512BW.
448(define_mode_iterator SSESCALARMODE
449  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
450
451(define_mode_iterator VI12_AVX2
452  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
453   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
454
455(define_mode_iterator VI24_AVX2
456  [(V16HI "TARGET_AVX2") V8HI
457   (V8SI "TARGET_AVX2") V4SI])
458
459(define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
460  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
461   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
462   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
463
464(define_mode_iterator VI124_AVX2
465  [(V32QI "TARGET_AVX2") V16QI
466   (V16HI "TARGET_AVX2") V8HI
467   (V8SI "TARGET_AVX2") V4SI])
468
469(define_mode_iterator VI2_AVX2_AVX512BW
470  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
471
472(define_mode_iterator VI248_AVX512VL
473  [V32HI V16SI V8DI
474   (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
475   (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
476   (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
477
478(define_mode_iterator VI48_AVX2
479  [(V8SI "TARGET_AVX2") V4SI
480   (V4DI "TARGET_AVX2") V2DI])
481
482(define_mode_iterator VI248_AVX2
483  [(V16HI "TARGET_AVX2") V8HI
484   (V8SI "TARGET_AVX2") V4SI
485   (V4DI "TARGET_AVX2") V2DI])
486
487(define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
488  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
489   (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
490   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
491
492(define_mode_iterator VI248_AVX512BW
493  [(V32HI "TARGET_AVX512BW") V16SI V8DI])
494
495(define_mode_iterator VI248_AVX512BW_AVX512VL
496  [(V32HI "TARGET_AVX512BW") 
497   (V4DI "TARGET_AVX512VL") V16SI V8DI])
498
499;; Suppose TARGET_AVX512VL as baseline
500(define_mode_iterator VI248_AVX512BW_1
501 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
502  V8SI V4SI
503  V2DI])
504   
505(define_mode_iterator VI248_AVX512BW_2
506 [(V16HI "TARGET_AVX512BW") (V8HI "TARGET_AVX512BW")
507  V8SI V4SI
508  V4DI V2DI])
509   
510(define_mode_iterator VI48_AVX512F
511  [(V16SI "TARGET_AVX512F") V8SI V4SI
512   (V8DI "TARGET_AVX512F") V4DI V2DI])
513
514(define_mode_iterator VI48_AVX_AVX512F
515  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
516   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
517
518(define_mode_iterator VI12_AVX_AVX512F
519  [ (V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
520    (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
521
522(define_mode_iterator V48_AVX2
523  [V4SF V2DF
524   V8SF V4DF
525   (V4SI "TARGET_AVX2") (V2DI "TARGET_AVX2")
526   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")])
527
528(define_mode_iterator VI1_AVX512VLBW
529  [(V64QI "TARGET_AVX512BW") (V32QI  "TARGET_AVX512VL")
530	(V16QI  "TARGET_AVX512VL")])
531
532(define_mode_attr avx512
533  [(V16QI "avx512vl") (V32QI "avx512vl") (V64QI "avx512bw")
534   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
535   (V4SI  "avx512vl") (V8SI  "avx512vl") (V16SI "avx512f")
536   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
537   (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
538   (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
539
540(define_mode_attr sse2_avx_avx512f
541  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
542   (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
543   (V4SI  "sse2") (V8SI  "avx") (V16SI "avx512f")
544   (V2DI  "avx512vl") (V4DI  "avx512vl") (V8DI "avx512f")
545   (V16SF "avx512f") (V8SF "avx") (V4SF "avx")
546   (V8DF "avx512f") (V4DF "avx") (V2DF "avx")])
547
548(define_mode_attr sse2_avx2
549  [(V16QI "sse2") (V32QI "avx2") (V64QI "avx512bw")
550   (V8HI "sse2") (V16HI "avx2") (V32HI "avx512bw")
551   (V4SI "sse2") (V8SI "avx2") (V16SI "avx512f")
552   (V2DI "sse2") (V4DI "avx2") (V8DI "avx512f")
553   (V1TI "sse2") (V2TI "avx2") (V4TI "avx512bw")])
554
555(define_mode_attr ssse3_avx2
556   [(V16QI "ssse3") (V32QI "avx2") (V64QI "avx512bw")
557    (V4HI "ssse3") (V8HI "ssse3") (V16HI "avx2") (V32HI "avx512bw")
558    (V4SI "ssse3") (V8SI "avx2")
559    (V2DI "ssse3") (V4DI "avx2")
560    (TI "ssse3") (V2TI "avx2") (V4TI "avx512bw")])
561
562(define_mode_attr sse4_1_avx2
563   [(V16QI "sse4_1") (V32QI "avx2") (V64QI "avx512bw")
564    (V8HI "sse4_1") (V16HI "avx2") (V32HI "avx512bw")
565    (V4SI "sse4_1") (V8SI "avx2") (V16SI "avx512f")
566    (V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512dq")])
567
568(define_mode_attr avx_avx2
569  [(V4SF "avx") (V2DF "avx")
570   (V8SF "avx") (V4DF "avx")
571   (V4SI "avx2") (V2DI "avx2")
572   (V8SI "avx2") (V4DI "avx2")])
573
574(define_mode_attr vec_avx2
575  [(V16QI "vec") (V32QI "avx2")
576   (V8HI "vec") (V16HI "avx2")
577   (V4SI "vec") (V8SI "avx2")
578   (V2DI "vec") (V4DI "avx2")])
579
580(define_mode_attr avx2_avx512
581  [(V4SI "avx2") (V8SI "avx2") (V16SI "avx512f")
582   (V2DI "avx2") (V4DI "avx2") (V8DI "avx512f")
583   (V4SF "avx2") (V8SF "avx2") (V16SF "avx512f")
584   (V2DF "avx2") (V4DF "avx2") (V8DF "avx512f")
585   (V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")])
586
587(define_mode_attr shuffletype
588  [(V16SF "f") (V16SI "i") (V8DF "f") (V8DI "i")
589  (V8SF "f") (V8SI "i") (V4DF "f") (V4DI "i")
590  (V4SF "f") (V4SI "i") (V2DF "f") (V2DI "i")
591  (V32HI "i") (V16HI "i") (V8HI "i")
592  (V64QI "i") (V32QI "i") (V16QI "i")
593  (V4TI "i") (V2TI "i") (V1TI "i")])
594
595(define_mode_attr ssequartermode
596  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "V4SI") (V8DI "V2DI")])
597
598(define_mode_attr ssequarterinsnmode
599  [(V16SF "V4SF") (V8DF "V2DF") (V16SI "TI") (V8DI "TI")])
600
601(define_mode_attr vecmemsuffix
602  [(V16SF "{z}") (V8SF "{y}") (V4SF "{x}")
603   (V8DF "{z}") (V4DF "{y}") (V2DF "{x}")])
604
605(define_mode_attr ssedoublemodelower
606  [(V16QI "v16hi") (V32QI "v32hi") (V64QI "v64hi")
607   (V8HI "v8si")   (V16HI "v16si") (V32HI "v32si")
608   (V4SI "v4di")   (V8SI "v8di")   (V16SI "v16di")])
609
610(define_mode_attr ssedoublemode
611  [(V4SF "V8SF") (V8SF "V16SF") (V16SF "V32SF")
612   (V2DF "V4DF") (V4DF "V8DF") (V8DF "V16DF")
613   (V16QI "V16HI") (V32QI "V32HI") (V64QI "V64HI")
614   (V8HI "V8SI") (V16HI "V16SI") (V32HI "V32SI")
615   (V4SI "V4DI") (V8SI "V16SI") (V16SI "V32SI")
616   (V4DI "V8DI") (V8DI "V16DI")])
617
618(define_mode_attr ssebytemode
619  [(V8DI "V64QI") (V4DI "V32QI") (V2DI "V16QI")
620   (V16SI "V64QI") (V8SI "V32QI") (V4SI "V16QI")])
621
622;; All 128bit vector integer modes
623(define_mode_iterator VI_128 [V16QI V8HI V4SI V2DI])
624
625;; All 256bit vector integer modes
626(define_mode_iterator VI_256 [V32QI V16HI V8SI V4DI])
627
628;; Various 128bit vector integer mode combinations
629(define_mode_iterator VI12_128 [V16QI V8HI])
630(define_mode_iterator VI14_128 [V16QI V4SI])
631(define_mode_iterator VI124_128 [V16QI V8HI V4SI])
632(define_mode_iterator VI24_128 [V8HI V4SI])
633(define_mode_iterator VI248_128 [V8HI V4SI V2DI])
634(define_mode_iterator VI48_128 [V4SI V2DI])
635
636;; Various 256bit and 512 vector integer mode combinations
637(define_mode_iterator VI124_256 [V32QI V16HI V8SI])
638(define_mode_iterator VI124_256_AVX512F_AVX512BW
639  [V32QI V16HI V8SI
640   (V64QI "TARGET_AVX512BW")
641   (V32HI "TARGET_AVX512BW")
642   (V16SI "TARGET_AVX512F")])
643(define_mode_iterator VI48_256 [V8SI V4DI])
644(define_mode_iterator VI48_512 [V16SI V8DI])
645(define_mode_iterator VI4_256_8_512 [V8SI V8DI])
646(define_mode_iterator VI_AVX512BW
647  [V16SI V8DI (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
648
649;; Int-float size matches
650(define_mode_iterator VI4F_128 [V4SI V4SF])
651(define_mode_iterator VI8F_128 [V2DI V2DF])
652(define_mode_iterator VI4F_256 [V8SI V8SF])
653(define_mode_iterator VI8F_256 [V4DI V4DF])
654(define_mode_iterator VI4F_256_512
655  [V8SI V8SF
656   (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
657(define_mode_iterator VI48F_256_512
658  [V8SI V8SF
659  (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
660  (V8DI  "TARGET_AVX512F") (V8DF  "TARGET_AVX512F")
661  (V4DI  "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
662(define_mode_iterator VF48_I1248
663  [V16SI V16SF V8DI V8DF V32HI V64QI])
664(define_mode_iterator VI48F
665  [V16SI V16SF V8DI V8DF
666   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
667   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
668   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
669   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
670(define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
671
672(define_mode_iterator VF_AVX512
673  [(V4SF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
674   (V8SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
675   V16SF V8DF])
676
677(define_mode_attr avx512bcst
678  [(V4SI "%{1to4%}") (V2DI "%{1to2%}")
679   (V8SI "%{1to8%}") (V4DI "%{1to4%}")
680   (V16SI "%{1to16%}") (V8DI "%{1to8%}")
681   (V4SF "%{1to4%}") (V2DF "%{1to2%}")
682   (V8SF "%{1to8%}") (V4DF "%{1to4%}")
683   (V16SF "%{1to16%}") (V8DF "%{1to8%}")])
684
685;; Mapping from float mode to required SSE level
686(define_mode_attr sse
687  [(SF "sse") (DF "sse2")
688   (V4SF "sse") (V2DF "sse2")
689   (V16SF "avx512f") (V8SF "avx")
690   (V8DF "avx512f") (V4DF "avx")])
691
692(define_mode_attr sse2
693  [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
694   (V2DI "sse2") (V4DI "avx") (V8DI "avx512f")])
695
696(define_mode_attr sse3
697  [(V16QI "sse3") (V32QI "avx")])
698
699(define_mode_attr sse4_1
700  [(V4SF "sse4_1") (V2DF "sse4_1")
701   (V8SF "avx") (V4DF "avx")
702   (V8DF "avx512f")
703   (V4DI "avx") (V2DI "sse4_1")
704   (V8SI "avx") (V4SI "sse4_1")
705   (V16QI "sse4_1") (V32QI "avx")
706   (V8HI "sse4_1") (V16HI "avx")])
707
708(define_mode_attr avxsizesuffix
709  [(V64QI "512") (V32HI "512") (V16SI "512") (V8DI "512")
710   (V32QI "256") (V16HI "256") (V8SI "256") (V4DI "256")
711   (V16QI "") (V8HI "") (V4SI "") (V2DI "")
712   (V16SF "512") (V8DF "512")
713   (V8SF "256") (V4DF "256")
714   (V4SF "") (V2DF "")])
715
716;; SSE instruction mode
717(define_mode_attr sseinsnmode
718  [(V64QI "XI") (V32HI "XI") (V16SI "XI") (V8DI "XI") (V4TI "XI")
719   (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") (V2TI "OI")
720   (V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
721   (V16SF "V16SF") (V8DF "V8DF")
722   (V8SF "V8SF") (V4DF "V4DF")
723   (V4SF "V4SF") (V2DF "V2DF")
724   (TI "TI")])
725
726;; Mapping of vector modes to corresponding mask size
727(define_mode_attr avx512fmaskmode
728  [(V64QI "DI") (V32QI "SI") (V16QI "HI")
729   (V32HI "SI") (V16HI "HI") (V8HI  "QI") (V4HI "QI")
730   (V16SI "HI") (V8SI  "QI") (V4SI  "QI")
731   (V8DI  "QI") (V4DI  "QI") (V2DI  "QI")
732   (V16SF "HI") (V8SF  "QI") (V4SF  "QI")
733   (V8DF  "QI") (V4DF  "QI") (V2DF  "QI")])
734
735;; Mapping of vector modes to corresponding mask size
736(define_mode_attr avx512fmaskmodelower
737  [(V64QI "di") (V32QI "si") (V16QI "hi")
738   (V32HI "si") (V16HI "hi") (V8HI  "qi") (V4HI "qi")
739   (V16SI "hi") (V8SI  "qi") (V4SI  "qi")
740   (V8DI  "qi") (V4DI  "qi") (V2DI  "qi")
741   (V16SF "hi") (V8SF  "qi") (V4SF  "qi")
742   (V8DF  "qi") (V4DF  "qi") (V2DF  "qi")])
743
744;; Mapping of vector modes to corresponding mask half size
745(define_mode_attr avx512fmaskhalfmode
746  [(V64QI "SI") (V32QI "HI") (V16QI "QI")
747   (V32HI "HI") (V16HI "QI") (V8HI  "QI") (V4HI "QI")
748   (V16SI "QI") (V8SI  "QI") (V4SI  "QI")
749   (V8DI  "QI") (V4DI  "QI") (V2DI  "QI")
750   (V16SF "QI") (V8SF  "QI") (V4SF  "QI")
751   (V8DF  "QI") (V4DF  "QI") (V2DF  "QI")])
752
753;; Mapping of vector float modes to an integer mode of the same size
754(define_mode_attr sseintvecmode
755  [(V16SF "V16SI") (V8DF  "V8DI")
756   (V8SF  "V8SI")  (V4DF  "V4DI")
757   (V4SF  "V4SI")  (V2DF  "V2DI")
758   (V16SI "V16SI") (V8DI  "V8DI")
759   (V8SI  "V8SI")  (V4DI  "V4DI")
760   (V4SI  "V4SI")  (V2DI  "V2DI")
761   (V16HI "V16HI") (V8HI  "V8HI")
762   (V32HI "V32HI") (V64QI "V64QI")
763   (V32QI "V32QI") (V16QI "V16QI")])
764
765(define_mode_attr sseintvecmode2
766  [(V8DF "XI") (V4DF "OI") (V2DF "TI")
767   (V8SF "OI") (V4SF "TI")])
768
769(define_mode_attr sseintvecmodelower
770  [(V16SF "v16si") (V8DF "v8di")
771   (V8SF "v8si") (V4DF "v4di")
772   (V4SF "v4si") (V2DF "v2di")
773   (V8SI "v8si") (V4DI "v4di")
774   (V4SI "v4si") (V2DI "v2di")
775   (V16HI "v16hi") (V8HI "v8hi")
776   (V32QI "v32qi") (V16QI "v16qi")])
777
778;; Mapping of vector modes to a vector mode of double size
779(define_mode_attr ssedoublevecmode
780  [(V32QI "V64QI") (V16HI "V32HI") (V8SI "V16SI") (V4DI "V8DI")
781   (V16QI "V32QI") (V8HI "V16HI") (V4SI "V8SI") (V2DI "V4DI")
782   (V8SF "V16SF") (V4DF "V8DF")
783   (V4SF "V8SF") (V2DF "V4DF")])
784
785;; Mapping of vector modes to a vector mode of half size
786(define_mode_attr ssehalfvecmode
787  [(V64QI "V32QI") (V32HI "V16HI") (V16SI "V8SI") (V8DI "V4DI") (V4TI "V2TI")
788   (V32QI "V16QI") (V16HI  "V8HI") (V8SI  "V4SI") (V4DI "V2DI")
789   (V16QI  "V8QI") (V8HI   "V4HI") (V4SI  "V2SI")
790   (V16SF "V8SF") (V8DF "V4DF")
791   (V8SF  "V4SF") (V4DF "V2DF")
792   (V4SF  "V2SF")])
793
794(define_mode_attr ssehalfvecmodelower
795  [(V64QI "v32qi") (V32HI "v16hi") (V16SI "v8si") (V8DI "v4di") (V4TI "v2ti")
796   (V32QI "v16qi") (V16HI  "v8hi") (V8SI  "v4si") (V4DI "v2di")
797   (V16QI  "v8qi") (V8HI   "v4hi") (V4SI  "v2si")
798   (V16SF "v8sf") (V8DF "v4df")
799   (V8SF  "v4sf") (V4DF "v2df")
800   (V4SF  "v2sf")])
801
802;; Mapping of vector modes ti packed single mode of the same size
803(define_mode_attr ssePSmode
804  [(V16SI "V16SF") (V8DF "V16SF")
805   (V16SF "V16SF") (V8DI "V16SF")
806   (V64QI "V16SF") (V32QI "V8SF") (V16QI "V4SF")
807   (V32HI "V16SF") (V16HI "V8SF") (V8HI "V4SF")
808   (V8SI "V8SF") (V4SI "V4SF")
809   (V4DI "V8SF") (V2DI "V4SF")
810   (V4TI "V16SF") (V2TI "V8SF") (V1TI "V4SF")
811   (V8SF "V8SF") (V4SF "V4SF")
812   (V4DF "V8SF") (V2DF "V4SF")])
813
814(define_mode_attr ssePSmode2
815  [(V8DI "V8SF") (V4DI "V4SF")])
816
817;; Mapping of vector modes back to the scalar modes
818(define_mode_attr ssescalarmode
819  [(V64QI "QI") (V32QI "QI") (V16QI "QI")
820   (V32HI "HI") (V16HI "HI") (V8HI "HI")
821   (V16SI "SI") (V8SI "SI")  (V4SI "SI")
822   (V8DI "DI")  (V4DI "DI")  (V2DI "DI")
823   (V16SF "SF") (V8SF "SF")  (V4SF "SF")
824   (V8DF "DF")  (V4DF "DF")  (V2DF "DF")
825   (V4TI "TI")  (V2TI "TI")])
826
827;; Mapping of vector modes back to the scalar modes
828(define_mode_attr ssescalarmodelower
829  [(V64QI "qi") (V32QI "qi") (V16QI "qi")
830   (V32HI "hi") (V16HI "hi") (V8HI "hi")
831   (V16SI "si") (V8SI "si")  (V4SI "si")
832   (V8DI "di")  (V4DI "di")  (V2DI "di")
833   (V16SF "sf") (V8SF "sf")  (V4SF "sf")
834   (V8DF "df")  (V4DF "df")  (V2DF "df")
835   (V4TI "ti")  (V2TI "ti")])
836
837;; Mapping of vector modes to the 128bit modes
838(define_mode_attr ssexmmmode
839  [(V64QI "V16QI") (V32QI "V16QI") (V16QI "V16QI")
840   (V32HI "V8HI")  (V16HI "V8HI") (V8HI "V8HI")
841   (V16SI "V4SI")  (V8SI "V4SI")  (V4SI "V4SI")
842   (V8DI "V2DI")   (V4DI "V2DI")  (V2DI "V2DI")
843   (V16SF "V4SF")  (V8SF "V4SF")  (V4SF "V4SF")
844   (V8DF "V2DF")   (V4DF "V2DF")  (V2DF "V2DF")])
845
846;; Pointer size override for scalar modes (Intel asm dialect)
847(define_mode_attr iptr
848  [(V64QI "b") (V32HI "w") (V16SI "k") (V8DI "q")
849   (V32QI "b") (V16HI "w") (V8SI "k") (V4DI "q")
850   (V16QI "b") (V8HI "w") (V4SI "k") (V2DI "q")
851   (V16SF "k") (V8DF "q")
852   (V8SF "k") (V4DF "q")
853   (V4SF "k") (V2DF "q")
854   (SF "k") (DF "q")])
855
856;; Mapping of vector modes to VPTERNLOG suffix
857(define_mode_attr ternlogsuffix
858  [(V8DI "q") (V4DI "q") (V2DI "q")
859   (V16SI "d") (V8SI "d") (V4SI "d")
860   (V32HI "d") (V16HI "d") (V8HI "d")
861   (V64QI "d") (V32QI "d") (V16QI "d")])
862
863;; Number of scalar elements in each vector type
864(define_mode_attr ssescalarnum
865  [(V64QI "64") (V16SI "16") (V8DI "8")
866   (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")
867   (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
868   (V16SF "16") (V8DF "8")
869   (V8SF "8") (V4DF "4")
870   (V4SF "4") (V2DF "2")])
871
872;; Mask of scalar elements in each vector type
873(define_mode_attr ssescalarnummask
874  [(V32QI "31") (V16HI "15") (V8SI "7") (V4DI "3")
875   (V16QI "15") (V8HI "7") (V4SI "3") (V2DI "1")
876   (V8SF "7") (V4DF "3")
877   (V4SF "3") (V2DF "1")])
878
879(define_mode_attr ssescalarsize
880  [(V4TI  "64") (V2TI  "64") (V1TI  "64")
881   (V8DI  "64") (V4DI  "64") (V2DI  "64")
882   (V64QI "8") (V32QI "8") (V16QI "8")
883   (V32HI "16") (V16HI "16") (V8HI "16")
884   (V16SI "32") (V8SI "32") (V4SI "32")
885   (V16SF "32") (V8SF "32") (V4SF "32")
886   (V8DF "64") (V4DF "64") (V2DF "64")])
887
888;; SSE prefix for integer vector modes
889(define_mode_attr sseintprefix
890  [(V2DI  "p") (V2DF  "")
891   (V4DI  "p") (V4DF  "")
892   (V8DI  "p") (V8DF  "")
893   (V4SI  "p") (V4SF  "")
894   (V8SI  "p") (V8SF  "")
895   (V16SI "p") (V16SF "")
896   (V16QI "p") (V8HI "p")
897   (V32QI "p") (V16HI "p")
898   (V64QI "p") (V32HI "p")])
899
900;; SSE scalar suffix for vector modes
901(define_mode_attr ssescalarmodesuffix
902  [(SF "ss") (DF "sd")
903   (V16SF "ss") (V8DF "sd")
904   (V8SF "ss") (V4DF "sd")
905   (V4SF "ss") (V2DF "sd")
906   (V16SI "d") (V8DI "q")
907   (V8SI "d") (V4DI "q")
908   (V4SI "d") (V2DI "q")])
909
910;; Pack/unpack vector modes
911(define_mode_attr sseunpackmode
912  [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")
913   (V32QI "V16HI") (V16HI "V8SI") (V8SI "V4DI")
914   (V32HI "V16SI") (V64QI "V32HI") (V16SI "V8DI")])
915
916(define_mode_attr ssepackmode
917  [(V8HI "V16QI") (V4SI "V8HI") (V2DI "V4SI")
918   (V16HI "V32QI") (V8SI "V16HI") (V4DI "V8SI")
919   (V32HI "V64QI") (V16SI "V32HI") (V8DI "V16SI")])
920
921;; Mapping of the max integer size for xop rotate immediate constraint
922(define_mode_attr sserotatemax
923  [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
924
925;; Mapping of mode to cast intrinsic name
926(define_mode_attr castmode
927 [(V8SI "si") (V8SF "ps") (V4DF "pd")
928  (V16SI "si") (V16SF "ps") (V8DF "pd")])
929
930;; Instruction suffix for sign and zero extensions.
931(define_code_attr extsuffix [(sign_extend "sx") (zero_extend "zx")])
932
933;; i128 for integer vectors and TARGET_AVX2, f128 otherwise.
934;; i64x4 or f64x4 for 512bit modes.
935(define_mode_attr i128
936  [(V16SF "f64x4") (V8SF "f128") (V8DF "f64x4") (V4DF "f128")
937   (V64QI "i64x4") (V32QI "%~128") (V32HI "i64x4") (V16HI "%~128")
938   (V16SI "i64x4") (V8SI "%~128") (V8DI "i64x4") (V4DI "%~128")])
939
940;; For 256-bit modes for TARGET_AVX512VL && TARGET_AVX512DQ
941;; i32x4, f32x4, i64x2 or f64x2 suffixes.
942(define_mode_attr i128vldq
943  [(V8SF "f32x4") (V4DF "f64x2")
944   (V32QI "i32x4") (V16HI "i32x4") (V8SI "i32x4") (V4DI "i64x2")])
945
946;; Mix-n-match
947(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
948(define_mode_iterator AVX512MODE2P [V16SI V16SF V8DF])
949
950;; Mapping for dbpsabbw modes
951(define_mode_attr dbpsadbwmode
952  [(V32HI "V64QI") (V16HI "V32QI") (V8HI "V16QI")])
953
954;; Mapping suffixes for broadcast
955(define_mode_attr bcstscalarsuff
956  [(V64QI "b")  (V32QI "b") (V16QI "b")
957   (V32HI "w")  (V16HI "w") (V8HI "w")
958   (V16SI "d")  (V8SI "d")  (V4SI "d")
959   (V8DI "q")   (V4DI "q")  (V2DI "q")
960   (V16SF "ss") (V8SF "ss") (V4SF "ss")
961   (V8DF "sd")  (V4DF "sd") (V2DF "sd")])
962
963;; Tie mode of assembler operand to mode iterator
964(define_mode_attr xtg_mode
965  [(V16QI "x") (V8HI "x") (V4SI "x") (V2DI "x") (V4SF "x") (V2DF "x")
966   (V32QI "t") (V16HI "t") (V8SI "t") (V4DI "t") (V8SF "t") (V4DF "t")
967   (V64QI "g") (V32HI "g") (V16SI "g") (V8DI "g") (V16SF "g") (V8DF "g")])
968
969;; Half mask mode for unpacks
970(define_mode_attr HALFMASKMODE
971  [(DI "SI") (SI "HI")])
972
973;; Double mask mode for packs
974(define_mode_attr DOUBLEMASKMODE
975  [(HI "SI") (SI "DI")])
976
977
978;; Include define_subst patterns for instructions with mask
979(include "subst.md")
980
981;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
982
983;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
984;;
985;; Move patterns
986;;
987;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
988
989;; All of these patterns are enabled for SSE1 as well as SSE2.
990;; This is essential for maintaining stable calling conventions.
991
992(define_expand "mov<mode>"
993  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
994	(match_operand:VMOVE 1 "nonimmediate_operand"))]
995  "TARGET_SSE"
996{
997  ix86_expand_vector_move (<MODE>mode, operands);
998  DONE;
999})
1000
1001(define_insn "mov<mode>_internal"
1002  [(set (match_operand:VMOVE 0 "nonimmediate_operand"
1003	 "=v,v ,v ,m")
1004	(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
1005	 " C,BC,vm,v"))]
1006  "TARGET_SSE
1007   && (register_operand (operands[0], <MODE>mode)
1008       || register_operand (operands[1], <MODE>mode))"
1009{
1010  switch (get_attr_type (insn))
1011    {
1012    case TYPE_SSELOG1:
1013      return standard_sse_constant_opcode (insn, operands);
1014
1015    case TYPE_SSEMOV:
1016      return ix86_output_ssemov (insn, operands);
1017
1018    default:
1019      gcc_unreachable ();
1020    }
1021}
1022  [(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
1023   (set_attr "prefix" "maybe_vex")
1024   (set (attr "mode")
1025	(cond [(match_test "TARGET_AVX")
1026		 (const_string "<sseinsnmode>")
1027	       (ior (not (match_test "TARGET_SSE2"))
1028		    (match_test "optimize_function_for_size_p (cfun)"))
1029		 (const_string "V4SF")
1030	       (and (match_test "<MODE>mode == V2DFmode")
1031		    (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
1032		 (const_string "V4SF")
1033	       (and (eq_attr "alternative" "3")
1034		    (match_test "TARGET_SSE_TYPELESS_STORES"))
1035		 (const_string "V4SF")
1036	       (and (eq_attr "alternative" "0")
1037		    (match_test "TARGET_SSE_LOAD0_BY_PXOR"))
1038		 (const_string "TI")
1039	      ]
1040	      (const_string "<sseinsnmode>")))
1041   (set (attr "enabled")
1042        (cond [(and (match_test "<MODE_SIZE> == 16")
1043		    (eq_attr "alternative" "1"))
1044		 (symbol_ref "TARGET_SSE2")
1045	       (and (match_test "<MODE_SIZE> == 32")
1046		    (eq_attr "alternative" "1"))
1047		 (symbol_ref "TARGET_AVX2")
1048	      ]
1049	      (symbol_ref "true")))])
1050
1051(define_insn "<avx512>_load<mode>_mask"
1052  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
1053	(vec_merge:V48_AVX512VL
1054	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand" "v,m")
1055	  (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1056	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1057  "TARGET_AVX512F"
1058{
1059  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1060    {
1061      if (misaligned_operand (operands[1], <MODE>mode))
1062	return "vmovu<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1063      else
1064	return "vmova<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1065    }
1066  else
1067    {
1068      if (misaligned_operand (operands[1], <MODE>mode))
1069	return "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1070      else
1071	return "vmovdqa<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}";
1072    }
1073}
1074  [(set_attr "type" "ssemov")
1075   (set_attr "prefix" "evex")
1076   (set_attr "memory" "none,load")
1077   (set_attr "mode" "<sseinsnmode>")])
1078
1079(define_insn "<avx512>_load<mode>_mask"
1080  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
1081	(vec_merge:VI12_AVX512VL
1082	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v,m")
1083	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C,0C")
1084	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
1085  "TARGET_AVX512BW"
1086  "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
1087  [(set_attr "type" "ssemov")
1088   (set_attr "prefix" "evex")
1089   (set_attr "memory" "none,load")
1090   (set_attr "mode" "<sseinsnmode>")])
1091
1092(define_insn "avx512f_mov<ssescalarmodelower>_mask"
1093  [(set (match_operand:VF_128 0 "register_operand" "=v")
1094	(vec_merge:VF_128
1095	  (vec_merge:VF_128
1096	    (match_operand:VF_128 2 "register_operand" "v")
1097	    (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1098	    (match_operand:QI 4 "register_operand" "Yk"))
1099	  (match_operand:VF_128 1 "register_operand" "v")
1100	  (const_int 1)))]
1101  "TARGET_AVX512F"
1102  "vmov<ssescalarmodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
1103  [(set_attr "type" "ssemov")
1104   (set_attr "prefix" "evex")
1105   (set_attr "mode" "<ssescalarmode>")])
1106
1107(define_expand "avx512f_load<mode>_mask"
1108  [(set (match_operand:<ssevecmode> 0 "register_operand")
1109	(vec_merge:<ssevecmode>
1110	  (vec_merge:<ssevecmode>
1111	    (vec_duplicate:<ssevecmode>
1112	      (match_operand:MODEF 1 "memory_operand"))
1113	    (match_operand:<ssevecmode> 2 "nonimm_or_0_operand")
1114	    (match_operand:QI 3 "register_operand"))
1115	  (match_dup 4)
1116	  (const_int 1)))]
1117  "TARGET_AVX512F"
1118  "operands[4] = CONST0_RTX (<ssevecmode>mode);")
1119
1120(define_insn "*avx512f_load<mode>_mask"
1121  [(set (match_operand:<ssevecmode> 0 "register_operand" "=v")
1122	(vec_merge:<ssevecmode>
1123	  (vec_merge:<ssevecmode>
1124	    (vec_duplicate:<ssevecmode>
1125	      (match_operand:MODEF 1 "memory_operand" "m"))
1126	    (match_operand:<ssevecmode> 2 "nonimm_or_0_operand" "0C")
1127	    (match_operand:QI 3 "register_operand" "Yk"))
1128	  (match_operand:<ssevecmode> 4 "const0_operand" "C")
1129	  (const_int 1)))]
1130  "TARGET_AVX512F"
1131  "vmov<ssescalarmodesuffix>\t{%1, %0%{%3%}%N2|%0%{3%}%N2, %1}"
1132  [(set_attr "type" "ssemov")
1133   (set_attr "prefix" "evex")
1134   (set_attr "memory" "load")
1135   (set_attr "mode" "<MODE>")])
1136
1137(define_insn "avx512f_store<mode>_mask"
1138  [(set (match_operand:MODEF 0 "memory_operand" "=m")
1139	(if_then_else:MODEF
1140	  (and:QI (match_operand:QI 2 "register_operand" "Yk")
1141		 (const_int 1))
1142	  (vec_select:MODEF
1143	    (match_operand:<ssevecmode> 1 "register_operand" "v")
1144	    (parallel [(const_int 0)]))
1145	  (match_dup 0)))]
1146  "TARGET_AVX512F"
1147  "vmov<ssescalarmodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1148  [(set_attr "type" "ssemov")
1149   (set_attr "prefix" "evex")
1150   (set_attr "memory" "store")
1151   (set_attr "mode" "<MODE>")])
1152
1153(define_insn "<avx512>_blendm<mode>"
1154  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
1155	(vec_merge:V48_AVX512VL
1156	  (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "vm")
1157	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
1158	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1159  "TARGET_AVX512F"
1160  "v<sseintprefix>blendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1161  [(set_attr "type" "ssemov")
1162   (set_attr "prefix" "evex")
1163   (set_attr "mode" "<sseinsnmode>")])
1164
1165(define_insn "<avx512>_blendm<mode>"
1166  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
1167	(vec_merge:VI12_AVX512VL
1168	  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
1169	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1170	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
1171  "TARGET_AVX512BW"
1172  "vpblendm<ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
1173  [(set_attr "type" "ssemov")
1174   (set_attr "prefix" "evex")
1175   (set_attr "mode" "<sseinsnmode>")])
1176
1177(define_insn "<avx512>_store<mode>_mask"
1178  [(set (match_operand:V48_AVX512VL 0 "memory_operand" "=m")
1179	(vec_merge:V48_AVX512VL
1180	  (match_operand:V48_AVX512VL 1 "register_operand" "v")
1181	  (match_dup 0)
1182	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1183  "TARGET_AVX512F"
1184{
1185  if (FLOAT_MODE_P (GET_MODE_INNER (<MODE>mode)))
1186    {
1187      if (misaligned_operand (operands[0], <MODE>mode))
1188	return "vmovu<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1189      else
1190	return "vmova<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1191    }
1192  else
1193    {
1194      if (misaligned_operand (operands[0], <MODE>mode))
1195	return "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1196      else
1197	return "vmovdqa<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}";
1198    }
1199}
1200  [(set_attr "type" "ssemov")
1201   (set_attr "prefix" "evex")
1202   (set_attr "memory" "store")
1203   (set_attr "mode" "<sseinsnmode>")])
1204
1205(define_insn "<avx512>_store<mode>_mask"
1206  [(set (match_operand:VI12_AVX512VL 0 "memory_operand" "=m")
1207	(vec_merge:VI12_AVX512VL
1208	  (match_operand:VI12_AVX512VL 1 "register_operand" "v")
1209	  (match_dup 0)
1210	  (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")))]
1211  "TARGET_AVX512BW"
1212  "vmovdqu<ssescalarsize>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
1213  [(set_attr "type" "ssemov")
1214   (set_attr "prefix" "evex")
1215   (set_attr "memory" "store")
1216   (set_attr "mode" "<sseinsnmode>")])
1217
1218(define_insn "sse2_movq128"
1219  [(set (match_operand:V2DI 0 "register_operand" "=v")
1220	(vec_concat:V2DI
1221	  (vec_select:DI
1222	    (match_operand:V2DI 1 "nonimmediate_operand" "vm")
1223	    (parallel [(const_int 0)]))
1224	  (const_int 0)))]
1225  "TARGET_SSE2"
1226  "%vmovq\t{%1, %0|%0, %q1}"
1227  [(set_attr "type" "ssemov")
1228   (set_attr "prefix" "maybe_vex")
1229   (set_attr "mode" "TI")])
1230
1231;; Move a DI from a 32-bit register pair (e.g. %edx:%eax) to an xmm.
1232;; We'd rather avoid this entirely; if the 32-bit reg pair was loaded
1233;; from memory, we'd prefer to load the memory directly into the %xmm
1234;; register.  To facilitate this happy circumstance, this pattern won't
1235;; split until after register allocation.  If the 64-bit value didn't
1236;; come from memory, this is the best we can do.  This is much better
1237;; than storing %edx:%eax into a stack temporary and loading an %xmm
1238;; from there.
1239
1240(define_insn_and_split "movdi_to_sse"
1241  [(set (match_operand:V4SI 0 "register_operand" "=x,x,?x")
1242	(unspec:V4SI [(match_operand:DI 1 "nonimmediate_operand" "r,m,r")]
1243		     UNSPEC_MOVDI_TO_SSE))
1244     (clobber (match_scratch:V4SI 2 "=X,X,&x"))]
1245  "!TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
1246  "#"
1247  "&& reload_completed"
1248  [(const_int 0)]
1249{
1250 if (register_operand (operands[1], DImode))
1251   {
1252      /* The DImode arrived in a pair of integral registers (e.g. %edx:%eax).
1253	 Assemble the 64-bit DImode value in an xmm register.  */
1254      emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
1255				  gen_lowpart (SImode, operands[1])));
1256      if (TARGET_SSE4_1)
1257        emit_insn (gen_sse4_1_pinsrd (operands[0], operands[0],
1258				      gen_highpart (SImode, operands[1]),
1259				      GEN_INT (2)));
1260      else
1261	{
1262	  emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
1263				      gen_highpart (SImode, operands[1])));
1264	  emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
1265						 operands[2]));
1266	}
1267  }
1268 else if (memory_operand (operands[1], DImode))
1269   emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
1270				  operands[1], const0_rtx));
1271 else
1272   gcc_unreachable ();
1273 DONE;
1274}
1275  [(set_attr "isa" "sse4,*,*")])
1276
1277(define_split
1278  [(set (match_operand:V4SF 0 "register_operand")
1279	(match_operand:V4SF 1 "zero_extended_scalar_load_operand"))]
1280  "TARGET_SSE && reload_completed"
1281  [(set (match_dup 0)
1282	(vec_merge:V4SF
1283	  (vec_duplicate:V4SF (match_dup 1))
1284	  (match_dup 2)
1285	  (const_int 1)))]
1286{
1287  operands[1] = gen_lowpart (SFmode, operands[1]);
1288  operands[2] = CONST0_RTX (V4SFmode);
1289})
1290
1291(define_split
1292  [(set (match_operand:V2DF 0 "register_operand")
1293	(match_operand:V2DF 1 "zero_extended_scalar_load_operand"))]
1294  "TARGET_SSE2 && reload_completed"
1295  [(set (match_dup 0) (vec_concat:V2DF (match_dup 1) (match_dup 2)))]
1296{
1297  operands[1] = gen_lowpart (DFmode, operands[1]);
1298  operands[2] = CONST0_RTX (DFmode);
1299})
1300
1301(define_expand "movmisalign<mode>"
1302  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
1303	(match_operand:VMOVE 1 "nonimmediate_operand"))]
1304  "TARGET_SSE"
1305{
1306  ix86_expand_vector_move_misalign (<MODE>mode, operands);
1307  DONE;
1308})
1309
1310;; Merge movsd/movhpd to movupd for TARGET_SSE_UNALIGNED_LOAD_OPTIMAL targets.
1311(define_peephole2
1312  [(set (match_operand:V2DF 0 "sse_reg_operand")
1313	(vec_concat:V2DF (match_operand:DF 1 "memory_operand")
1314			 (match_operand:DF 4 "const0_operand")))
1315   (set (match_operand:V2DF 2 "sse_reg_operand")
1316	(vec_concat:V2DF (vec_select:DF (match_dup 2)
1317					(parallel [(const_int 0)]))
1318			 (match_operand:DF 3 "memory_operand")))]
1319  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1320   && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1321  [(set (match_dup 2) (match_dup 5))]
1322  "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1323
1324(define_peephole2
1325  [(set (match_operand:DF 0 "sse_reg_operand")
1326	(match_operand:DF 1 "memory_operand"))
1327   (set (match_operand:V2DF 2 "sse_reg_operand")
1328	(vec_concat:V2DF (match_operand:DF 4 "sse_reg_operand")
1329			 (match_operand:DF 3 "memory_operand")))]
1330  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
1331   && REGNO (operands[4]) == REGNO (operands[2])
1332   && ix86_operands_ok_for_move_multiple (operands, true, DFmode)"
1333  [(set (match_dup 2) (match_dup 5))]
1334  "operands[5] = adjust_address (operands[1], V2DFmode, 0);")
1335
1336;; Merge movlpd/movhpd to movupd for TARGET_SSE_UNALIGNED_STORE_OPTIMAL targets.
1337(define_peephole2
1338  [(set (match_operand:DF 0 "memory_operand")
1339	(vec_select:DF (match_operand:V2DF 1 "sse_reg_operand")
1340		       (parallel [(const_int 0)])))
1341   (set (match_operand:DF 2 "memory_operand")
1342	(vec_select:DF (match_operand:V2DF 3 "sse_reg_operand")
1343		       (parallel [(const_int 1)])))]
1344  "TARGET_SSE2 && TARGET_SSE_UNALIGNED_STORE_OPTIMAL
1345   && ix86_operands_ok_for_move_multiple (operands, false, DFmode)"
1346  [(set (match_dup 4) (match_dup 1))]
1347  "operands[4] = adjust_address (operands[0], V2DFmode, 0);")
1348
1349(define_insn "<sse3>_lddqu<avxsizesuffix>"
1350  [(set (match_operand:VI1 0 "register_operand" "=x")
1351	(unspec:VI1 [(match_operand:VI1 1 "memory_operand" "m")]
1352		    UNSPEC_LDDQU))]
1353  "TARGET_SSE3"
1354  "%vlddqu\t{%1, %0|%0, %1}"
1355  [(set_attr "type" "ssemov")
1356   (set_attr "movu" "1")
1357   (set (attr "prefix_data16")
1358     (if_then_else
1359       (match_test "TARGET_AVX")
1360     (const_string "*")
1361     (const_string "0")))
1362   (set (attr "prefix_rep")
1363     (if_then_else
1364       (match_test "TARGET_AVX")
1365     (const_string "*")
1366     (const_string "1")))
1367   (set_attr "prefix" "maybe_vex")
1368   (set_attr "mode" "<sseinsnmode>")])
1369
1370(define_insn "sse2_movnti<mode>"
1371  [(set (match_operand:SWI48 0 "memory_operand" "=m")
1372	(unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")]
1373		      UNSPEC_MOVNT))]
1374  "TARGET_SSE2"
1375  "movnti\t{%1, %0|%0, %1}"
1376  [(set_attr "type" "ssemov")
1377   (set_attr "prefix_data16" "0")
1378   (set_attr "mode" "<MODE>")])
1379
1380(define_insn "<sse>_movnt<mode>"
1381  [(set (match_operand:VF 0 "memory_operand" "=m")
1382	(unspec:VF
1383	  [(match_operand:VF 1 "register_operand" "v")]
1384	  UNSPEC_MOVNT))]
1385  "TARGET_SSE"
1386  "%vmovnt<ssemodesuffix>\t{%1, %0|%0, %1}"
1387  [(set_attr "type" "ssemov")
1388   (set_attr "prefix" "maybe_vex")
1389   (set_attr "mode" "<MODE>")])
1390
1391(define_insn "<sse2>_movnt<mode>"
1392  [(set (match_operand:VI8 0 "memory_operand" "=m")
1393	(unspec:VI8 [(match_operand:VI8 1 "register_operand" "v")]
1394		    UNSPEC_MOVNT))]
1395  "TARGET_SSE2"
1396  "%vmovntdq\t{%1, %0|%0, %1}"
1397  [(set_attr "type" "ssecvt")
1398   (set (attr "prefix_data16")
1399     (if_then_else
1400       (match_test "TARGET_AVX")
1401     (const_string "*")
1402     (const_string "1")))
1403   (set_attr "prefix" "maybe_vex")
1404   (set_attr "mode" "<sseinsnmode>")])
1405
1406; Expand patterns for non-temporal stores.  At the moment, only those
1407; that directly map to insns are defined; it would be possible to
1408; define patterns for other modes that would expand to several insns.
1409
1410;; Modes handled by storent patterns.
1411(define_mode_iterator STORENT_MODE
1412  [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
1413   (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
1414   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
1415   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
1416   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
1417
1418(define_expand "storent<mode>"
1419  [(set (match_operand:STORENT_MODE 0 "memory_operand")
1420	(unspec:STORENT_MODE
1421	  [(match_operand:STORENT_MODE 1 "register_operand")]
1422	  UNSPEC_MOVNT))]
1423  "TARGET_SSE")
1424
1425;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1426;;
1427;; Mask operations
1428;;
1429;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1430
1431;; All integer modes with AVX512BW/DQ.
1432(define_mode_iterator SWI1248_AVX512BWDQ
1433  [(QI "TARGET_AVX512DQ") HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1434
1435;; All integer modes with AVX512BW, where HImode operation
1436;; can be used instead of QImode.
1437(define_mode_iterator SWI1248_AVX512BW
1438  [QI HI (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1439
1440;; All integer modes with AVX512BW/DQ, even HImode requires DQ.
1441(define_mode_iterator SWI1248_AVX512BWDQ2
1442  [(QI "TARGET_AVX512DQ") (HI "TARGET_AVX512DQ")
1443   (SI "TARGET_AVX512BW") (DI "TARGET_AVX512BW")])
1444
1445(define_expand "kmov<mskmodesuffix>"
1446  [(set (match_operand:SWI1248_AVX512BWDQ 0 "nonimmediate_operand")
1447	(match_operand:SWI1248_AVX512BWDQ 1 "nonimmediate_operand"))]
1448  "TARGET_AVX512F
1449   && !(MEM_P (operands[0]) && MEM_P (operands[1]))")
1450
1451(define_insn "k<code><mode>"
1452  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1453	(any_logic:SWI1248_AVX512BW
1454	  (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1455	  (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1456   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1457  "TARGET_AVX512F"
1458{
1459  if (get_attr_mode (insn) == MODE_HI)
1460    return "k<logic>w\t{%2, %1, %0|%0, %1, %2}";
1461  else
1462    return "k<logic><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1463}
1464  [(set_attr "type" "msklog")
1465   (set_attr "prefix" "vex")
1466   (set (attr "mode")
1467     (cond [(and (match_test "<MODE>mode == QImode")
1468		 (not (match_test "TARGET_AVX512DQ")))
1469	       (const_string "HI")
1470	   ]
1471	   (const_string "<MODE>")))])
1472
1473(define_insn "kandn<mode>"
1474  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1475	(and:SWI1248_AVX512BW
1476	  (not:SWI1248_AVX512BW
1477	    (match_operand:SWI1248_AVX512BW 1 "register_operand" "k"))
1478	  (match_operand:SWI1248_AVX512BW 2 "register_operand" "k")))
1479   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1480  "TARGET_AVX512F"
1481{
1482  if (get_attr_mode (insn) == MODE_HI)
1483    return "kandnw\t{%2, %1, %0|%0, %1, %2}";
1484  else
1485    return "kandn<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1486}
1487  [(set_attr "type" "msklog")
1488   (set_attr "prefix" "vex")
1489   (set (attr "mode")
1490     (cond [(and (match_test "<MODE>mode == QImode")
1491		 (not (match_test "TARGET_AVX512DQ")))
1492	      (const_string "HI")
1493	   ]
1494	   (const_string "<MODE>")))])
1495
1496(define_insn "kxnor<mode>"
1497  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1498	(not:SWI1248_AVX512BW
1499	  (xor:SWI1248_AVX512BW
1500	    (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")
1501	    (match_operand:SWI1248_AVX512BW 2 "register_operand" "k"))))
1502   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1503  "TARGET_AVX512F"
1504{
1505  if (get_attr_mode (insn) == MODE_HI)
1506    return "kxnorw\t{%2, %1, %0|%0, %1, %2}";
1507  else
1508    return "kxnor<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}";
1509}
1510  [(set_attr "type" "msklog")
1511   (set_attr "prefix" "vex")
1512   (set (attr "mode")
1513     (cond [(and (match_test "<MODE>mode == QImode")
1514		 (not (match_test "TARGET_AVX512DQ")))
1515	      (const_string "HI")
1516	   ]
1517	   (const_string "<MODE>")))])
1518
1519(define_insn "knot<mode>"
1520  [(set (match_operand:SWI1248_AVX512BW 0 "register_operand" "=k")
1521	(not:SWI1248_AVX512BW
1522	  (match_operand:SWI1248_AVX512BW 1 "register_operand" "k")))
1523   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1524  "TARGET_AVX512F"
1525{
1526  if (get_attr_mode (insn) == MODE_HI)
1527    return "knotw\t{%1, %0|%0, %1}";
1528  else
1529    return "knot<mskmodesuffix>\t{%1, %0|%0, %1}";
1530}
1531  [(set_attr "type" "msklog")
1532   (set_attr "prefix" "vex")
1533   (set (attr "mode")
1534     (cond [(and (match_test "<MODE>mode == QImode")
1535		 (not (match_test "TARGET_AVX512DQ")))
1536	       (const_string "HI")
1537	   ]
1538	   (const_string "<MODE>")))])
1539
1540(define_insn "kadd<mode>"
1541  [(set (match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "=k")
1542	(plus:SWI1248_AVX512BWDQ2
1543	  (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")
1544	  (match_operand:SWI1248_AVX512BWDQ2 2 "register_operand" "k")))
1545   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1546  "TARGET_AVX512F"
1547  "kadd<mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1548  [(set_attr "type" "msklog")
1549   (set_attr "prefix" "vex")
1550   (set_attr "mode" "<MODE>")])
1551
1552;; Mask variant shift mnemonics
1553(define_code_attr mshift [(ashift "shiftl") (lshiftrt "shiftr")])
1554
1555(define_insn "k<code><mode>"
1556  [(set (match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "=k")
1557	(any_lshift:SWI1248_AVX512BWDQ
1558	  (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")
1559	  (match_operand 2 "const_0_to_255_operand" "n")))
1560   (unspec [(const_int 0)] UNSPEC_MASKOP)]
1561  "TARGET_AVX512F"
1562  "k<mshift><mskmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1563  [(set_attr "type" "msklog")
1564   (set_attr "prefix" "vex")
1565   (set_attr "mode" "<MODE>")])
1566
1567(define_insn "ktest<mode>"
1568  [(set (reg:CC FLAGS_REG)
1569	(unspec:CC
1570	  [(match_operand:SWI1248_AVX512BWDQ2 0 "register_operand" "k")
1571	   (match_operand:SWI1248_AVX512BWDQ2 1 "register_operand" "k")]
1572	  UNSPEC_KTEST))]
1573  "TARGET_AVX512F"
1574  "ktest<mskmodesuffix>\t{%1, %0|%0, %1}"
1575  [(set_attr "mode" "<MODE>")
1576   (set_attr "type" "msklog")
1577   (set_attr "prefix" "vex")])
1578
1579(define_insn "kortest<mode>"
1580  [(set (reg:CC FLAGS_REG)
1581	(unspec:CC
1582	  [(match_operand:SWI1248_AVX512BWDQ 0 "register_operand" "k")
1583	   (match_operand:SWI1248_AVX512BWDQ 1 "register_operand" "k")]
1584	  UNSPEC_KORTEST))]
1585  "TARGET_AVX512F"
1586  "kortest<mskmodesuffix>\t{%1, %0|%0, %1}"
1587  [(set_attr "mode" "<MODE>")
1588   (set_attr "type" "msklog")
1589   (set_attr "prefix" "vex")])
1590
1591(define_insn "kunpckhi"
1592  [(set (match_operand:HI 0 "register_operand" "=k")
1593	(ior:HI
1594	  (ashift:HI
1595	    (zero_extend:HI (match_operand:QI 1 "register_operand" "k"))
1596	    (const_int 8))
1597	  (zero_extend:HI (match_operand:QI 2 "register_operand" "k"))))]
1598  "TARGET_AVX512F"
1599  "kunpckbw\t{%2, %1, %0|%0, %1, %2}"
1600  [(set_attr "mode" "HI")
1601   (set_attr "type" "msklog")
1602   (set_attr "prefix" "vex")])
1603
1604(define_insn "kunpcksi"
1605  [(set (match_operand:SI 0 "register_operand" "=k")
1606	(ior:SI
1607	  (ashift:SI
1608	    (zero_extend:SI (match_operand:HI 1 "register_operand" "k"))
1609	    (const_int 16))
1610	  (zero_extend:SI (match_operand:HI 2 "register_operand" "k"))))]
1611  "TARGET_AVX512BW"
1612  "kunpckwd\t{%2, %1, %0|%0, %1, %2}"
1613  [(set_attr "mode" "SI")])
1614
1615(define_insn "kunpckdi"
1616  [(set (match_operand:DI 0 "register_operand" "=k")
1617	(ior:DI
1618	  (ashift:DI
1619	    (zero_extend:DI (match_operand:SI 1 "register_operand" "k"))
1620	    (const_int 32))
1621	  (zero_extend:DI (match_operand:SI 2 "register_operand" "k"))))]
1622  "TARGET_AVX512BW"
1623  "kunpckdq\t{%2, %1, %0|%0, %1, %2}"
1624  [(set_attr "mode" "DI")])
1625
1626
1627;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1628;;
1629;; Parallel floating point arithmetic
1630;;
1631;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1632
1633(define_expand "<code><mode>2"
1634  [(set (match_operand:VF 0 "register_operand")
1635	(absneg:VF
1636	  (match_operand:VF 1 "register_operand")))]
1637  "TARGET_SSE"
1638  "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
1639
1640(define_insn_and_split "*<code><mode>2"
1641  [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1642	(absneg:VF
1643	  (match_operand:VF 1 "vector_operand" "0,  xBm,v, m")))
1644   (use (match_operand:VF 2 "vector_operand"    "xBm,0,  vm,v"))]
1645  "TARGET_SSE"
1646  "#"
1647  "&& reload_completed"
1648  [(set (match_dup 0) (match_dup 3))]
1649{
1650  enum rtx_code absneg_op = <CODE> == ABS ? AND : XOR;
1651
1652  if (TARGET_AVX)
1653    {
1654      if (MEM_P (operands[1]))
1655        std::swap (operands[1], operands[2]);
1656    }
1657  else
1658   {
1659     if (operands_match_p (operands[0], operands[2]))
1660       std::swap (operands[1], operands[2]);
1661   }
1662
1663  operands[3]
1664    = gen_rtx_fmt_ee (absneg_op, <MODE>mode, operands[1], operands[2]);
1665}
1666  [(set_attr "isa" "noavx,noavx,avx,avx")])
1667
1668(define_insn_and_split "*nabs<mode>2"
1669  [(set (match_operand:VF 0 "register_operand" "=x,x,v,v")
1670	(neg:VF
1671	  (abs:VF
1672	    (match_operand:VF 1 "vector_operand" "0,xBm,v,m"))))
1673   (use (match_operand:VF 2 "vector_operand"    "xBm,0,vm,v"))]
1674  "TARGET_SSE"
1675  "#"
1676  "&& reload_completed"
1677  [(set (match_dup 0) (match_dup 3))]
1678{
1679  if (TARGET_AVX)
1680    {
1681      if (MEM_P (operands[1]))
1682        std::swap (operands[1], operands[2]);
1683    }
1684  else
1685   {
1686     if (operands_match_p (operands[0], operands[2]))
1687       std::swap (operands[1], operands[2]);
1688   }
1689
1690  operands[3]
1691    = gen_rtx_fmt_ee (IOR, <MODE>mode, operands[1], operands[2]);
1692}
1693  [(set_attr "isa" "noavx,noavx,avx,avx")])
1694
1695(define_expand "<plusminus_insn><mode>3<mask_name><round_name>"
1696  [(set (match_operand:VF 0 "register_operand")
1697	(plusminus:VF
1698	  (match_operand:VF 1 "<round_nimm_predicate>")
1699	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1700  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1701  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1702
1703(define_insn "*<plusminus_insn><mode>3<mask_name><round_name>"
1704  [(set (match_operand:VF 0 "register_operand" "=x,v")
1705	(plusminus:VF
1706	  (match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
1707	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1708  "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
1709   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1710  "@
1711   <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
1712   v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1713  [(set_attr "isa" "noavx,avx")
1714   (set_attr "type" "sseadd")
1715   (set_attr "prefix" "<mask_prefix3>")
1716   (set_attr "mode" "<MODE>")])
1717
1718(define_insn "*sub<mode>3<mask_name>_bcst"
1719  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1720	(minus:VF_AVX512
1721	  (match_operand:VF_AVX512 1 "register_operand" "v")
1722	  (vec_duplicate:VF_AVX512
1723	    (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1724  "TARGET_AVX512F
1725   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
1726   && <mask_mode512bit_condition>"
1727  "vsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
1728  [(set_attr "prefix" "evex")
1729   (set_attr "type" "sseadd")
1730   (set_attr "mode" "<MODE>")])
1731
1732(define_insn "*add<mode>3<mask_name>_bcst"
1733  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1734	(plus:VF_AVX512
1735	  (vec_duplicate:VF_AVX512
1736	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1737	  (match_operand:VF_AVX512 2 "register_operand" "v")))]
1738  "TARGET_AVX512F
1739   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
1740   && <mask_mode512bit_condition>"
1741  "vadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
1742  [(set_attr "prefix" "evex")
1743   (set_attr "type" "sseadd")
1744   (set_attr "mode" "<MODE>")])
1745
1746;; Standard scalar operation patterns which preserve the rest of the
1747;; vector for combiner.
1748(define_insn "*<sse>_vm<plusminus_insn><mode>3"
1749  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1750	(vec_merge:VF_128
1751	  (vec_duplicate:VF_128
1752	    (plusminus:<ssescalarmode>
1753	      (vec_select:<ssescalarmode>
1754	        (match_operand:VF_128 1 "register_operand" "0,v")
1755		(parallel [(const_int 0)]))
1756	      (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1757	  (match_dup 1)
1758	  (const_int 1)))]
1759  "TARGET_SSE"
1760  "@
1761   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1762   v<plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1763  [(set_attr "isa" "noavx,avx")
1764   (set_attr "type" "sseadd")
1765   (set_attr "prefix" "orig,vex")
1766   (set_attr "mode" "<ssescalarmode>")])
1767
1768(define_insn "<sse>_vm<plusminus_insn><mode>3<mask_scalar_name><round_scalar_name>"
1769  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1770	(vec_merge:VF_128
1771	  (plusminus:VF_128
1772	    (match_operand:VF_128 1 "register_operand" "0,v")
1773	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1774	  (match_dup 1)
1775	  (const_int 1)))]
1776  "TARGET_SSE"
1777  "@
1778   <plusminus_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1779   v<plusminus_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1780  [(set_attr "isa" "noavx,avx")
1781   (set_attr "type" "sseadd")
1782   (set_attr "prefix" "<round_scalar_prefix>")
1783   (set_attr "mode" "<ssescalarmode>")])
1784
1785(define_expand "mul<mode>3<mask_name><round_name>"
1786  [(set (match_operand:VF 0 "register_operand")
1787	(mult:VF
1788	  (match_operand:VF 1 "<round_nimm_predicate>")
1789	  (match_operand:VF 2 "<round_nimm_predicate>")))]
1790  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1791  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
1792
1793(define_insn "*mul<mode>3<mask_name><round_name>"
1794  [(set (match_operand:VF 0 "register_operand" "=x,v")
1795	(mult:VF
1796	  (match_operand:VF 1 "<round_nimm_predicate>" "%0,v")
1797	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1798  "TARGET_SSE
1799   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
1800   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1801  "@
1802   mul<ssemodesuffix>\t{%2, %0|%0, %2}
1803   vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1804  [(set_attr "isa" "noavx,avx")
1805   (set_attr "type" "ssemul")
1806   (set_attr "prefix" "<mask_prefix3>")
1807   (set_attr "btver2_decode" "direct,double")
1808   (set_attr "mode" "<MODE>")])
1809
1810(define_insn "*mul<mode>3<mask_name>_bcst"
1811  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1812	(mult:VF_AVX512
1813	  (vec_duplicate:VF_AVX512
1814	     (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
1815	  (match_operand:VF_AVX512 2 "register_operand" "v")))]
1816  "TARGET_AVX512F && <mask_mode512bit_condition>"
1817  "vmul<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
1818  [(set_attr "prefix" "evex")
1819   (set_attr "type" "ssemul")
1820   (set_attr "mode" "<MODE>")])
1821
1822;; Standard scalar operation patterns which preserve the rest of the
1823;; vector for combiner.
1824(define_insn "*<sse>_vm<multdiv_mnemonic><mode>3"
1825  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1826	(vec_merge:VF_128
1827	  (vec_duplicate:VF_128
1828	    (multdiv:<ssescalarmode>
1829	      (vec_select:<ssescalarmode>
1830	        (match_operand:VF_128 1 "register_operand" "0,v")
1831		(parallel [(const_int 0)]))
1832	      (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")))
1833	  (match_dup 1)
1834	  (const_int 1)))]
1835  "TARGET_SSE"
1836  "@
1837   <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %2}
1838   v<multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
1839  [(set_attr "isa" "noavx,avx")
1840   (set_attr "type" "sse<multdiv_mnemonic>")
1841   (set_attr "prefix" "orig,vex")
1842   (set_attr "btver2_decode" "direct,double")
1843   (set_attr "mode" "<ssescalarmode>")])
1844
1845(define_insn "<sse>_vm<multdiv_mnemonic><mode>3<mask_scalar_name><round_scalar_name>"
1846  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
1847	(vec_merge:VF_128
1848	  (multdiv:VF_128
1849	    (match_operand:VF_128 1 "register_operand" "0,v")
1850	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
1851	  (match_dup 1)
1852	  (const_int 1)))]
1853  "TARGET_SSE"
1854  "@
1855   <multdiv_mnemonic><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
1856   v<multdiv_mnemonic><ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_scalar_mask_op3>}"
1857  [(set_attr "isa" "noavx,avx")
1858   (set_attr "type" "sse<multdiv_mnemonic>")
1859   (set_attr "prefix" "<round_scalar_prefix>")
1860   (set_attr "btver2_decode" "direct,double")
1861   (set_attr "mode" "<ssescalarmode>")])
1862
1863(define_expand "div<mode>3"
1864  [(set (match_operand:VF2 0 "register_operand")
1865	(div:VF2 (match_operand:VF2 1 "register_operand")
1866		 (match_operand:VF2 2 "vector_operand")))]
1867  "TARGET_SSE2"
1868  "ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);")
1869
1870(define_expand "div<mode>3"
1871  [(set (match_operand:VF1 0 "register_operand")
1872	(div:VF1 (match_operand:VF1 1 "register_operand")
1873		 (match_operand:VF1 2 "vector_operand")))]
1874  "TARGET_SSE"
1875{
1876  ix86_fixup_binary_operands_no_copy (DIV, <MODE>mode, operands);
1877
1878  if (TARGET_SSE_MATH
1879      && TARGET_RECIP_VEC_DIV
1880      && !optimize_insn_for_size_p ()
1881      && flag_finite_math_only && !flag_trapping_math
1882      && flag_unsafe_math_optimizations)
1883    {
1884      ix86_emit_swdivsf (operands[0], operands[1], operands[2], <MODE>mode);
1885      DONE;
1886    }
1887})
1888
1889(define_insn "<sse>_div<mode>3<mask_name><round_name>"
1890  [(set (match_operand:VF 0 "register_operand" "=x,v")
1891	(div:VF
1892	  (match_operand:VF 1 "register_operand" "0,v")
1893	  (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
1894  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
1895  "@
1896   div<ssemodesuffix>\t{%2, %0|%0, %2}
1897   vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
1898  [(set_attr "isa" "noavx,avx")
1899   (set_attr "type" "ssediv")
1900   (set_attr "prefix" "<mask_prefix3>")
1901   (set_attr "mode" "<MODE>")])
1902
1903(define_insn "*<avx512>_div<mode>3<mask_name>_bcst"
1904  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
1905	(div:VF_AVX512
1906	  (match_operand:VF_AVX512 1 "register_operand" "v")
1907	  (vec_duplicate:VF_AVX512
1908	     (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
1909  "TARGET_AVX512F && <mask_mode512bit_condition>"
1910  "vdiv<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<avx512bcst>}"
1911  [(set_attr "prefix" "evex")
1912    (set_attr "type" "ssediv")
1913   (set_attr "mode" "<MODE>")])
1914
1915(define_insn "<sse>_rcp<mode>2"
1916  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
1917	(unspec:VF1_128_256
1918	  [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RCP))]
1919  "TARGET_SSE"
1920  "%vrcpps\t{%1, %0|%0, %1}"
1921  [(set_attr "type" "sse")
1922   (set_attr "atom_sse_attr" "rcp")
1923   (set_attr "btver2_sse_attr" "rcp")
1924   (set_attr "prefix" "maybe_vex")
1925   (set_attr "mode" "<MODE>")])
1926
1927(define_insn "sse_vmrcpv4sf2"
1928  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1929	(vec_merge:V4SF
1930	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
1931		       UNSPEC_RCP)
1932	  (match_operand:V4SF 2 "register_operand" "0,x")
1933	  (const_int 1)))]
1934  "TARGET_SSE"
1935  "@
1936   rcpss\t{%1, %0|%0, %k1}
1937   vrcpss\t{%1, %2, %0|%0, %2, %k1}"
1938  [(set_attr "isa" "noavx,avx")
1939   (set_attr "type" "sse")
1940   (set_attr "atom_sse_attr" "rcp")
1941   (set_attr "btver2_sse_attr" "rcp")
1942   (set_attr "prefix" "orig,vex")
1943   (set_attr "mode" "SF")])
1944
1945(define_insn "*sse_vmrcpv4sf2"
1946  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
1947	(vec_merge:V4SF
1948	  (vec_duplicate:V4SF
1949	    (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
1950		         UNSPEC_RCP))
1951	  (match_operand:V4SF 2 "register_operand" "0,x")
1952	  (const_int 1)))]
1953  "TARGET_SSE"
1954  "@
1955   rcpss\t{%1, %0|%0, %1}
1956   vrcpss\t{%1, %2, %0|%0, %2, %1}"
1957  [(set_attr "isa" "noavx,avx")
1958   (set_attr "type" "sse")
1959   (set_attr "atom_sse_attr" "rcp")
1960   (set_attr "btver2_sse_attr" "rcp")
1961   (set_attr "prefix" "orig,vex")
1962   (set_attr "mode" "SF")])
1963
1964(define_insn "<mask_codefor>rcp14<mode><mask_name>"
1965  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
1966	(unspec:VF_AVX512VL
1967	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
1968	  UNSPEC_RCP14))]
1969  "TARGET_AVX512F"
1970  "vrcp14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
1971  [(set_attr "type" "sse")
1972   (set_attr "prefix" "evex")
1973   (set_attr "mode" "<MODE>")])
1974
1975(define_insn "srcp14<mode>"
1976  [(set (match_operand:VF_128 0 "register_operand" "=v")
1977	(vec_merge:VF_128
1978	  (unspec:VF_128
1979	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1980	    UNSPEC_RCP14)
1981	  (match_operand:VF_128 2 "register_operand" "v")
1982	  (const_int 1)))]
1983  "TARGET_AVX512F"
1984  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
1985  [(set_attr "type" "sse")
1986   (set_attr "prefix" "evex")
1987   (set_attr "mode" "<MODE>")])
1988
1989(define_insn "srcp14<mode>_mask"
1990  [(set (match_operand:VF_128 0 "register_operand" "=v")
1991	(vec_merge:VF_128
1992	  (vec_merge:VF_128
1993	    (unspec:VF_128
1994	      [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
1995	    UNSPEC_RCP14)
1996	      (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
1997	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
1998	  (match_operand:VF_128 2 "register_operand" "v")
1999	  (const_int 1)))]
2000  "TARGET_AVX512F"
2001  "vrcp14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2002  [(set_attr "type" "sse")
2003   (set_attr "prefix" "evex")
2004   (set_attr "mode" "<MODE>")])
2005
2006(define_expand "sqrt<mode>2"
2007  [(set (match_operand:VF2 0 "register_operand")
2008	(sqrt:VF2 (match_operand:VF2 1 "vector_operand")))]
2009  "TARGET_SSE2")
2010
2011(define_expand "sqrt<mode>2"
2012  [(set (match_operand:VF1 0 "register_operand")
2013	(sqrt:VF1 (match_operand:VF1 1 "vector_operand")))]
2014  "TARGET_SSE"
2015{
2016  if (TARGET_SSE_MATH
2017      && TARGET_RECIP_VEC_SQRT
2018      && !optimize_insn_for_size_p ()
2019      && flag_finite_math_only && !flag_trapping_math
2020      && flag_unsafe_math_optimizations)
2021    {
2022      ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, false);
2023      DONE;
2024    }
2025})
2026
2027(define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
2028  [(set (match_operand:VF 0 "register_operand" "=x,v")
2029	(sqrt:VF (match_operand:VF 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
2030  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
2031  "@
2032   sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
2033   vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
2034  [(set_attr "isa" "noavx,avx")
2035   (set_attr "type" "sse")
2036   (set_attr "atom_sse_attr" "sqrt")
2037   (set_attr "btver2_sse_attr" "sqrt")
2038   (set_attr "prefix" "maybe_vex")
2039   (set_attr "mode" "<MODE>")])
2040
2041(define_insn "<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2042  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2043	(vec_merge:VF_128
2044	  (sqrt:VF_128
2045	    (match_operand:VF_128 1 "nonimmediate_operand" "xm,<round_scalar_constraint>"))
2046	  (match_operand:VF_128 2 "register_operand" "0,v")
2047	  (const_int 1)))]
2048  "TARGET_SSE"
2049  "@
2050   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}
2051   vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %<iptr>1<round_scalar_mask_op3>}"
2052  [(set_attr "isa" "noavx,avx")
2053   (set_attr "type" "sse")
2054   (set_attr "atom_sse_attr" "sqrt")
2055   (set_attr "prefix" "<round_scalar_prefix>")
2056   (set_attr "btver2_sse_attr" "sqrt")
2057   (set_attr "mode" "<ssescalarmode>")])
2058
2059(define_insn "*<sse>_vmsqrt<mode>2<mask_scalar_name><round_scalar_name>"
2060  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2061	(vec_merge:VF_128
2062	  (vec_duplicate:VF_128
2063	    (sqrt:<ssescalarmode>
2064	      (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "xm,<round_scalar_constraint>")))
2065	  (match_operand:VF_128 2 "register_operand" "0,v")
2066	  (const_int 1)))]
2067  "TARGET_SSE"
2068  "@
2069   sqrt<ssescalarmodesuffix>\t{%1, %0|%0, %1}
2070   vsqrt<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%1, %2, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %2, %1<round_scalar_mask_op3>}"
2071  [(set_attr "isa" "noavx,avx")
2072   (set_attr "type" "sse")
2073   (set_attr "atom_sse_attr" "sqrt")
2074   (set_attr "prefix" "<round_scalar_prefix>")
2075   (set_attr "btver2_sse_attr" "sqrt")
2076   (set_attr "mode" "<ssescalarmode>")])
2077
2078(define_expand "rsqrt<mode>2"
2079  [(set (match_operand:VF1_128_256 0 "register_operand")
2080	(unspec:VF1_128_256
2081	  [(match_operand:VF1_128_256 1 "vector_operand")] UNSPEC_RSQRT))]
2082  "TARGET_SSE && TARGET_SSE_MATH"
2083{
2084  ix86_emit_swsqrtsf (operands[0], operands[1], <MODE>mode, true);
2085  DONE;
2086})
2087
2088(define_expand "rsqrtv16sf2"
2089  [(set (match_operand:V16SF 0 "register_operand")
2090	(unspec:V16SF
2091	  [(match_operand:V16SF 1 "vector_operand")]
2092	  UNSPEC_RSQRT28))]
2093  "TARGET_AVX512ER && TARGET_SSE_MATH"
2094{
2095  ix86_emit_swsqrtsf (operands[0], operands[1], V16SFmode, true);
2096  DONE;
2097})
2098
2099(define_insn "<sse>_rsqrt<mode>2"
2100  [(set (match_operand:VF1_128_256 0 "register_operand" "=x")
2101	(unspec:VF1_128_256
2102	  [(match_operand:VF1_128_256 1 "vector_operand" "xBm")] UNSPEC_RSQRT))]
2103  "TARGET_SSE"
2104  "%vrsqrtps\t{%1, %0|%0, %1}"
2105  [(set_attr "type" "sse")
2106   (set_attr "prefix" "maybe_vex")
2107   (set_attr "mode" "<MODE>")])
2108
2109(define_insn "<mask_codefor>rsqrt14<mode><mask_name>"
2110  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2111	(unspec:VF_AVX512VL
2112	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")]
2113	  UNSPEC_RSQRT14))]
2114  "TARGET_AVX512F"
2115  "vrsqrt14<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
2116  [(set_attr "type" "sse")
2117   (set_attr "prefix" "evex")
2118   (set_attr "mode" "<MODE>")])
2119
2120(define_insn "rsqrt14<mode>"
2121  [(set (match_operand:VF_128 0 "register_operand" "=v")
2122	(vec_merge:VF_128
2123	  (unspec:VF_128
2124	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2125	    UNSPEC_RSQRT14)
2126	  (match_operand:VF_128 2 "register_operand" "v")
2127	  (const_int 1)))]
2128  "TARGET_AVX512F"
2129  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %<iptr>1}"
2130  [(set_attr "type" "sse")
2131   (set_attr "prefix" "evex")
2132   (set_attr "mode" "<MODE>")])
2133
2134(define_insn "rsqrt14_<mode>_mask"
2135  [(set (match_operand:VF_128 0 "register_operand" "=v")
2136	(vec_merge:VF_128
2137	  (vec_merge:VF_128
2138	    (unspec:VF_128
2139	      [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
2140	      UNSPEC_RSQRT14)
2141	      (match_operand:VF_128 3 "nonimm_or_0_operand" "0C")
2142	      (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk"))
2143	  (match_operand:VF_128 2 "register_operand" "v")
2144	  (const_int 1)))]
2145  "TARGET_AVX512F"
2146  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0%{%4%}%N3|%0%{%4%}%N3, %2, %<iptr>1}"
2147  [(set_attr "type" "sse")
2148   (set_attr "prefix" "evex")
2149   (set_attr "mode" "<MODE>")])
2150
2151(define_insn "sse_vmrsqrtv4sf2"
2152  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2153	(vec_merge:V4SF
2154	  (unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,xm")]
2155		       UNSPEC_RSQRT)
2156	  (match_operand:V4SF 2 "register_operand" "0,x")
2157	  (const_int 1)))]
2158  "TARGET_SSE"
2159  "@
2160   rsqrtss\t{%1, %0|%0, %k1}
2161   vrsqrtss\t{%1, %2, %0|%0, %2, %k1}"
2162  [(set_attr "isa" "noavx,avx")
2163   (set_attr "type" "sse")
2164   (set_attr "prefix" "orig,vex")
2165   (set_attr "mode" "SF")])
2166
2167(define_insn "*sse_vmrsqrtv4sf2"
2168  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2169	(vec_merge:V4SF
2170	  (vec_duplicate:V4SF
2171	    (unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm,xm")]
2172		         UNSPEC_RSQRT))
2173	  (match_operand:V4SF 2 "register_operand" "0,x")
2174	  (const_int 1)))]
2175  "TARGET_SSE"
2176  "@
2177   rsqrtss\t{%1, %0|%0, %1}
2178   vrsqrtss\t{%1, %2, %0|%0, %2, %1}"
2179  [(set_attr "isa" "noavx,avx")
2180   (set_attr "type" "sse")
2181   (set_attr "prefix" "orig,vex")
2182   (set_attr "mode" "SF")])
2183
2184(define_expand "<code><mode>3<mask_name><round_saeonly_name>"
2185  [(set (match_operand:VF 0 "register_operand")
2186	(smaxmin:VF
2187	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>")
2188	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>")))]
2189  "TARGET_SSE && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2190{
2191  if (!flag_finite_math_only || flag_signed_zeros)
2192    {
2193      operands[1] = force_reg (<MODE>mode, operands[1]);
2194      emit_insn (gen_ieee_<maxmin_float><mode>3<mask_name><round_saeonly_name>
2195		 (operands[0], operands[1], operands[2]
2196		  <mask_operand_arg34>
2197		  <round_saeonly_mask_arg3>));
2198      DONE;
2199    }
2200  else
2201    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
2202})
2203
2204;; These versions of the min/max patterns are intentionally ignorant of
2205;; their behavior wrt -0.0 and NaN (via the commutative operand mark).
2206;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
2207;; are undefined in this condition, we're certain this is correct.
2208
2209(define_insn "*<code><mode>3<mask_name><round_saeonly_name>"
2210  [(set (match_operand:VF 0 "register_operand" "=x,v")
2211	(smaxmin:VF
2212	  (match_operand:VF 1 "<round_saeonly_nimm_predicate>" "%0,v")
2213	  (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")))]
2214  "TARGET_SSE
2215   && !(MEM_P (operands[1]) && MEM_P (operands[2]))
2216   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2217  "@
2218   <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
2219   v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2220  [(set_attr "isa" "noavx,avx")
2221   (set_attr "type" "sseadd")
2222   (set_attr "btver2_sse_attr" "maxmin")
2223   (set_attr "prefix" "<mask_prefix3>")
2224   (set_attr "mode" "<MODE>")])
2225
2226;; These versions of the min/max patterns implement exactly the operations
2227;;   min = (op1 < op2 ? op1 : op2)
2228;;   max = (!(op1 < op2) ? op1 : op2)
2229;; Their operands are not commutative, and thus they may be used in the
2230;; presence of -0.0 and NaN.
2231
2232(define_insn "ieee_<ieee_maxmin><mode>3<mask_name><round_saeonly_name>"
2233  [(set (match_operand:VF 0 "register_operand" "=x,v")
2234	(unspec:VF
2235	  [(match_operand:VF 1 "register_operand" "0,v")
2236	   (match_operand:VF 2 "<round_saeonly_nimm_predicate>" "xBm,<round_saeonly_constraint>")]
2237	  IEEE_MAXMIN))]
2238  "TARGET_SSE
2239   && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
2240  "@
2241   <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
2242   v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
2243  [(set_attr "isa" "noavx,avx")
2244   (set_attr "type" "sseadd")
2245   (set_attr "btver2_sse_attr" "maxmin")
2246   (set_attr "prefix" "<mask_prefix3>")
2247   (set_attr "mode" "<MODE>")])
2248
2249;; Standard scalar operation patterns which preserve the rest of the
2250;; vector for combiner.
2251(define_insn "*ieee_<ieee_maxmin><mode>3"
2252  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2253	(vec_merge:VF_128
2254	  (vec_duplicate:VF_128
2255	    (unspec:<ssescalarmode>
2256	      [(vec_select:<ssescalarmode>
2257	         (match_operand:VF_128 1 "register_operand" "0,v")
2258		 (parallel [(const_int 0)]))
2259	       (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm,vm")]
2260	       IEEE_MAXMIN))
2261	  (match_dup 1)
2262	  (const_int 1)))]
2263  "TARGET_SSE"
2264  "@
2265   <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %2}
2266   v<ieee_maxmin><ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2267  [(set_attr "isa" "noavx,avx")
2268   (set_attr "type" "sseadd")
2269   (set_attr "btver2_sse_attr" "maxmin")
2270   (set_attr "prefix" "orig,vex")
2271   (set_attr "mode" "<ssescalarmode>")])
2272
2273(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
2274  [(set (match_operand:VF_128 0 "register_operand" "=x,v")
2275	(vec_merge:VF_128
2276	  (smaxmin:VF_128
2277	    (match_operand:VF_128 1 "register_operand" "0,v")
2278	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>"))
2279	 (match_dup 1)
2280	 (const_int 1)))]
2281  "TARGET_SSE"
2282  "@
2283   <maxmin_float><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2284   v<maxmin_float><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
2285  [(set_attr "isa" "noavx,avx")
2286   (set_attr "type" "sse")
2287   (set_attr "btver2_sse_attr" "maxmin")
2288   (set_attr "prefix" "<round_saeonly_scalar_prefix>")
2289   (set_attr "mode" "<ssescalarmode>")])
2290
2291(define_insn "avx_addsubv4df3"
2292  [(set (match_operand:V4DF 0 "register_operand" "=x")
2293	(vec_merge:V4DF
2294	  (minus:V4DF
2295	    (match_operand:V4DF 1 "register_operand" "x")
2296	    (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
2297	  (plus:V4DF (match_dup 1) (match_dup 2))
2298	  (const_int 5)))]
2299  "TARGET_AVX"
2300  "vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2301  [(set_attr "type" "sseadd")
2302   (set_attr "prefix" "vex")
2303   (set_attr "mode" "V4DF")])
2304
2305(define_insn "sse3_addsubv2df3"
2306  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2307	(vec_merge:V2DF
2308	  (minus:V2DF
2309	    (match_operand:V2DF 1 "register_operand" "0,x")
2310	    (match_operand:V2DF 2 "vector_operand" "xBm,xm"))
2311	  (plus:V2DF (match_dup 1) (match_dup 2))
2312	  (const_int 1)))]
2313  "TARGET_SSE3"
2314  "@
2315   addsubpd\t{%2, %0|%0, %2}
2316   vaddsubpd\t{%2, %1, %0|%0, %1, %2}"
2317  [(set_attr "isa" "noavx,avx")
2318   (set_attr "type" "sseadd")
2319   (set_attr "atom_unit" "complex")
2320   (set_attr "prefix" "orig,vex")
2321   (set_attr "mode" "V2DF")])
2322
2323(define_insn "avx_addsubv8sf3"
2324  [(set (match_operand:V8SF 0 "register_operand" "=x")
2325	(vec_merge:V8SF
2326	  (minus:V8SF
2327	    (match_operand:V8SF 1 "register_operand" "x")
2328	    (match_operand:V8SF 2 "nonimmediate_operand" "xm"))
2329	  (plus:V8SF (match_dup 1) (match_dup 2))
2330	  (const_int 85)))]
2331  "TARGET_AVX"
2332  "vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2333  [(set_attr "type" "sseadd")
2334   (set_attr "prefix" "vex")
2335   (set_attr "mode" "V8SF")])
2336
2337(define_insn "sse3_addsubv4sf3"
2338  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2339	(vec_merge:V4SF
2340	  (minus:V4SF
2341	    (match_operand:V4SF 1 "register_operand" "0,x")
2342	    (match_operand:V4SF 2 "vector_operand" "xBm,xm"))
2343	  (plus:V4SF (match_dup 1) (match_dup 2))
2344	  (const_int 5)))]
2345  "TARGET_SSE3"
2346  "@
2347   addsubps\t{%2, %0|%0, %2}
2348   vaddsubps\t{%2, %1, %0|%0, %1, %2}"
2349  [(set_attr "isa" "noavx,avx")
2350   (set_attr "type" "sseadd")
2351   (set_attr "prefix" "orig,vex")
2352   (set_attr "prefix_rep" "1,*")
2353   (set_attr "mode" "V4SF")])
2354
2355(define_split
2356  [(set (match_operand:VF_128_256 0 "register_operand")
2357	(match_operator:VF_128_256 6 "addsub_vm_operator"
2358	  [(minus:VF_128_256
2359	     (match_operand:VF_128_256 1 "register_operand")
2360	     (match_operand:VF_128_256 2 "vector_operand"))
2361	   (plus:VF_128_256
2362	     (match_operand:VF_128_256 3 "vector_operand")
2363	     (match_operand:VF_128_256 4 "vector_operand"))
2364	   (match_operand 5 "const_int_operand")]))]
2365  "TARGET_SSE3
2366   && can_create_pseudo_p ()
2367   && ((rtx_equal_p (operands[1], operands[3])
2368	&& rtx_equal_p (operands[2], operands[4]))
2369       || (rtx_equal_p (operands[1], operands[4])
2370	   && rtx_equal_p (operands[2], operands[3])))"
2371  [(set (match_dup 0)
2372	(vec_merge:VF_128_256
2373	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
2374	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
2375	  (match_dup 5)))])
2376
2377(define_split
2378  [(set (match_operand:VF_128_256 0 "register_operand")
2379	(match_operator:VF_128_256 6 "addsub_vm_operator"
2380	  [(plus:VF_128_256
2381	     (match_operand:VF_128_256 1 "vector_operand")
2382	     (match_operand:VF_128_256 2 "vector_operand"))
2383	   (minus:VF_128_256
2384	     (match_operand:VF_128_256 3 "register_operand")
2385	     (match_operand:VF_128_256 4 "vector_operand"))
2386	   (match_operand 5 "const_int_operand")]))]
2387  "TARGET_SSE3
2388   && can_create_pseudo_p ()
2389   && ((rtx_equal_p (operands[1], operands[3])
2390	&& rtx_equal_p (operands[2], operands[4]))
2391       || (rtx_equal_p (operands[1], operands[4])
2392	   && rtx_equal_p (operands[2], operands[3])))"
2393  [(set (match_dup 0)
2394	(vec_merge:VF_128_256
2395	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
2396	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
2397	  (match_dup 5)))]
2398{
2399  /* Negate mask bits to compensate for swapped PLUS and MINUS RTXes.  */
2400  operands[5]
2401    = GEN_INT (~INTVAL (operands[5])
2402	       & ((HOST_WIDE_INT_1U << GET_MODE_NUNITS (<MODE>mode)) - 1));
2403})
2404
2405(define_split
2406  [(set (match_operand:VF_128_256 0 "register_operand")
2407	(match_operator:VF_128_256 7 "addsub_vs_operator"
2408	  [(vec_concat:<ssedoublemode>
2409	     (minus:VF_128_256
2410	       (match_operand:VF_128_256 1 "register_operand")
2411	       (match_operand:VF_128_256 2 "vector_operand"))
2412	     (plus:VF_128_256
2413	       (match_operand:VF_128_256 3 "vector_operand")
2414	       (match_operand:VF_128_256 4 "vector_operand")))
2415	   (match_parallel 5 "addsub_vs_parallel"
2416	     [(match_operand 6 "const_int_operand")])]))]
2417  "TARGET_SSE3
2418   && can_create_pseudo_p ()
2419   && ((rtx_equal_p (operands[1], operands[3])
2420	&& rtx_equal_p (operands[2], operands[4]))
2421       || (rtx_equal_p (operands[1], operands[4])
2422	   && rtx_equal_p (operands[2], operands[3])))"
2423  [(set (match_dup 0)
2424	(vec_merge:VF_128_256
2425	  (minus:VF_128_256 (match_dup 1) (match_dup 2))
2426	  (plus:VF_128_256 (match_dup 1) (match_dup 2))
2427	  (match_dup 5)))]
2428{
2429  int i, nelt = XVECLEN (operands[5], 0);
2430  HOST_WIDE_INT ival = 0;
2431
2432  for (i = 0; i < nelt; i++)
2433    if (INTVAL (XVECEXP (operands[5], 0, i)) < GET_MODE_NUNITS (<MODE>mode))
2434      ival |= HOST_WIDE_INT_1 << i;
2435
2436  operands[5] = GEN_INT (ival);
2437})
2438
2439(define_split
2440  [(set (match_operand:VF_128_256 0 "register_operand")
2441	(match_operator:VF_128_256 7 "addsub_vs_operator"
2442	  [(vec_concat:<ssedoublemode>
2443	     (plus:VF_128_256
2444	       (match_operand:VF_128_256 1 "vector_operand")
2445	       (match_operand:VF_128_256 2 "vector_operand"))
2446	     (minus:VF_128_256
2447	       (match_operand:VF_128_256 3 "register_operand")
2448	       (match_operand:VF_128_256 4 "vector_operand")))
2449	   (match_parallel 5 "addsub_vs_parallel"
2450	     [(match_operand 6 "const_int_operand")])]))]
2451  "TARGET_SSE3
2452   && can_create_pseudo_p ()
2453   && ((rtx_equal_p (operands[1], operands[3])
2454	&& rtx_equal_p (operands[2], operands[4]))
2455       || (rtx_equal_p (operands[1], operands[4])
2456	   && rtx_equal_p (operands[2], operands[3])))"
2457  [(set (match_dup 0)
2458	(vec_merge:VF_128_256
2459	  (minus:VF_128_256 (match_dup 3) (match_dup 4))
2460	  (plus:VF_128_256 (match_dup 3) (match_dup 4))
2461	  (match_dup 5)))]
2462{
2463  int i, nelt = XVECLEN (operands[5], 0);
2464  HOST_WIDE_INT ival = 0;
2465
2466  for (i = 0; i < nelt; i++)
2467    if (INTVAL (XVECEXP (operands[5], 0, i)) >= GET_MODE_NUNITS (<MODE>mode))
2468      ival |= HOST_WIDE_INT_1 << i;
2469
2470  operands[5] = GEN_INT (ival);
2471})
2472
2473(define_insn "avx_h<plusminus_insn>v4df3"
2474  [(set (match_operand:V4DF 0 "register_operand" "=x")
2475	(vec_concat:V4DF
2476	  (vec_concat:V2DF
2477	    (plusminus:DF
2478	      (vec_select:DF
2479		(match_operand:V4DF 1 "register_operand" "x")
2480		(parallel [(const_int 0)]))
2481	      (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2482	    (plusminus:DF
2483	      (vec_select:DF
2484		(match_operand:V4DF 2 "nonimmediate_operand" "xm")
2485		(parallel [(const_int 0)]))
2486	      (vec_select:DF (match_dup 2) (parallel [(const_int 1)]))))
2487	  (vec_concat:V2DF
2488	    (plusminus:DF
2489	      (vec_select:DF (match_dup 1) (parallel [(const_int 2)]))
2490	      (vec_select:DF (match_dup 1) (parallel [(const_int 3)])))
2491	    (plusminus:DF
2492	      (vec_select:DF (match_dup 2) (parallel [(const_int 2)]))
2493	      (vec_select:DF (match_dup 2) (parallel [(const_int 3)]))))))]
2494  "TARGET_AVX"
2495  "vh<plusminus_mnemonic>pd\t{%2, %1, %0|%0, %1, %2}"
2496  [(set_attr "type" "sseadd")
2497   (set_attr "prefix" "vex")
2498   (set_attr "mode" "V4DF")])
2499
2500(define_expand "sse3_haddv2df3"
2501  [(set (match_operand:V2DF 0 "register_operand")
2502	(vec_concat:V2DF
2503	  (plus:DF
2504	    (vec_select:DF
2505	      (match_operand:V2DF 1 "register_operand")
2506	      (parallel [(const_int 0)]))
2507	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2508	  (plus:DF
2509	    (vec_select:DF
2510	      (match_operand:V2DF 2 "vector_operand")
2511	      (parallel [(const_int 0)]))
2512	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2513  "TARGET_SSE3")
2514
2515(define_insn "*sse3_haddv2df3"
2516  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2517	(vec_concat:V2DF
2518	  (plus:DF
2519	    (vec_select:DF
2520	      (match_operand:V2DF 1 "register_operand" "0,x")
2521	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
2522	    (vec_select:DF
2523	      (match_dup 1)
2524	      (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
2525	  (plus:DF
2526	    (vec_select:DF
2527	      (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2528	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
2529	    (vec_select:DF
2530	      (match_dup 2)
2531	      (parallel [(match_operand:SI 6 "const_0_to_1_operand")])))))]
2532  "TARGET_SSE3
2533   && INTVAL (operands[3]) != INTVAL (operands[4])
2534   && INTVAL (operands[5]) != INTVAL (operands[6])"
2535  "@
2536   haddpd\t{%2, %0|%0, %2}
2537   vhaddpd\t{%2, %1, %0|%0, %1, %2}"
2538  [(set_attr "isa" "noavx,avx")
2539   (set_attr "type" "sseadd")
2540   (set_attr "prefix" "orig,vex")
2541   (set_attr "mode" "V2DF")])
2542
2543(define_insn "sse3_hsubv2df3"
2544  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2545	(vec_concat:V2DF
2546	  (minus:DF
2547	    (vec_select:DF
2548	      (match_operand:V2DF 1 "register_operand" "0,x")
2549	      (parallel [(const_int 0)]))
2550	    (vec_select:DF (match_dup 1) (parallel [(const_int 1)])))
2551	  (minus:DF
2552	    (vec_select:DF
2553	      (match_operand:V2DF 2 "vector_operand" "xBm,xm")
2554	      (parallel [(const_int 0)]))
2555	    (vec_select:DF (match_dup 2) (parallel [(const_int 1)])))))]
2556  "TARGET_SSE3"
2557  "@
2558   hsubpd\t{%2, %0|%0, %2}
2559   vhsubpd\t{%2, %1, %0|%0, %1, %2}"
2560  [(set_attr "isa" "noavx,avx")
2561   (set_attr "type" "sseadd")
2562   (set_attr "prefix" "orig,vex")
2563   (set_attr "mode" "V2DF")])
2564
2565(define_insn "*sse3_haddv2df3_low"
2566  [(set (match_operand:DF 0 "register_operand" "=x,x")
2567	(plus:DF
2568	  (vec_select:DF
2569	    (match_operand:V2DF 1 "register_operand" "0,x")
2570	    (parallel [(match_operand:SI 2 "const_0_to_1_operand")]))
2571	  (vec_select:DF
2572	    (match_dup 1)
2573	    (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))))]
2574  "TARGET_SSE3
2575   && INTVAL (operands[2]) != INTVAL (operands[3])"
2576  "@
2577   haddpd\t{%0, %0|%0, %0}
2578   vhaddpd\t{%1, %1, %0|%0, %1, %1}"
2579  [(set_attr "isa" "noavx,avx")
2580   (set_attr "type" "sseadd1")
2581   (set_attr "prefix" "orig,vex")
2582   (set_attr "mode" "V2DF")])
2583
2584(define_insn "*sse3_hsubv2df3_low"
2585  [(set (match_operand:DF 0 "register_operand" "=x,x")
2586	(minus:DF
2587	  (vec_select:DF
2588	    (match_operand:V2DF 1 "register_operand" "0,x")
2589	    (parallel [(const_int 0)]))
2590	  (vec_select:DF
2591	    (match_dup 1)
2592	    (parallel [(const_int 1)]))))]
2593  "TARGET_SSE3"
2594  "@
2595   hsubpd\t{%0, %0|%0, %0}
2596   vhsubpd\t{%1, %1, %0|%0, %1, %1}"
2597  [(set_attr "isa" "noavx,avx")
2598   (set_attr "type" "sseadd1")
2599   (set_attr "prefix" "orig,vex")
2600   (set_attr "mode" "V2DF")])
2601
2602(define_insn "avx_h<plusminus_insn>v8sf3"
2603  [(set (match_operand:V8SF 0 "register_operand" "=x")
2604	(vec_concat:V8SF
2605	  (vec_concat:V4SF
2606	    (vec_concat:V2SF
2607	      (plusminus:SF
2608		(vec_select:SF
2609		  (match_operand:V8SF 1 "register_operand" "x")
2610		  (parallel [(const_int 0)]))
2611		(vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2612	      (plusminus:SF
2613		(vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2614		(vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2615	    (vec_concat:V2SF
2616	      (plusminus:SF
2617		(vec_select:SF
2618		  (match_operand:V8SF 2 "nonimmediate_operand" "xm")
2619		  (parallel [(const_int 0)]))
2620		(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2621	      (plusminus:SF
2622		(vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2623		(vec_select:SF (match_dup 2) (parallel [(const_int 3)])))))
2624	  (vec_concat:V4SF
2625	    (vec_concat:V2SF
2626	      (plusminus:SF
2627		(vec_select:SF (match_dup 1) (parallel [(const_int 4)]))
2628		(vec_select:SF (match_dup 1) (parallel [(const_int 5)])))
2629	      (plusminus:SF
2630		(vec_select:SF (match_dup 1) (parallel [(const_int 6)]))
2631		(vec_select:SF (match_dup 1) (parallel [(const_int 7)]))))
2632	    (vec_concat:V2SF
2633	      (plusminus:SF
2634		(vec_select:SF (match_dup 2) (parallel [(const_int 4)]))
2635		(vec_select:SF (match_dup 2) (parallel [(const_int 5)])))
2636	      (plusminus:SF
2637		(vec_select:SF (match_dup 2) (parallel [(const_int 6)]))
2638		(vec_select:SF (match_dup 2) (parallel [(const_int 7)])))))))]
2639  "TARGET_AVX"
2640  "vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2641  [(set_attr "type" "sseadd")
2642   (set_attr "prefix" "vex")
2643   (set_attr "mode" "V8SF")])
2644
2645(define_insn "sse3_h<plusminus_insn>v4sf3"
2646  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2647	(vec_concat:V4SF
2648	  (vec_concat:V2SF
2649	    (plusminus:SF
2650	      (vec_select:SF
2651		(match_operand:V4SF 1 "register_operand" "0,x")
2652		(parallel [(const_int 0)]))
2653	      (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
2654	    (plusminus:SF
2655	      (vec_select:SF (match_dup 1) (parallel [(const_int 2)]))
2656	      (vec_select:SF (match_dup 1) (parallel [(const_int 3)]))))
2657	  (vec_concat:V2SF
2658	    (plusminus:SF
2659	      (vec_select:SF
2660		(match_operand:V4SF 2 "vector_operand" "xBm,xm")
2661		(parallel [(const_int 0)]))
2662	      (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
2663	    (plusminus:SF
2664	      (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
2665	      (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
2666  "TARGET_SSE3"
2667  "@
2668   h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}
2669   vh<plusminus_mnemonic>ps\t{%2, %1, %0|%0, %1, %2}"
2670  [(set_attr "isa" "noavx,avx")
2671   (set_attr "type" "sseadd")
2672   (set_attr "atom_unit" "complex")
2673   (set_attr "prefix" "orig,vex")
2674   (set_attr "prefix_rep" "1,*")
2675   (set_attr "mode" "V4SF")])
2676
2677(define_mode_iterator REDUC_SSE_PLUS_MODE
2678 [(V2DF "TARGET_SSE") (V4SF "TARGET_SSE")])
2679
2680(define_expand "reduc_plus_scal_<mode>"
2681 [(plus:REDUC_SSE_PLUS_MODE
2682   (match_operand:<ssescalarmode> 0 "register_operand")
2683   (match_operand:REDUC_SSE_PLUS_MODE 1 "register_operand"))]
2684 ""
2685{
2686  rtx tmp = gen_reg_rtx (<MODE>mode);
2687  ix86_expand_reduc (gen_add<mode>3, tmp, operands[1]);
2688  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2689                                                        const0_rtx));
2690  DONE;
2691})
2692
2693(define_expand "reduc_plus_scal_v16qi"
2694 [(plus:V16QI
2695    (match_operand:QI 0 "register_operand")
2696    (match_operand:V16QI 1 "register_operand"))]
2697 "TARGET_SSE2"
2698{
2699  rtx tmp = gen_reg_rtx (V1TImode);
2700  emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, operands[1]),
2701				 GEN_INT (64)));
2702  rtx tmp2 = gen_reg_rtx (V16QImode);
2703  emit_insn (gen_addv16qi3 (tmp2, operands[1], gen_lowpart (V16QImode, tmp)));
2704  rtx tmp3 = gen_reg_rtx (V16QImode);
2705  emit_move_insn (tmp3, CONST0_RTX (V16QImode));
2706  rtx tmp4 = gen_reg_rtx (V2DImode);
2707  emit_insn (gen_sse2_psadbw (tmp4, tmp2, tmp3));
2708  tmp4 = gen_lowpart (V16QImode, tmp4);
2709  emit_insn (gen_vec_extractv16qiqi (operands[0], tmp4, const0_rtx));
2710  DONE;
2711})
2712
2713(define_mode_iterator REDUC_PLUS_MODE
2714 [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
2715  (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2716  (V32QI "TARGET_AVX") (V64QI "TARGET_AVX512F")])
2717
2718(define_expand "reduc_plus_scal_<mode>"
2719 [(plus:REDUC_PLUS_MODE
2720   (match_operand:<ssescalarmode> 0 "register_operand")
2721   (match_operand:REDUC_PLUS_MODE 1 "register_operand"))]
2722 ""
2723{
2724  rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2725  emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2726  rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2727  rtx tmp3 = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
2728  emit_insn (gen_add<ssehalfvecmodelower>3 (tmp2, tmp, tmp3));
2729  emit_insn (gen_reduc_plus_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2730  DONE;
2731})
2732
2733;; Modes handled by reduc_sm{in,ax}* patterns.
2734(define_mode_iterator REDUC_SSE_SMINMAX_MODE
2735  [(V4SF "TARGET_SSE") (V2DF "TARGET_SSE")
2736   (V4SI "TARGET_SSE2") (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
2737   (V2DI "TARGET_SSE4_2")])
2738
2739(define_expand "reduc_<code>_scal_<mode>"
2740  [(smaxmin:REDUC_SSE_SMINMAX_MODE
2741     (match_operand:<ssescalarmode> 0 "register_operand")
2742     (match_operand:REDUC_SSE_SMINMAX_MODE 1 "register_operand"))]
2743  ""
2744{
2745  rtx tmp = gen_reg_rtx (<MODE>mode);
2746  ix86_expand_reduc (gen_<code><mode>3, tmp, operands[1]);
2747  emit_insn (gen_vec_extract<mode><ssescalarmodelower> (operands[0], tmp,
2748							const0_rtx));
2749  DONE;
2750})
2751
2752(define_mode_iterator REDUC_SMINMAX_MODE
2753  [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
2754   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
2755   (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
2756   (V64QI "TARGET_AVX512BW")
2757   (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
2758   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
2759   (V8DF "TARGET_AVX512F")])
2760
2761(define_expand "reduc_<code>_scal_<mode>"
2762  [(smaxmin:REDUC_SMINMAX_MODE
2763     (match_operand:<ssescalarmode> 0 "register_operand")
2764     (match_operand:REDUC_SMINMAX_MODE 1 "register_operand"))]
2765  ""
2766{
2767  rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2768  emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2769  rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2770  emit_insn (gen_<code><ssehalfvecmodelower>3
2771    (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2772  emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2773  DONE;
2774})
2775
2776(define_expand "reduc_<code>_scal_<mode>"
2777  [(umaxmin:VI_AVX512BW
2778     (match_operand:<ssescalarmode> 0 "register_operand")
2779     (match_operand:VI_AVX512BW 1 "register_operand"))]
2780  "TARGET_AVX512F"
2781{
2782  rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2783  emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2784  rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2785  emit_insn (gen_<code><ssehalfvecmodelower>3
2786    (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2787  emit_insn (gen_reduc_<code>_scal_<ssehalfvecmodelower> (operands[0], tmp2));
2788  DONE;
2789})
2790
2791(define_expand "reduc_<code>_scal_<mode>"
2792  [(umaxmin:VI_256
2793     (match_operand:<ssescalarmode> 0 "register_operand")
2794     (match_operand:VI_256 1 "register_operand"))]
2795  "TARGET_AVX2"
2796{
2797  rtx tmp = gen_reg_rtx (<ssehalfvecmode>mode);
2798  emit_insn (gen_vec_extract_hi_<mode> (tmp, operands[1]));
2799  rtx tmp2 = gen_reg_rtx (<ssehalfvecmode>mode);
2800  emit_insn (gen_<code><ssehalfvecmodelower>3
2801    (tmp2, tmp, gen_lowpart (<ssehalfvecmode>mode, operands[1])));
2802  rtx tmp3 = gen_reg_rtx (<ssehalfvecmode>mode);
2803  ix86_expand_reduc (gen_<code><ssehalfvecmodelower>3, tmp3, tmp2);
2804  emit_insn (gen_vec_extract<ssehalfvecmodelower><ssescalarmodelower>
2805		(operands[0], tmp3, const0_rtx));
2806  DONE;
2807})
2808
2809(define_expand "reduc_umin_scal_v8hi"
2810  [(umin:V8HI
2811     (match_operand:HI 0 "register_operand")
2812     (match_operand:V8HI 1 "register_operand"))]
2813  "TARGET_SSE4_1"
2814{
2815  rtx tmp = gen_reg_rtx (V8HImode);
2816  ix86_expand_reduc (gen_uminv8hi3, tmp, operands[1]);
2817  emit_insn (gen_vec_extractv8hihi (operands[0], tmp, const0_rtx));
2818  DONE;
2819})
2820
2821(define_insn "<mask_codefor>reducep<mode><mask_name>"
2822  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
2823	(unspec:VF_AVX512VL
2824	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "vm")
2825	   (match_operand:SI 2 "const_0_to_255_operand")]
2826	  UNSPEC_REDUCE))]
2827  "TARGET_AVX512DQ"
2828  "vreduce<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
2829  [(set_attr "type" "sse")
2830   (set_attr "prefix" "evex")
2831   (set_attr "mode" "<MODE>")])
2832
2833(define_insn "reduces<mode><mask_scalar_name>"
2834  [(set (match_operand:VF_128 0 "register_operand" "=v")
2835	(vec_merge:VF_128
2836	  (unspec:VF_128
2837	    [(match_operand:VF_128 1 "register_operand" "v")
2838	     (match_operand:VF_128 2 "nonimmediate_operand" "vm")
2839	     (match_operand:SI 3 "const_0_to_255_operand")]
2840	    UNSPEC_REDUCE)
2841	  (match_dup 1)
2842	  (const_int 1)))]
2843  "TARGET_AVX512DQ"
2844  "vreduce<ssescalarmodesuffix>\t{%3, %2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2, %3}"
2845  [(set_attr "type" "sse")
2846   (set_attr "prefix" "evex")
2847   (set_attr "mode" "<MODE>")])
2848
2849;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2850;;
2851;; Parallel floating point comparisons
2852;;
2853;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2854
2855(define_insn "avx_cmp<mode>3"
2856  [(set (match_operand:VF_128_256 0 "register_operand" "=x")
2857	(unspec:VF_128_256
2858	  [(match_operand:VF_128_256 1 "register_operand" "x")
2859	   (match_operand:VF_128_256 2 "nonimmediate_operand" "xm")
2860	   (match_operand:SI 3 "const_0_to_31_operand" "n")]
2861	  UNSPEC_PCMP))]
2862  "TARGET_AVX"
2863  "vcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2864  [(set_attr "type" "ssecmp")
2865   (set_attr "length_immediate" "1")
2866   (set_attr "prefix" "vex")
2867   (set_attr "mode" "<MODE>")])
2868
2869(define_insn "avx_vmcmp<mode>3"
2870  [(set (match_operand:VF_128 0 "register_operand" "=x")
2871	(vec_merge:VF_128
2872	  (unspec:VF_128
2873	    [(match_operand:VF_128 1 "register_operand" "x")
2874	     (match_operand:VF_128 2 "nonimmediate_operand" "xm")
2875	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
2876	    UNSPEC_PCMP)
2877	 (match_dup 1)
2878	 (const_int 1)))]
2879  "TARGET_AVX"
2880  "vcmp<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
2881  [(set_attr "type" "ssecmp")
2882   (set_attr "length_immediate" "1")
2883   (set_attr "prefix" "vex")
2884   (set_attr "mode" "<ssescalarmode>")])
2885
2886(define_insn "*<sse>_maskcmp<mode>3_comm"
2887  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2888	(match_operator:VF_128_256 3 "sse_comparison_operator"
2889	  [(match_operand:VF_128_256 1 "register_operand" "%0,x")
2890	   (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2891  "TARGET_SSE
2892   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
2893  "@
2894   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2895   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2896  [(set_attr "isa" "noavx,avx")
2897   (set_attr "type" "ssecmp")
2898   (set_attr "length_immediate" "1")
2899   (set_attr "prefix" "orig,vex")
2900   (set_attr "mode" "<MODE>")])
2901
2902(define_insn "<sse>_maskcmp<mode>3"
2903  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
2904	(match_operator:VF_128_256 3 "sse_comparison_operator"
2905	  [(match_operand:VF_128_256 1 "register_operand" "0,x")
2906	   (match_operand:VF_128_256 2 "vector_operand" "xBm,xm")]))]
2907  "TARGET_SSE"
2908  "@
2909   cmp%D3<ssemodesuffix>\t{%2, %0|%0, %2}
2910   vcmp%D3<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
2911  [(set_attr "isa" "noavx,avx")
2912   (set_attr "type" "ssecmp")
2913   (set_attr "length_immediate" "1")
2914   (set_attr "prefix" "orig,vex")
2915   (set_attr "mode" "<MODE>")])
2916
2917(define_insn "<sse>_vmmaskcmp<mode>3"
2918  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
2919	(vec_merge:VF_128
2920	 (match_operator:VF_128 3 "sse_comparison_operator"
2921	   [(match_operand:VF_128 1 "register_operand" "0,x")
2922	    (match_operand:VF_128 2 "nonimmediate_operand" "xm,xm")])
2923	 (match_dup 1)
2924	 (const_int 1)))]
2925  "TARGET_SSE"
2926  "@
2927   cmp%D3<ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
2928   vcmp%D3<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %<iptr>2}"
2929  [(set_attr "isa" "noavx,avx")
2930   (set_attr "type" "ssecmp")
2931   (set_attr "length_immediate" "1,*")
2932   (set_attr "prefix" "orig,vex")
2933   (set_attr "mode" "<ssescalarmode>")])
2934
2935(define_mode_attr cmp_imm_predicate
2936  [(V16SF "const_0_to_31_operand")  (V8DF "const_0_to_31_operand")
2937   (V16SI "const_0_to_7_operand")   (V8DI "const_0_to_7_operand")
2938   (V8SF "const_0_to_31_operand")   (V4DF "const_0_to_31_operand")
2939   (V8SI "const_0_to_7_operand")    (V4DI "const_0_to_7_operand")
2940   (V4SF "const_0_to_31_operand")   (V2DF "const_0_to_31_operand")
2941   (V4SI "const_0_to_7_operand")    (V2DI "const_0_to_7_operand")
2942   (V32HI "const_0_to_7_operand")   (V64QI "const_0_to_7_operand")
2943   (V16HI "const_0_to_7_operand")   (V32QI "const_0_to_7_operand")
2944   (V8HI "const_0_to_7_operand")    (V16QI "const_0_to_7_operand")])
2945
2946(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name><round_saeonly_name>"
2947  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2948	(unspec:<avx512fmaskmode>
2949	  [(match_operand:V48_AVX512VL 1 "register_operand" "v")
2950	   (match_operand:V48_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
2951	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2952	  UNSPEC_PCMP))]
2953  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
2954  "v<sseintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
2955  [(set_attr "type" "ssecmp")
2956   (set_attr "length_immediate" "1")
2957   (set_attr "prefix" "evex")
2958   (set_attr "mode" "<sseinsnmode>")])
2959
2960(define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
2961  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2962	(unspec:<avx512fmaskmode>
2963	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2964	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2965	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
2966	  UNSPEC_PCMP))]
2967  "TARGET_AVX512BW"
2968  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2969  [(set_attr "type" "ssecmp")
2970   (set_attr "length_immediate" "1")
2971   (set_attr "prefix" "evex")
2972   (set_attr "mode" "<sseinsnmode>")])
2973
2974(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2975  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2976	(unspec:<avx512fmaskmode>
2977	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
2978	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
2979	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2980	  UNSPEC_UNSIGNED_PCMP))]
2981  "TARGET_AVX512BW"
2982  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2983  [(set_attr "type" "ssecmp")
2984   (set_attr "length_immediate" "1")
2985   (set_attr "prefix" "evex")
2986   (set_attr "mode" "<sseinsnmode>")])
2987
2988(define_insn "<avx512>_ucmp<mode>3<mask_scalar_merge_name>"
2989  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
2990	(unspec:<avx512fmaskmode>
2991	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
2992	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
2993	   (match_operand:SI 3 "const_0_to_7_operand" "n")]
2994	  UNSPEC_UNSIGNED_PCMP))]
2995  "TARGET_AVX512F"
2996  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
2997  [(set_attr "type" "ssecmp")
2998   (set_attr "length_immediate" "1")
2999   (set_attr "prefix" "evex")
3000   (set_attr "mode" "<sseinsnmode>")])
3001
3002(define_insn "avx512f_vmcmp<mode>3<round_saeonly_name>"
3003  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3004	(and:<avx512fmaskmode>
3005	  (unspec:<avx512fmaskmode>
3006	    [(match_operand:VF_128 1 "register_operand" "v")
3007	     (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3008	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
3009	    UNSPEC_PCMP)
3010	  (const_int 1)))]
3011  "TARGET_AVX512F"
3012  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %<iptr>2<round_saeonly_op4>, %3}"
3013  [(set_attr "type" "ssecmp")
3014   (set_attr "length_immediate" "1")
3015   (set_attr "prefix" "evex")
3016   (set_attr "mode" "<ssescalarmode>")])
3017
3018(define_insn "avx512f_vmcmp<mode>3_mask<round_saeonly_name>"
3019  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
3020	(and:<avx512fmaskmode>
3021	  (unspec:<avx512fmaskmode>
3022	    [(match_operand:VF_128 1 "register_operand" "v")
3023	     (match_operand:VF_128 2 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3024	     (match_operand:SI 3 "const_0_to_31_operand" "n")]
3025	    UNSPEC_PCMP)
3026	  (and:<avx512fmaskmode>
3027	    (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")
3028	    (const_int 1))))]
3029  "TARGET_AVX512F"
3030  "vcmp<ssescalarmodesuffix>\t{%3, <round_saeonly_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %<iptr>2<round_saeonly_op5>, %3}"
3031  [(set_attr "type" "ssecmp")
3032   (set_attr "length_immediate" "1")
3033   (set_attr "prefix" "evex")
3034   (set_attr "mode" "<ssescalarmode>")])
3035
3036(define_insn "<sse>_<unord>comi<round_saeonly_name>"
3037  [(set (reg:CCFP FLAGS_REG)
3038	(compare:CCFP
3039	  (vec_select:MODEF
3040	    (match_operand:<ssevecmode> 0 "register_operand" "v")
3041	    (parallel [(const_int 0)]))
3042	  (vec_select:MODEF
3043	    (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
3044	    (parallel [(const_int 0)]))))]
3045  "SSE_FLOAT_MODE_P (<MODE>mode)"
3046  "%v<unord>comi<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
3047  [(set_attr "type" "ssecomi")
3048   (set_attr "prefix" "maybe_vex")
3049   (set_attr "prefix_rep" "0")
3050   (set (attr "prefix_data16")
3051	(if_then_else (eq_attr "mode" "DF")
3052		      (const_string "1")
3053		      (const_string "0")))
3054   (set_attr "mode" "<MODE>")])
3055
3056(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3057  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3058	(match_operator:<avx512fmaskmode> 1 ""
3059	  [(match_operand:V48_AVX512VL 2 "register_operand")
3060	   (match_operand:V48_AVX512VL 3 "nonimmediate_operand")]))]
3061  "TARGET_AVX512F"
3062{
3063  bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3064				      operands[2], operands[3]);
3065  gcc_assert (ok);
3066  DONE;
3067})
3068
3069(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
3070  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3071	(match_operator:<avx512fmaskmode> 1 ""
3072	  [(match_operand:VI12_AVX512VL 2 "register_operand")
3073	   (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3074  "TARGET_AVX512BW"
3075{
3076  bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3077				      operands[2], operands[3]);
3078  gcc_assert (ok);
3079  DONE;
3080})
3081
3082(define_expand "vec_cmp<mode><sseintvecmodelower>"
3083  [(set (match_operand:<sseintvecmode> 0 "register_operand")
3084	(match_operator:<sseintvecmode> 1 ""
3085	  [(match_operand:VI_256 2 "register_operand")
3086	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
3087  "TARGET_AVX2"
3088{
3089  bool ok = ix86_expand_int_vec_cmp (operands);
3090  gcc_assert (ok);
3091  DONE;
3092})
3093
3094(define_expand "vec_cmp<mode><sseintvecmodelower>"
3095  [(set (match_operand:<sseintvecmode> 0 "register_operand")
3096	(match_operator:<sseintvecmode> 1 ""
3097	  [(match_operand:VI124_128 2 "register_operand")
3098	   (match_operand:VI124_128 3 "vector_operand")]))]
3099  "TARGET_SSE2"
3100{
3101  bool ok = ix86_expand_int_vec_cmp (operands);
3102  gcc_assert (ok);
3103  DONE;
3104})
3105
3106(define_expand "vec_cmpv2div2di"
3107  [(set (match_operand:V2DI 0 "register_operand")
3108	(match_operator:V2DI 1 ""
3109	  [(match_operand:V2DI 2 "register_operand")
3110	   (match_operand:V2DI 3 "vector_operand")]))]
3111  "TARGET_SSE4_2"
3112{
3113  bool ok = ix86_expand_int_vec_cmp (operands);
3114  gcc_assert (ok);
3115  DONE;
3116})
3117
3118(define_expand "vec_cmp<mode><sseintvecmodelower>"
3119  [(set (match_operand:<sseintvecmode> 0 "register_operand")
3120	(match_operator:<sseintvecmode> 1 ""
3121	  [(match_operand:VF_256 2 "register_operand")
3122	   (match_operand:VF_256 3 "nonimmediate_operand")]))]
3123  "TARGET_AVX"
3124{
3125  bool ok = ix86_expand_fp_vec_cmp (operands);
3126  gcc_assert (ok);
3127  DONE;
3128})
3129
3130(define_expand "vec_cmp<mode><sseintvecmodelower>"
3131  [(set (match_operand:<sseintvecmode> 0 "register_operand")
3132	(match_operator:<sseintvecmode> 1 ""
3133	  [(match_operand:VF_128 2 "register_operand")
3134	   (match_operand:VF_128 3 "vector_operand")]))]
3135  "TARGET_SSE"
3136{
3137  bool ok = ix86_expand_fp_vec_cmp (operands);
3138  gcc_assert (ok);
3139  DONE;
3140})
3141
3142(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3143  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3144	(match_operator:<avx512fmaskmode> 1 ""
3145	  [(match_operand:VI48_AVX512VL 2 "register_operand")
3146	   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")]))]
3147  "TARGET_AVX512F"
3148{
3149  bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3150				      operands[2], operands[3]);
3151  gcc_assert (ok);
3152  DONE;
3153})
3154
3155(define_expand "vec_cmpu<mode><avx512fmaskmodelower>"
3156  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
3157	(match_operator:<avx512fmaskmode> 1 ""
3158	  [(match_operand:VI12_AVX512VL 2 "register_operand")
3159	   (match_operand:VI12_AVX512VL 3 "nonimmediate_operand")]))]
3160  "TARGET_AVX512BW"
3161{
3162  bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
3163				      operands[2], operands[3]);
3164  gcc_assert (ok);
3165  DONE;
3166})
3167
3168(define_expand "vec_cmpu<mode><sseintvecmodelower>"
3169  [(set (match_operand:<sseintvecmode> 0 "register_operand")
3170	(match_operator:<sseintvecmode> 1 ""
3171	  [(match_operand:VI_256 2 "register_operand")
3172	   (match_operand:VI_256 3 "nonimmediate_operand")]))]
3173  "TARGET_AVX2"
3174{
3175  bool ok = ix86_expand_int_vec_cmp (operands);
3176  gcc_assert (ok);
3177  DONE;
3178})
3179
3180(define_expand "vec_cmpu<mode><sseintvecmodelower>"
3181  [(set (match_operand:<sseintvecmode> 0 "register_operand")
3182	(match_operator:<sseintvecmode> 1 ""
3183	  [(match_operand:VI124_128 2 "register_operand")
3184	   (match_operand:VI124_128 3 "vector_operand")]))]
3185  "TARGET_SSE2"
3186{
3187  bool ok = ix86_expand_int_vec_cmp (operands);
3188  gcc_assert (ok);
3189  DONE;
3190})
3191
3192(define_expand "vec_cmpuv2div2di"
3193  [(set (match_operand:V2DI 0 "register_operand")
3194	(match_operator:V2DI 1 ""
3195	  [(match_operand:V2DI 2 "register_operand")
3196	   (match_operand:V2DI 3 "vector_operand")]))]
3197  "TARGET_SSE4_2"
3198{
3199  bool ok = ix86_expand_int_vec_cmp (operands);
3200  gcc_assert (ok);
3201  DONE;
3202})
3203
3204(define_expand "vec_cmpeqv2div2di"
3205  [(set (match_operand:V2DI 0 "register_operand")
3206	(match_operator:V2DI 1 ""
3207	  [(match_operand:V2DI 2 "register_operand")
3208	   (match_operand:V2DI 3 "vector_operand")]))]
3209  "TARGET_SSE4_1"
3210{
3211  bool ok = ix86_expand_int_vec_cmp (operands);
3212  gcc_assert (ok);
3213  DONE;
3214})
3215
3216(define_expand "vcond<V_512:mode><VF_512:mode>"
3217  [(set (match_operand:V_512 0 "register_operand")
3218	(if_then_else:V_512
3219	  (match_operator 3 ""
3220	    [(match_operand:VF_512 4 "nonimmediate_operand")
3221	     (match_operand:VF_512 5 "nonimmediate_operand")])
3222	  (match_operand:V_512 1 "general_operand")
3223	  (match_operand:V_512 2 "general_operand")))]
3224  "TARGET_AVX512F
3225   && (GET_MODE_NUNITS (<V_512:MODE>mode)
3226       == GET_MODE_NUNITS (<VF_512:MODE>mode))"
3227{
3228  bool ok = ix86_expand_fp_vcond (operands);
3229  gcc_assert (ok);
3230  DONE;
3231})
3232
3233(define_expand "vcond<V_256:mode><VF_256:mode>"
3234  [(set (match_operand:V_256 0 "register_operand")
3235	(if_then_else:V_256
3236	  (match_operator 3 ""
3237	    [(match_operand:VF_256 4 "nonimmediate_operand")
3238	     (match_operand:VF_256 5 "nonimmediate_operand")])
3239	  (match_operand:V_256 1 "general_operand")
3240	  (match_operand:V_256 2 "general_operand")))]
3241  "TARGET_AVX
3242   && (GET_MODE_NUNITS (<V_256:MODE>mode)
3243       == GET_MODE_NUNITS (<VF_256:MODE>mode))"
3244{
3245  bool ok = ix86_expand_fp_vcond (operands);
3246  gcc_assert (ok);
3247  DONE;
3248})
3249
3250(define_expand "vcond<V_128:mode><VF_128:mode>"
3251  [(set (match_operand:V_128 0 "register_operand")
3252	(if_then_else:V_128
3253	  (match_operator 3 ""
3254	    [(match_operand:VF_128 4 "vector_operand")
3255	     (match_operand:VF_128 5 "vector_operand")])
3256	  (match_operand:V_128 1 "general_operand")
3257	  (match_operand:V_128 2 "general_operand")))]
3258  "TARGET_SSE
3259   && (GET_MODE_NUNITS (<V_128:MODE>mode)
3260       == GET_MODE_NUNITS (<VF_128:MODE>mode))"
3261{
3262  bool ok = ix86_expand_fp_vcond (operands);
3263  gcc_assert (ok);
3264  DONE;
3265})
3266
3267(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3268  [(set (match_operand:V48_AVX512VL 0 "register_operand")
3269	(vec_merge:V48_AVX512VL
3270	  (match_operand:V48_AVX512VL 1 "nonimmediate_operand")
3271	  (match_operand:V48_AVX512VL 2 "nonimm_or_0_operand")
3272	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3273  "TARGET_AVX512F")
3274
3275(define_expand "vcond_mask_<mode><avx512fmaskmodelower>"
3276  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
3277	(vec_merge:VI12_AVX512VL
3278	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
3279	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand")
3280	  (match_operand:<avx512fmaskmode> 3 "register_operand")))]
3281  "TARGET_AVX512BW")
3282
3283;; As vcondv4div4df and vcondv8siv8sf are enabled already with TARGET_AVX,
3284;; and their condition can be folded late into a constant, we need to
3285;; support vcond_mask_v4div4di and vcond_mask_v8siv8si for TARGET_AVX.
3286(define_mode_iterator VI_256_AVX2 [(V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
3287				   V8SI V4DI])
3288
3289(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3290  [(set (match_operand:VI_256_AVX2 0 "register_operand")
3291	(vec_merge:VI_256_AVX2
3292	  (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
3293	  (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
3294	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3295  "TARGET_AVX"
3296{
3297  ix86_expand_sse_movcc (operands[0], operands[3],
3298			 operands[1], operands[2]);
3299  DONE;
3300})
3301
3302(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3303  [(set (match_operand:VI124_128 0 "register_operand")
3304	(vec_merge:VI124_128
3305	  (match_operand:VI124_128 1 "vector_operand")
3306	  (match_operand:VI124_128 2 "nonimm_or_0_operand")
3307	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3308  "TARGET_SSE2"
3309{
3310  ix86_expand_sse_movcc (operands[0], operands[3],
3311			 operands[1], operands[2]);
3312  DONE;
3313})
3314
3315(define_expand "vcond_mask_v2div2di"
3316  [(set (match_operand:V2DI 0 "register_operand")
3317	(vec_merge:V2DI
3318	  (match_operand:V2DI 1 "vector_operand")
3319	  (match_operand:V2DI 2 "nonimm_or_0_operand")
3320	  (match_operand:V2DI 3 "register_operand")))]
3321  "TARGET_SSE4_2"
3322{
3323  ix86_expand_sse_movcc (operands[0], operands[3],
3324			 operands[1], operands[2]);
3325  DONE;
3326})
3327
3328(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3329  [(set (match_operand:VF_256 0 "register_operand")
3330	(vec_merge:VF_256
3331	  (match_operand:VF_256 1 "nonimmediate_operand")
3332	  (match_operand:VF_256 2 "nonimm_or_0_operand")
3333	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3334  "TARGET_AVX"
3335{
3336  ix86_expand_sse_movcc (operands[0], operands[3],
3337			 operands[1], operands[2]);
3338  DONE;
3339})
3340
3341(define_expand "vcond_mask_<mode><sseintvecmodelower>"
3342  [(set (match_operand:VF_128 0 "register_operand")
3343	(vec_merge:VF_128
3344	  (match_operand:VF_128 1 "vector_operand")
3345	  (match_operand:VF_128 2 "nonimm_or_0_operand")
3346	  (match_operand:<sseintvecmode> 3 "register_operand")))]
3347  "TARGET_SSE"
3348{
3349  ix86_expand_sse_movcc (operands[0], operands[3],
3350			 operands[1], operands[2]);
3351  DONE;
3352})
3353
3354;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3355;;
3356;; Parallel floating point logical operations
3357;;
3358;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3359
3360(define_insn "<sse>_andnot<mode>3<mask_name>"
3361  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3362	(and:VF_128_256
3363	  (not:VF_128_256
3364	    (match_operand:VF_128_256 1 "register_operand" "0,x,v,v"))
3365	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3366  "TARGET_SSE && <mask_avx512vl_condition>"
3367{
3368  char buf[128];
3369  const char *ops;
3370  const char *suffix;
3371
3372  switch (which_alternative)
3373    {
3374    case 0:
3375      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3376      break;
3377    case 1:
3378    case 2:
3379    case 3:
3380      ops = "vandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3381      break;
3382    default:
3383      gcc_unreachable ();
3384    }
3385
3386  switch (get_attr_mode (insn))
3387    {
3388    case MODE_V8SF:
3389    case MODE_V4SF:
3390      suffix = "ps";
3391      break;
3392    case MODE_OI:
3393    case MODE_TI:
3394      /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
3395      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3396      ops = "vpandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3397      break;
3398    default:
3399      suffix = "<ssemodesuffix>";
3400    }
3401
3402  snprintf (buf, sizeof (buf), ops, suffix);
3403  output_asm_insn (buf, operands);
3404  return "";
3405}
3406  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3407   (set_attr "type" "sselog")
3408   (set_attr "prefix" "orig,maybe_vex,evex,evex")
3409   (set (attr "mode")
3410	(cond [(and (match_test "<mask_applied>")
3411		    (and (eq_attr "alternative" "1")
3412			 (match_test "!TARGET_AVX512DQ")))
3413		 (const_string "<sseintvecmode2>")
3414	       (eq_attr "alternative" "3")
3415		 (const_string "<sseintvecmode2>")
3416	       (match_test "TARGET_AVX")
3417		 (const_string "<MODE>")
3418	       (match_test "optimize_function_for_size_p (cfun)")
3419		 (const_string "V4SF")
3420	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3421		 (const_string "V4SF")
3422	      ]
3423	      (const_string "<MODE>")))])
3424
3425(define_insn "<sse>_andnot<mode>3<mask_name>"
3426  [(set (match_operand:VF_512 0 "register_operand" "=v")
3427	(and:VF_512
3428	  (not:VF_512
3429	    (match_operand:VF_512 1 "register_operand" "v"))
3430	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3431  "TARGET_AVX512F"
3432{
3433  char buf[128];
3434  const char *ops;
3435  const char *suffix;
3436
3437  suffix = "<ssemodesuffix>";
3438  ops = "";
3439
3440  /* There is no vandnp[sd] in avx512f.  Use vpandn[qd].  */
3441  if (!TARGET_AVX512DQ)
3442    {
3443      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3444      ops = "p";
3445    }
3446
3447  snprintf (buf, sizeof (buf),
3448	    "v%sandn%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3449	    ops, suffix);
3450  output_asm_insn (buf, operands);
3451  return "";
3452}
3453  [(set_attr "type" "sselog")
3454   (set_attr "prefix" "evex")
3455   (set (attr "mode")
3456        (if_then_else (match_test "TARGET_AVX512DQ")
3457		      (const_string "<sseinsnmode>")
3458		      (const_string "XI")))])
3459
3460(define_expand "<code><mode>3<mask_name>"
3461  [(set (match_operand:VF_128_256 0 "register_operand")
3462       (any_logic:VF_128_256
3463         (match_operand:VF_128_256 1 "vector_operand")
3464         (match_operand:VF_128_256 2 "vector_operand")))]
3465  "TARGET_SSE && <mask_avx512vl_condition>"
3466  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3467
3468(define_expand "<code><mode>3<mask_name>"
3469  [(set (match_operand:VF_512 0 "register_operand")
3470       (any_logic:VF_512
3471         (match_operand:VF_512 1 "nonimmediate_operand")
3472         (match_operand:VF_512 2 "nonimmediate_operand")))]
3473  "TARGET_AVX512F"
3474  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
3475
3476(define_insn "*<code><mode>3<mask_name>"
3477  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x,v,v")
3478	(any_logic:VF_128_256
3479	  (match_operand:VF_128_256 1 "vector_operand" "%0,x,v,v")
3480	  (match_operand:VF_128_256 2 "vector_operand" "xBm,xm,vm,vm")))]
3481  "TARGET_SSE && <mask_avx512vl_condition>
3482   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3483{
3484  char buf[128];
3485  const char *ops;
3486  const char *suffix;
3487
3488  switch (which_alternative)
3489    {
3490    case 0:
3491      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3492      break;
3493    case 1:
3494    case 2:
3495    case 3:
3496      ops = "v<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3497      break;
3498    default:
3499      gcc_unreachable ();
3500    }
3501
3502  switch (get_attr_mode (insn))
3503    {
3504    case MODE_V8SF:
3505    case MODE_V4SF:
3506      suffix = "ps";
3507      break;
3508    case MODE_OI:
3509    case MODE_TI:
3510      /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[qd].  */
3511      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3512      ops = "vp<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
3513      break;
3514    default:
3515      suffix = "<ssemodesuffix>";
3516    }
3517
3518  snprintf (buf, sizeof (buf), ops, suffix);
3519  output_asm_insn (buf, operands);
3520  return "";
3521}
3522  [(set_attr "isa" "noavx,avx,avx512dq,avx512f")
3523   (set_attr "type" "sselog")
3524   (set_attr "prefix" "orig,maybe_evex,evex,evex")
3525   (set (attr "mode")
3526	(cond [(and (match_test "<mask_applied>")
3527		    (and (eq_attr "alternative" "1")
3528			 (match_test "!TARGET_AVX512DQ")))
3529		 (const_string "<sseintvecmode2>")
3530	       (eq_attr "alternative" "3")
3531		 (const_string "<sseintvecmode2>")
3532	       (match_test "TARGET_AVX")
3533		 (const_string "<MODE>")
3534	       (match_test "optimize_function_for_size_p (cfun)")
3535		 (const_string "V4SF")
3536	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3537		 (const_string "V4SF")
3538	      ]
3539	      (const_string "<MODE>")))])
3540
3541(define_insn "*<code><mode>3<mask_name>"
3542  [(set (match_operand:VF_512 0 "register_operand" "=v")
3543	(any_logic:VF_512
3544	  (match_operand:VF_512 1 "nonimmediate_operand" "%v")
3545	  (match_operand:VF_512 2 "nonimmediate_operand" "vm")))]
3546  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3547{
3548  char buf[128];
3549  const char *ops;
3550  const char *suffix;
3551
3552  suffix = "<ssemodesuffix>";
3553  ops = "";
3554
3555  /* There is no v<logic>p[sd] in avx512f.  Use vp<logic>[dq].  */
3556  if (!TARGET_AVX512DQ)
3557    {
3558      suffix = GET_MODE_INNER (<MODE>mode) == DFmode ? "q" : "d";
3559      ops = "p";
3560    }
3561
3562  snprintf (buf, sizeof (buf),
3563	   "v%s<logic>%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}",
3564	   ops, suffix);
3565  output_asm_insn (buf, operands);
3566  return "";
3567}
3568  [(set_attr "type" "sselog")
3569   (set_attr "prefix" "evex")
3570   (set (attr "mode")
3571        (if_then_else (match_test "TARGET_AVX512DQ")
3572		      (const_string "<sseinsnmode>")
3573		      (const_string "XI")))])
3574
3575(define_expand "copysign<mode>3"
3576  [(set (match_dup 4)
3577	(and:VF
3578	  (not:VF (match_dup 3))
3579	  (match_operand:VF 1 "vector_operand")))
3580   (set (match_dup 5)
3581	(and:VF (match_dup 3)
3582		(match_operand:VF 2 "vector_operand")))
3583   (set (match_operand:VF 0 "register_operand")
3584	(ior:VF (match_dup 4) (match_dup 5)))]
3585  "TARGET_SSE"
3586{
3587  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3588
3589  operands[4] = gen_reg_rtx (<MODE>mode);
3590  operands[5] = gen_reg_rtx (<MODE>mode);
3591})
3592
3593(define_expand "xorsign<mode>3"
3594  [(set (match_dup 4)
3595	(and:VF (match_dup 3)
3596		(match_operand:VF 2 "vector_operand")))
3597   (set (match_operand:VF 0 "register_operand")
3598	(xor:VF (match_dup 4)
3599		(match_operand:VF 1 "vector_operand")))]
3600  "TARGET_SSE"
3601{
3602  operands[3] = ix86_build_signbit_mask (<MODE>mode, 1, 0);
3603
3604  operands[4] = gen_reg_rtx (<MODE>mode);
3605})
3606
3607(define_expand "signbit<mode>2"
3608  [(set (match_operand:<sseintvecmode> 0 "register_operand")
3609	(lshiftrt:<sseintvecmode>
3610	  (subreg:<sseintvecmode>
3611	    (match_operand:VF1_AVX2 1 "register_operand") 0)
3612	  (match_dup 2)))]
3613  "TARGET_SSE2"
3614  "operands[2] = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)-1);")
3615
3616;; Also define scalar versions.  These are used for abs, neg, and
3617;; conditional move.  Using subregs into vector modes causes register
3618;; allocation lossage.  These patterns do not allow memory operands
3619;; because the native instructions read the full 128-bits.
3620
3621(define_insn "*andnot<mode>3"
3622  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3623	(and:MODEF
3624	  (not:MODEF
3625	    (match_operand:MODEF 1 "register_operand" "0,x,v,v"))
3626	    (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3627  "SSE_FLOAT_MODE_P (<MODE>mode)"
3628{
3629  char buf[128];
3630  const char *ops;
3631  const char *suffix
3632    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3633
3634  switch (which_alternative)
3635    {
3636    case 0:
3637      ops = "andn%s\t{%%2, %%0|%%0, %%2}";
3638      break;
3639    case 1:
3640      ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3641      break;
3642    case 2:
3643      if (TARGET_AVX512DQ)
3644	ops = "vandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3645      else
3646	{
3647	  suffix = <MODE>mode == DFmode ? "q" : "d";
3648	  ops = "vpandn%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3649	}
3650      break;
3651    case 3:
3652      if (TARGET_AVX512DQ)
3653	ops = "vandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3654      else
3655	{
3656	  suffix = <MODE>mode == DFmode ? "q" : "d";
3657	  ops = "vpandn%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3658	}
3659      break;
3660    default:
3661      gcc_unreachable ();
3662    }
3663
3664  snprintf (buf, sizeof (buf), ops, suffix);
3665  output_asm_insn (buf, operands);
3666  return "";
3667}
3668  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3669   (set_attr "type" "sselog")
3670   (set_attr "prefix" "orig,vex,evex,evex")
3671   (set (attr "mode")
3672	(cond [(eq_attr "alternative" "2")
3673		 (if_then_else (match_test "TARGET_AVX512DQ")
3674			       (const_string "<ssevecmode>")
3675			       (const_string "TI"))
3676	       (eq_attr "alternative" "3")
3677		 (if_then_else (match_test "TARGET_AVX512DQ")
3678			       (const_string "<avx512fvecmode>")
3679			       (const_string "XI"))
3680	       (match_test "TARGET_AVX")
3681		 (const_string "<ssevecmode>")
3682	       (match_test "optimize_function_for_size_p (cfun)")
3683		 (const_string "V4SF")
3684	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3685		 (const_string "V4SF")
3686	      ]
3687	      (const_string "<ssevecmode>")))])
3688
3689(define_insn "*andnottf3"
3690  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3691	(and:TF
3692	  (not:TF (match_operand:TF 1 "register_operand" "0,x,v,v"))
3693	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3694  "TARGET_SSE"
3695{
3696  char buf[128];
3697  const char *ops;
3698  const char *tmp
3699    = (which_alternative >= 2 ? "pandnq"
3700       : get_attr_mode (insn) == MODE_V4SF ? "andnps" : "pandn");
3701
3702  switch (which_alternative)
3703    {
3704    case 0:
3705      ops = "%s\t{%%2, %%0|%%0, %%2}";
3706      break;
3707    case 1:
3708    case 2:
3709      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3710      break;
3711    case 3:
3712      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3713      break;
3714    default:
3715      gcc_unreachable ();
3716    }
3717
3718  snprintf (buf, sizeof (buf), ops, tmp);
3719  output_asm_insn (buf, operands);
3720  return "";
3721}
3722  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3723   (set_attr "type" "sselog")
3724   (set (attr "prefix_data16")
3725     (if_then_else
3726       (and (eq_attr "alternative" "0")
3727	    (eq_attr "mode" "TI"))
3728       (const_string "1")
3729       (const_string "*")))
3730   (set_attr "prefix" "orig,vex,evex,evex")
3731   (set (attr "mode")
3732	(cond [(eq_attr "alternative" "2")
3733		 (const_string "TI")
3734	       (eq_attr "alternative" "3")
3735		 (const_string "XI")
3736	       (match_test "TARGET_AVX")
3737		 (const_string "TI")
3738	       (ior (not (match_test "TARGET_SSE2"))
3739		    (match_test "optimize_function_for_size_p (cfun)"))
3740		 (const_string "V4SF")
3741	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3742		 (const_string "V4SF")
3743	      ]
3744	      (const_string "TI")))])
3745
3746(define_insn "*<code><mode>3"
3747  [(set (match_operand:MODEF 0 "register_operand" "=x,x,v,v")
3748	(any_logic:MODEF
3749	  (match_operand:MODEF 1 "register_operand" "%0,x,v,v")
3750	  (match_operand:MODEF 2 "register_operand" "x,x,v,v")))]
3751  "SSE_FLOAT_MODE_P (<MODE>mode)"
3752{
3753  char buf[128];
3754  const char *ops;
3755  const char *suffix
3756    = (get_attr_mode (insn) == MODE_V4SF) ? "ps" : "<ssevecmodesuffix>";
3757
3758  switch (which_alternative)
3759    {
3760    case 0:
3761      ops = "<logic>%s\t{%%2, %%0|%%0, %%2}";
3762      break;
3763    case 2:
3764      if (!TARGET_AVX512DQ)
3765	{
3766	  suffix = <MODE>mode == DFmode ? "q" : "d";
3767	  ops = "vp<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3768	  break;
3769	}
3770      /* FALLTHRU */
3771    case 1:
3772      ops = "v<logic>%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3773      break;
3774    case 3:
3775      if (TARGET_AVX512DQ)
3776	ops = "v<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3777      else
3778	{
3779	  suffix = <MODE>mode == DFmode ? "q" : "d";
3780	  ops = "vp<logic>%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3781	}
3782      break;
3783    default:
3784      gcc_unreachable ();
3785    }
3786
3787  snprintf (buf, sizeof (buf), ops, suffix);
3788  output_asm_insn (buf, operands);
3789  return "";
3790}
3791  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3792   (set_attr "type" "sselog")
3793   (set_attr "prefix" "orig,vex,evex,evex")
3794   (set (attr "mode")
3795	(cond [(eq_attr "alternative" "2")
3796		 (if_then_else (match_test "TARGET_AVX512DQ")
3797			       (const_string "<ssevecmode>")
3798			       (const_string "TI"))
3799	       (eq_attr "alternative" "3")
3800		 (if_then_else (match_test "TARGET_AVX512DQ")
3801			       (const_string "<avx512fvecmode>")
3802			       (const_string "XI"))
3803	       (match_test "TARGET_AVX")
3804		 (const_string "<ssevecmode>")
3805	       (match_test "optimize_function_for_size_p (cfun)")
3806		 (const_string "V4SF")
3807	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3808		 (const_string "V4SF")
3809	      ]
3810	      (const_string "<ssevecmode>")))])
3811
3812(define_expand "<code>tf3"
3813  [(set (match_operand:TF 0 "register_operand")
3814	(any_logic:TF
3815	  (match_operand:TF 1 "vector_operand")
3816	  (match_operand:TF 2 "vector_operand")))]
3817  "TARGET_SSE"
3818  "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
3819
3820(define_insn "*<code>tf3"
3821  [(set (match_operand:TF 0 "register_operand" "=x,x,v,v")
3822	(any_logic:TF
3823	  (match_operand:TF 1 "vector_operand" "%0,x,v,v")
3824	  (match_operand:TF 2 "vector_operand" "xBm,xm,vm,v")))]
3825  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
3826{
3827  char buf[128];
3828  const char *ops;
3829  const char *tmp
3830    = (which_alternative >= 2 ? "p<logic>q"
3831       : get_attr_mode (insn) == MODE_V4SF ? "<logic>ps" : "p<logic>");
3832
3833  switch (which_alternative)
3834    {
3835    case 0:
3836      ops = "%s\t{%%2, %%0|%%0, %%2}";
3837      break;
3838    case 1:
3839    case 2:
3840      ops = "v%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
3841      break;
3842    case 3:
3843      ops = "v%s\t{%%g2, %%g1, %%g0|%%g0, %%g1, %%g2}";
3844      break;
3845    default:
3846      gcc_unreachable ();
3847    }
3848
3849  snprintf (buf, sizeof (buf), ops, tmp);
3850  output_asm_insn (buf, operands);
3851  return "";
3852}
3853  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
3854   (set_attr "type" "sselog")
3855   (set (attr "prefix_data16")
3856     (if_then_else
3857       (and (eq_attr "alternative" "0")
3858	    (eq_attr "mode" "TI"))
3859       (const_string "1")
3860       (const_string "*")))
3861   (set_attr "prefix" "orig,vex,evex,evex")
3862   (set (attr "mode")
3863	(cond [(eq_attr "alternative" "2")
3864		 (const_string "TI")
3865	       (eq_attr "alternative" "3")
3866		 (const_string "QI")
3867	       (match_test "TARGET_AVX")
3868		 (const_string "TI")
3869	       (ior (not (match_test "TARGET_SSE2"))
3870		    (match_test "optimize_function_for_size_p (cfun)"))
3871		 (const_string "V4SF")
3872	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
3873		 (const_string "V4SF")
3874	      ]
3875	      (const_string "TI")))])
3876
3877;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3878;;
3879;; FMA floating point multiply/accumulate instructions.  These include
3880;; scalar versions of the instructions as well as vector versions.
3881;;
3882;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3883
3884;; The standard names for scalar FMA are only available with SSE math enabled.
3885;; CPUID bit AVX512F enables evex encoded scalar and 512-bit fma.  It doesn't
3886;; care about FMA bit, so we enable fma for TARGET_AVX512F even when TARGET_FMA
3887;; and TARGET_FMA4 are both false.
3888;; TODO: In theory AVX512F does not automatically imply FMA, and without FMA
3889;; one must force the EVEX encoding of the fma insns.  Ideally we'd improve
3890;; GAS to allow proper prefix selection.  However, for the moment all hardware
3891;; that supports AVX512F also supports FMA so we can ignore this for now.
3892(define_mode_iterator FMAMODEM
3893  [(SF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3894   (DF "TARGET_SSE_MATH && (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F)")
3895   (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3896   (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3897   (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3898   (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3899   (V16SF "TARGET_AVX512F")
3900   (V8DF "TARGET_AVX512F")])
3901
3902(define_expand "fma<mode>4"
3903  [(set (match_operand:FMAMODEM 0 "register_operand")
3904	(fma:FMAMODEM
3905	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3906	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3907	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3908
3909(define_expand "fms<mode>4"
3910  [(set (match_operand:FMAMODEM 0 "register_operand")
3911	(fma:FMAMODEM
3912	  (match_operand:FMAMODEM 1 "nonimmediate_operand")
3913	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3914	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3915
3916(define_expand "fnma<mode>4"
3917  [(set (match_operand:FMAMODEM 0 "register_operand")
3918	(fma:FMAMODEM
3919	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3920	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3921	  (match_operand:FMAMODEM 3 "nonimmediate_operand")))])
3922
3923(define_expand "fnms<mode>4"
3924  [(set (match_operand:FMAMODEM 0 "register_operand")
3925	(fma:FMAMODEM
3926	  (neg:FMAMODEM (match_operand:FMAMODEM 1 "nonimmediate_operand"))
3927	  (match_operand:FMAMODEM 2 "nonimmediate_operand")
3928	  (neg:FMAMODEM (match_operand:FMAMODEM 3 "nonimmediate_operand"))))])
3929
3930;; The builtins for intrinsics are not constrained by SSE math enabled.
3931(define_mode_iterator FMAMODE_AVX512
3932 [(SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3933  (DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
3934  (V4SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3935  (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3936  (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3937  (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
3938  (V16SF "TARGET_AVX512F")
3939  (V8DF "TARGET_AVX512F")])
3940
3941(define_mode_iterator FMAMODE
3942  [SF DF V4SF V2DF V8SF V4DF])
3943
3944(define_expand "fma4i_fmadd_<mode>"
3945  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3946	(fma:FMAMODE_AVX512
3947	  (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3948	  (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3949	  (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3950
3951(define_expand "fma4i_fmsub_<mode>"
3952  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3953	(fma:FMAMODE_AVX512
3954	  (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand")
3955	  (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3956	  (neg:FMAMODE_AVX512
3957	    (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3958
3959(define_expand "fma4i_fnmadd_<mode>"
3960  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3961	(fma:FMAMODE_AVX512
3962	  (neg:FMAMODE_AVX512
3963	    (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3964	  (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3965	  (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand")))])
3966
3967(define_expand "fma4i_fnmsub_<mode>"
3968  [(set (match_operand:FMAMODE_AVX512 0 "register_operand")
3969	(fma:FMAMODE_AVX512
3970	  (neg:FMAMODE_AVX512
3971	    (match_operand:FMAMODE_AVX512 1 "nonimmediate_operand"))
3972	  (match_operand:FMAMODE_AVX512 2 "nonimmediate_operand")
3973	  (neg:FMAMODE_AVX512
3974	    (match_operand:FMAMODE_AVX512 3 "nonimmediate_operand"))))])
3975
3976(define_expand "<avx512>_fmadd_<mode>_maskz<round_expand_name>"
3977  [(match_operand:VF_AVX512VL 0 "register_operand")
3978   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
3979   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
3980   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
3981   (match_operand:<avx512fmaskmode> 4 "register_operand")]
3982  "TARGET_AVX512F && <round_mode512bit_condition>"
3983{
3984  emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
3985    operands[0], operands[1], operands[2], operands[3],
3986    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
3987  DONE;
3988})
3989
3990(define_insn "*fma_fmadd_<mode>"
3991  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
3992	(fma:FMAMODE
3993	  (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x")
3994	  (match_operand:FMAMODE 2 "nonimmediate_operand" "vm,v,vm,x,m")
3995	  (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x")))]
3996  "TARGET_FMA || TARGET_FMA4"
3997  "@
3998   vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
3999   vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4000   vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4001   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4002   vfmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4003  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4004   (set_attr "type" "ssemuladd")
4005   (set_attr "mode" "<MODE>")])
4006
4007;; Suppose AVX-512F as baseline
4008(define_mode_iterator VF_SF_AVX512VL
4009  [SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
4010   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
4011
4012(define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
4013  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4014	(fma:VF_SF_AVX512VL
4015	  (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4016	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4017	  (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4018  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4019  "@
4020   vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4021   vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4022   vfmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4023  [(set_attr "type" "ssemuladd")
4024   (set_attr "mode" "<MODE>")])
4025
4026(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_1"
4027  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4028	(fma:VF_AVX512
4029	  (match_operand:VF_AVX512 1 "register_operand" "%0")
4030	  (match_operand:VF_AVX512 2 "register_operand" "v")
4031	  (vec_duplicate:VF_AVX512
4032	    (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
4033  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4034  "vfmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4035  [(set_attr "type" "ssemuladd")
4036   (set_attr "mode" "<MODE>")])
4037
4038(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_2"
4039  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4040	(fma:VF_AVX512
4041	  (vec_duplicate:VF_AVX512
4042	    (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
4043	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
4044	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4045  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4046  "@
4047   vfmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4048   vfmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4049  [(set_attr "type" "ssemuladd")
4050   (set_attr "mode" "<MODE>")])
4051
4052(define_insn "*<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name>_bcst_3"
4053  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4054	(fma:VF_AVX512
4055	  (match_operand:VF_AVX512 1 "register_operand" "0,v")
4056	  (vec_duplicate:VF_AVX512
4057	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4058	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4059  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4060  "@
4061   vfmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4062   vfmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4063  [(set_attr "type" "ssemuladd")
4064   (set_attr "mode" "<MODE>")])
4065
4066(define_insn "<avx512>_fmadd_<mode>_mask<round_name>"
4067  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4068	(vec_merge:VF_AVX512VL
4069	  (fma:VF_AVX512VL
4070	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4071	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4072	    (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4073	  (match_dup 1)
4074	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4075  "TARGET_AVX512F && <round_mode512bit_condition>"
4076  "@
4077   vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4078   vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4079  [(set_attr "type" "ssemuladd")
4080   (set_attr "mode" "<MODE>")])
4081
4082(define_insn "<avx512>_fmadd_<mode>_mask3<round_name>"
4083  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4084	(vec_merge:VF_AVX512VL
4085	  (fma:VF_AVX512VL
4086	    (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4087	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4088	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4089	  (match_dup 3)
4090	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4091  "TARGET_AVX512F"
4092  "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4093  [(set_attr "type" "ssemuladd")
4094   (set_attr "mode" "<MODE>")])
4095
4096(define_insn "*fma_fmsub_<mode>"
4097  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4098	(fma:FMAMODE
4099	  (match_operand:FMAMODE   1 "nonimmediate_operand" "%0,0,v,x,x")
4100	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
4101	  (neg:FMAMODE
4102	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4103  "TARGET_FMA || TARGET_FMA4"
4104  "@
4105   vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4106   vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4107   vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4108   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4109   vfmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4110  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4111   (set_attr "type" "ssemuladd")
4112   (set_attr "mode" "<MODE>")])
4113
4114(define_expand "<avx512>_fmsub_<mode>_maskz<round_expand_name>"
4115  [(match_operand:VF_AVX512VL 0 "register_operand")
4116   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4117   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4118   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4119   (match_operand:<avx512fmaskmode> 4 "register_operand")]
4120  "TARGET_AVX512F && <round_mode512bit_condition>"
4121{
4122  emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
4123    operands[0], operands[1], operands[2], operands[3],
4124    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4125  DONE;
4126})
4127
4128(define_insn "<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name><round_name>"
4129  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4130	(fma:VF_SF_AVX512VL
4131	  (match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
4132	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4133	  (neg:VF_SF_AVX512VL
4134	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4135  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4136  "@
4137   vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4138   vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4139   vfmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4140  [(set_attr "type" "ssemuladd")
4141   (set_attr "mode" "<MODE>")])
4142
4143(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_1"
4144  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4145	(fma:VF_AVX512
4146	  (match_operand:VF_AVX512 1 "register_operand" "%0")
4147	  (match_operand:VF_AVX512 2 "register_operand" "v")
4148	  (neg:VF_AVX512
4149	    (vec_duplicate:VF_AVX512
4150	      (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
4151  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4152  "vfmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4153  [(set_attr "type" "ssemuladd")
4154   (set_attr "mode" "<MODE>")])
4155
4156(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_2"
4157  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4158	(fma:VF_AVX512
4159	  (vec_duplicate:VF_AVX512
4160	    (match_operand:<ssescalarmode> 1 "memory_operand" "m,m"))
4161	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
4162	  (neg:VF_AVX512
4163	    (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4164  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4165  "@
4166   vfmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4167   vfmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4168  [(set_attr "type" "ssemuladd")
4169   (set_attr "mode" "<MODE>")])
4170
4171(define_insn "*<sd_mask_codefor>fma_fmsub_<mode><sd_maskz_name>_bcst_3"
4172  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4173	(fma:VF_AVX512
4174	  (match_operand:VF_AVX512 1 "register_operand" "0,v")
4175	  (vec_duplicate:VF_AVX512
4176	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4177	  (neg:VF_AVX512
4178	    (match_operand:VF_AVX512 3 "nonimmediate_operand" "v,0"))))]
4179  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4180  "@
4181   vfmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4182   vfmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4183  [(set_attr "type" "ssemuladd")
4184   (set_attr "mode" "<MODE>")])
4185
4186(define_insn "<avx512>_fmsub_<mode>_mask<round_name>"
4187  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4188	(vec_merge:VF_AVX512VL
4189	  (fma:VF_AVX512VL
4190	    (match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4191	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4192	    (neg:VF_AVX512VL
4193	      (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4194	  (match_dup 1)
4195	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4196  "TARGET_AVX512F"
4197  "@
4198   vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4199   vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4200  [(set_attr "type" "ssemuladd")
4201   (set_attr "mode" "<MODE>")])
4202
4203(define_insn "<avx512>_fmsub_<mode>_mask3<round_name>"
4204  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4205	(vec_merge:VF_AVX512VL
4206	  (fma:VF_AVX512VL
4207	    (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v")
4208	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4209	    (neg:VF_AVX512VL
4210	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4211	  (match_dup 3)
4212	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4213  "TARGET_AVX512F && <round_mode512bit_condition>"
4214  "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4215  [(set_attr "type" "ssemuladd")
4216   (set_attr "mode" "<MODE>")])
4217
4218(define_insn "*fma_fnmadd_<mode>"
4219  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4220	(fma:FMAMODE
4221	  (neg:FMAMODE
4222	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4223	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
4224	  (match_operand:FMAMODE   3 "nonimmediate_operand" "v,vm,0,xm,x")))]
4225  "TARGET_FMA || TARGET_FMA4"
4226  "@
4227   vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4228   vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4229   vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4230   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4231   vfnmadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4232  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4233   (set_attr "type" "ssemuladd")
4234   (set_attr "mode" "<MODE>")])
4235
4236(define_expand "<avx512>_fnmadd_<mode>_maskz<round_expand_name>"
4237  [(match_operand:VF_AVX512VL 0 "register_operand")
4238   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4239   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4240   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4241   (match_operand:<avx512fmaskmode> 4 "register_operand")]
4242  "TARGET_AVX512F && <round_mode512bit_condition>"
4243{
4244  emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
4245    operands[0], operands[1], operands[2], operands[3],
4246    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4247  DONE;
4248})
4249
4250(define_insn "<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name><round_name>"
4251  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4252	(fma:VF_SF_AVX512VL
4253	  (neg:VF_SF_AVX512VL
4254	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4255	  (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4256	  (match_operand:VF_SF_AVX512VL   3 "<round_nimm_predicate>" "v,<round_constraint>,0")))]
4257  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4258  "@
4259   vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4260   vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4261   vfnmadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4262  [(set_attr "type" "ssemuladd")
4263   (set_attr "mode" "<MODE>")])
4264
4265(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_1"
4266  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4267	(fma:VF_AVX512
4268	  (neg:VF_AVX512
4269	    (match_operand:VF_AVX512 1 "register_operand" "%0"))
4270	  (match_operand:VF_AVX512 2 "register_operand" "v")
4271	  (vec_duplicate:VF_AVX512
4272	    (match_operand:<ssescalarmode> 3 "memory_operand" "m"))))]
4273  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4274  "vfnmadd213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4275  [(set_attr "type" "ssemuladd")
4276   (set_attr "mode" "<MODE>")])
4277
4278(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_2"
4279  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4280	(fma:VF_AVX512
4281	  (neg:VF_AVX512
4282	    (vec_duplicate:VF_AVX512
4283	      (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4284	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
4285	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4286  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4287  "@
4288   vfnmadd132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4289   vfnmadd231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4290  [(set_attr "type" "ssemuladd")
4291   (set_attr "mode" "<MODE>")])
4292
4293(define_insn "*<sd_mask_codefor>fma_fnmadd_<mode><sd_maskz_name>_bcst_3"
4294  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4295	(fma:VF_AVX512
4296	  (neg:VF_AVX512
4297	    (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4298	  (vec_duplicate:VF_AVX512
4299	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4300	  (match_operand:VF_AVX512 3 "register_operand" "v,0")))]
4301  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4302  "@
4303   vfnmadd132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4304   vfnmadd231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4305  [(set_attr "type" "ssemuladd")
4306   (set_attr "mode" "<MODE>")])
4307
4308(define_insn "<avx512>_fnmadd_<mode>_mask<round_name>"
4309  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4310	(vec_merge:VF_AVX512VL
4311	  (fma:VF_AVX512VL
4312	    (neg:VF_AVX512VL
4313	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4314	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4315	    (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
4316	  (match_dup 1)
4317	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4318  "TARGET_AVX512F && <round_mode512bit_condition>"
4319  "@
4320   vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4321   vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4322  [(set_attr "type" "ssemuladd")
4323   (set_attr "mode" "<MODE>")])
4324
4325(define_insn "<avx512>_fnmadd_<mode>_mask3<round_name>"
4326  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4327	(vec_merge:VF_AVX512VL
4328	  (fma:VF_AVX512VL
4329	    (neg:VF_AVX512VL
4330	      (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4331	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4332	    (match_operand:VF_AVX512VL 3 "register_operand" "0"))
4333	  (match_dup 3)
4334	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4335  "TARGET_AVX512F && <round_mode512bit_condition>"
4336  "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4337  [(set_attr "type" "ssemuladd")
4338   (set_attr "mode" "<MODE>")])
4339
4340(define_insn "*fma_fnmsub_<mode>"
4341  [(set (match_operand:FMAMODE 0 "register_operand" "=v,v,v,x,x")
4342	(fma:FMAMODE
4343	  (neg:FMAMODE
4344	    (match_operand:FMAMODE 1 "nonimmediate_operand" "%0,0,v,x,x"))
4345	  (match_operand:FMAMODE   2 "nonimmediate_operand" "vm,v,vm,x,m")
4346	  (neg:FMAMODE
4347	    (match_operand:FMAMODE 3 "nonimmediate_operand" "v,vm,0,xm,x"))))]
4348  "TARGET_FMA || TARGET_FMA4"
4349  "@
4350   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4351   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4352   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}
4353   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4354   vfnmsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4355  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4356   (set_attr "type" "ssemuladd")
4357   (set_attr "mode" "<MODE>")])
4358
4359(define_expand "<avx512>_fnmsub_<mode>_maskz<round_expand_name>"
4360  [(match_operand:VF_AVX512VL 0 "register_operand")
4361   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4362   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4363   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4364   (match_operand:<avx512fmaskmode> 4 "register_operand")]
4365  "TARGET_AVX512F && <round_mode512bit_condition>"
4366{
4367  emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
4368    operands[0], operands[1], operands[2], operands[3],
4369    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4370  DONE;
4371})
4372
4373(define_insn "<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name><round_name>"
4374  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4375	(fma:VF_SF_AVX512VL
4376	  (neg:VF_SF_AVX512VL
4377	    (match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v"))
4378	  (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4379	  (neg:VF_SF_AVX512VL
4380	    (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))))]
4381  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4382  "@
4383   vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4384   vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4385   vfnmsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4386  [(set_attr "type" "ssemuladd")
4387   (set_attr "mode" "<MODE>")])
4388
4389(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_1"
4390  [(set (match_operand:VF_AVX512 0 "register_operand" "=v")
4391	(fma:VF_AVX512
4392	  (neg:VF_AVX512
4393	    (match_operand:VF_AVX512 1 "register_operand" "%0"))
4394	  (match_operand:VF_AVX512 2 "register_operand" "v")
4395	  (neg:VF_AVX512
4396	    (vec_duplicate:VF_AVX512
4397	      (match_operand:<ssescalarmode> 3 "memory_operand" "m")))))]
4398  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4399  "vfnmsub213<ssemodesuffix>\t{%3<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<avx512bcst>}"
4400  [(set_attr "type" "ssemuladd")
4401   (set_attr "mode" "<MODE>")])
4402
4403(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_2"
4404  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4405	(fma:VF_AVX512
4406	  (neg:VF_AVX512
4407	    (vec_duplicate:VF_AVX512
4408	      (match_operand:<ssescalarmode> 1 "memory_operand" "m,m")))
4409	  (match_operand:VF_AVX512 2 "register_operand" "0,v")
4410	  (neg:VF_AVX512
4411	    (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4412  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4413  "@
4414   vfnmsub132<ssemodesuffix>\t{%1<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %1<avx512bcst>}
4415   vfnmsub231<ssemodesuffix>\t{%1<avx512bcst>, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %1<avx512bcst>}"
4416  [(set_attr "type" "ssemuladd")
4417   (set_attr "mode" "<MODE>")])
4418
4419(define_insn "*<sd_mask_codefor>fma_fnmsub_<mode><sd_maskz_name>_bcst_3"
4420  [(set (match_operand:VF_AVX512 0 "register_operand" "=v,v")
4421	(fma:VF_AVX512
4422	  (neg:VF_AVX512
4423	    (match_operand:VF_AVX512 1 "register_operand" "0,v"))
4424	  (vec_duplicate:VF_AVX512
4425	    (match_operand:<ssescalarmode> 2 "memory_operand" "m,m"))
4426	  (neg:VF_AVX512
4427	    (match_operand:VF_AVX512 3 "register_operand" "v,0"))))]
4428  "TARGET_AVX512F && <sd_mask_mode512bit_condition>"
4429  "@
4430   vfnmsub132<ssemodesuffix>\t{%2<avx512bcst>, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<avx512bcst>}
4431   vfnmsub231<ssemodesuffix>\t{%2<avx512bcst>, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<avx512bcst>}"
4432  [(set_attr "type" "ssemuladd")
4433   (set_attr "mode" "<MODE>")])
4434
4435(define_insn "<avx512>_fnmsub_<mode>_mask<round_name>"
4436  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4437	(vec_merge:VF_AVX512VL
4438	  (fma:VF_AVX512VL
4439	    (neg:VF_AVX512VL
4440	      (match_operand:VF_AVX512VL 1 "register_operand" "0,0"))
4441	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4442	    (neg:VF_AVX512VL
4443	      (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
4444	  (match_dup 1)
4445	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4446  "TARGET_AVX512F && <round_mode512bit_condition>"
4447  "@
4448   vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4449   vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4450  [(set_attr "type" "ssemuladd")
4451   (set_attr "mode" "<MODE>")])
4452
4453(define_insn "<avx512>_fnmsub_<mode>_mask3<round_name>"
4454  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4455	(vec_merge:VF_AVX512VL
4456	  (fma:VF_AVX512VL
4457	    (neg:VF_AVX512VL
4458	      (match_operand:VF_AVX512VL 1 "<round_nimm_predicate>" "%v"))
4459	    (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4460	    (neg:VF_AVX512VL
4461	      (match_operand:VF_AVX512VL 3 "register_operand" "0")))
4462	  (match_dup 3)
4463	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4464  "TARGET_AVX512F"
4465  "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4466  [(set_attr "type" "ssemuladd")
4467   (set_attr "mode" "<MODE>")])
4468
4469;; FMA parallel floating point multiply addsub and subadd operations.
4470
4471;; It would be possible to represent these without the UNSPEC as
4472;;
4473;; (vec_merge
4474;;   (fma op1 op2 op3)
4475;;   (fma op1 op2 (neg op3))
4476;;   (merge-const))
4477;;
4478;; But this doesn't seem useful in practice.
4479
4480(define_expand "fmaddsub_<mode>"
4481  [(set (match_operand:VF 0 "register_operand")
4482	(unspec:VF
4483	  [(match_operand:VF 1 "nonimmediate_operand")
4484	   (match_operand:VF 2 "nonimmediate_operand")
4485	   (match_operand:VF 3 "nonimmediate_operand")]
4486	  UNSPEC_FMADDSUB))]
4487  "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F")
4488
4489(define_expand "<avx512>_fmaddsub_<mode>_maskz<round_expand_name>"
4490  [(match_operand:VF_AVX512VL 0 "register_operand")
4491   (match_operand:VF_AVX512VL 1 "<round_expand_nimm_predicate>")
4492   (match_operand:VF_AVX512VL 2 "<round_expand_nimm_predicate>")
4493   (match_operand:VF_AVX512VL 3 "<round_expand_nimm_predicate>")
4494   (match_operand:<avx512fmaskmode> 4 "register_operand")]
4495  "TARGET_AVX512F"
4496{
4497  emit_insn (gen_fma_fmaddsub_<mode>_maskz_1<round_expand_name> (
4498    operands[0], operands[1], operands[2], operands[3],
4499    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4500  DONE;
4501})
4502
4503(define_insn "*fma_fmaddsub_<mode>"
4504  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4505	(unspec:VF_128_256
4506	  [(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,0,v,x,x")
4507	   (match_operand:VF_128_256 2 "nonimmediate_operand" "vm,v,vm,x,m")
4508	   (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x")]
4509	  UNSPEC_FMADDSUB))]
4510  "TARGET_FMA || TARGET_FMA4"
4511  "@
4512   vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4513   vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4514   vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4515   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4516   vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4517  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4518   (set_attr "type" "ssemuladd")
4519   (set_attr "mode" "<MODE>")])
4520
4521(define_insn "<sd_mask_codefor>fma_fmaddsub_<mode><sd_maskz_name><round_name>"
4522  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4523	(unspec:VF_SF_AVX512VL
4524	  [(match_operand:VF_SF_AVX512VL 1 "<round_nimm_predicate>" "%0,0,v")
4525	   (match_operand:VF_SF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4526	   (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
4527	  UNSPEC_FMADDSUB))]
4528  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4529  "@
4530   vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4531   vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4532   vfmaddsub231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4533  [(set_attr "type" "ssemuladd")
4534   (set_attr "mode" "<MODE>")])
4535
4536(define_insn "<avx512>_fmaddsub_<mode>_mask<round_name>"
4537  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4538	(vec_merge:VF_AVX512VL
4539	  (unspec:VF_AVX512VL
4540	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4541	     (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4542	     (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")]
4543	    UNSPEC_FMADDSUB)
4544	  (match_dup 1)
4545	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4546  "TARGET_AVX512F"
4547  "@
4548   vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4549   vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4550  [(set_attr "type" "ssemuladd")
4551   (set_attr "mode" "<MODE>")])
4552
4553(define_insn "<avx512>_fmaddsub_<mode>_mask3<round_name>"
4554  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4555	(vec_merge:VF_AVX512VL
4556	  (unspec:VF_AVX512VL
4557	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4558	     (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4559	     (match_operand:VF_AVX512VL 3 "register_operand" "0")]
4560	    UNSPEC_FMADDSUB)
4561	  (match_dup 3)
4562	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4563  "TARGET_AVX512F"
4564  "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4565  [(set_attr "type" "ssemuladd")
4566   (set_attr "mode" "<MODE>")])
4567
4568(define_insn "*fma_fmsubadd_<mode>"
4569  [(set (match_operand:VF_128_256 0 "register_operand" "=v,v,v,x,x")
4570	(unspec:VF_128_256
4571	  [(match_operand:VF_128_256   1 "nonimmediate_operand" "%0,0,v,x,x")
4572	   (match_operand:VF_128_256   2 "nonimmediate_operand" "vm,v,vm,x,m")
4573	   (neg:VF_128_256
4574	     (match_operand:VF_128_256 3 "nonimmediate_operand" "v,vm,0,xm,x"))]
4575	  UNSPEC_FMADDSUB))]
4576  "TARGET_FMA || TARGET_FMA4"
4577  "@
4578   vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
4579   vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
4580   vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
4581   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
4582   vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
4583  [(set_attr "isa" "fma,fma,fma,fma4,fma4")
4584   (set_attr "type" "ssemuladd")
4585   (set_attr "mode" "<MODE>")])
4586
4587(define_insn "<sd_mask_codefor>fma_fmsubadd_<mode><sd_maskz_name><round_name>"
4588  [(set (match_operand:VF_SF_AVX512VL 0 "register_operand" "=v,v,v")
4589	(unspec:VF_SF_AVX512VL
4590	  [(match_operand:VF_SF_AVX512VL   1 "<round_nimm_predicate>" "%0,0,v")
4591	   (match_operand:VF_SF_AVX512VL   2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
4592	   (neg:VF_SF_AVX512VL
4593	     (match_operand:VF_SF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
4594	  UNSPEC_FMADDSUB))]
4595  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
4596  "@
4597   vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
4598   vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
4599   vfmsubadd231<ssemodesuffix>\t{<round_sd_mask_op4>%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2<round_sd_mask_op4>}"
4600  [(set_attr "type" "ssemuladd")
4601   (set_attr "mode" "<MODE>")])
4602
4603(define_insn "<avx512>_fmsubadd_<mode>_mask<round_name>"
4604  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v,v")
4605	(vec_merge:VF_AVX512VL
4606	  (unspec:VF_AVX512VL
4607	    [(match_operand:VF_AVX512VL 1 "register_operand" "0,0")
4608	     (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
4609	     (neg:VF_AVX512VL
4610	       (match_operand:VF_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
4611	    UNSPEC_FMADDSUB)
4612	  (match_dup 1)
4613	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
4614  "TARGET_AVX512F"
4615  "@
4616   vfmsubadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
4617   vfmsubadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
4618  [(set_attr "type" "ssemuladd")
4619   (set_attr "mode" "<MODE>")])
4620
4621(define_insn "<avx512>_fmsubadd_<mode>_mask3<round_name>"
4622  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
4623	(vec_merge:VF_AVX512VL
4624	  (unspec:VF_AVX512VL
4625	    [(match_operand:VF_AVX512VL 1 "register_operand" "v")
4626	     (match_operand:VF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
4627	     (neg:VF_AVX512VL
4628	       (match_operand:VF_AVX512VL 3 "register_operand" "0"))]
4629	    UNSPEC_FMADDSUB)
4630	  (match_dup 3)
4631	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
4632  "TARGET_AVX512F"
4633  "vfmsubadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
4634  [(set_attr "type" "ssemuladd")
4635   (set_attr "mode" "<MODE>")])
4636
4637;; FMA3 floating point scalar intrinsics. These merge result with
4638;; high-order elements from the destination register.
4639
4640(define_expand "fmai_vmfmadd_<mode><round_name>"
4641  [(set (match_operand:VF_128 0 "register_operand")
4642	(vec_merge:VF_128
4643	  (fma:VF_128
4644	    (match_operand:VF_128 1 "register_operand")
4645	    (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4646	    (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4647	  (match_dup 1)
4648	  (const_int 1)))]
4649  "TARGET_FMA")
4650
4651(define_expand "fmai_vmfmsub_<mode><round_name>"
4652  [(set (match_operand:VF_128 0 "register_operand")
4653	(vec_merge:VF_128
4654	  (fma:VF_128
4655	    (match_operand:VF_128 1 "register_operand")
4656	    (match_operand:VF_128 2 "<round_nimm_scalar_predicate>")
4657	    (neg:VF_128
4658	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4659	  (match_dup 1)
4660	  (const_int 1)))]
4661  "TARGET_FMA")
4662
4663(define_expand "fmai_vmfnmadd_<mode><round_name>"
4664  [(set (match_operand:VF_128 0 "register_operand")
4665	(vec_merge:VF_128
4666	  (fma:VF_128
4667	    (neg:VF_128
4668	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4669	    (match_operand:VF_128 1 "register_operand")
4670	    (match_operand:VF_128 3 "<round_nimm_scalar_predicate>"))
4671	  (match_dup 1)
4672	  (const_int 1)))]
4673  "TARGET_FMA")
4674
4675(define_expand "fmai_vmfnmsub_<mode><round_name>"
4676  [(set (match_operand:VF_128 0 "register_operand")
4677	(vec_merge:VF_128
4678	  (fma:VF_128
4679	    (neg:VF_128
4680	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>"))
4681	    (match_operand:VF_128 1 "register_operand")
4682	    (neg:VF_128
4683	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>")))
4684	  (match_dup 1)
4685	  (const_int 1)))]
4686  "TARGET_FMA")
4687
4688(define_insn "*fmai_fmadd_<mode>"
4689  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4690        (vec_merge:VF_128
4691	  (fma:VF_128
4692	    (match_operand:VF_128 1 "register_operand" "0,0")
4693	    (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
4694	    (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4695	  (match_dup 1)
4696	  (const_int 1)))]
4697  "TARGET_FMA || TARGET_AVX512F"
4698  "@
4699   vfmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4700   vfmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4701  [(set_attr "type" "ssemuladd")
4702   (set_attr "mode" "<MODE>")])
4703
4704(define_insn "*fmai_fmsub_<mode>"
4705  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4706        (vec_merge:VF_128
4707	  (fma:VF_128
4708	    (match_operand:VF_128   1 "register_operand" "0,0")
4709	    (match_operand:VF_128   2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4710	    (neg:VF_128
4711	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4712	  (match_dup 1)
4713	  (const_int 1)))]
4714  "TARGET_FMA || TARGET_AVX512F"
4715  "@
4716   vfmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4717   vfmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4718  [(set_attr "type" "ssemuladd")
4719   (set_attr "mode" "<MODE>")])
4720
4721(define_insn "*fmai_fnmadd_<mode><round_name>"
4722  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4723        (vec_merge:VF_128
4724	  (fma:VF_128
4725	    (neg:VF_128
4726	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4727	    (match_operand:VF_128   1 "register_operand" "0,0")
4728	    (match_operand:VF_128   3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4729	  (match_dup 1)
4730	  (const_int 1)))]
4731  "TARGET_FMA || TARGET_AVX512F"
4732  "@
4733   vfnmadd132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4734   vfnmadd213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4735  [(set_attr "type" "ssemuladd")
4736   (set_attr "mode" "<MODE>")])
4737
4738(define_insn "*fmai_fnmsub_<mode><round_name>"
4739  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4740        (vec_merge:VF_128
4741	  (fma:VF_128
4742	    (neg:VF_128
4743	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4744	    (match_operand:VF_128   1 "register_operand" "0,0")
4745	    (neg:VF_128
4746	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4747	  (match_dup 1)
4748	  (const_int 1)))]
4749  "TARGET_FMA || TARGET_AVX512F"
4750  "@
4751   vfnmsub132<ssescalarmodesuffix>\t{<round_op4>%2, %3, %0|%0, %<iptr>3, %<iptr>2<round_op4>}
4752   vfnmsub213<ssescalarmodesuffix>\t{<round_op4>%3, %2, %0|%0, %<iptr>2, %<iptr>3<round_op4>}"
4753  [(set_attr "type" "ssemuladd")
4754   (set_attr "mode" "<MODE>")])
4755
4756(define_insn "avx512f_vmfmadd_<mode>_mask<round_name>"
4757  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4758	(vec_merge:VF_128
4759	  (vec_merge:VF_128
4760	    (fma:VF_128
4761	      (match_operand:VF_128 1 "register_operand" "0,0")
4762	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4763	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4764	    (match_dup 1)
4765	    (match_operand:QI 4 "register_operand" "Yk,Yk"))
4766	  (match_dup 1)
4767	  (const_int 1)))]
4768  "TARGET_AVX512F"
4769  "@
4770   vfmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4771   vfmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4772  [(set_attr "type" "ssemuladd")
4773   (set_attr "mode" "<MODE>")])
4774
4775(define_insn "avx512f_vmfmadd_<mode>_mask3<round_name>"
4776  [(set (match_operand:VF_128 0 "register_operand" "=v")
4777	(vec_merge:VF_128
4778	  (vec_merge:VF_128
4779	    (fma:VF_128
4780	      (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4781	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4782	      (match_operand:VF_128 3 "register_operand" "0"))
4783	    (match_dup 3)
4784	    (match_operand:QI 4 "register_operand" "Yk"))
4785	  (match_dup 3)
4786	  (const_int 1)))]
4787  "TARGET_AVX512F"
4788  "vfmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4789  [(set_attr "type" "ssemuladd")
4790   (set_attr "mode" "<MODE>")])
4791
4792(define_expand "avx512f_vmfmadd_<mode>_maskz<round_expand_name>"
4793  [(match_operand:VF_128 0 "register_operand")
4794   (match_operand:VF_128 1 "<round_expand_nimm_predicate>")
4795   (match_operand:VF_128 2 "<round_expand_nimm_predicate>")
4796   (match_operand:VF_128 3 "<round_expand_nimm_predicate>")
4797   (match_operand:QI 4 "register_operand")]
4798  "TARGET_AVX512F"
4799{
4800  emit_insn (gen_avx512f_vmfmadd_<mode>_maskz_1<round_expand_name> (
4801    operands[0], operands[1], operands[2], operands[3],
4802    CONST0_RTX (<MODE>mode), operands[4]<round_expand_operand>));
4803  DONE;
4804})
4805
4806(define_insn "avx512f_vmfmadd_<mode>_maskz_1<round_name>"
4807  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4808	(vec_merge:VF_128
4809	  (vec_merge:VF_128
4810	    (fma:VF_128
4811	      (match_operand:VF_128 1 "register_operand" "0,0")
4812	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4813	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4814	    (match_operand:VF_128 4 "const0_operand" "C,C")
4815	    (match_operand:QI 5 "register_operand" "Yk,Yk"))
4816	  (match_dup 1)
4817	  (const_int 1)))]
4818  "TARGET_AVX512F"
4819  "@
4820   vfmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4821   vfmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4822  [(set_attr "type" "ssemuladd")
4823   (set_attr "mode" "<MODE>")])
4824
4825(define_insn "*avx512f_vmfmsub_<mode>_mask<round_name>"
4826  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4827	(vec_merge:VF_128
4828	  (vec_merge:VF_128
4829	    (fma:VF_128
4830	      (match_operand:VF_128 1 "register_operand" "0,0")
4831	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4832	      (neg:VF_128
4833		(match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4834	    (match_dup 1)
4835	    (match_operand:QI 4 "register_operand" "Yk,Yk"))
4836	  (match_dup 1)
4837	  (const_int 1)))]
4838  "TARGET_AVX512F"
4839  "@
4840   vfmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4841   vfmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4842  [(set_attr "type" "ssemuladd")
4843   (set_attr "mode" "<MODE>")])
4844
4845(define_insn "avx512f_vmfmsub_<mode>_mask3<round_name>"
4846  [(set (match_operand:VF_128 0 "register_operand" "=v")
4847	(vec_merge:VF_128
4848	  (vec_merge:VF_128
4849	    (fma:VF_128
4850	      (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4851	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
4852	      (neg:VF_128
4853		(match_operand:VF_128 3 "register_operand" "0")))
4854	    (match_dup 3)
4855	    (match_operand:QI 4 "register_operand" "Yk"))
4856	  (match_dup 3)
4857	  (const_int 1)))]
4858  "TARGET_AVX512F"
4859  "vfmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4860  [(set_attr "type" "ssemuladd")
4861   (set_attr "mode" "<MODE>")])
4862
4863(define_insn "*avx512f_vmfmsub_<mode>_maskz_1<round_name>"
4864  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4865	(vec_merge:VF_128
4866	  (vec_merge:VF_128
4867	    (fma:VF_128
4868	      (match_operand:VF_128 1 "register_operand" "0,0")
4869	      (match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
4870	      (neg:VF_128
4871		(match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4872	    (match_operand:VF_128 4 "const0_operand" "C,C")
4873	    (match_operand:QI 5 "register_operand" "Yk,Yk"))
4874	  (match_dup 1)
4875	  (const_int 1)))]
4876  "TARGET_AVX512F"
4877  "@
4878   vfmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4879   vfmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4880  [(set_attr "type" "ssemuladd")
4881   (set_attr "mode" "<MODE>")])
4882
4883(define_insn "*avx512f_vmfnmadd_<mode>_mask<round_name>"
4884  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4885	(vec_merge:VF_128
4886	  (vec_merge:VF_128
4887	    (fma:VF_128
4888	      (neg:VF_128
4889		(match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4890	      (match_operand:VF_128 1 "register_operand" "0,0")
4891	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4892	    (match_dup 1)
4893	    (match_operand:QI 4 "register_operand" "Yk,Yk"))
4894	  (match_dup 1)
4895	  (const_int 1)))]
4896  "TARGET_AVX512F"
4897  "@
4898   vfnmadd132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4899   vfnmadd213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4900  [(set_attr "type" "ssemuladd")
4901   (set_attr "mode" "<MODE>")])
4902
4903(define_insn "*avx512f_vmfnmadd_<mode>_mask3<round_name>"
4904  [(set (match_operand:VF_128 0 "register_operand" "=v")
4905	(vec_merge:VF_128
4906	  (vec_merge:VF_128
4907	    (fma:VF_128
4908	      (neg:VF_128
4909		(match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4910	      (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4911	      (match_operand:VF_128 3 "register_operand" "0"))
4912	    (match_dup 3)
4913	    (match_operand:QI 4 "register_operand" "Yk"))
4914	  (match_dup 3)
4915	  (const_int 1)))]
4916  "TARGET_AVX512F"
4917  "vfnmadd231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4918  [(set_attr "type" "ssemuladd")
4919   (set_attr "mode" "<MODE>")])
4920
4921(define_insn "*avx512f_vmfnmadd_<mode>_maskz_1<round_name>"
4922  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4923	(vec_merge:VF_128
4924	  (vec_merge:VF_128
4925	    (fma:VF_128
4926	      (neg:VF_128
4927		(match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4928	      (match_operand:VF_128 1 "register_operand" "0,0")
4929	      (match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
4930	    (match_operand:VF_128 4 "const0_operand" "C,C")
4931	    (match_operand:QI 5 "register_operand" "Yk,Yk"))
4932	  (match_dup 1)
4933	  (const_int 1)))]
4934  "TARGET_AVX512F"
4935  "@
4936   vfnmadd132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4937   vfnmadd213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4938  [(set_attr "type" "ssemuladd")
4939   (set_attr "mode" "<MODE>")])
4940
4941(define_insn "*avx512f_vmfnmsub_<mode>_mask<round_name>"
4942  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4943	(vec_merge:VF_128
4944	  (vec_merge:VF_128
4945	    (fma:VF_128
4946	      (neg:VF_128
4947		(match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4948	      (match_operand:VF_128 1 "register_operand" "0,0")
4949	      (neg:VF_128
4950		(match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4951	    (match_dup 1)
4952	    (match_operand:QI 4 "register_operand" "Yk,Yk"))
4953	  (match_dup 1)
4954	  (const_int 1)))]
4955  "TARGET_AVX512F"
4956  "@
4957   vfnmsub132<ssescalarmodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}
4958   vfnmsub213<ssescalarmodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %<iptr>2, %<iptr>3<round_op5>}"
4959  [(set_attr "type" "ssemuladd")
4960   (set_attr "mode" "<MODE>")])
4961
4962(define_insn "*avx512f_vmfnmsub_<mode>_mask3<round_name>"
4963  [(set (match_operand:VF_128 0 "register_operand" "=v")
4964	(vec_merge:VF_128
4965	  (vec_merge:VF_128
4966	    (fma:VF_128
4967	      (neg:VF_128
4968		(match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
4969	      (match_operand:VF_128 1 "<round_nimm_scalar_predicate>" "%v")
4970	      (neg:VF_128
4971		(match_operand:VF_128 3 "register_operand" "0")))
4972	    (match_dup 3)
4973	    (match_operand:QI 4 "register_operand" "Yk"))
4974	  (match_dup 3)
4975	  (const_int 1)))]
4976  "TARGET_AVX512F"
4977  "vfnmsub231<ssescalarmodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %<iptr>3, %<iptr>2<round_op5>}"
4978  [(set_attr "type" "ssemuladd")
4979   (set_attr "mode" "<MODE>")])
4980
4981(define_insn "*avx512f_vmfnmsub_<mode>_maskz_1<round_name>"
4982  [(set (match_operand:VF_128 0 "register_operand" "=v,v")
4983	(vec_merge:VF_128
4984	  (vec_merge:VF_128
4985	    (fma:VF_128
4986	      (neg:VF_128
4987		(match_operand:VF_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
4988	      (match_operand:VF_128 1 "register_operand" "0,0")
4989	      (neg:VF_128
4990		(match_operand:VF_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
4991	    (match_operand:VF_128 4 "const0_operand" "C,C")
4992	    (match_operand:QI 5 "register_operand" "Yk,Yk"))
4993	  (match_dup 1)
4994	  (const_int 1)))]
4995  "TARGET_AVX512F"
4996  "@
4997   vfnmsub132<ssescalarmodesuffix>\t{<round_op6>%2, %3, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>3, %<iptr>2<round_op6>}
4998   vfnmsub213<ssescalarmodesuffix>\t{<round_op6>%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %<iptr>2, %<iptr>3<round_op6>}"
4999  [(set_attr "type" "ssemuladd")
5000   (set_attr "mode" "<MODE>")])
5001
5002;; FMA4 floating point scalar intrinsics.  These write the
5003;; entire destination register, with the high-order elements zeroed.
5004
5005(define_expand "fma4i_vmfmadd_<mode>"
5006  [(set (match_operand:VF_128 0 "register_operand")
5007	(vec_merge:VF_128
5008	  (fma:VF_128
5009	    (match_operand:VF_128 1 "nonimmediate_operand")
5010	    (match_operand:VF_128 2 "nonimmediate_operand")
5011	    (match_operand:VF_128 3 "nonimmediate_operand"))
5012	  (match_dup 4)
5013	  (const_int 1)))]
5014  "TARGET_FMA4"
5015  "operands[4] = CONST0_RTX (<MODE>mode);")
5016
5017(define_insn "*fma4i_vmfmadd_<mode>"
5018  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5019	(vec_merge:VF_128
5020	  (fma:VF_128
5021	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5022	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5023	    (match_operand:VF_128 3 "nonimmediate_operand" "xm,x"))
5024	  (match_operand:VF_128 4 "const0_operand")
5025	  (const_int 1)))]
5026  "TARGET_FMA4"
5027  "vfmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5028  [(set_attr "type" "ssemuladd")
5029   (set_attr "mode" "<MODE>")])
5030
5031(define_insn "*fma4i_vmfmsub_<mode>"
5032  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5033	(vec_merge:VF_128
5034	  (fma:VF_128
5035	    (match_operand:VF_128 1 "nonimmediate_operand" "%x,x")
5036	    (match_operand:VF_128 2 "nonimmediate_operand" " x,m")
5037	    (neg:VF_128
5038	      (match_operand:VF_128 3 "nonimmediate_operand" "xm,x")))
5039	  (match_operand:VF_128 4 "const0_operand")
5040	  (const_int 1)))]
5041  "TARGET_FMA4"
5042  "vfmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5043  [(set_attr "type" "ssemuladd")
5044   (set_attr "mode" "<MODE>")])
5045
5046(define_insn "*fma4i_vmfnmadd_<mode>"
5047  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5048	(vec_merge:VF_128
5049	  (fma:VF_128
5050	    (neg:VF_128
5051	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5052	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
5053	    (match_operand:VF_128   3 "nonimmediate_operand" "xm,x"))
5054	  (match_operand:VF_128 4 "const0_operand")
5055	  (const_int 1)))]
5056  "TARGET_FMA4"
5057  "vfnmadd<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5058  [(set_attr "type" "ssemuladd")
5059   (set_attr "mode" "<MODE>")])
5060
5061(define_insn "*fma4i_vmfnmsub_<mode>"
5062  [(set (match_operand:VF_128 0 "register_operand" "=x,x")
5063	(vec_merge:VF_128
5064	  (fma:VF_128
5065	    (neg:VF_128
5066	      (match_operand:VF_128 1 "nonimmediate_operand" "%x,x"))
5067	    (match_operand:VF_128   2 "nonimmediate_operand" " x,m")
5068	    (neg:VF_128
5069	      (match_operand:VF_128   3 "nonimmediate_operand" "xm,x")))
5070	  (match_operand:VF_128 4 "const0_operand")
5071	  (const_int 1)))]
5072  "TARGET_FMA4"
5073  "vfnmsub<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %<iptr>3}"
5074  [(set_attr "type" "ssemuladd")
5075   (set_attr "mode" "<MODE>")])
5076
5077;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5078;;
5079;; Parallel single-precision floating point conversion operations
5080;;
5081;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5082
5083(define_insn_and_split "sse_cvtpi2ps"
5084  [(set (match_operand:V4SF 0 "register_operand" "=x,x,Yv")
5085	(vec_merge:V4SF
5086	  (vec_duplicate:V4SF
5087	    (float:V2SF (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,Yv")))
5088	  (match_operand:V4SF 1 "register_operand" "0,0,Yv")
5089	  (const_int 3)))
5090   (clobber (match_scratch:V4SF 3 "=X,x,Yv"))]
5091  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5092  "@
5093   cvtpi2ps\t{%2, %0|%0, %2}
5094   #
5095   #"
5096  "TARGET_SSE2 && reload_completed
5097   && SSE_REG_P (operands[2])"
5098  [(const_int 0)]
5099{
5100  rtx op2 = lowpart_subreg (V4SImode, operands[2],
5101			    GET_MODE (operands[2]));
5102  /* Generate SSE2 cvtdq2ps.  */
5103  emit_insn (gen_floatv4siv4sf2 (operands[3], op2));
5104
5105  /* Merge operands[3] with operands[0].  */
5106  rtx mask, op1;
5107  if (TARGET_AVX)
5108    {
5109      mask = gen_rtx_PARALLEL (VOIDmode,
5110			       gen_rtvec (4, GEN_INT (0), GEN_INT (1),
5111					  GEN_INT (6), GEN_INT (7)));
5112      op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
5113      op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5114      emit_insn (gen_rtx_SET (operands[0], op2));
5115    }
5116  else
5117    {
5118      /* NB: SSE can only concatenate OP0 and OP3 to OP0.  */
5119      mask = gen_rtx_PARALLEL (VOIDmode,
5120			       gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5121					  GEN_INT (4), GEN_INT (5)));
5122      op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
5123      op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
5124      emit_insn (gen_rtx_SET (operands[0], op2));
5125
5126      /* Swap bits 0:63 with bits 64:127.  */
5127      mask = gen_rtx_PARALLEL (VOIDmode,
5128			       gen_rtvec (4, GEN_INT (2), GEN_INT (3),
5129					  GEN_INT (0), GEN_INT (1)));
5130      rtx dest = lowpart_subreg (V4SImode, operands[0],
5131				 GET_MODE (operands[0]));
5132      op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
5133      emit_insn (gen_rtx_SET (dest, op1));
5134    }
5135  DONE;
5136}
5137  [(set_attr "mmx_isa" "native,sse_noavx,avx")
5138   (set_attr "type" "ssecvt")
5139   (set_attr "mode" "V4SF")])
5140
5141(define_insn_and_split "sse_cvtps2pi"
5142  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5143	(vec_select:V2SI
5144	  (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")]
5145		       UNSPEC_FIX_NOTRUNC)
5146	  (parallel [(const_int 0) (const_int 1)])))]
5147  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5148  "@
5149   cvtps2pi\t{%1, %0|%0, %q1}
5150   #"
5151  "TARGET_SSE2 && reload_completed
5152   && SSE_REG_P (operands[0])"
5153  [(const_int 0)]
5154{
5155  rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5156			    GET_MODE (operands[1]));
5157  rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5158			    GET_MODE (operands[0]));
5159
5160  op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5161  emit_insn (gen_rtx_SET (tmp, op1));
5162
5163  rtx dest = lowpart_subreg (V4SImode, operands[0],
5164			    GET_MODE (operands[0]));
5165  emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp));
5166  DONE;
5167}
5168  [(set_attr "isa" "*,sse2")
5169   (set_attr "mmx_isa" "native,*")
5170   (set_attr "type" "ssecvt")
5171   (set_attr "unit" "mmx,*")
5172   (set_attr "mode" "DI")])
5173
5174(define_insn_and_split "sse_cvttps2pi"
5175  [(set (match_operand:V2SI 0 "register_operand" "=y,Yv")
5176	(vec_select:V2SI
5177	  (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm"))
5178	  (parallel [(const_int 0) (const_int 1)])))]
5179  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE"
5180  "@
5181   cvttps2pi\t{%1, %0|%0, %q1}
5182   #"
5183  "TARGET_SSE2 && reload_completed
5184   && SSE_REG_P (operands[0])"
5185  [(const_int 0)]
5186{
5187  rtx op1 = lowpart_subreg (V2SFmode, operands[1],
5188			    GET_MODE (operands[1]));
5189  rtx tmp = lowpart_subreg (V4SFmode, operands[0],
5190			    GET_MODE (operands[0]));
5191
5192  op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode));
5193  emit_insn (gen_rtx_SET (tmp, op1));
5194
5195  rtx dest = lowpart_subreg (V4SImode, operands[0],
5196			    GET_MODE (operands[0]));
5197  emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp));
5198  DONE;
5199}
5200  [(set_attr "isa" "*,sse2")
5201   (set_attr "mmx_isa" "native,*")
5202   (set_attr "type" "ssecvt")
5203   (set_attr "unit" "mmx,*")
5204   (set_attr "prefix_rep" "0")
5205   (set_attr "mode" "SF")])
5206
5207(define_insn "sse_cvtsi2ss<rex64namesuffix><round_name>"
5208  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
5209	(vec_merge:V4SF
5210	  (vec_duplicate:V4SF
5211	    (float:SF (match_operand:SWI48 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5212	  (match_operand:V4SF 1 "register_operand" "0,0,v")
5213	  (const_int 1)))]
5214  "TARGET_SSE"
5215  "@
5216   cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5217   cvtsi2ss<rex64suffix>\t{%2, %0|%0, %2}
5218   vcvtsi2ss<rex64suffix>\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5219  [(set_attr "isa" "noavx,noavx,avx")
5220   (set_attr "type" "sseicvt")
5221   (set_attr "athlon_decode" "vector,double,*")
5222   (set_attr "amdfam10_decode" "vector,double,*")
5223   (set_attr "bdver1_decode" "double,direct,*")
5224   (set_attr "btver2_decode" "double,double,double")
5225   (set_attr "znver1_decode" "double,double,double")
5226   (set (attr "length_vex")
5227	(if_then_else
5228	  (and (match_test "<MODE>mode == DImode")
5229	       (eq_attr "alternative" "2"))
5230	  (const_string "4")
5231	  (const_string "*")))
5232   (set (attr "prefix_rex")
5233	(if_then_else
5234	  (and (match_test "<MODE>mode == DImode")
5235	       (eq_attr "alternative" "0,1"))
5236	  (const_string "1")
5237	  (const_string "*")))
5238   (set_attr "prefix" "orig,orig,maybe_evex")
5239   (set_attr "mode" "SF")])
5240
5241(define_insn "sse_cvtss2si<rex64namesuffix><round_name>"
5242  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5243	(unspec:SWI48
5244	  [(vec_select:SF
5245	     (match_operand:V4SF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5246	     (parallel [(const_int 0)]))]
5247	  UNSPEC_FIX_NOTRUNC))]
5248  "TARGET_SSE"
5249  "%vcvtss2si<rex64suffix>\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5250  [(set_attr "type" "sseicvt")
5251   (set_attr "athlon_decode" "double,vector")
5252   (set_attr "bdver1_decode" "double,double")
5253   (set_attr "prefix_rep" "1")
5254   (set_attr "prefix" "maybe_vex")
5255   (set_attr "mode" "<MODE>")])
5256
5257(define_insn "sse_cvtss2si<rex64namesuffix>_2"
5258  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5259	(unspec:SWI48 [(match_operand:SF 1 "nonimmediate_operand" "v,m")]
5260		      UNSPEC_FIX_NOTRUNC))]
5261  "TARGET_SSE"
5262  "%vcvtss2si<rex64suffix>\t{%1, %0|%0, %1}"
5263  [(set_attr "type" "sseicvt")
5264   (set_attr "athlon_decode" "double,vector")
5265   (set_attr "amdfam10_decode" "double,double")
5266   (set_attr "bdver1_decode" "double,double")
5267   (set_attr "prefix_rep" "1")
5268   (set_attr "prefix" "maybe_vex")
5269   (set_attr "mode" "<MODE>")])
5270
5271(define_insn "sse_cvttss2si<rex64namesuffix><round_saeonly_name>"
5272  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5273	(fix:SWI48
5274	  (vec_select:SF
5275	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint>")
5276	    (parallel [(const_int 0)]))))]
5277  "TARGET_SSE"
5278  "%vcvttss2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5279  [(set_attr "type" "sseicvt")
5280   (set_attr "athlon_decode" "double,vector")
5281   (set_attr "amdfam10_decode" "double,double")
5282   (set_attr "bdver1_decode" "double,double")
5283   (set_attr "prefix_rep" "1")
5284   (set_attr "prefix" "maybe_vex")
5285   (set_attr "mode" "<MODE>")])
5286
5287(define_insn "cvtusi2<ssescalarmodesuffix>32<round_name>"
5288  [(set (match_operand:VF_128 0 "register_operand" "=v")
5289	(vec_merge:VF_128
5290	  (vec_duplicate:VF_128
5291	    (unsigned_float:<ssescalarmode>
5292	      (match_operand:SI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5293	  (match_operand:VF_128 1 "register_operand" "v")
5294	  (const_int 1)))]
5295  "TARGET_AVX512F && <round_modev4sf_condition>"
5296  "vcvtusi2<ssescalarmodesuffix>{l}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5297  [(set_attr "type" "sseicvt")
5298   (set_attr "prefix" "evex")
5299   (set_attr "mode" "<ssescalarmode>")])
5300
5301(define_insn "cvtusi2<ssescalarmodesuffix>64<round_name>"
5302  [(set (match_operand:VF_128 0 "register_operand" "=v")
5303	(vec_merge:VF_128
5304	  (vec_duplicate:VF_128
5305	    (unsigned_float:<ssescalarmode>
5306	      (match_operand:DI 2 "<round_nimm_scalar_predicate>" "<round_constraint3>")))
5307	  (match_operand:VF_128 1 "register_operand" "v")
5308	  (const_int 1)))]
5309  "TARGET_AVX512F && TARGET_64BIT"
5310  "vcvtusi2<ssescalarmodesuffix>{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5311  [(set_attr "type" "sseicvt")
5312   (set_attr "prefix" "evex")
5313   (set_attr "mode" "<ssescalarmode>")])
5314
5315(define_insn "float<sseintvecmodelower><mode>2<mask_name><round_name>"
5316  [(set (match_operand:VF1 0 "register_operand" "=x,v")
5317	(float:VF1
5318	  (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
5319  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
5320  "@
5321   cvtdq2ps\t{%1, %0|%0, %1}
5322   vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5323  [(set_attr "isa" "noavx,avx")
5324   (set_attr "type" "ssecvt")
5325   (set_attr "prefix" "maybe_vex")
5326   (set_attr "mode" "<sseinsnmode>")])
5327
5328(define_insn "ufloat<sseintvecmodelower><mode>2<mask_name><round_name>"
5329  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
5330	(unsigned_float:VF1_AVX512VL
5331	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5332  "TARGET_AVX512F"
5333  "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5334  [(set_attr "type" "ssecvt")
5335   (set_attr "prefix" "evex")
5336   (set_attr "mode" "<MODE>")])
5337
5338(define_expand "floatuns<sseintvecmodelower><mode>2"
5339  [(match_operand:VF1 0 "register_operand")
5340   (match_operand:<sseintvecmode> 1 "register_operand")]
5341  "TARGET_SSE2 && (<MODE>mode == V4SFmode || TARGET_AVX2)"
5342{
5343  if (<MODE>mode == V16SFmode)
5344    emit_insn (gen_ufloatv16siv16sf2 (operands[0], operands[1]));
5345  else
5346    if (TARGET_AVX512VL)
5347      {
5348	if (<MODE>mode == V4SFmode)
5349	  emit_insn (gen_ufloatv4siv4sf2 (operands[0], operands[1]));
5350	else
5351	  emit_insn (gen_ufloatv8siv8sf2 (operands[0], operands[1]));
5352      }
5353  else
5354    ix86_expand_vector_convert_uns_vsivsf (operands[0], operands[1]);
5355
5356  DONE;
5357})
5358
5359
5360;; For <sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode> insn pattern
5361(define_mode_attr sf2simodelower
5362  [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
5363
5364(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
5365  [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
5366	(unspec:VI4_AVX
5367	  [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
5368	  UNSPEC_FIX_NOTRUNC))]
5369  "TARGET_SSE2 && <mask_mode512bit_condition>"
5370  "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5371  [(set_attr "type" "ssecvt")
5372   (set (attr "prefix_data16")
5373     (if_then_else
5374       (match_test "TARGET_AVX")
5375     (const_string "*")
5376     (const_string "1")))
5377   (set_attr "prefix" "maybe_vex")
5378   (set_attr "mode" "<sseinsnmode>")])
5379
5380(define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
5381  [(set (match_operand:V16SI 0 "register_operand" "=v")
5382	(unspec:V16SI
5383	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
5384	  UNSPEC_FIX_NOTRUNC))]
5385  "TARGET_AVX512F"
5386  "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5387  [(set_attr "type" "ssecvt")
5388   (set_attr "prefix" "evex")
5389   (set_attr "mode" "XI")])
5390
5391(define_insn "<mask_codefor><avx512>_ufix_notrunc<sf2simodelower><mode><mask_name><round_name>"
5392  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
5393	(unspec:VI4_AVX512VL
5394	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
5395	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5396  "TARGET_AVX512F"
5397  "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5398  [(set_attr "type" "ssecvt")
5399   (set_attr "prefix" "evex")
5400   (set_attr "mode" "<sseinsnmode>")])
5401
5402(define_insn "<mask_codefor>avx512dq_cvtps2qq<mode><mask_name><round_name>"
5403  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5404	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5405		     UNSPEC_FIX_NOTRUNC))]
5406  "TARGET_AVX512DQ && <round_mode512bit_condition>"
5407  "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5408  [(set_attr "type" "ssecvt")
5409   (set_attr "prefix" "evex")
5410   (set_attr "mode" "<sseinsnmode>")])
5411
5412(define_insn "<mask_codefor>avx512dq_cvtps2qqv2di<mask_name>"
5413  [(set (match_operand:V2DI 0 "register_operand" "=v")
5414	(unspec:V2DI
5415	  [(vec_select:V2SF
5416	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5417	     (parallel [(const_int 0) (const_int 1)]))]
5418	  UNSPEC_FIX_NOTRUNC))]
5419  "TARGET_AVX512DQ && TARGET_AVX512VL"
5420  "vcvtps2qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5421  [(set_attr "type" "ssecvt")
5422   (set_attr "prefix" "evex")
5423   (set_attr "mode" "TI")])
5424
5425(define_insn "<mask_codefor>avx512dq_cvtps2uqq<mode><mask_name><round_name>"
5426  [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
5427	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
5428		     UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5429  "TARGET_AVX512DQ && <round_mode512bit_condition>"
5430  "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5431  [(set_attr "type" "ssecvt")
5432   (set_attr "prefix" "evex")
5433   (set_attr "mode" "<sseinsnmode>")])
5434
5435(define_insn "<mask_codefor>avx512dq_cvtps2uqqv2di<mask_name>"
5436  [(set (match_operand:V2DI 0 "register_operand" "=v")
5437	(unspec:V2DI
5438	  [(vec_select:V2SF
5439	     (match_operand:V4SF 1 "nonimmediate_operand" "vm")
5440	     (parallel [(const_int 0) (const_int 1)]))]
5441	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5442  "TARGET_AVX512DQ && TARGET_AVX512VL"
5443  "vcvtps2uqq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5444  [(set_attr "type" "ssecvt")
5445   (set_attr "prefix" "evex")
5446   (set_attr "mode" "TI")])
5447
5448(define_insn "<fixsuffix>fix_truncv16sfv16si2<mask_name><round_saeonly_name>"
5449  [(set (match_operand:V16SI 0 "register_operand" "=v")
5450	(any_fix:V16SI
5451	  (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
5452  "TARGET_AVX512F"
5453  "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
5454  [(set_attr "type" "ssecvt")
5455   (set_attr "prefix" "evex")
5456   (set_attr "mode" "XI")])
5457
5458(define_insn "fix_truncv8sfv8si2<mask_name>"
5459  [(set (match_operand:V8SI 0 "register_operand" "=v")
5460	(fix:V8SI (match_operand:V8SF 1 "nonimmediate_operand" "vm")))]
5461  "TARGET_AVX && <mask_avx512vl_condition>"
5462  "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5463  [(set_attr "type" "ssecvt")
5464   (set_attr "prefix" "<mask_prefix>")
5465   (set_attr "mode" "OI")])
5466
5467(define_insn "fix_truncv4sfv4si2<mask_name>"
5468  [(set (match_operand:V4SI 0 "register_operand" "=v")
5469	(fix:V4SI (match_operand:V4SF 1 "vector_operand" "vBm")))]
5470  "TARGET_SSE2 && <mask_avx512vl_condition>"
5471  "%vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5472  [(set_attr "type" "ssecvt")
5473   (set (attr "prefix_rep")
5474     (if_then_else
5475       (match_test "TARGET_AVX")
5476     (const_string "*")
5477     (const_string "1")))
5478   (set (attr "prefix_data16")
5479     (if_then_else
5480       (match_test "TARGET_AVX")
5481     (const_string "*")
5482     (const_string "0")))
5483   (set_attr "prefix_data16" "0")
5484   (set_attr "prefix" "<mask_prefix2>")
5485   (set_attr "mode" "TI")])
5486
5487(define_expand "fixuns_trunc<mode><sseintvecmodelower>2"
5488  [(match_operand:<sseintvecmode> 0 "register_operand")
5489   (match_operand:VF1 1 "register_operand")]
5490  "TARGET_SSE2"
5491{
5492  if (<MODE>mode == V16SFmode)
5493    emit_insn (gen_ufix_truncv16sfv16si2 (operands[0],
5494					  operands[1]));
5495  else
5496    {
5497      rtx tmp[3];
5498      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
5499      tmp[1] = gen_reg_rtx (<sseintvecmode>mode);
5500      emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (tmp[1], tmp[0]));
5501      emit_insn (gen_xor<sseintvecmodelower>3 (operands[0], tmp[1], tmp[2]));
5502    }
5503  DONE;
5504})
5505
5506;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5507;;
5508;; Parallel double-precision floating point conversion operations
5509;;
5510;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
5511
5512(define_insn "sse2_cvtpi2pd"
5513  [(set (match_operand:V2DF 0 "register_operand" "=v,x")
5514	(float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,?!y")))]
5515  "TARGET_SSE2"
5516  "@
5517   %vcvtdq2pd\t{%1, %0|%0, %1}
5518   cvtpi2pd\t{%1, %0|%0, %1}"
5519  [(set_attr "mmx_isa" "*,native")
5520   (set_attr "type" "ssecvt")
5521   (set_attr "unit" "*,mmx")
5522   (set_attr "prefix_data16" "*,1")
5523   (set_attr "prefix" "maybe_vex,*")
5524   (set_attr "mode" "V2DF")])
5525
5526(define_insn "sse2_cvtpd2pi"
5527  [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5528	(unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm,xBm")]
5529		     UNSPEC_FIX_NOTRUNC))]
5530  "TARGET_SSE2"
5531  "@
5532   * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, %0|%0, %1}\";
5533   cvtpd2pi\t{%1, %0|%0, %1}"
5534  [(set_attr "mmx_isa" "*,native")
5535   (set_attr "type" "ssecvt")
5536   (set_attr "unit" "*,mmx")
5537   (set_attr "amdfam10_decode" "double")
5538   (set_attr "athlon_decode" "vector")
5539   (set_attr "bdver1_decode" "double")
5540   (set_attr "prefix_data16" "*,1")
5541   (set_attr "prefix" "maybe_vex,*")
5542   (set_attr "mode" "TI")])
5543
5544(define_insn "sse2_cvttpd2pi"
5545  [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
5546	(fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm,xBm")))]
5547  "TARGET_SSE2"
5548  "@
5549   * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvttpd2dq\t{%1, %0|%0, %1}\";
5550   cvttpd2pi\t{%1, %0|%0, %1}"
5551  [(set_attr "mmx_isa" "*,native")
5552   (set_attr "type" "ssecvt")
5553   (set_attr "unit" "*,mmx")
5554   (set_attr "amdfam10_decode" "double")
5555   (set_attr "athlon_decode" "vector")
5556   (set_attr "bdver1_decode" "double")
5557   (set_attr "prefix_data16" "*,1")
5558   (set_attr "prefix" "maybe_vex,*")
5559   (set_attr "mode" "TI")])
5560
5561(define_insn "sse2_cvtsi2sd"
5562  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5563	(vec_merge:V2DF
5564	  (vec_duplicate:V2DF
5565	    (float:DF (match_operand:SI 2 "nonimmediate_operand" "r,m,rm")))
5566	  (match_operand:V2DF 1 "register_operand" "0,0,v")
5567	  (const_int 1)))]
5568  "TARGET_SSE2"
5569  "@
5570   cvtsi2sd{l}\t{%2, %0|%0, %2}
5571   cvtsi2sd{l}\t{%2, %0|%0, %2}
5572   vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
5573  [(set_attr "isa" "noavx,noavx,avx")
5574   (set_attr "type" "sseicvt")
5575   (set_attr "athlon_decode" "double,direct,*")
5576   (set_attr "amdfam10_decode" "vector,double,*")
5577   (set_attr "bdver1_decode" "double,direct,*")
5578   (set_attr "btver2_decode" "double,double,double")
5579   (set_attr "znver1_decode" "double,double,double")
5580   (set_attr "prefix" "orig,orig,maybe_evex")
5581   (set_attr "mode" "DF")])
5582
5583(define_insn "sse2_cvtsi2sdq<round_name>"
5584  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
5585	(vec_merge:V2DF
5586	  (vec_duplicate:V2DF
5587	    (float:DF (match_operand:DI 2 "<round_nimm_scalar_predicate>" "r,m,<round_constraint3>")))
5588	  (match_operand:V2DF 1 "register_operand" "0,0,v")
5589	  (const_int 1)))]
5590  "TARGET_SSE2 && TARGET_64BIT"
5591  "@
5592   cvtsi2sd{q}\t{%2, %0|%0, %2}
5593   cvtsi2sd{q}\t{%2, %0|%0, %2}
5594   vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
5595  [(set_attr "isa" "noavx,noavx,avx")
5596   (set_attr "type" "sseicvt")
5597   (set_attr "athlon_decode" "double,direct,*")
5598   (set_attr "amdfam10_decode" "vector,double,*")
5599   (set_attr "bdver1_decode" "double,direct,*")
5600   (set_attr "length_vex" "*,*,4")
5601   (set_attr "prefix_rex" "1,1,*")
5602   (set_attr "prefix" "orig,orig,maybe_evex")
5603   (set_attr "mode" "DF")])
5604
5605(define_insn "avx512f_vcvtss2usi<rex64namesuffix><round_name>"
5606  [(set (match_operand:SWI48 0 "register_operand" "=r")
5607	(unspec:SWI48
5608	  [(vec_select:SF
5609	     (match_operand:V4SF 1 "<round_nimm_predicate>" "<round_constraint>")
5610	     (parallel [(const_int 0)]))]
5611	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5612  "TARGET_AVX512F"
5613  "vcvtss2usi\t{<round_op2>%1, %0|%0, %k1<round_op2>}"
5614  [(set_attr "type" "sseicvt")
5615   (set_attr "prefix" "evex")
5616   (set_attr "mode" "<MODE>")])
5617
5618(define_insn "avx512f_vcvttss2usi<rex64namesuffix><round_saeonly_name>"
5619  [(set (match_operand:SWI48 0 "register_operand" "=r")
5620	(unsigned_fix:SWI48
5621	  (vec_select:SF
5622	    (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5623	    (parallel [(const_int 0)]))))]
5624  "TARGET_AVX512F"
5625  "vcvttss2usi\t{<round_saeonly_op2>%1, %0|%0, %k1<round_saeonly_op2>}"
5626  [(set_attr "type" "sseicvt")
5627   (set_attr "prefix" "evex")
5628   (set_attr "mode" "<MODE>")])
5629
5630(define_insn "avx512f_vcvtsd2usi<rex64namesuffix><round_name>"
5631  [(set (match_operand:SWI48 0 "register_operand" "=r")
5632	(unspec:SWI48
5633	  [(vec_select:DF
5634	     (match_operand:V2DF 1 "<round_nimm_predicate>" "<round_constraint>")
5635	     (parallel [(const_int 0)]))]
5636	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
5637  "TARGET_AVX512F"
5638  "vcvtsd2usi\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5639  [(set_attr "type" "sseicvt")
5640   (set_attr "prefix" "evex")
5641   (set_attr "mode" "<MODE>")])
5642
5643(define_insn "avx512f_vcvttsd2usi<rex64namesuffix><round_saeonly_name>"
5644  [(set (match_operand:SWI48 0 "register_operand" "=r")
5645	(unsigned_fix:SWI48
5646	  (vec_select:DF
5647	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
5648	    (parallel [(const_int 0)]))))]
5649  "TARGET_AVX512F"
5650  "vcvttsd2usi\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5651  [(set_attr "type" "sseicvt")
5652   (set_attr "prefix" "evex")
5653   (set_attr "mode" "<MODE>")])
5654
5655(define_insn "sse2_cvtsd2si<rex64namesuffix><round_name>"
5656  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5657	(unspec:SWI48
5658	  [(vec_select:DF
5659	     (match_operand:V2DF 1 "<round_nimm_scalar_predicate>" "v,<round_constraint2>")
5660	     (parallel [(const_int 0)]))]
5661	  UNSPEC_FIX_NOTRUNC))]
5662  "TARGET_SSE2"
5663  "%vcvtsd2si<rex64suffix>\t{<round_op2>%1, %0|%0, %q1<round_op2>}"
5664  [(set_attr "type" "sseicvt")
5665   (set_attr "athlon_decode" "double,vector")
5666   (set_attr "bdver1_decode" "double,double")
5667   (set_attr "btver2_decode" "double,double")
5668   (set_attr "prefix_rep" "1")
5669   (set_attr "prefix" "maybe_vex")
5670   (set_attr "mode" "<MODE>")])
5671
5672(define_insn "sse2_cvtsd2si<rex64namesuffix>_2"
5673  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5674	(unspec:SWI48 [(match_operand:DF 1 "nonimmediate_operand" "v,m")]
5675		      UNSPEC_FIX_NOTRUNC))]
5676  "TARGET_SSE2"
5677  "%vcvtsd2si<rex64suffix>\t{%1, %0|%0, %q1}"
5678  [(set_attr "type" "sseicvt")
5679   (set_attr "athlon_decode" "double,vector")
5680   (set_attr "amdfam10_decode" "double,double")
5681   (set_attr "bdver1_decode" "double,double")
5682   (set_attr "prefix_rep" "1")
5683   (set_attr "prefix" "maybe_vex")
5684   (set_attr "mode" "<MODE>")])
5685
5686(define_insn "sse2_cvttsd2si<rex64namesuffix><round_saeonly_name>"
5687  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
5688	(fix:SWI48
5689	  (vec_select:DF
5690	    (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "v,<round_saeonly_constraint2>")
5691	    (parallel [(const_int 0)]))))]
5692  "TARGET_SSE2"
5693  "%vcvttsd2si<rex64suffix>\t{<round_saeonly_op2>%1, %0|%0, %q1<round_saeonly_op2>}"
5694  [(set_attr "type" "sseicvt")
5695   (set_attr "athlon_decode" "double,vector")
5696   (set_attr "amdfam10_decode" "double,double")
5697   (set_attr "bdver1_decode" "double,double")
5698   (set_attr "btver2_decode" "double,double")
5699   (set_attr "prefix_rep" "1")
5700   (set_attr "prefix" "maybe_vex")
5701   (set_attr "mode" "<MODE>")])
5702
5703;; For float<si2dfmode><mode>2 insn pattern
5704(define_mode_attr si2dfmode
5705  [(V8DF "V8SI") (V4DF "V4SI")])
5706(define_mode_attr si2dfmodelower
5707  [(V8DF "v8si") (V4DF "v4si")])
5708
5709(define_insn "float<si2dfmodelower><mode>2<mask_name>"
5710  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
5711	(float:VF2_512_256 (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5712  "TARGET_AVX && <mask_mode512bit_condition>"
5713  "vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5714  [(set_attr "type" "ssecvt")
5715   (set_attr "prefix" "maybe_vex")
5716   (set_attr "mode" "<MODE>")])
5717
5718(define_insn "float<floatunssuffix><sseintvecmodelower><mode>2<mask_name><round_name>"
5719  [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
5720	(any_float:VF2_AVX512VL
5721	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5722  "TARGET_AVX512DQ"
5723  "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5724  [(set_attr "type" "ssecvt")
5725   (set_attr "prefix" "evex")
5726   (set_attr "mode" "<MODE>")])
5727
5728;; For float<floatunssuffix><sselondveclower><mode> insn patterns
5729(define_mode_attr qq2pssuff
5730  [(V8SF "") (V4SF "{y}")])
5731
5732(define_mode_attr sselongvecmode
5733  [(V8SF "V8DI") (V4SF  "V4DI")])
5734
5735(define_mode_attr sselongvecmodelower
5736  [(V8SF "v8di") (V4SF  "v4di")])
5737
5738(define_mode_attr sseintvecmode3
5739  [(V8SF "XI") (V4SF "OI")
5740   (V8DF "OI") (V4DF "TI")])
5741
5742(define_insn "float<floatunssuffix><sselongvecmodelower><mode>2<mask_name><round_name>"
5743  [(set (match_operand:VF1_128_256VL 0 "register_operand" "=v")
5744	 (any_float:VF1_128_256VL
5745	   (match_operand:<sselongvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
5746  "TARGET_AVX512DQ && <round_modev8sf_condition>"
5747  "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5748  [(set_attr "type" "ssecvt")
5749   (set_attr "prefix" "evex")
5750   (set_attr "mode" "<MODE>")])
5751
5752(define_expand "float<floatunssuffix>v2div2sf2"
5753  [(set (match_operand:V4SF 0 "register_operand" "=v")
5754	(vec_concat:V4SF
5755	    (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5756	    (match_dup 2)))]
5757  "TARGET_AVX512DQ && TARGET_AVX512VL"
5758  "operands[2] = CONST0_RTX (V2SFmode);")
5759
5760(define_insn "*float<floatunssuffix>v2div2sf2"
5761  [(set (match_operand:V4SF 0 "register_operand" "=v")
5762	(vec_concat:V4SF
5763	    (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5764	    (match_operand:V2SF 2 "const0_operand" "C")))]
5765  "TARGET_AVX512DQ && TARGET_AVX512VL"
5766  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0|%0, %1}"
5767  [(set_attr "type" "ssecvt")
5768   (set_attr "prefix" "evex")
5769   (set_attr "mode" "V4SF")])
5770
5771(define_mode_attr vpckfloat_concat_mode
5772  [(V8DI "v16sf") (V4DI "v8sf") (V2DI "v8sf")])
5773(define_mode_attr vpckfloat_temp_mode
5774  [(V8DI "V8SF") (V4DI "V4SF") (V2DI "V4SF")])
5775(define_mode_attr vpckfloat_op_mode
5776  [(V8DI "v8sf") (V4DI "v4sf") (V2DI "v2sf")])
5777
5778(define_expand "vec_pack<floatprefix>_float_<mode>"
5779  [(match_operand:<ssePSmode> 0 "register_operand")
5780   (any_float:<ssePSmode>
5781     (match_operand:VI8_AVX512VL 1 "register_operand"))
5782   (match_operand:VI8_AVX512VL 2 "register_operand")]
5783  "TARGET_AVX512DQ"
5784{
5785  rtx r1 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5786  rtx r2 = gen_reg_rtx (<vpckfloat_temp_mode>mode);
5787  rtx (*gen) (rtx, rtx) = gen_float<floatunssuffix><mode><vpckfloat_op_mode>2;
5788  emit_insn (gen (r1, operands[1]));
5789  emit_insn (gen (r2, operands[2]));
5790  if (<MODE>mode == V2DImode)
5791    emit_insn (gen_sse_movlhps (operands[0], r1, r2));
5792  else
5793    emit_insn (gen_avx_vec_concat<vpckfloat_concat_mode> (operands[0],
5794							  r1, r2));
5795  DONE;
5796})
5797
5798(define_expand "float<floatunssuffix>v2div2sf2_mask"
5799  [(set (match_operand:V4SF 0 "register_operand" "=v")
5800    (vec_concat:V4SF
5801        (vec_merge:V2SF
5802	    (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5803            (vec_select:V2SF
5804                (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5805                (parallel [(const_int 0) (const_int 1)]))
5806            (match_operand:QI 3 "register_operand" "Yk"))
5807	    (match_dup 4)))]
5808  "TARGET_AVX512DQ && TARGET_AVX512VL"
5809  "operands[4] = CONST0_RTX (V2SFmode);")
5810
5811(define_insn "*float<floatunssuffix>v2div2sf2_mask"
5812  [(set (match_operand:V4SF 0 "register_operand" "=v")
5813    (vec_concat:V4SF
5814        (vec_merge:V2SF
5815	    (any_float:V2SF (match_operand:V2DI 1 "nonimmediate_operand" "vm"))
5816            (vec_select:V2SF
5817                (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
5818                (parallel [(const_int 0) (const_int 1)]))
5819            (match_operand:QI 3 "register_operand" "Yk"))
5820	    (match_operand:V2SF 4 "const0_operand" "C")))]
5821  "TARGET_AVX512DQ && TARGET_AVX512VL"
5822  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5823  [(set_attr "type" "ssecvt")
5824   (set_attr "prefix" "evex")
5825   (set_attr "mode" "V4SF")])
5826
5827(define_insn "*float<floatunssuffix>v2div2sf2_mask_1"
5828  [(set (match_operand:V4SF 0 "register_operand" "=v")
5829    (vec_concat:V4SF
5830	(vec_merge:V2SF
5831		(any_float:V2SF (match_operand:V2DI 1
5832				  "nonimmediate_operand" "vm"))
5833	    (match_operand:V2SF 3 "const0_operand" "C")
5834	    (match_operand:QI 2 "register_operand" "Yk"))
5835	    (match_operand:V2SF 4 "const0_operand" "C")))]
5836  "TARGET_AVX512DQ && TARGET_AVX512VL"
5837  "vcvt<floatsuffix>qq2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5838  [(set_attr "type" "ssecvt")
5839   (set_attr "prefix" "evex")
5840   (set_attr "mode" "V4SF")])
5841
5842(define_insn "ufloat<si2dfmodelower><mode>2<mask_name>"
5843  [(set (match_operand:VF2_512_256VL 0 "register_operand" "=v")
5844	(unsigned_float:VF2_512_256VL
5845	  (match_operand:<si2dfmode> 1 "nonimmediate_operand" "vm")))]
5846   "TARGET_AVX512F"
5847   "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5848   [(set_attr "type" "ssecvt")
5849    (set_attr "prefix" "evex")
5850    (set_attr "mode" "<MODE>")])
5851
5852(define_insn "ufloatv2siv2df2<mask_name>"
5853  [(set (match_operand:V2DF 0 "register_operand" "=v")
5854	(unsigned_float:V2DF
5855	  (vec_select:V2SI
5856	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5857	    (parallel [(const_int 0) (const_int 1)]))))]
5858  "TARGET_AVX512VL"
5859  "vcvtudq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5860  [(set_attr "type" "ssecvt")
5861   (set_attr "prefix" "evex")
5862   (set_attr "mode" "V2DF")])
5863
5864(define_insn "avx512f_cvtdq2pd512_2"
5865  [(set (match_operand:V8DF 0 "register_operand" "=v")
5866	(float:V8DF
5867	  (vec_select:V8SI
5868	    (match_operand:V16SI 1 "nonimmediate_operand" "vm")
5869	    (parallel [(const_int 0) (const_int 1)
5870		       (const_int 2) (const_int 3)
5871		       (const_int 4) (const_int 5)
5872		       (const_int 6) (const_int 7)]))))]
5873  "TARGET_AVX512F"
5874  "vcvtdq2pd\t{%t1, %0|%0, %t1}"
5875  [(set_attr "type" "ssecvt")
5876   (set_attr "prefix" "evex")
5877   (set_attr "mode" "V8DF")])
5878
5879(define_insn "avx_cvtdq2pd256_2"
5880  [(set (match_operand:V4DF 0 "register_operand" "=v")
5881	(float:V4DF
5882	  (vec_select:V4SI
5883	    (match_operand:V8SI 1 "nonimmediate_operand" "vm")
5884	    (parallel [(const_int 0) (const_int 1)
5885		       (const_int 2) (const_int 3)]))))]
5886  "TARGET_AVX"
5887  "vcvtdq2pd\t{%x1, %0|%0, %x1}"
5888  [(set_attr "type" "ssecvt")
5889   (set_attr "prefix" "maybe_evex")
5890   (set_attr "mode" "V4DF")])
5891
5892(define_insn "sse2_cvtdq2pd<mask_name>"
5893  [(set (match_operand:V2DF 0 "register_operand" "=v")
5894	(float:V2DF
5895	  (vec_select:V2SI
5896	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")
5897	    (parallel [(const_int 0) (const_int 1)]))))]
5898  "TARGET_SSE2 && <mask_avx512vl_condition>"
5899  "%vcvtdq2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
5900  [(set_attr "type" "ssecvt")
5901   (set_attr "prefix" "maybe_vex")
5902   (set_attr "mode" "V2DF")])
5903
5904(define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
5905  [(set (match_operand:V8SI 0 "register_operand" "=v")
5906	(unspec:V8SI
5907	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
5908	  UNSPEC_FIX_NOTRUNC))]
5909  "TARGET_AVX512F"
5910  "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
5911  [(set_attr "type" "ssecvt")
5912   (set_attr "prefix" "evex")
5913   (set_attr "mode" "OI")])
5914
5915(define_insn "avx_cvtpd2dq256<mask_name>"
5916  [(set (match_operand:V4SI 0 "register_operand" "=v")
5917	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5918		     UNSPEC_FIX_NOTRUNC))]
5919  "TARGET_AVX && <mask_avx512vl_condition>"
5920  "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
5921  [(set_attr "type" "ssecvt")
5922   (set_attr "prefix" "<mask_prefix>")
5923   (set_attr "mode" "OI")])
5924
5925(define_expand "avx_cvtpd2dq256_2"
5926  [(set (match_operand:V8SI 0 "register_operand")
5927	(vec_concat:V8SI
5928	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand")]
5929		       UNSPEC_FIX_NOTRUNC)
5930	  (match_dup 2)))]
5931  "TARGET_AVX"
5932  "operands[2] = CONST0_RTX (V4SImode);")
5933
5934(define_insn "*avx_cvtpd2dq256_2"
5935  [(set (match_operand:V8SI 0 "register_operand" "=v")
5936	(vec_concat:V8SI
5937	  (unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
5938		       UNSPEC_FIX_NOTRUNC)
5939	  (match_operand:V4SI 2 "const0_operand")))]
5940  "TARGET_AVX"
5941  "vcvtpd2dq{y}\t{%1, %x0|%x0, %1}"
5942  [(set_attr "type" "ssecvt")
5943   (set_attr "prefix" "vex")
5944   (set_attr "btver2_decode" "vector")
5945   (set_attr "mode" "OI")])
5946
5947(define_insn "sse2_cvtpd2dq"
5948  [(set (match_operand:V4SI 0 "register_operand" "=v")
5949	(vec_concat:V4SI
5950	  (unspec:V2SI [(match_operand:V2DF 1 "vector_operand" "vBm")]
5951		       UNSPEC_FIX_NOTRUNC)
5952	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5953  "TARGET_SSE2"
5954{
5955  if (TARGET_AVX)
5956    return "vcvtpd2dq{x}\t{%1, %0|%0, %1}";
5957  else
5958    return "cvtpd2dq\t{%1, %0|%0, %1}";
5959}
5960  [(set_attr "type" "ssecvt")
5961   (set_attr "prefix_rep" "1")
5962   (set_attr "prefix_data16" "0")
5963   (set_attr "prefix" "maybe_vex")
5964   (set_attr "mode" "TI")
5965   (set_attr "amdfam10_decode" "double")
5966   (set_attr "athlon_decode" "vector")
5967   (set_attr "bdver1_decode" "double")])
5968
5969(define_insn "sse2_cvtpd2dq_mask"
5970  [(set (match_operand:V4SI 0 "register_operand" "=v")
5971	(vec_concat:V4SI
5972	  (vec_merge:V2SI
5973	    (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5974			  UNSPEC_FIX_NOTRUNC)
5975	    (vec_select:V2SI
5976	      (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
5977	      (parallel [(const_int 0) (const_int 1)]))
5978	    (match_operand:QI 3 "register_operand" "Yk"))
5979	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5980  "TARGET_AVX512VL"
5981  "vcvtpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
5982  [(set_attr "type" "ssecvt")
5983   (set_attr "prefix" "evex")
5984   (set_attr "mode" "TI")])
5985
5986(define_insn "*sse2_cvtpd2dq_mask_1"
5987  [(set (match_operand:V4SI 0 "register_operand" "=v")
5988	(vec_concat:V4SI
5989	  (vec_merge:V2SI
5990	    (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
5991			  UNSPEC_FIX_NOTRUNC)
5992	    (const_vector:V2SI [(const_int 0) (const_int 0)])
5993	    (match_operand:QI 2 "register_operand" "Yk"))
5994	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
5995  "TARGET_AVX512VL"
5996  "vcvtpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
5997  [(set_attr "type" "ssecvt")
5998   (set_attr "prefix" "evex")
5999   (set_attr "mode" "TI")])
6000
6001;; For ufix_notrunc* insn patterns
6002(define_mode_attr pd2udqsuff
6003  [(V8DF "") (V4DF "{y}")])
6004
6005(define_insn "ufix_notrunc<mode><si2dfmodelower>2<mask_name><round_name>"
6006  [(set (match_operand:<si2dfmode> 0 "register_operand" "=v")
6007	(unspec:<si2dfmode>
6008	  [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
6009	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6010  "TARGET_AVX512F"
6011  "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6012  [(set_attr "type" "ssecvt")
6013   (set_attr "prefix" "evex")
6014   (set_attr "mode" "<sseinsnmode>")])
6015
6016(define_insn "ufix_notruncv2dfv2si2"
6017  [(set (match_operand:V4SI 0 "register_operand" "=v")
6018	(vec_concat:V4SI
6019	  (unspec:V2SI
6020	    [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6021	       UNSPEC_UNSIGNED_FIX_NOTRUNC)
6022	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6023  "TARGET_AVX512VL"
6024  "vcvtpd2udq{x}\t{%1, %0|%0, %1}"
6025  [(set_attr "type" "ssecvt")
6026   (set_attr "prefix" "evex")
6027   (set_attr "mode" "TI")])
6028
6029(define_insn "ufix_notruncv2dfv2si2_mask"
6030  [(set (match_operand:V4SI 0 "register_operand" "=v")
6031	(vec_concat:V4SI
6032	  (vec_merge:V2SI
6033	    (unspec:V2SI
6034	      [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6035		 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6036	    (vec_select:V2SI
6037	      (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6038	      (parallel [(const_int 0) (const_int 1)]))
6039	    (match_operand:QI 3 "register_operand" "Yk"))
6040	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6041  "TARGET_AVX512VL"
6042  "vcvtpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6043  [(set_attr "type" "ssecvt")
6044   (set_attr "prefix" "evex")
6045   (set_attr "mode" "TI")])
6046
6047(define_insn "*ufix_notruncv2dfv2si2_mask_1"
6048  [(set (match_operand:V4SI 0 "register_operand" "=v")
6049	(vec_concat:V4SI
6050	  (vec_merge:V2SI
6051	    (unspec:V2SI
6052	      [(match_operand:V2DF 1 "nonimmediate_operand" "vm")]
6053		 UNSPEC_UNSIGNED_FIX_NOTRUNC)
6054	    (const_vector:V2SI [(const_int 0) (const_int 0)])
6055	    (match_operand:QI 2 "register_operand" "Yk"))
6056	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6057  "TARGET_AVX512VL"
6058  "vcvtpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6059  [(set_attr "type" "ssecvt")
6060   (set_attr "prefix" "evex")
6061   (set_attr "mode" "TI")])
6062
6063(define_insn "fix<fixunssuffix>_truncv8dfv8si2<mask_name><round_saeonly_name>"
6064  [(set (match_operand:V8SI 0 "register_operand" "=v")
6065	(any_fix:V8SI
6066	  (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6067  "TARGET_AVX512F"
6068  "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6069  [(set_attr "type" "ssecvt")
6070   (set_attr "prefix" "evex")
6071   (set_attr "mode" "OI")])
6072
6073(define_insn "ufix_truncv2dfv2si2"
6074  [(set (match_operand:V4SI 0 "register_operand" "=v")
6075	(vec_concat:V4SI
6076	  (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6077	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6078  "TARGET_AVX512VL"
6079  "vcvttpd2udq{x}\t{%1, %0|%0, %1}"
6080  [(set_attr "type" "ssecvt")
6081   (set_attr "prefix" "evex")
6082   (set_attr "mode" "TI")])
6083
6084(define_insn "ufix_truncv2dfv2si2_mask"
6085  [(set (match_operand:V4SI 0 "register_operand" "=v")
6086	(vec_concat:V4SI
6087	  (vec_merge:V2SI
6088	    (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6089	    (vec_select:V2SI
6090	      (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6091	      (parallel [(const_int 0) (const_int 1)]))
6092	    (match_operand:QI 3 "register_operand" "Yk"))
6093	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6094  "TARGET_AVX512VL"
6095  "vcvttpd2udq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6096  [(set_attr "type" "ssecvt")
6097   (set_attr "prefix" "evex")
6098   (set_attr "mode" "TI")])
6099
6100(define_insn "*ufix_truncv2dfv2si2_mask_1"
6101  [(set (match_operand:V4SI 0 "register_operand" "=v")
6102	(vec_concat:V4SI
6103	  (vec_merge:V2SI
6104	    (unsigned_fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6105	    (const_vector:V2SI [(const_int 0) (const_int 0)])
6106	    (match_operand:QI 2 "register_operand" "Yk"))
6107	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6108  "TARGET_AVX512VL"
6109  "vcvttpd2udq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6110  [(set_attr "type" "ssecvt")
6111   (set_attr "prefix" "evex")
6112   (set_attr "mode" "TI")])
6113
6114(define_insn "fix_truncv4dfv4si2<mask_name>"
6115  [(set (match_operand:V4SI 0 "register_operand" "=v")
6116	(fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6117  "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
6118  "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6119  [(set_attr "type" "ssecvt")
6120   (set_attr "prefix" "maybe_evex")
6121   (set_attr "mode" "OI")])
6122
6123(define_insn "ufix_truncv4dfv4si2<mask_name>"
6124  [(set (match_operand:V4SI 0 "register_operand" "=v")
6125	(unsigned_fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6126  "TARGET_AVX512VL && TARGET_AVX512F"
6127  "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6128  [(set_attr "type" "ssecvt")
6129   (set_attr "prefix" "maybe_evex")
6130   (set_attr "mode" "OI")])
6131
6132(define_insn "fix<fixunssuffix>_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
6133  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6134	(any_fix:<sseintvecmode>
6135	  (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6136  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
6137  "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6138  [(set_attr "type" "ssecvt")
6139   (set_attr "prefix" "evex")
6140   (set_attr "mode" "<sseintvecmode2>")])
6141
6142(define_insn "fix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6143  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6144	(unspec:<sseintvecmode>
6145	  [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
6146	  UNSPEC_FIX_NOTRUNC))]
6147  "TARGET_AVX512DQ && <round_mode512bit_condition>"
6148  "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6149  [(set_attr "type" "ssecvt")
6150   (set_attr "prefix" "evex")
6151   (set_attr "mode" "<sseintvecmode2>")])
6152
6153(define_insn "ufix_notrunc<mode><sseintvecmodelower>2<mask_name><round_name>"
6154  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6155	(unspec:<sseintvecmode>
6156	  [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
6157	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
6158  "TARGET_AVX512DQ && <round_mode512bit_condition>"
6159  "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6160  [(set_attr "type" "ssecvt")
6161   (set_attr "prefix" "evex")
6162   (set_attr "mode" "<sseintvecmode2>")])
6163
6164(define_insn "fix<fixunssuffix>_trunc<mode><sselongvecmodelower>2<mask_name><round_saeonly_name>"
6165  [(set (match_operand:<sselongvecmode> 0 "register_operand" "=v")
6166	(any_fix:<sselongvecmode>
6167	  (match_operand:VF1_128_256VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6168  "TARGET_AVX512DQ && <round_saeonly_modev8sf_condition>"
6169  "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6170  [(set_attr "type" "ssecvt")
6171   (set_attr "prefix" "evex")
6172   (set_attr "mode" "<sseintvecmode3>")])
6173
6174(define_insn "fix<fixunssuffix>_truncv2sfv2di2<mask_name>"
6175  [(set (match_operand:V2DI 0 "register_operand" "=v")
6176	(any_fix:V2DI
6177	  (vec_select:V2SF
6178	    (match_operand:V4SF 1 "nonimmediate_operand" "vm")
6179	    (parallel [(const_int 0) (const_int 1)]))))]
6180  "TARGET_AVX512DQ && TARGET_AVX512VL"
6181  "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6182  [(set_attr "type" "ssecvt")
6183   (set_attr "prefix" "evex")
6184   (set_attr "mode" "TI")])
6185
6186(define_mode_attr vunpckfixt_mode
6187  [(V16SF "V8DI") (V8SF "V4DI") (V4SF "V2DI")])
6188(define_mode_attr vunpckfixt_model
6189  [(V16SF "v8di") (V8SF "v4di") (V4SF "v2di")])
6190(define_mode_attr vunpckfixt_extract_mode
6191  [(V16SF "v16sf") (V8SF "v8sf") (V4SF "v8sf")])
6192
6193(define_expand "vec_unpack_<fixprefix>fix_trunc_lo_<mode>"
6194  [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6195   (any_fix:<vunpckfixt_mode>
6196     (match_operand:VF1_AVX512VL 1 "register_operand"))]
6197  "TARGET_AVX512DQ"
6198{
6199  rtx tem = operands[1];
6200  if (<MODE>mode != V4SFmode)
6201    {
6202      tem = gen_reg_rtx (<ssehalfvecmode>mode);
6203      emit_insn (gen_vec_extract_lo_<vunpckfixt_extract_mode> (tem,
6204							       operands[1]));
6205    }
6206  rtx (*gen) (rtx, rtx)
6207    = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6208  emit_insn (gen (operands[0], tem));
6209  DONE;
6210})
6211
6212(define_expand "vec_unpack_<fixprefix>fix_trunc_hi_<mode>"
6213  [(match_operand:<vunpckfixt_mode> 0 "register_operand")
6214   (any_fix:<vunpckfixt_mode>
6215     (match_operand:VF1_AVX512VL 1 "register_operand"))]
6216  "TARGET_AVX512DQ"
6217{
6218  rtx tem;
6219  if (<MODE>mode != V4SFmode)
6220    {
6221      tem = gen_reg_rtx (<ssehalfvecmode>mode);
6222      emit_insn (gen_vec_extract_hi_<vunpckfixt_extract_mode> (tem,
6223							       operands[1]));
6224    }
6225  else
6226    {
6227      tem = gen_reg_rtx (V4SFmode);
6228      emit_insn (gen_avx_vpermilv4sf (tem, operands[1], GEN_INT (0x4e)));
6229    }
6230  rtx (*gen) (rtx, rtx)
6231    = gen_fix<fixunssuffix>_trunc<ssehalfvecmodelower><vunpckfixt_model>2;
6232  emit_insn (gen (operands[0], tem));
6233  DONE;
6234})
6235
6236(define_insn "ufix_trunc<mode><sseintvecmodelower>2<mask_name>"
6237  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
6238	(unsigned_fix:<sseintvecmode>
6239	  (match_operand:VF1_128_256VL 1 "nonimmediate_operand" "vm")))]
6240  "TARGET_AVX512VL"
6241  "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6242  [(set_attr "type" "ssecvt")
6243   (set_attr "prefix" "evex")
6244   (set_attr "mode" "<sseintvecmode2>")])
6245
6246(define_expand "avx_cvttpd2dq256_2"
6247  [(set (match_operand:V8SI 0 "register_operand")
6248	(vec_concat:V8SI
6249	  (fix:V4SI (match_operand:V4DF 1 "nonimmediate_operand"))
6250	  (match_dup 2)))]
6251  "TARGET_AVX"
6252  "operands[2] = CONST0_RTX (V4SImode);")
6253
6254(define_insn "sse2_cvttpd2dq"
6255  [(set (match_operand:V4SI 0 "register_operand" "=v")
6256	(vec_concat:V4SI
6257	  (fix:V2SI (match_operand:V2DF 1 "vector_operand" "vBm"))
6258	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6259  "TARGET_SSE2"
6260{
6261  if (TARGET_AVX)
6262    return "vcvttpd2dq{x}\t{%1, %0|%0, %1}";
6263  else
6264    return "cvttpd2dq\t{%1, %0|%0, %1}";
6265}
6266  [(set_attr "type" "ssecvt")
6267   (set_attr "amdfam10_decode" "double")
6268   (set_attr "athlon_decode" "vector")
6269   (set_attr "bdver1_decode" "double")
6270   (set_attr "prefix" "maybe_vex")
6271   (set_attr "mode" "TI")])
6272
6273(define_insn "sse2_cvttpd2dq_mask"
6274  [(set (match_operand:V4SI 0 "register_operand" "=v")
6275	(vec_concat:V4SI
6276	  (vec_merge:V2SI
6277	    (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6278	    (vec_select:V2SI
6279	      (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
6280	      (parallel [(const_int 0) (const_int 1)]))
6281	    (match_operand:QI 3 "register_operand" "Yk"))
6282	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6283  "TARGET_AVX512VL"
6284  "vcvttpd2dq{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6285  [(set_attr "type" "ssecvt")
6286   (set_attr "prefix" "evex")
6287   (set_attr "mode" "TI")])
6288
6289(define_insn "*sse2_cvttpd2dq_mask_1"
6290  [(set (match_operand:V4SI 0 "register_operand" "=v")
6291	(vec_concat:V4SI
6292	  (vec_merge:V2SI
6293	    (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6294	    (const_vector:V2SI [(const_int 0) (const_int 0)])
6295	    (match_operand:QI 2 "register_operand" "Yk"))
6296	  (const_vector:V2SI [(const_int 0) (const_int 0)])))]
6297  "TARGET_AVX512VL"
6298  "vcvttpd2dq{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6299  [(set_attr "type" "ssecvt")
6300   (set_attr "prefix" "evex")
6301   (set_attr "mode" "TI")])
6302
6303(define_insn "sse2_cvtsd2ss<round_name>"
6304  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6305	(vec_merge:V4SF
6306	  (vec_duplicate:V4SF
6307	    (float_truncate:V2SF
6308	      (match_operand:V2DF 2 "nonimmediate_operand" "x,m,<round_constraint>")))
6309	  (match_operand:V4SF 1 "register_operand" "0,0,v")
6310	  (const_int 1)))]
6311  "TARGET_SSE2"
6312  "@
6313   cvtsd2ss\t{%2, %0|%0, %2}
6314   cvtsd2ss\t{%2, %0|%0, %q2}
6315   vcvtsd2ss\t{<round_op3>%2, %1, %0|%0, %1, %q2<round_op3>}"
6316  [(set_attr "isa" "noavx,noavx,avx")
6317   (set_attr "type" "ssecvt")
6318   (set_attr "athlon_decode" "vector,double,*")
6319   (set_attr "amdfam10_decode" "vector,double,*")
6320   (set_attr "bdver1_decode" "direct,direct,*")
6321   (set_attr "btver2_decode" "double,double,double")
6322   (set_attr "prefix" "orig,orig,<round_prefix>")
6323   (set_attr "mode" "SF")])
6324
6325(define_insn "*sse2_vd_cvtsd2ss"
6326  [(set (match_operand:V4SF 0 "register_operand" "=x,x,v")
6327	(vec_merge:V4SF
6328	  (vec_duplicate:V4SF
6329	    (float_truncate:SF (match_operand:DF 2 "nonimmediate_operand" "x,m,vm")))
6330	  (match_operand:V4SF 1 "register_operand" "0,0,v")
6331	  (const_int 1)))]
6332  "TARGET_SSE2"
6333  "@
6334   cvtsd2ss\t{%2, %0|%0, %2}
6335   cvtsd2ss\t{%2, %0|%0, %2}
6336   vcvtsd2ss\t{%2, %1, %0|%0, %1, %2}"
6337  [(set_attr "isa" "noavx,noavx,avx")
6338   (set_attr "type" "ssecvt")
6339   (set_attr "athlon_decode" "vector,double,*")
6340   (set_attr "amdfam10_decode" "vector,double,*")
6341   (set_attr "bdver1_decode" "direct,direct,*")
6342   (set_attr "btver2_decode" "double,double,double")
6343   (set_attr "prefix" "orig,orig,vex")
6344   (set_attr "mode" "SF")])
6345
6346(define_insn "sse2_cvtss2sd<round_saeonly_name>"
6347  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6348	(vec_merge:V2DF
6349	  (float_extend:V2DF
6350	    (vec_select:V2SF
6351	      (match_operand:V4SF 2 "<round_saeonly_nimm_scalar_predicate>" "x,m,<round_saeonly_constraint>")
6352	      (parallel [(const_int 0) (const_int 1)])))
6353	  (match_operand:V2DF 1 "register_operand" "0,0,v")
6354	  (const_int 1)))]
6355  "TARGET_SSE2"
6356  "@
6357   cvtss2sd\t{%2, %0|%0, %2}
6358   cvtss2sd\t{%2, %0|%0, %k2}
6359   vcvtss2sd\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %k2<round_saeonly_op3>}"
6360  [(set_attr "isa" "noavx,noavx,avx")
6361   (set_attr "type" "ssecvt")
6362   (set_attr "amdfam10_decode" "vector,double,*")
6363   (set_attr "athlon_decode" "direct,direct,*")
6364   (set_attr "bdver1_decode" "direct,direct,*")
6365   (set_attr "btver2_decode" "double,double,double")
6366   (set_attr "prefix" "orig,orig,<round_saeonly_prefix>")
6367   (set_attr "mode" "DF")])
6368
6369(define_insn "*sse2_vd_cvtss2sd"
6370  [(set (match_operand:V2DF 0 "register_operand" "=x,x,v")
6371	(vec_merge:V2DF
6372	  (vec_duplicate:V2DF
6373	    (float_extend:DF (match_operand:SF 2 "nonimmediate_operand" "x,m,vm")))
6374	  (match_operand:V2DF 1 "register_operand" "0,0,v")
6375	  (const_int 1)))]
6376  "TARGET_SSE2"
6377  "@
6378   cvtss2sd\t{%2, %0|%0, %2}
6379   cvtss2sd\t{%2, %0|%0, %2}
6380   vcvtss2sd\t{%2, %1, %0|%0, %1, %2}"
6381  [(set_attr "isa" "noavx,noavx,avx")
6382   (set_attr "type" "ssecvt")
6383   (set_attr "amdfam10_decode" "vector,double,*")
6384   (set_attr "athlon_decode" "direct,direct,*")
6385   (set_attr "bdver1_decode" "direct,direct,*")
6386   (set_attr "btver2_decode" "double,double,double")
6387   (set_attr "prefix" "orig,orig,vex")
6388   (set_attr "mode" "DF")])
6389
6390(define_insn "<mask_codefor>avx512f_cvtpd2ps512<mask_name><round_name>"
6391  [(set (match_operand:V8SF 0 "register_operand" "=v")
6392	(float_truncate:V8SF
6393	  (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
6394  "TARGET_AVX512F"
6395  "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
6396  [(set_attr "type" "ssecvt")
6397   (set_attr "prefix" "evex")
6398   (set_attr "mode" "V8SF")])
6399
6400(define_insn "avx_cvtpd2ps256<mask_name>"
6401  [(set (match_operand:V4SF 0 "register_operand" "=v")
6402	(float_truncate:V4SF
6403	  (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
6404  "TARGET_AVX && <mask_avx512vl_condition>"
6405  "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
6406  [(set_attr "type" "ssecvt")
6407   (set_attr "prefix" "maybe_evex")
6408   (set_attr "btver2_decode" "vector")
6409   (set_attr "mode" "V4SF")])
6410
6411(define_expand "sse2_cvtpd2ps"
6412  [(set (match_operand:V4SF 0 "register_operand")
6413	(vec_concat:V4SF
6414	  (float_truncate:V2SF
6415	    (match_operand:V2DF 1 "vector_operand"))
6416	  (match_dup 2)))]
6417  "TARGET_SSE2"
6418  "operands[2] = CONST0_RTX (V2SFmode);")
6419
6420(define_expand "sse2_cvtpd2ps_mask"
6421  [(set (match_operand:V4SF 0 "register_operand")
6422	(vec_concat:V4SF
6423	  (vec_merge:V2SF
6424	    (float_truncate:V2SF
6425	      (match_operand:V2DF 1 "vector_operand"))
6426	    (vec_select:V2SF
6427	      (match_operand:V4SF 2 "nonimm_or_0_operand")
6428	      (parallel [(const_int 0) (const_int 1)]))
6429	    (match_operand:QI 3 "register_operand"))
6430	  (match_dup 4)))]
6431  "TARGET_SSE2"
6432  "operands[4] = CONST0_RTX (V2SFmode);")
6433
6434(define_insn "*sse2_cvtpd2ps"
6435  [(set (match_operand:V4SF 0 "register_operand" "=v")
6436	(vec_concat:V4SF
6437	  (float_truncate:V2SF
6438	    (match_operand:V2DF 1 "vector_operand" "vBm"))
6439	  (match_operand:V2SF 2 "const0_operand" "C")))]
6440  "TARGET_SSE2"
6441{
6442  if (TARGET_AVX)
6443    return "vcvtpd2ps{x}\t{%1, %0|%0, %1}";
6444  else
6445    return "cvtpd2ps\t{%1, %0|%0, %1}";
6446}
6447  [(set_attr "type" "ssecvt")
6448   (set_attr "amdfam10_decode" "double")
6449   (set_attr "athlon_decode" "vector")
6450   (set_attr "bdver1_decode" "double")
6451   (set_attr "prefix_data16" "1")
6452   (set_attr "prefix" "maybe_vex")
6453   (set_attr "mode" "V4SF")])
6454
6455(define_insn "*sse2_cvtpd2ps_mask"
6456  [(set (match_operand:V4SF 0 "register_operand" "=v")
6457	(vec_concat:V4SF
6458	  (vec_merge:V2SF
6459	    (float_truncate:V2SF
6460	      (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6461	    (vec_select:V2SF
6462	      (match_operand:V4SF 2 "nonimm_or_0_operand" "0C")
6463	      (parallel [(const_int 0) (const_int 1)]))
6464	    (match_operand:QI 3 "register_operand" "Yk"))
6465	  (match_operand:V2SF 4 "const0_operand" "C")))]
6466  "TARGET_AVX512VL"
6467  "vcvtpd2ps{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
6468  [(set_attr "type" "ssecvt")
6469   (set_attr "prefix" "evex")
6470   (set_attr "mode" "V4SF")])
6471
6472(define_insn "*sse2_cvtpd2ps_mask_1"
6473  [(set (match_operand:V4SF 0 "register_operand" "=v")
6474	(vec_concat:V4SF
6475	  (vec_merge:V2SF
6476	    (float_truncate:V2SF
6477	      (match_operand:V2DF 1 "nonimmediate_operand" "vm"))
6478	    (match_operand:V2SF 3 "const0_operand" "C")
6479	    (match_operand:QI 2 "register_operand" "Yk"))
6480	  (match_operand:V2SF 4 "const0_operand" "C")))]
6481  "TARGET_AVX512VL"
6482  "vcvtpd2ps{x}\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
6483  [(set_attr "type" "ssecvt")
6484   (set_attr "prefix" "evex")
6485   (set_attr "mode" "V4SF")])
6486
6487;; For <sse2_avx_avx512f>_cvtps2pd<avxsizesuffix> insn pattern
6488(define_mode_attr sf2dfmode
6489  [(V8DF "V8SF") (V4DF "V4SF")])
6490
6491(define_insn "<sse2_avx_avx512f>_cvtps2pd<avxsizesuffix><mask_name><round_saeonly_name>"
6492  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
6493	(float_extend:VF2_512_256
6494	  (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
6495  "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
6496  "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
6497  [(set_attr "type" "ssecvt")
6498   (set_attr "prefix" "maybe_vex")
6499   (set_attr "mode" "<MODE>")])
6500
6501(define_insn "*avx_cvtps2pd256_2"
6502  [(set (match_operand:V4DF 0 "register_operand" "=v")
6503	(float_extend:V4DF
6504	  (vec_select:V4SF
6505	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
6506	    (parallel [(const_int 0) (const_int 1)
6507		       (const_int 2) (const_int 3)]))))]
6508  "TARGET_AVX"
6509  "vcvtps2pd\t{%x1, %0|%0, %x1}"
6510  [(set_attr "type" "ssecvt")
6511   (set_attr "prefix" "vex")
6512   (set_attr "mode" "V4DF")])
6513
6514(define_insn "vec_unpacks_lo_v16sf"
6515  [(set (match_operand:V8DF 0 "register_operand" "=v")
6516	(float_extend:V8DF
6517	  (vec_select:V8SF
6518	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
6519	    (parallel [(const_int 0) (const_int 1)
6520		       (const_int 2) (const_int 3)
6521		       (const_int 4) (const_int 5)
6522		       (const_int 6) (const_int 7)]))))]
6523  "TARGET_AVX512F"
6524  "vcvtps2pd\t{%t1, %0|%0, %t1}"
6525  [(set_attr "type" "ssecvt")
6526   (set_attr "prefix" "evex")
6527   (set_attr "mode" "V8DF")])
6528
6529(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6530  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6531	(unspec:<avx512fmaskmode>
6532	 [(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
6533	 UNSPEC_CVTINT2MASK))]
6534  "TARGET_AVX512BW"
6535  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6536  [(set_attr "prefix" "evex")
6537   (set_attr "mode" "<sseinsnmode>")])
6538
6539(define_insn "<avx512>_cvt<ssemodesuffix>2mask<mode>"
6540  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
6541	(unspec:<avx512fmaskmode>
6542	 [(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
6543	 UNSPEC_CVTINT2MASK))]
6544  "TARGET_AVX512DQ"
6545  "vpmov<ssemodesuffix>2m\t{%1, %0|%0, %1}"
6546  [(set_attr "prefix" "evex")
6547   (set_attr "mode" "<sseinsnmode>")])
6548
6549(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6550  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
6551	(vec_merge:VI12_AVX512VL
6552	  (match_dup 2)
6553	  (match_dup 3)
6554	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6555  "TARGET_AVX512BW"
6556  {
6557    operands[2] = CONSTM1_RTX (<MODE>mode);
6558    operands[3] = CONST0_RTX (<MODE>mode);
6559  })
6560
6561(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6562  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
6563	(vec_merge:VI12_AVX512VL
6564	  (match_operand:VI12_AVX512VL 2 "vector_all_ones_operand")
6565	  (match_operand:VI12_AVX512VL 3 "const0_operand")
6566	  (match_operand:<avx512fmaskmode> 1 "register_operand" "k")))]
6567  "TARGET_AVX512BW"
6568  "vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}"
6569  [(set_attr "prefix" "evex")
6570   (set_attr "mode" "<sseinsnmode>")])
6571
6572(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
6573  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
6574	(vec_merge:VI48_AVX512VL
6575	  (match_dup 2)
6576	  (match_dup 3)
6577	  (match_operand:<avx512fmaskmode> 1 "register_operand")))]
6578  "TARGET_AVX512F"
6579  "{
6580    operands[2] = CONSTM1_RTX (<MODE>mode);
6581    operands[3] = CONST0_RTX (<MODE>mode);
6582  }")
6583
6584(define_insn "*<avx512>_cvtmask2<ssemodesuffix><mode>"
6585  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v,v")
6586	(vec_merge:VI48_AVX512VL
6587	  (match_operand:VI48_AVX512VL 2 "vector_all_ones_operand")
6588	  (match_operand:VI48_AVX512VL 3 "const0_operand")
6589	  (match_operand:<avx512fmaskmode> 1 "register_operand" "k,Yk")))]
6590  "TARGET_AVX512F"
6591  "@
6592   vpmovm2<ssemodesuffix>\t{%1, %0|%0, %1}
6593   vpternlog<ssemodesuffix>\t{$0x81, %0, %0, %0%{%1%}%{z%}|%0%{%1%}%{z%}, %0, %0, 0x81}"
6594  [(set_attr "isa" "avx512dq,*")
6595   (set_attr "length_immediate" "0,1")
6596   (set_attr "prefix" "evex")
6597   (set_attr "mode" "<sseinsnmode>")])
6598
6599(define_insn "sse2_cvtps2pd<mask_name>"
6600  [(set (match_operand:V2DF 0 "register_operand" "=v")
6601	(float_extend:V2DF
6602	  (vec_select:V2SF
6603	    (match_operand:V4SF 1 "vector_operand" "vm")
6604	    (parallel [(const_int 0) (const_int 1)]))))]
6605  "TARGET_SSE2 && <mask_avx512vl_condition>"
6606  "%vcvtps2pd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
6607  [(set_attr "type" "ssecvt")
6608   (set_attr "amdfam10_decode" "direct")
6609   (set_attr "athlon_decode" "double")
6610   (set_attr "bdver1_decode" "double")
6611   (set_attr "prefix_data16" "0")
6612   (set_attr "prefix" "maybe_vex")
6613   (set_attr "mode" "V2DF")])
6614
6615(define_expand "vec_unpacks_hi_v4sf"
6616  [(set (match_dup 2)
6617   (vec_select:V4SF
6618     (vec_concat:V8SF
6619       (match_dup 2)
6620       (match_operand:V4SF 1 "vector_operand"))
6621     (parallel [(const_int 6) (const_int 7)
6622		(const_int 2) (const_int 3)])))
6623  (set (match_operand:V2DF 0 "register_operand")
6624   (float_extend:V2DF
6625     (vec_select:V2SF
6626       (match_dup 2)
6627       (parallel [(const_int 0) (const_int 1)]))))]
6628  "TARGET_SSE2"
6629  "operands[2] = gen_reg_rtx (V4SFmode);")
6630
6631(define_expand "vec_unpacks_hi_v8sf"
6632  [(set (match_dup 2)
6633	(vec_select:V4SF
6634	  (match_operand:V8SF 1 "register_operand")
6635	  (parallel [(const_int 4) (const_int 5)
6636		     (const_int 6) (const_int 7)])))
6637   (set (match_operand:V4DF 0 "register_operand")
6638	(float_extend:V4DF
6639	  (match_dup 2)))]
6640  "TARGET_AVX"
6641  "operands[2] = gen_reg_rtx (V4SFmode);")
6642
6643(define_expand "vec_unpacks_hi_v16sf"
6644  [(set (match_dup 2)
6645	(vec_select:V8SF
6646	  (match_operand:V16SF 1 "register_operand")
6647	  (parallel [(const_int 8) (const_int 9)
6648		     (const_int 10) (const_int 11)
6649		     (const_int 12) (const_int 13)
6650		     (const_int 14) (const_int 15)])))
6651   (set (match_operand:V8DF 0 "register_operand")
6652	(float_extend:V8DF
6653	  (match_dup 2)))]
6654"TARGET_AVX512F"
6655"operands[2] = gen_reg_rtx (V8SFmode);")
6656
6657(define_expand "vec_unpacks_lo_v4sf"
6658  [(set (match_operand:V2DF 0 "register_operand")
6659	(float_extend:V2DF
6660	  (vec_select:V2SF
6661	    (match_operand:V4SF 1 "vector_operand")
6662	    (parallel [(const_int 0) (const_int 1)]))))]
6663  "TARGET_SSE2")
6664
6665(define_expand "vec_unpacks_lo_v8sf"
6666  [(set (match_operand:V4DF 0 "register_operand")
6667	(float_extend:V4DF
6668	  (vec_select:V4SF
6669	    (match_operand:V8SF 1 "nonimmediate_operand")
6670	    (parallel [(const_int 0) (const_int 1)
6671		       (const_int 2) (const_int 3)]))))]
6672  "TARGET_AVX")
6673
6674(define_mode_attr sseunpackfltmode
6675  [(V8HI "V4SF") (V4SI "V2DF") (V16HI "V8SF")
6676  (V8SI "V4DF") (V32HI "V16SF") (V16SI "V8DF")])
6677
6678(define_expand "vec_unpacks_float_hi_<mode>"
6679  [(match_operand:<sseunpackfltmode> 0 "register_operand")
6680   (match_operand:VI2_AVX512F 1 "register_operand")]
6681  "TARGET_SSE2"
6682{
6683  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6684
6685  emit_insn (gen_vec_unpacks_hi_<mode> (tmp, operands[1]));
6686  emit_insn (gen_rtx_SET (operands[0],
6687			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6688  DONE;
6689})
6690
6691(define_expand "vec_unpacks_float_lo_<mode>"
6692  [(match_operand:<sseunpackfltmode> 0 "register_operand")
6693   (match_operand:VI2_AVX512F 1 "register_operand")]
6694  "TARGET_SSE2"
6695{
6696  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6697
6698  emit_insn (gen_vec_unpacks_lo_<mode> (tmp, operands[1]));
6699  emit_insn (gen_rtx_SET (operands[0],
6700			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6701  DONE;
6702})
6703
6704(define_expand "vec_unpacku_float_hi_<mode>"
6705  [(match_operand:<sseunpackfltmode> 0 "register_operand")
6706   (match_operand:VI2_AVX512F 1 "register_operand")]
6707  "TARGET_SSE2"
6708{
6709  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6710
6711  emit_insn (gen_vec_unpacku_hi_<mode> (tmp, operands[1]));
6712  emit_insn (gen_rtx_SET (operands[0],
6713			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6714  DONE;
6715})
6716
6717(define_expand "vec_unpacku_float_lo_<mode>"
6718  [(match_operand:<sseunpackfltmode> 0 "register_operand")
6719   (match_operand:VI2_AVX512F 1 "register_operand")]
6720  "TARGET_SSE2"
6721{
6722  rtx tmp = gen_reg_rtx (<sseunpackmode>mode);
6723
6724  emit_insn (gen_vec_unpacku_lo_<mode> (tmp, operands[1]));
6725  emit_insn (gen_rtx_SET (operands[0],
6726			  gen_rtx_FLOAT (<sseunpackfltmode>mode, tmp)));
6727  DONE;
6728})
6729
6730(define_expand "vec_unpacks_float_hi_v4si"
6731  [(set (match_dup 2)
6732	(vec_select:V4SI
6733	  (match_operand:V4SI 1 "vector_operand")
6734	  (parallel [(const_int 2) (const_int 3)
6735		     (const_int 2) (const_int 3)])))
6736   (set (match_operand:V2DF 0 "register_operand")
6737	(float:V2DF
6738	  (vec_select:V2SI
6739	  (match_dup 2)
6740	    (parallel [(const_int 0) (const_int 1)]))))]
6741  "TARGET_SSE2"
6742  "operands[2] = gen_reg_rtx (V4SImode);")
6743
6744(define_expand "vec_unpacks_float_lo_v4si"
6745  [(set (match_operand:V2DF 0 "register_operand")
6746	(float:V2DF
6747	  (vec_select:V2SI
6748	    (match_operand:V4SI 1 "vector_operand")
6749	    (parallel [(const_int 0) (const_int 1)]))))]
6750  "TARGET_SSE2")
6751
6752(define_expand "vec_unpacks_float_hi_v8si"
6753  [(set (match_dup 2)
6754	(vec_select:V4SI
6755	  (match_operand:V8SI 1 "register_operand")
6756	  (parallel [(const_int 4) (const_int 5)
6757		     (const_int 6) (const_int 7)])))
6758   (set (match_operand:V4DF 0 "register_operand")
6759	(float:V4DF
6760	  (match_dup 2)))]
6761  "TARGET_AVX"
6762  "operands[2] = gen_reg_rtx (V4SImode);")
6763
6764(define_expand "vec_unpacks_float_lo_v8si"
6765  [(set (match_operand:V4DF 0 "register_operand")
6766	(float:V4DF
6767	  (vec_select:V4SI
6768	    (match_operand:V8SI 1 "nonimmediate_operand")
6769	    (parallel [(const_int 0) (const_int 1)
6770		       (const_int 2) (const_int 3)]))))]
6771  "TARGET_AVX")
6772
6773(define_expand "vec_unpacks_float_hi_v16si"
6774  [(set (match_dup 2)
6775	(vec_select:V8SI
6776	  (match_operand:V16SI 1 "nonimmediate_operand")
6777	  (parallel [(const_int 8) (const_int 9)
6778		     (const_int 10) (const_int 11)
6779		     (const_int 12) (const_int 13)
6780		     (const_int 14) (const_int 15)])))
6781   (set (match_operand:V8DF 0 "register_operand")
6782	(float:V8DF
6783	  (match_dup 2)))]
6784  "TARGET_AVX512F"
6785  "operands[2] = gen_reg_rtx (V8SImode);")
6786
6787(define_expand "vec_unpacks_float_lo_v16si"
6788  [(set (match_operand:V8DF 0 "register_operand")
6789	(float:V8DF
6790	  (vec_select:V8SI
6791	    (match_operand:V16SI 1 "nonimmediate_operand")
6792	    (parallel [(const_int 0) (const_int 1)
6793		       (const_int 2) (const_int 3)
6794		       (const_int 4) (const_int 5)
6795		       (const_int 6) (const_int 7)]))))]
6796  "TARGET_AVX512F")
6797
6798(define_expand "vec_unpacku_float_hi_v4si"
6799  [(set (match_dup 5)
6800	(vec_select:V4SI
6801	  (match_operand:V4SI 1 "vector_operand")
6802	  (parallel [(const_int 2) (const_int 3)
6803		     (const_int 2) (const_int 3)])))
6804   (set (match_dup 6)
6805	(float:V2DF
6806	  (vec_select:V2SI
6807	  (match_dup 5)
6808	    (parallel [(const_int 0) (const_int 1)]))))
6809   (set (match_dup 7)
6810	(lt:V2DF (match_dup 6) (match_dup 3)))
6811   (set (match_dup 8)
6812	(and:V2DF (match_dup 7) (match_dup 4)))
6813   (set (match_operand:V2DF 0 "register_operand")
6814	(plus:V2DF (match_dup 6) (match_dup 8)))]
6815  "TARGET_SSE2"
6816{
6817  REAL_VALUE_TYPE TWO32r;
6818  rtx x;
6819  int i;
6820
6821  real_ldexp (&TWO32r, &dconst1, 32);
6822  x = const_double_from_real_value (TWO32r, DFmode);
6823
6824  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6825  operands[4] = force_reg (V2DFmode,
6826			   ix86_build_const_vector (V2DFmode, 1, x));
6827
6828  operands[5] = gen_reg_rtx (V4SImode);
6829
6830  for (i = 6; i < 9; i++)
6831    operands[i] = gen_reg_rtx (V2DFmode);
6832})
6833
6834(define_expand "vec_unpacku_float_lo_v4si"
6835  [(set (match_dup 5)
6836	(float:V2DF
6837	  (vec_select:V2SI
6838	    (match_operand:V4SI 1 "vector_operand")
6839	    (parallel [(const_int 0) (const_int 1)]))))
6840   (set (match_dup 6)
6841	(lt:V2DF (match_dup 5) (match_dup 3)))
6842   (set (match_dup 7)
6843	(and:V2DF (match_dup 6) (match_dup 4)))
6844   (set (match_operand:V2DF 0 "register_operand")
6845	(plus:V2DF (match_dup 5) (match_dup 7)))]
6846  "TARGET_SSE2"
6847{
6848  REAL_VALUE_TYPE TWO32r;
6849  rtx x;
6850  int i;
6851
6852  real_ldexp (&TWO32r, &dconst1, 32);
6853  x = const_double_from_real_value (TWO32r, DFmode);
6854
6855  operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
6856  operands[4] = force_reg (V2DFmode,
6857			   ix86_build_const_vector (V2DFmode, 1, x));
6858
6859  for (i = 5; i < 8; i++)
6860    operands[i] = gen_reg_rtx (V2DFmode);
6861})
6862
6863(define_expand "vec_unpacku_float_hi_v8si"
6864  [(match_operand:V4DF 0 "register_operand")
6865   (match_operand:V8SI 1 "register_operand")]
6866  "TARGET_AVX"
6867{
6868  REAL_VALUE_TYPE TWO32r;
6869  rtx x, tmp[6];
6870  int i;
6871
6872  real_ldexp (&TWO32r, &dconst1, 32);
6873  x = const_double_from_real_value (TWO32r, DFmode);
6874
6875  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6876  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6877  tmp[5] = gen_reg_rtx (V4SImode);
6878
6879  for (i = 2; i < 5; i++)
6880    tmp[i] = gen_reg_rtx (V4DFmode);
6881  emit_insn (gen_vec_extract_hi_v8si (tmp[5], operands[1]));
6882  emit_insn (gen_floatv4siv4df2 (tmp[2], tmp[5]));
6883  emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6884  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6885  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6886  DONE;
6887})
6888
6889(define_expand "vec_unpacku_float_hi_v16si"
6890  [(match_operand:V8DF 0 "register_operand")
6891   (match_operand:V16SI 1 "register_operand")]
6892  "TARGET_AVX512F"
6893{
6894  REAL_VALUE_TYPE TWO32r;
6895  rtx k, x, tmp[4];
6896
6897  real_ldexp (&TWO32r, &dconst1, 32);
6898  x = const_double_from_real_value (TWO32r, DFmode);
6899
6900  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6901  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6902  tmp[2] = gen_reg_rtx (V8DFmode);
6903  tmp[3] = gen_reg_rtx (V8SImode);
6904  k = gen_reg_rtx (QImode);
6905
6906  emit_insn (gen_vec_extract_hi_v16si (tmp[3], operands[1]));
6907  emit_insn (gen_floatv8siv8df2 (tmp[2], tmp[3]));
6908  ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
6909  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6910  emit_move_insn (operands[0], tmp[2]);
6911  DONE;
6912})
6913
6914(define_expand "vec_unpacku_float_lo_v8si"
6915  [(match_operand:V4DF 0 "register_operand")
6916   (match_operand:V8SI 1 "nonimmediate_operand")]
6917  "TARGET_AVX"
6918{
6919  REAL_VALUE_TYPE TWO32r;
6920  rtx x, tmp[5];
6921  int i;
6922
6923  real_ldexp (&TWO32r, &dconst1, 32);
6924  x = const_double_from_real_value (TWO32r, DFmode);
6925
6926  tmp[0] = force_reg (V4DFmode, CONST0_RTX (V4DFmode));
6927  tmp[1] = force_reg (V4DFmode, ix86_build_const_vector (V4DFmode, 1, x));
6928
6929  for (i = 2; i < 5; i++)
6930    tmp[i] = gen_reg_rtx (V4DFmode);
6931  emit_insn (gen_avx_cvtdq2pd256_2 (tmp[2], operands[1]));
6932  emit_insn (gen_rtx_SET (tmp[3], gen_rtx_LT (V4DFmode, tmp[2], tmp[0])));
6933  emit_insn (gen_andv4df3 (tmp[4], tmp[3], tmp[1]));
6934  emit_insn (gen_addv4df3 (operands[0], tmp[2], tmp[4]));
6935  DONE;
6936})
6937
6938(define_expand "vec_unpacku_float_lo_v16si"
6939  [(match_operand:V8DF 0 "register_operand")
6940   (match_operand:V16SI 1 "nonimmediate_operand")]
6941  "TARGET_AVX512F"
6942{
6943  REAL_VALUE_TYPE TWO32r;
6944  rtx k, x, tmp[3];
6945
6946  real_ldexp (&TWO32r, &dconst1, 32);
6947  x = const_double_from_real_value (TWO32r, DFmode);
6948
6949  tmp[0] = force_reg (V8DFmode, CONST0_RTX (V8DFmode));
6950  tmp[1] = force_reg (V8DFmode, ix86_build_const_vector (V8DFmode, 1, x));
6951  tmp[2] = gen_reg_rtx (V8DFmode);
6952  k = gen_reg_rtx (QImode);
6953
6954  emit_insn (gen_avx512f_cvtdq2pd512_2 (tmp[2], operands[1]));
6955  ix86_expand_mask_vec_cmp (k, LT, tmp[2], tmp[0]);
6956  emit_insn (gen_addv8df3_mask (tmp[2], tmp[2], tmp[1], tmp[2], k));
6957  emit_move_insn (operands[0], tmp[2]);
6958  DONE;
6959})
6960
6961(define_expand "vec_pack_trunc_<mode>"
6962  [(set (match_dup 3)
6963	(float_truncate:<sf2dfmode>
6964	  (match_operand:VF2_512_256 1 "nonimmediate_operand")))
6965   (set (match_dup 4)
6966	(float_truncate:<sf2dfmode>
6967	  (match_operand:VF2_512_256 2 "nonimmediate_operand")))
6968   (set (match_operand:<ssePSmode> 0 "register_operand")
6969	(vec_concat:<ssePSmode>
6970	  (match_dup 3)
6971	  (match_dup 4)))]
6972  "TARGET_AVX"
6973{
6974  operands[3] = gen_reg_rtx (<sf2dfmode>mode);
6975  operands[4] = gen_reg_rtx (<sf2dfmode>mode);
6976})
6977
6978(define_expand "vec_pack_trunc_v2df"
6979  [(match_operand:V4SF 0 "register_operand")
6980   (match_operand:V2DF 1 "vector_operand")
6981   (match_operand:V2DF 2 "vector_operand")]
6982  "TARGET_SSE2"
6983{
6984  rtx tmp0, tmp1;
6985
6986  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
6987    {
6988      tmp0 = gen_reg_rtx (V4DFmode);
6989      tmp1 = force_reg (V2DFmode, operands[1]);
6990
6991      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
6992      emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0));
6993    }
6994  else
6995    {
6996      tmp0 = gen_reg_rtx (V4SFmode);
6997      tmp1 = gen_reg_rtx (V4SFmode);
6998
6999      emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1]));
7000      emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2]));
7001      emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1));
7002    }
7003  DONE;
7004})
7005
7006(define_expand "vec_pack_sfix_trunc_v8df"
7007  [(match_operand:V16SI 0 "register_operand")
7008   (match_operand:V8DF 1 "nonimmediate_operand")
7009   (match_operand:V8DF 2 "nonimmediate_operand")]
7010  "TARGET_AVX512F"
7011{
7012  rtx r1, r2;
7013
7014  r1 = gen_reg_rtx (V8SImode);
7015  r2 = gen_reg_rtx (V8SImode);
7016
7017  emit_insn (gen_fix_truncv8dfv8si2 (r1, operands[1]));
7018  emit_insn (gen_fix_truncv8dfv8si2 (r2, operands[2]));
7019  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7020  DONE;
7021})
7022
7023(define_expand "vec_pack_sfix_trunc_v4df"
7024  [(match_operand:V8SI 0 "register_operand")
7025   (match_operand:V4DF 1 "nonimmediate_operand")
7026   (match_operand:V4DF 2 "nonimmediate_operand")]
7027  "TARGET_AVX"
7028{
7029  rtx r1, r2;
7030
7031  r1 = gen_reg_rtx (V4SImode);
7032  r2 = gen_reg_rtx (V4SImode);
7033
7034  emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1]));
7035  emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2]));
7036  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7037  DONE;
7038})
7039
7040(define_expand "vec_pack_sfix_trunc_v2df"
7041  [(match_operand:V4SI 0 "register_operand")
7042   (match_operand:V2DF 1 "vector_operand")
7043   (match_operand:V2DF 2 "vector_operand")]
7044  "TARGET_SSE2"
7045{
7046  rtx tmp0, tmp1, tmp2;
7047
7048  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7049    {
7050      tmp0 = gen_reg_rtx (V4DFmode);
7051      tmp1 = force_reg (V2DFmode, operands[1]);
7052
7053      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7054      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0));
7055    }
7056  else
7057    {
7058      tmp0 = gen_reg_rtx (V4SImode);
7059      tmp1 = gen_reg_rtx (V4SImode);
7060      tmp2 = gen_reg_rtx (V2DImode);
7061
7062      emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1]));
7063      emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2]));
7064      emit_insn (gen_vec_interleave_lowv2di (tmp2,
7065					     gen_lowpart (V2DImode, tmp0),
7066					     gen_lowpart (V2DImode, tmp1)));
7067      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7068    }
7069  DONE;
7070})
7071
7072(define_mode_attr ssepackfltmode
7073  [(V8DF "V16SI") (V4DF "V8SI") (V2DF "V4SI")])
7074
7075(define_expand "vec_pack_ufix_trunc_<mode>"
7076  [(match_operand:<ssepackfltmode> 0 "register_operand")
7077   (match_operand:VF2 1 "register_operand")
7078   (match_operand:VF2 2 "register_operand")]
7079  "TARGET_SSE2"
7080{
7081  if (<MODE>mode == V8DFmode)
7082    {
7083      rtx r1, r2;
7084
7085      r1 = gen_reg_rtx (V8SImode);
7086      r2 = gen_reg_rtx (V8SImode);
7087
7088      emit_insn (gen_fixuns_truncv8dfv8si2 (r1, operands[1]));
7089      emit_insn (gen_fixuns_truncv8dfv8si2 (r2, operands[2]));
7090      emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7091    }
7092  else
7093    {
7094      rtx tmp[7];
7095      tmp[0] = ix86_expand_adjust_ufix_to_sfix_si (operands[1], &tmp[2]);
7096      tmp[1] = ix86_expand_adjust_ufix_to_sfix_si (operands[2], &tmp[3]);
7097      tmp[4] = gen_reg_rtx (<ssepackfltmode>mode);
7098      emit_insn (gen_vec_pack_sfix_trunc_<mode> (tmp[4], tmp[0], tmp[1]));
7099      if (<ssepackfltmode>mode == V4SImode || TARGET_AVX2)
7100	{
7101	  tmp[5] = gen_reg_rtx (<ssepackfltmode>mode);
7102	  ix86_expand_vec_extract_even_odd (tmp[5], tmp[2], tmp[3], 0);
7103	}
7104      else
7105	{
7106	  tmp[5] = gen_reg_rtx (V8SFmode);
7107	  ix86_expand_vec_extract_even_odd (tmp[5],
7108					    gen_lowpart (V8SFmode, tmp[2]),
7109					    gen_lowpart (V8SFmode, tmp[3]), 0);
7110	  tmp[5] = gen_lowpart (V8SImode, tmp[5]);
7111	}
7112      tmp[6] = expand_simple_binop (<ssepackfltmode>mode, XOR, tmp[4], tmp[5],
7113				    operands[0], 0, OPTAB_DIRECT);
7114      if (tmp[6] != operands[0])
7115	emit_move_insn (operands[0], tmp[6]);
7116    }
7117
7118  DONE;
7119})
7120
7121(define_expand "avx512f_vec_pack_sfix_v8df"
7122  [(match_operand:V16SI 0 "register_operand")
7123   (match_operand:V8DF 1 "nonimmediate_operand")
7124   (match_operand:V8DF 2 "nonimmediate_operand")]
7125  "TARGET_AVX512F"
7126{
7127  rtx r1, r2;
7128
7129  r1 = gen_reg_rtx (V8SImode);
7130  r2 = gen_reg_rtx (V8SImode);
7131
7132  emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
7133  emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
7134  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
7135  DONE;
7136})
7137
7138(define_expand "vec_pack_sfix_v4df"
7139  [(match_operand:V8SI 0 "register_operand")
7140   (match_operand:V4DF 1 "nonimmediate_operand")
7141   (match_operand:V4DF 2 "nonimmediate_operand")]
7142  "TARGET_AVX"
7143{
7144  rtx r1, r2;
7145
7146  r1 = gen_reg_rtx (V4SImode);
7147  r2 = gen_reg_rtx (V4SImode);
7148
7149  emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1]));
7150  emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2]));
7151  emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2));
7152  DONE;
7153})
7154
7155(define_expand "vec_pack_sfix_v2df"
7156  [(match_operand:V4SI 0 "register_operand")
7157   (match_operand:V2DF 1 "vector_operand")
7158   (match_operand:V2DF 2 "vector_operand")]
7159  "TARGET_SSE2"
7160{
7161  rtx tmp0, tmp1, tmp2;
7162
7163  if (TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
7164    {
7165      tmp0 = gen_reg_rtx (V4DFmode);
7166      tmp1 = force_reg (V2DFmode, operands[1]);
7167
7168      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
7169      emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0));
7170    }
7171  else
7172    {
7173      tmp0 = gen_reg_rtx (V4SImode);
7174      tmp1 = gen_reg_rtx (V4SImode);
7175      tmp2 = gen_reg_rtx (V2DImode);
7176
7177      emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1]));
7178      emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2]));
7179      emit_insn (gen_vec_interleave_lowv2di (tmp2,
7180					     gen_lowpart (V2DImode, tmp0),
7181					     gen_lowpart (V2DImode, tmp1)));
7182      emit_move_insn (operands[0], gen_lowpart (V4SImode, tmp2));
7183    }
7184  DONE;
7185})
7186
7187;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7188;;
7189;; Parallel single-precision floating point element swizzling
7190;;
7191;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7192
7193(define_expand "sse_movhlps_exp"
7194  [(set (match_operand:V4SF 0 "nonimmediate_operand")
7195	(vec_select:V4SF
7196	  (vec_concat:V8SF
7197	    (match_operand:V4SF 1 "nonimmediate_operand")
7198	    (match_operand:V4SF 2 "nonimmediate_operand"))
7199	  (parallel [(const_int 6)
7200		     (const_int 7)
7201		     (const_int 2)
7202		     (const_int 3)])))]
7203  "TARGET_SSE"
7204{
7205  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7206
7207  emit_insn (gen_sse_movhlps (dst, operands[1], operands[2]));
7208
7209  /* Fix up the destination if needed.  */
7210  if (dst != operands[0])
7211    emit_move_insn (operands[0], dst);
7212
7213  DONE;
7214})
7215
7216(define_insn "sse_movhlps"
7217  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
7218	(vec_select:V4SF
7219	  (vec_concat:V8SF
7220	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7221	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,o,o,v"))
7222	  (parallel [(const_int 6)
7223		     (const_int 7)
7224		     (const_int 2)
7225		     (const_int 3)])))]
7226  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7227  "@
7228   movhlps\t{%2, %0|%0, %2}
7229   vmovhlps\t{%2, %1, %0|%0, %1, %2}
7230   movlps\t{%H2, %0|%0, %H2}
7231   vmovlps\t{%H2, %1, %0|%0, %1, %H2}
7232   %vmovhps\t{%2, %0|%q0, %2}"
7233  [(set_attr "isa" "noavx,avx,noavx,avx,*")
7234   (set_attr "type" "ssemov")
7235   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7236   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7237
7238(define_expand "sse_movlhps_exp"
7239  [(set (match_operand:V4SF 0 "nonimmediate_operand")
7240	(vec_select:V4SF
7241	  (vec_concat:V8SF
7242	    (match_operand:V4SF 1 "nonimmediate_operand")
7243	    (match_operand:V4SF 2 "nonimmediate_operand"))
7244	  (parallel [(const_int 0)
7245		     (const_int 1)
7246		     (const_int 4)
7247		     (const_int 5)])))]
7248  "TARGET_SSE"
7249{
7250  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7251
7252  emit_insn (gen_sse_movlhps (dst, operands[1], operands[2]));
7253
7254  /* Fix up the destination if needed.  */
7255  if (dst != operands[0])
7256    emit_move_insn (operands[0], dst);
7257
7258  DONE;
7259})
7260
7261(define_insn "sse_movlhps"
7262  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
7263	(vec_select:V4SF
7264	  (vec_concat:V8SF
7265	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7266	    (match_operand:V4SF 2 "nonimmediate_operand" " x,v,m,v,v"))
7267	  (parallel [(const_int 0)
7268		     (const_int 1)
7269		     (const_int 4)
7270		     (const_int 5)])))]
7271  "TARGET_SSE && ix86_binary_operator_ok (UNKNOWN, V4SFmode, operands)"
7272  "@
7273   movlhps\t{%2, %0|%0, %2}
7274   vmovlhps\t{%2, %1, %0|%0, %1, %2}
7275   movhps\t{%2, %0|%0, %q2}
7276   vmovhps\t{%2, %1, %0|%0, %1, %q2}
7277   %vmovlps\t{%2, %H0|%H0, %2}"
7278  [(set_attr "isa" "noavx,avx,noavx,avx,*")
7279   (set_attr "type" "ssemov")
7280   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7281   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7282
7283(define_insn "<mask_codefor>avx512f_unpckhps512<mask_name>"
7284  [(set (match_operand:V16SF 0 "register_operand" "=v")
7285	(vec_select:V16SF
7286	  (vec_concat:V32SF
7287	    (match_operand:V16SF 1 "register_operand" "v")
7288	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7289	  (parallel [(const_int 2) (const_int 18)
7290		     (const_int 3) (const_int 19)
7291		     (const_int 6) (const_int 22)
7292		     (const_int 7) (const_int 23)
7293		     (const_int 10) (const_int 26)
7294		     (const_int 11) (const_int 27)
7295		     (const_int 14) (const_int 30)
7296		     (const_int 15) (const_int 31)])))]
7297  "TARGET_AVX512F"
7298  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7299  [(set_attr "type" "sselog")
7300   (set_attr "prefix" "evex")
7301   (set_attr "mode" "V16SF")])
7302
7303;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7304(define_insn "avx_unpckhps256<mask_name>"
7305  [(set (match_operand:V8SF 0 "register_operand" "=v")
7306	(vec_select:V8SF
7307	  (vec_concat:V16SF
7308	    (match_operand:V8SF 1 "register_operand" "v")
7309	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7310	  (parallel [(const_int 2) (const_int 10)
7311		     (const_int 3) (const_int 11)
7312		     (const_int 6) (const_int 14)
7313		     (const_int 7) (const_int 15)])))]
7314  "TARGET_AVX && <mask_avx512vl_condition>"
7315  "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7316  [(set_attr "type" "sselog")
7317   (set_attr "prefix" "vex")
7318   (set_attr "mode" "V8SF")])
7319
7320(define_expand "vec_interleave_highv8sf"
7321  [(set (match_dup 3)
7322	(vec_select:V8SF
7323	  (vec_concat:V16SF
7324	    (match_operand:V8SF 1 "register_operand")
7325	    (match_operand:V8SF 2 "nonimmediate_operand"))
7326	  (parallel [(const_int 0) (const_int 8)
7327		     (const_int 1) (const_int 9)
7328		     (const_int 4) (const_int 12)
7329		     (const_int 5) (const_int 13)])))
7330   (set (match_dup 4)
7331	(vec_select:V8SF
7332	  (vec_concat:V16SF
7333	    (match_dup 1)
7334	    (match_dup 2))
7335	  (parallel [(const_int 2) (const_int 10)
7336		     (const_int 3) (const_int 11)
7337		     (const_int 6) (const_int 14)
7338		     (const_int 7) (const_int 15)])))
7339   (set (match_operand:V8SF 0 "register_operand")
7340	(vec_select:V8SF
7341	  (vec_concat:V16SF
7342	    (match_dup 3)
7343	    (match_dup 4))
7344	  (parallel [(const_int 4) (const_int 5)
7345		     (const_int 6) (const_int 7)
7346		     (const_int 12) (const_int 13)
7347		     (const_int 14) (const_int 15)])))]
7348 "TARGET_AVX"
7349{
7350  operands[3] = gen_reg_rtx (V8SFmode);
7351  operands[4] = gen_reg_rtx (V8SFmode);
7352})
7353
7354(define_insn "vec_interleave_highv4sf<mask_name>"
7355  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7356	(vec_select:V4SF
7357	  (vec_concat:V8SF
7358	    (match_operand:V4SF 1 "register_operand" "0,v")
7359	    (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7360	  (parallel [(const_int 2) (const_int 6)
7361		     (const_int 3) (const_int 7)])))]
7362  "TARGET_SSE && <mask_avx512vl_condition>"
7363  "@
7364   unpckhps\t{%2, %0|%0, %2}
7365   vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7366  [(set_attr "isa" "noavx,avx")
7367   (set_attr "type" "sselog")
7368   (set_attr "prefix" "orig,vex")
7369   (set_attr "mode" "V4SF")])
7370
7371(define_insn "<mask_codefor>avx512f_unpcklps512<mask_name>"
7372  [(set (match_operand:V16SF 0 "register_operand" "=v")
7373	(vec_select:V16SF
7374	  (vec_concat:V32SF
7375	    (match_operand:V16SF 1 "register_operand" "v")
7376	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
7377	  (parallel [(const_int 0) (const_int 16)
7378		     (const_int 1) (const_int 17)
7379		     (const_int 4) (const_int 20)
7380		     (const_int 5) (const_int 21)
7381		     (const_int 8) (const_int 24)
7382		     (const_int 9) (const_int 25)
7383		     (const_int 12) (const_int 28)
7384		     (const_int 13) (const_int 29)])))]
7385  "TARGET_AVX512F"
7386  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7387  [(set_attr "type" "sselog")
7388   (set_attr "prefix" "evex")
7389   (set_attr "mode" "V16SF")])
7390
7391;; Recall that the 256-bit unpck insns only shuffle within their lanes.
7392(define_insn "avx_unpcklps256<mask_name>"
7393  [(set (match_operand:V8SF 0 "register_operand" "=v")
7394	(vec_select:V8SF
7395	  (vec_concat:V16SF
7396	    (match_operand:V8SF 1 "register_operand" "v")
7397	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7398	  (parallel [(const_int 0) (const_int 8)
7399		     (const_int 1) (const_int 9)
7400		     (const_int 4) (const_int 12)
7401		     (const_int 5) (const_int 13)])))]
7402  "TARGET_AVX && <mask_avx512vl_condition>"
7403  "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
7404  [(set_attr "type" "sselog")
7405   (set_attr "prefix" "vex")
7406   (set_attr "mode" "V8SF")])
7407
7408(define_insn "unpcklps128_mask"
7409  [(set (match_operand:V4SF 0 "register_operand" "=v")
7410	(vec_merge:V4SF
7411	  (vec_select:V4SF
7412	    (vec_concat:V8SF
7413	      (match_operand:V4SF 1 "register_operand" "v")
7414	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7415	    (parallel [(const_int 0) (const_int 4)
7416		      (const_int 1) (const_int 5)]))
7417	  (match_operand:V4SF 3 "nonimm_or_0_operand" "0C")
7418	  (match_operand:QI 4 "register_operand" "Yk")))]
7419  "TARGET_AVX512VL"
7420  "vunpcklps\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
7421  [(set_attr "type" "sselog")
7422   (set_attr "prefix" "evex")
7423   (set_attr "mode" "V4SF")])
7424
7425(define_expand "vec_interleave_lowv8sf"
7426  [(set (match_dup 3)
7427	(vec_select:V8SF
7428	  (vec_concat:V16SF
7429	    (match_operand:V8SF 1 "register_operand")
7430	    (match_operand:V8SF 2 "nonimmediate_operand"))
7431	  (parallel [(const_int 0) (const_int 8)
7432		     (const_int 1) (const_int 9)
7433		     (const_int 4) (const_int 12)
7434		     (const_int 5) (const_int 13)])))
7435   (set (match_dup 4)
7436	(vec_select:V8SF
7437	  (vec_concat:V16SF
7438	    (match_dup 1)
7439	    (match_dup 2))
7440	  (parallel [(const_int 2) (const_int 10)
7441		     (const_int 3) (const_int 11)
7442		     (const_int 6) (const_int 14)
7443		     (const_int 7) (const_int 15)])))
7444   (set (match_operand:V8SF 0 "register_operand")
7445	(vec_select:V8SF
7446	  (vec_concat:V16SF
7447	    (match_dup 3)
7448	    (match_dup 4))
7449	  (parallel [(const_int 0) (const_int 1)
7450		     (const_int 2) (const_int 3)
7451		     (const_int 8) (const_int 9)
7452		     (const_int 10) (const_int 11)])))]
7453 "TARGET_AVX"
7454{
7455  operands[3] = gen_reg_rtx (V8SFmode);
7456  operands[4] = gen_reg_rtx (V8SFmode);
7457})
7458
7459(define_insn "vec_interleave_lowv4sf"
7460  [(set (match_operand:V4SF 0 "register_operand" "=x,v")
7461	(vec_select:V4SF
7462	  (vec_concat:V8SF
7463	    (match_operand:V4SF 1 "register_operand" "0,v")
7464	    (match_operand:V4SF 2 "vector_operand" "xBm,vm"))
7465	  (parallel [(const_int 0) (const_int 4)
7466		     (const_int 1) (const_int 5)])))]
7467  "TARGET_SSE"
7468  "@
7469   unpcklps\t{%2, %0|%0, %2}
7470   vunpcklps\t{%2, %1, %0|%0, %1, %2}"
7471  [(set_attr "isa" "noavx,avx")
7472   (set_attr "type" "sselog")
7473   (set_attr "prefix" "orig,maybe_evex")
7474   (set_attr "mode" "V4SF")])
7475
7476;; These are modeled with the same vec_concat as the others so that we
7477;; capture users of shufps that can use the new instructions
7478(define_insn "avx_movshdup256<mask_name>"
7479  [(set (match_operand:V8SF 0 "register_operand" "=v")
7480	(vec_select:V8SF
7481	  (vec_concat:V16SF
7482	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7483	    (match_dup 1))
7484	  (parallel [(const_int 1) (const_int 1)
7485		     (const_int 3) (const_int 3)
7486		     (const_int 5) (const_int 5)
7487		     (const_int 7) (const_int 7)])))]
7488  "TARGET_AVX && <mask_avx512vl_condition>"
7489  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7490  [(set_attr "type" "sse")
7491   (set_attr "prefix" "vex")
7492   (set_attr "mode" "V8SF")])
7493
7494(define_insn "sse3_movshdup<mask_name>"
7495  [(set (match_operand:V4SF 0 "register_operand" "=v")
7496	(vec_select:V4SF
7497	  (vec_concat:V8SF
7498	    (match_operand:V4SF 1 "vector_operand" "vBm")
7499	    (match_dup 1))
7500	  (parallel [(const_int 1)
7501		     (const_int 1)
7502		     (const_int 7)
7503		     (const_int 7)])))]
7504  "TARGET_SSE3 && <mask_avx512vl_condition>"
7505  "%vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7506  [(set_attr "type" "sse")
7507   (set_attr "prefix_rep" "1")
7508   (set_attr "prefix" "maybe_vex")
7509   (set_attr "mode" "V4SF")])
7510
7511(define_insn "<mask_codefor>avx512f_movshdup512<mask_name>"
7512  [(set (match_operand:V16SF 0 "register_operand" "=v")
7513	(vec_select:V16SF
7514	  (vec_concat:V32SF
7515	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7516	    (match_dup 1))
7517	  (parallel [(const_int 1) (const_int 1)
7518		     (const_int 3) (const_int 3)
7519		     (const_int 5) (const_int 5)
7520		     (const_int 7) (const_int 7)
7521		     (const_int 9) (const_int 9)
7522		     (const_int 11) (const_int 11)
7523		     (const_int 13) (const_int 13)
7524		     (const_int 15) (const_int 15)])))]
7525  "TARGET_AVX512F"
7526  "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7527  [(set_attr "type" "sse")
7528   (set_attr "prefix" "evex")
7529   (set_attr "mode" "V16SF")])
7530
7531(define_insn "avx_movsldup256<mask_name>"
7532  [(set (match_operand:V8SF 0 "register_operand" "=v")
7533	(vec_select:V8SF
7534	  (vec_concat:V16SF
7535	    (match_operand:V8SF 1 "nonimmediate_operand" "vm")
7536	    (match_dup 1))
7537	  (parallel [(const_int 0) (const_int 0)
7538		     (const_int 2) (const_int 2)
7539		     (const_int 4) (const_int 4)
7540		     (const_int 6) (const_int 6)])))]
7541  "TARGET_AVX && <mask_avx512vl_condition>"
7542  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7543  [(set_attr "type" "sse")
7544   (set_attr "prefix" "vex")
7545   (set_attr "mode" "V8SF")])
7546
7547(define_insn "sse3_movsldup<mask_name>"
7548  [(set (match_operand:V4SF 0 "register_operand" "=v")
7549	(vec_select:V4SF
7550	  (vec_concat:V8SF
7551	    (match_operand:V4SF 1 "vector_operand" "vBm")
7552	    (match_dup 1))
7553	  (parallel [(const_int 0)
7554		     (const_int 0)
7555		     (const_int 6)
7556		     (const_int 6)])))]
7557  "TARGET_SSE3 && <mask_avx512vl_condition>"
7558  "%vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7559  [(set_attr "type" "sse")
7560   (set_attr "prefix_rep" "1")
7561   (set_attr "prefix" "maybe_vex")
7562   (set_attr "mode" "V4SF")])
7563
7564(define_insn "<mask_codefor>avx512f_movsldup512<mask_name>"
7565  [(set (match_operand:V16SF 0 "register_operand" "=v")
7566	(vec_select:V16SF
7567	  (vec_concat:V32SF
7568	    (match_operand:V16SF 1 "nonimmediate_operand" "vm")
7569	    (match_dup 1))
7570	  (parallel [(const_int 0) (const_int 0)
7571		     (const_int 2) (const_int 2)
7572		     (const_int 4) (const_int 4)
7573		     (const_int 6) (const_int 6)
7574		     (const_int 8) (const_int 8)
7575		     (const_int 10) (const_int 10)
7576		     (const_int 12) (const_int 12)
7577		     (const_int 14) (const_int 14)])))]
7578  "TARGET_AVX512F"
7579  "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
7580  [(set_attr "type" "sse")
7581   (set_attr "prefix" "evex")
7582   (set_attr "mode" "V16SF")])
7583
7584(define_expand "avx_shufps256<mask_expand4_name>"
7585  [(match_operand:V8SF 0 "register_operand")
7586   (match_operand:V8SF 1 "register_operand")
7587   (match_operand:V8SF 2 "nonimmediate_operand")
7588   (match_operand:SI 3 "const_int_operand")]
7589  "TARGET_AVX"
7590{
7591  int mask = INTVAL (operands[3]);
7592  emit_insn (gen_avx_shufps256_1<mask_expand4_name> (operands[0],
7593						     operands[1],
7594						     operands[2],
7595						     GEN_INT ((mask >> 0) & 3),
7596						     GEN_INT ((mask >> 2) & 3),
7597						     GEN_INT (((mask >> 4) & 3) + 8),
7598						     GEN_INT (((mask >> 6) & 3) + 8),
7599						     GEN_INT (((mask >> 0) & 3) + 4),
7600						     GEN_INT (((mask >> 2) & 3) + 4),
7601						     GEN_INT (((mask >> 4) & 3) + 12),
7602						     GEN_INT (((mask >> 6) & 3) + 12)
7603						     <mask_expand4_args>));
7604  DONE;
7605})
7606
7607;; One bit in mask selects 2 elements.
7608(define_insn "avx_shufps256_1<mask_name>"
7609  [(set (match_operand:V8SF 0 "register_operand" "=v")
7610	(vec_select:V8SF
7611	  (vec_concat:V16SF
7612	    (match_operand:V8SF 1 "register_operand" "v")
7613	    (match_operand:V8SF 2 "nonimmediate_operand" "vm"))
7614	  (parallel [(match_operand 3  "const_0_to_3_operand"  )
7615		     (match_operand 4  "const_0_to_3_operand"  )
7616		     (match_operand 5  "const_8_to_11_operand" )
7617		     (match_operand 6  "const_8_to_11_operand" )
7618		     (match_operand 7  "const_4_to_7_operand"  )
7619		     (match_operand 8  "const_4_to_7_operand"  )
7620		     (match_operand 9  "const_12_to_15_operand")
7621		     (match_operand 10 "const_12_to_15_operand")])))]
7622  "TARGET_AVX
7623   && <mask_avx512vl_condition>
7624   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
7625       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
7626       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
7627       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4))"
7628{
7629  int mask;
7630  mask = INTVAL (operands[3]);
7631  mask |= INTVAL (operands[4]) << 2;
7632  mask |= (INTVAL (operands[5]) - 8) << 4;
7633  mask |= (INTVAL (operands[6]) - 8) << 6;
7634  operands[3] = GEN_INT (mask);
7635
7636  return "vshufps\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
7637}
7638  [(set_attr "type" "sseshuf")
7639   (set_attr "length_immediate" "1")
7640   (set_attr "prefix" "<mask_prefix>")
7641   (set_attr "mode" "V8SF")])
7642
7643(define_expand "sse_shufps<mask_expand4_name>"
7644  [(match_operand:V4SF 0 "register_operand")
7645   (match_operand:V4SF 1 "register_operand")
7646   (match_operand:V4SF 2 "vector_operand")
7647   (match_operand:SI 3 "const_int_operand")]
7648  "TARGET_SSE"
7649{
7650  int mask = INTVAL (operands[3]);
7651  emit_insn (gen_sse_shufps_v4sf<mask_expand4_name> (operands[0],
7652						     operands[1],
7653						     operands[2],
7654						     GEN_INT ((mask >> 0) & 3),
7655						     GEN_INT ((mask >> 2) & 3),
7656						     GEN_INT (((mask >> 4) & 3) + 4),
7657						     GEN_INT (((mask >> 6) & 3) + 4)
7658						     <mask_expand4_args>));
7659  DONE;
7660})
7661
7662(define_insn "sse_shufps_v4sf_mask"
7663  [(set (match_operand:V4SF 0 "register_operand" "=v")
7664    (vec_merge:V4SF
7665	  (vec_select:V4SF
7666	    (vec_concat:V8SF
7667	      (match_operand:V4SF 1 "register_operand" "v")
7668	      (match_operand:V4SF 2 "nonimmediate_operand" "vm"))
7669	    (parallel [(match_operand 3 "const_0_to_3_operand")
7670	               (match_operand 4 "const_0_to_3_operand")
7671	               (match_operand 5 "const_4_to_7_operand")
7672	               (match_operand 6 "const_4_to_7_operand")]))
7673      (match_operand:V4SF 7 "nonimm_or_0_operand" "0C")
7674      (match_operand:QI 8 "register_operand" "Yk")))]
7675  "TARGET_AVX512VL"
7676{
7677  int mask = 0;
7678  mask |= INTVAL (operands[3]) << 0;
7679  mask |= INTVAL (operands[4]) << 2;
7680  mask |= (INTVAL (operands[5]) - 4) << 4;
7681  mask |= (INTVAL (operands[6]) - 4) << 6;
7682  operands[3] = GEN_INT (mask);
7683
7684  return "vshufps\t{%3, %2, %1, %0%{%8%}%N7|%0%{%8%}%N7, %1, %2, %3}";
7685}
7686  [(set_attr "type" "sseshuf")
7687   (set_attr "length_immediate" "1")
7688   (set_attr "prefix" "evex")
7689   (set_attr "mode" "V4SF")])
7690
7691(define_insn "sse_shufps_<mode>"
7692  [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
7693	(vec_select:VI4F_128
7694	  (vec_concat:<ssedoublevecmode>
7695	    (match_operand:VI4F_128 1 "register_operand" "0,v")
7696	    (match_operand:VI4F_128 2 "vector_operand" "xBm,vm"))
7697	  (parallel [(match_operand 3 "const_0_to_3_operand")
7698		     (match_operand 4 "const_0_to_3_operand")
7699		     (match_operand 5 "const_4_to_7_operand")
7700		     (match_operand 6 "const_4_to_7_operand")])))]
7701  "TARGET_SSE"
7702{
7703  int mask = 0;
7704  mask |= INTVAL (operands[3]) << 0;
7705  mask |= INTVAL (operands[4]) << 2;
7706  mask |= (INTVAL (operands[5]) - 4) << 4;
7707  mask |= (INTVAL (operands[6]) - 4) << 6;
7708  operands[3] = GEN_INT (mask);
7709
7710  switch (which_alternative)
7711    {
7712    case 0:
7713      return "shufps\t{%3, %2, %0|%0, %2, %3}";
7714    case 1:
7715      return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
7716    default:
7717      gcc_unreachable ();
7718    }
7719}
7720  [(set_attr "isa" "noavx,avx")
7721   (set_attr "type" "sseshuf")
7722   (set_attr "length_immediate" "1")
7723   (set_attr "prefix" "orig,maybe_evex")
7724   (set_attr "mode" "V4SF")])
7725
7726(define_insn "sse_storehps"
7727  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,v,v")
7728	(vec_select:V2SF
7729	  (match_operand:V4SF 1 "nonimmediate_operand" "v,v,o")
7730	  (parallel [(const_int 2) (const_int 3)])))]
7731  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7732  "@
7733   %vmovhps\t{%1, %0|%q0, %1}
7734   %vmovhlps\t{%1, %d0|%d0, %1}
7735   %vmovlps\t{%H1, %d0|%d0, %H1}"
7736  [(set_attr "type" "ssemov")
7737   (set_attr "prefix" "maybe_vex")
7738   (set_attr "mode" "V2SF,V4SF,V2SF")])
7739
7740(define_expand "sse_loadhps_exp"
7741  [(set (match_operand:V4SF 0 "nonimmediate_operand")
7742	(vec_concat:V4SF
7743	  (vec_select:V2SF
7744	    (match_operand:V4SF 1 "nonimmediate_operand")
7745	    (parallel [(const_int 0) (const_int 1)]))
7746	  (match_operand:V2SF 2 "nonimmediate_operand")))]
7747  "TARGET_SSE"
7748{
7749  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7750
7751  emit_insn (gen_sse_loadhps (dst, operands[1], operands[2]));
7752
7753  /* Fix up the destination if needed.  */
7754  if (dst != operands[0])
7755    emit_move_insn (operands[0], dst);
7756
7757  DONE;
7758})
7759
7760(define_insn "sse_loadhps"
7761  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,o")
7762	(vec_concat:V4SF
7763	  (vec_select:V2SF
7764	    (match_operand:V4SF 1 "nonimmediate_operand" " 0,v,0,v,0")
7765	    (parallel [(const_int 0) (const_int 1)]))
7766	  (match_operand:V2SF 2 "nonimmediate_operand"   " m,m,x,v,v")))]
7767  "TARGET_SSE"
7768  "@
7769   movhps\t{%2, %0|%0, %q2}
7770   vmovhps\t{%2, %1, %0|%0, %1, %q2}
7771   movlhps\t{%2, %0|%0, %2}
7772   vmovlhps\t{%2, %1, %0|%0, %1, %2}
7773   %vmovlps\t{%2, %H0|%H0, %2}"
7774  [(set_attr "isa" "noavx,avx,noavx,avx,*")
7775   (set_attr "type" "ssemov")
7776   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7777   (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
7778
7779(define_insn "sse_storelps"
7780  [(set (match_operand:V2SF 0 "nonimmediate_operand"   "=m,v,v")
7781	(vec_select:V2SF
7782	  (match_operand:V4SF 1 "nonimmediate_operand" " v,v,m")
7783	  (parallel [(const_int 0) (const_int 1)])))]
7784  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
7785  "@
7786   %vmovlps\t{%1, %0|%q0, %1}
7787   %vmovaps\t{%1, %0|%0, %1}
7788   %vmovlps\t{%1, %d0|%d0, %q1}"
7789  [(set_attr "type" "ssemov")
7790   (set_attr "prefix" "maybe_vex")
7791   (set_attr "mode" "V2SF,V4SF,V2SF")])
7792
7793(define_expand "sse_loadlps_exp"
7794  [(set (match_operand:V4SF 0 "nonimmediate_operand")
7795	(vec_concat:V4SF
7796	  (match_operand:V2SF 2 "nonimmediate_operand")
7797	  (vec_select:V2SF
7798	    (match_operand:V4SF 1 "nonimmediate_operand")
7799	    (parallel [(const_int 2) (const_int 3)]))))]
7800  "TARGET_SSE"
7801{
7802  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V4SFmode, operands);
7803
7804  emit_insn (gen_sse_loadlps (dst, operands[1], operands[2]));
7805
7806  /* Fix up the destination if needed.  */
7807  if (dst != operands[0])
7808    emit_move_insn (operands[0], dst);
7809
7810  DONE;
7811})
7812
7813(define_insn "sse_loadlps"
7814  [(set (match_operand:V4SF 0 "nonimmediate_operand"     "=x,v,x,v,m")
7815	(vec_concat:V4SF
7816	  (match_operand:V2SF 2 "nonimmediate_operand"   " 0,v,m,m,v")
7817	  (vec_select:V2SF
7818	    (match_operand:V4SF 1 "nonimmediate_operand" " x,v,0,v,0")
7819	    (parallel [(const_int 2) (const_int 3)]))))]
7820  "TARGET_SSE"
7821  "@
7822   shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
7823   vshufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
7824   movlps\t{%2, %0|%0, %q2}
7825   vmovlps\t{%2, %1, %0|%0, %1, %q2}
7826   %vmovlps\t{%2, %0|%q0, %2}"
7827  [(set_attr "isa" "noavx,avx,noavx,avx,*")
7828   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
7829   (set (attr "length_immediate")
7830     (if_then_else (eq_attr "alternative" "0,1")
7831		   (const_string "1")
7832		   (const_string "*")))
7833   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
7834   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
7835
7836(define_insn "sse_movss"
7837  [(set (match_operand:V4SF 0 "register_operand"   "=x,v")
7838	(vec_merge:V4SF
7839	  (match_operand:V4SF 2 "register_operand" " x,v")
7840	  (match_operand:V4SF 1 "register_operand" " 0,v")
7841	  (const_int 1)))]
7842  "TARGET_SSE"
7843  "@
7844   movss\t{%2, %0|%0, %2}
7845   vmovss\t{%2, %1, %0|%0, %1, %2}"
7846  [(set_attr "isa" "noavx,avx")
7847   (set_attr "type" "ssemov")
7848   (set_attr "prefix" "orig,maybe_evex")
7849   (set_attr "mode" "SF")])
7850
7851(define_insn "avx2_vec_dup<mode>"
7852  [(set (match_operand:VF1_128_256 0 "register_operand" "=v")
7853	(vec_duplicate:VF1_128_256
7854	  (vec_select:SF
7855	    (match_operand:V4SF 1 "register_operand" "v")
7856	    (parallel [(const_int 0)]))))]
7857  "TARGET_AVX2"
7858  "vbroadcastss\t{%1, %0|%0, %1}"
7859  [(set_attr "type" "sselog1")
7860    (set_attr "prefix" "maybe_evex")
7861    (set_attr "mode" "<MODE>")])
7862
7863(define_insn "avx2_vec_dupv8sf_1"
7864  [(set (match_operand:V8SF 0 "register_operand" "=v")
7865	(vec_duplicate:V8SF
7866	  (vec_select:SF
7867	    (match_operand:V8SF 1 "register_operand" "v")
7868	    (parallel [(const_int 0)]))))]
7869  "TARGET_AVX2"
7870  "vbroadcastss\t{%x1, %0|%0, %x1}"
7871  [(set_attr "type" "sselog1")
7872    (set_attr "prefix" "maybe_evex")
7873    (set_attr "mode" "V8SF")])
7874
7875(define_insn "avx512f_vec_dup<mode>_1"
7876  [(set (match_operand:VF_512 0 "register_operand" "=v")
7877	(vec_duplicate:VF_512
7878	  (vec_select:<ssescalarmode>
7879	    (match_operand:VF_512 1 "register_operand" "v")
7880	    (parallel [(const_int 0)]))))]
7881  "TARGET_AVX512F"
7882  "vbroadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}"
7883  [(set_attr "type" "sselog1")
7884    (set_attr "prefix" "evex")
7885    (set_attr "mode" "<MODE>")])
7886
7887;; Although insertps takes register source, we prefer
7888;; unpcklps with register source since it is shorter.
7889(define_insn "*vec_concatv2sf_sse4_1"
7890  [(set (match_operand:V2SF 0 "register_operand"
7891	  "=Yr,*x, v,Yr,*x,v,v,*y ,*y")
7892	(vec_concat:V2SF
7893	  (match_operand:SF 1 "nonimmediate_operand"
7894	  "  0, 0,Yv, 0,0, v,m, 0 , m")
7895	  (match_operand:SF 2 "nonimm_or_0_operand"
7896	  " Yr,*x,Yv, m,m, m,C,*ym, C")))]
7897  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
7898  "@
7899   unpcklps\t{%2, %0|%0, %2}
7900   unpcklps\t{%2, %0|%0, %2}
7901   vunpcklps\t{%2, %1, %0|%0, %1, %2}
7902   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7903   insertps\t{$0x10, %2, %0|%0, %2, 0x10}
7904   vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
7905   %vmovss\t{%1, %0|%0, %1}
7906   punpckldq\t{%2, %0|%0, %2}
7907   movd\t{%1, %0|%0, %1}"
7908  [(set (attr "isa")
7909     (cond [(eq_attr "alternative" "0,1,3,4")
7910	      (const_string "noavx")
7911	    (eq_attr "alternative" "2,5")
7912	      (const_string "avx")
7913	   ]
7914	   (const_string "*")))
7915   (set (attr "type")
7916     (cond [(eq_attr "alternative" "6")
7917	      (const_string "ssemov")
7918	    (eq_attr "alternative" "7")
7919	      (const_string "mmxcvt")
7920	    (eq_attr "alternative" "8")
7921	      (const_string "mmxmov")
7922	   ]
7923	   (const_string "sselog")))
7924   (set (attr "mmx_isa")
7925     (if_then_else (eq_attr "alternative" "7,8")
7926		   (const_string "native")
7927		   (const_string "*")))
7928   (set (attr "prefix_data16")
7929     (if_then_else (eq_attr "alternative" "3,4")
7930		   (const_string "1")
7931		   (const_string "*")))
7932   (set (attr "prefix_extra")
7933     (if_then_else (eq_attr "alternative" "3,4,5")
7934		   (const_string "1")
7935		   (const_string "*")))
7936   (set (attr "length_immediate")
7937     (if_then_else (eq_attr "alternative" "3,4,5")
7938		   (const_string "1")
7939		   (const_string "*")))
7940   (set (attr "prefix")
7941     (cond [(eq_attr "alternative" "2,5")
7942	      (const_string "maybe_evex")
7943	    (eq_attr "alternative" "6")
7944	      (const_string "maybe_vex")
7945	   ]
7946	   (const_string "orig")))
7947   (set_attr "mode" "V4SF,V4SF,V4SF,V4SF,V4SF,V4SF,SF,DI,DI")])
7948
7949;; ??? In theory we can match memory for the MMX alternative, but allowing
7950;; vector_operand for operand 2 and *not* allowing memory for the SSE
7951;; alternatives pretty much forces the MMX alternative to be chosen.
7952(define_insn "*vec_concatv2sf_sse"
7953  [(set (match_operand:V2SF 0 "register_operand"     "=x,x,*y,*y")
7954	(vec_concat:V2SF
7955	  (match_operand:SF 1 "nonimmediate_operand" " 0,m, 0, m")
7956	  (match_operand:SF 2 "reg_or_0_operand"     " x,C,*y, C")))]
7957  "TARGET_SSE"
7958  "@
7959   unpcklps\t{%2, %0|%0, %2}
7960   movss\t{%1, %0|%0, %1}
7961   punpckldq\t{%2, %0|%0, %2}
7962   movd\t{%1, %0|%0, %1}"
7963  [(set_attr "mmx_isa" "*,*,native,native")
7964   (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
7965   (set_attr "mode" "V4SF,SF,DI,DI")])
7966
7967(define_insn "*vec_concatv4sf"
7968  [(set (match_operand:V4SF 0 "register_operand"       "=x,v,x,v")
7969	(vec_concat:V4SF
7970	  (match_operand:V2SF 1 "register_operand"     " 0,v,0,v")
7971	  (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
7972  "TARGET_SSE"
7973  "@
7974   movlhps\t{%2, %0|%0, %2}
7975   vmovlhps\t{%2, %1, %0|%0, %1, %2}
7976   movhps\t{%2, %0|%0, %q2}
7977   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
7978  [(set_attr "isa" "noavx,avx,noavx,avx")
7979   (set_attr "type" "ssemov")
7980   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
7981   (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
7982
7983(define_insn "*vec_concatv4sf_0"
7984  [(set (match_operand:V4SF 0 "register_operand"       "=v")
7985	(vec_concat:V4SF
7986	  (match_operand:V2SF 1 "nonimmediate_operand" "vm")
7987	  (match_operand:V2SF 2 "const0_operand"       " C")))]
7988  "TARGET_SSE2"
7989  "%vmovq\t{%1, %0|%0, %1}"
7990  [(set_attr "type" "ssemov")
7991   (set_attr "prefix" "maybe_vex")
7992   (set_attr "mode" "DF")])
7993
7994;; Avoid combining registers from different units in a single alternative,
7995;; see comment above inline_secondary_memory_needed function in i386.c
7996(define_insn "vec_set<mode>_0"
7997  [(set (match_operand:VI4F_128 0 "nonimmediate_operand"
7998	  "=Yr,*x,v,v,v,x,x,v,Yr ,*x ,x  ,m ,m   ,m")
7999	(vec_merge:VI4F_128
8000	  (vec_duplicate:VI4F_128
8001	    (match_operand:<ssescalarmode> 2 "general_operand"
8002	  " Yr,*x,v,m,r ,m,x,v,*rm,*rm,*rm,!x,!*re,!*fF"))
8003	  (match_operand:VI4F_128 1 "nonimm_or_0_operand"
8004	  " C , C,C,C,C ,C,0,v,0  ,0  ,x  ,0 ,0   ,0")
8005	  (const_int 1)))]
8006  "TARGET_SSE"
8007  "@
8008   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8009   insertps\t{$0xe, %2, %0|%0, %2, 0xe}
8010   vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
8011   %vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
8012   %vmovd\t{%2, %0|%0, %2}
8013   movss\t{%2, %0|%0, %2}
8014   movss\t{%2, %0|%0, %2}
8015   vmovss\t{%2, %1, %0|%0, %1, %2}
8016   pinsrd\t{$0, %2, %0|%0, %2, 0}
8017   pinsrd\t{$0, %2, %0|%0, %2, 0}
8018   vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
8019   #
8020   #
8021   #"
8022  [(set (attr "isa")
8023     (cond [(eq_attr "alternative" "0,1,8,9")
8024	      (const_string "sse4_noavx")
8025	    (eq_attr "alternative" "2,7,10")
8026	      (const_string "avx")
8027	    (eq_attr "alternative" "3,4")
8028	      (const_string "sse2")
8029	    (eq_attr "alternative" "5,6")
8030	      (const_string "noavx")
8031	   ]
8032	   (const_string "*")))
8033   (set (attr "type")
8034     (cond [(eq_attr "alternative" "0,1,2,8,9,10")
8035	      (const_string "sselog")
8036	    (eq_attr "alternative" "12")
8037	      (const_string "imov")
8038	    (eq_attr "alternative" "13")
8039	      (const_string "fmov")
8040	   ]
8041	   (const_string "ssemov")))
8042   (set (attr "prefix_extra")
8043     (if_then_else (eq_attr "alternative" "8,9,10")
8044		   (const_string "1")
8045		   (const_string "*")))
8046   (set (attr "length_immediate")
8047     (if_then_else (eq_attr "alternative" "8,9,10")
8048		   (const_string "1")
8049		   (const_string "*")))
8050   (set (attr "prefix")
8051     (cond [(eq_attr "alternative" "0,1,5,6,8,9")
8052	      (const_string "orig")
8053	    (eq_attr "alternative" "2")
8054	      (const_string "maybe_evex")
8055	    (eq_attr "alternative" "3,4")
8056	      (const_string "maybe_vex")
8057	    (eq_attr "alternative" "7,10")
8058	      (const_string "vex")
8059	   ]
8060	   (const_string "*")))
8061   (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
8062   (set (attr "preferred_for_speed")
8063     (cond [(eq_attr "alternative" "4")
8064	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8065	   ]
8066	   (symbol_ref "true")))])
8067
8068;; A subset is vec_setv4sf.
8069(define_insn "*vec_setv4sf_sse4_1"
8070  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8071	(vec_merge:V4SF
8072	  (vec_duplicate:V4SF
8073	    (match_operand:SF 2 "nonimmediate_operand" "Yrm,*xm,vm"))
8074	  (match_operand:V4SF 1 "register_operand" "0,0,v")
8075	  (match_operand:SI 3 "const_int_operand")))]
8076  "TARGET_SSE4_1
8077   && ((unsigned) exact_log2 (INTVAL (operands[3]))
8078       < GET_MODE_NUNITS (V4SFmode))"
8079{
8080  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
8081  switch (which_alternative)
8082    {
8083    case 0:
8084    case 1:
8085      return "insertps\t{%3, %2, %0|%0, %2, %3}";
8086    case 2:
8087      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8088    default:
8089      gcc_unreachable ();
8090    }
8091}
8092  [(set_attr "isa" "noavx,noavx,avx")
8093   (set_attr "type" "sselog")
8094   (set_attr "prefix_data16" "1,1,*")
8095   (set_attr "prefix_extra" "1")
8096   (set_attr "length_immediate" "1")
8097   (set_attr "prefix" "orig,orig,maybe_evex")
8098   (set_attr "mode" "V4SF")])
8099
8100;; All of vinsertps, vmovss, vmovd clear also the higher bits.
8101(define_insn "vec_set<mode>_0"
8102  [(set (match_operand:VI4F_256_512 0 "register_operand" "=v,v,v")
8103	(vec_merge:VI4F_256_512
8104	  (vec_duplicate:VI4F_256_512
8105	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "v,m,r"))
8106	  (match_operand:VI4F_256_512 1 "const0_operand" "C,C,C")
8107	  (const_int 1)))]
8108  "TARGET_AVX"
8109  "@
8110   vinsertps\t{$0xe, %2, %2, %x0|%x0, %2, %2, 0xe}
8111   vmov<ssescalarmodesuffix>\t{%x2, %x0|%x0, %2}
8112   vmovd\t{%2, %x0|%x0, %2}"
8113  [(set (attr "type")
8114     (if_then_else (eq_attr "alternative" "0")
8115		   (const_string "sselog")
8116		   (const_string "ssemov")))
8117   (set_attr "prefix" "maybe_evex")
8118   (set_attr "mode" "SF,<ssescalarmode>,SI")
8119   (set (attr "preferred_for_speed")
8120     (cond [(eq_attr "alternative" "2")
8121	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
8122	   ]
8123	   (symbol_ref "true")))])
8124
8125(define_insn "sse4_1_insertps"
8126  [(set (match_operand:V4SF 0 "register_operand" "=Yr,*x,v")
8127	(unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "Yrm,*xm,vm")
8128		      (match_operand:V4SF 1 "register_operand" "0,0,v")
8129		      (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
8130		     UNSPEC_INSERTPS))]
8131  "TARGET_SSE4_1"
8132{
8133  if (MEM_P (operands[2]))
8134    {
8135      unsigned count_s = INTVAL (operands[3]) >> 6;
8136      if (count_s)
8137	operands[3] = GEN_INT (INTVAL (operands[3]) & 0x3f);
8138      operands[2] = adjust_address_nv (operands[2], SFmode, count_s * 4);
8139    }
8140  switch (which_alternative)
8141    {
8142    case 0:
8143    case 1:
8144      return "insertps\t{%3, %2, %0|%0, %2, %3}";
8145    case 2:
8146      return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8147    default:
8148      gcc_unreachable ();
8149    }
8150}
8151  [(set_attr "isa" "noavx,noavx,avx")
8152   (set_attr "type" "sselog")
8153   (set_attr "prefix_data16" "1,1,*")
8154   (set_attr "prefix_extra" "1")
8155   (set_attr "length_immediate" "1")
8156   (set_attr "prefix" "orig,orig,maybe_evex")
8157   (set_attr "mode" "V4SF")])
8158
8159(define_split
8160  [(set (match_operand:VI4F_128 0 "memory_operand")
8161	(vec_merge:VI4F_128
8162	  (vec_duplicate:VI4F_128
8163	    (match_operand:<ssescalarmode> 1 "nonmemory_operand"))
8164	  (match_dup 0)
8165	  (const_int 1)))]
8166  "TARGET_SSE && reload_completed"
8167  [(set (match_dup 0) (match_dup 1))]
8168  "operands[0] = adjust_address (operands[0], <ssescalarmode>mode, 0);")
8169
8170;; Standard scalar operation patterns which preserve the rest of the
8171;; vector for combiner.
8172(define_insn "vec_setv2df_0"
8173  [(set (match_operand:V2DF 0 "register_operand"       "=x,v,x,v")
8174	(vec_merge:V2DF
8175	  (vec_duplicate:V2DF
8176	    (match_operand:DF 2 "nonimmediate_operand" " x,v,m,m"))
8177	  (match_operand:V2DF 1 "register_operand"     " 0,v,0,v")
8178	  (const_int 1)))]
8179  "TARGET_SSE2"
8180  "@
8181   movsd\t{%2, %0|%0, %2}
8182   vmovsd\t{%2, %1, %0|%0, %1, %2}
8183   movlpd\t{%2, %0|%0, %2}
8184   vmovlpd\t{%2, %1, %0|%0, %1, %2}"
8185  [(set_attr "isa" "noavx,avx,noavx,avx")
8186   (set_attr "type" "ssemov")
8187   (set_attr "mode" "DF")])
8188
8189(define_expand "vec_set<mode>"
8190  [(match_operand:V 0 "register_operand")
8191   (match_operand:<ssescalarmode> 1 "register_operand")
8192   (match_operand 2 "const_int_operand")]
8193  "TARGET_SSE"
8194{
8195  ix86_expand_vector_set (false, operands[0], operands[1],
8196			  INTVAL (operands[2]));
8197  DONE;
8198})
8199
8200(define_insn_and_split "*vec_extractv4sf_0"
8201  [(set (match_operand:SF 0 "nonimmediate_operand" "=v,m,f,r")
8202	(vec_select:SF
8203	  (match_operand:V4SF 1 "nonimmediate_operand" "vm,v,m,m")
8204	  (parallel [(const_int 0)])))]
8205  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8206  "#"
8207  "&& reload_completed"
8208  [(set (match_dup 0) (match_dup 1))]
8209  "operands[1] = gen_lowpart (SFmode, operands[1]);")
8210
8211(define_insn_and_split "*sse4_1_extractps"
8212  [(set (match_operand:SF 0 "nonimmediate_operand" "=rm,rm,rm,Yv,Yv")
8213	(vec_select:SF
8214	  (match_operand:V4SF 1 "register_operand" "Yr,*x,v,0,v")
8215	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n,n,n,n")])))]
8216  "TARGET_SSE4_1"
8217  "@
8218   extractps\t{%2, %1, %0|%0, %1, %2}
8219   extractps\t{%2, %1, %0|%0, %1, %2}
8220   vextractps\t{%2, %1, %0|%0, %1, %2}
8221   #
8222   #"
8223  "&& reload_completed && SSE_REG_P (operands[0])"
8224  [(const_int 0)]
8225{
8226  rtx dest = lowpart_subreg (V4SFmode, operands[0], SFmode);
8227  switch (INTVAL (operands[2]))
8228    {
8229    case 1:
8230    case 3:
8231      emit_insn (gen_sse_shufps_v4sf (dest, operands[1], operands[1],
8232				      operands[2], operands[2],
8233				      GEN_INT (INTVAL (operands[2]) + 4),
8234				      GEN_INT (INTVAL (operands[2]) + 4)));
8235      break;
8236    case 2:
8237      emit_insn (gen_vec_interleave_highv4sf (dest, operands[1], operands[1]));
8238      break;
8239    default:
8240      /* 0 should be handled by the *vec_extractv4sf_0 pattern above.  */
8241      gcc_unreachable ();
8242    }
8243  DONE;
8244}
8245  [(set_attr "isa" "noavx,noavx,avx,noavx,avx")
8246   (set_attr "type" "sselog,sselog,sselog,*,*")
8247   (set_attr "prefix_data16" "1,1,1,*,*")
8248   (set_attr "prefix_extra" "1,1,1,*,*")
8249   (set_attr "length_immediate" "1,1,1,*,*")
8250   (set_attr "prefix" "orig,orig,maybe_evex,*,*")
8251   (set_attr "mode" "V4SF,V4SF,V4SF,*,*")])
8252
8253(define_insn_and_split "*vec_extractv4sf_mem"
8254  [(set (match_operand:SF 0 "register_operand" "=v,*r,f")
8255	(vec_select:SF
8256	  (match_operand:V4SF 1 "memory_operand" "o,o,o")
8257	  (parallel [(match_operand 2 "const_0_to_3_operand" "n,n,n")])))]
8258  "TARGET_SSE"
8259  "#"
8260  "&& reload_completed"
8261  [(set (match_dup 0) (match_dup 1))]
8262{
8263  operands[1] = adjust_address (operands[1], SFmode, INTVAL (operands[2]) * 4);
8264})
8265
8266(define_mode_attr extract_type
8267  [(V16SF "avx512f") (V16SI "avx512f") (V8DF "avx512dq") (V8DI "avx512dq")])
8268
8269(define_mode_attr extract_suf
8270  [(V16SF "32x4") (V16SI "32x4") (V8DF "64x2") (V8DI "64x2")])
8271
8272(define_mode_iterator AVX512_VEC
8273  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
8274
8275(define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
8276  [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
8277   (match_operand:AVX512_VEC 1 "register_operand")
8278   (match_operand:SI 2 "const_0_to_3_operand")
8279   (match_operand:<ssequartermode> 3 "nonimmediate_operand")
8280   (match_operand:QI 4 "register_operand")]
8281  "TARGET_AVX512F"
8282{
8283  int mask;
8284  mask = INTVAL (operands[2]);
8285  rtx dest = operands[0];
8286
8287  if (MEM_P (operands[0]) && !rtx_equal_p (operands[0], operands[3]))
8288    dest = gen_reg_rtx (<ssequartermode>mode);
8289
8290  if (<MODE>mode == V16SImode || <MODE>mode == V16SFmode)
8291    emit_insn (gen_avx512f_vextract<shuffletype>32x4_1_mask (dest,
8292        operands[1], GEN_INT (mask * 4), GEN_INT (mask * 4 + 1),
8293	GEN_INT (mask * 4 + 2), GEN_INT (mask * 4 + 3), operands[3],
8294	operands[4]));
8295  else
8296    emit_insn (gen_avx512dq_vextract<shuffletype>64x2_1_mask (dest,
8297        operands[1], GEN_INT (mask * 2), GEN_INT (mask * 2 + 1), operands[3],
8298	operands[4]));
8299  if (dest != operands[0])
8300    emit_move_insn (operands[0], dest);
8301  DONE;
8302})
8303
8304(define_insn "avx512dq_vextract<shuffletype>64x2_1_maskm"
8305  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8306	(vec_merge:<ssequartermode>
8307	  (vec_select:<ssequartermode>
8308	    (match_operand:V8FI 1 "register_operand" "v")
8309	    (parallel [(match_operand 2  "const_0_to_7_operand")
8310	      (match_operand 3  "const_0_to_7_operand")]))
8311	  (match_operand:<ssequartermode> 4 "memory_operand" "0")
8312	  (match_operand:QI 5 "register_operand" "Yk")))]
8313  "TARGET_AVX512DQ
8314   && INTVAL (operands[2]) % 2 == 0
8315   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8316   && rtx_equal_p (operands[4], operands[0])"
8317{
8318  operands[2] = GEN_INT ((INTVAL (operands[2])) >> 1);
8319  return "vextract<shuffletype>64x2\t{%2, %1, %0%{%5%}|%0%{%5%}, %1, %2}";
8320}
8321  [(set_attr "type" "sselog")
8322   (set_attr "prefix_extra" "1")
8323   (set_attr "length_immediate" "1")
8324   (set_attr "memory" "store")
8325   (set_attr "prefix" "evex")
8326   (set_attr "mode" "<sseinsnmode>")])
8327
8328(define_insn "avx512f_vextract<shuffletype>32x4_1_maskm"
8329  [(set (match_operand:<ssequartermode> 0 "memory_operand" "=m")
8330	(vec_merge:<ssequartermode>
8331	  (vec_select:<ssequartermode>
8332	    (match_operand:V16FI 1 "register_operand" "v")
8333	    (parallel [(match_operand 2  "const_0_to_15_operand")
8334	      (match_operand 3  "const_0_to_15_operand")
8335	      (match_operand 4  "const_0_to_15_operand")
8336	      (match_operand 5  "const_0_to_15_operand")]))
8337	  (match_operand:<ssequartermode> 6 "memory_operand" "0")
8338	  (match_operand:QI 7 "register_operand" "Yk")))]
8339  "TARGET_AVX512F
8340   && INTVAL (operands[2]) % 4 == 0
8341   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8342   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8343   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1
8344   && rtx_equal_p (operands[6], operands[0])"
8345{
8346  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8347  return "vextract<shuffletype>32x4\t{%2, %1, %0%{%7%}|%0%{%7%}, %1, %2}";
8348}
8349  [(set_attr "type" "sselog")
8350   (set_attr "prefix_extra" "1")
8351   (set_attr "length_immediate" "1")
8352   (set_attr "memory" "store")
8353   (set_attr "prefix" "evex")
8354   (set_attr "mode" "<sseinsnmode>")])
8355
8356(define_insn "<mask_codefor>avx512dq_vextract<shuffletype>64x2_1<mask_name>"
8357  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8358	(vec_select:<ssequartermode>
8359	  (match_operand:V8FI 1 "register_operand" "v")
8360	  (parallel [(match_operand 2  "const_0_to_7_operand")
8361            (match_operand 3  "const_0_to_7_operand")])))]
8362  "TARGET_AVX512DQ
8363   && INTVAL (operands[2]) % 2 == 0
8364   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1"
8365{
8366  operands[2] = GEN_INT (INTVAL (operands[2]) >> 1);
8367  return "vextract<shuffletype>64x2\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
8368}
8369  [(set_attr "type" "sselog1")
8370   (set_attr "prefix_extra" "1")
8371   (set_attr "length_immediate" "1")
8372   (set_attr "prefix" "evex")
8373   (set_attr "mode" "<sseinsnmode>")])
8374
8375(define_split
8376  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8377	(vec_select:<ssequartermode>
8378	  (match_operand:V8FI 1 "register_operand")
8379	  (parallel [(const_int 0) (const_int 1)])))]
8380  "TARGET_AVX512DQ
8381   && reload_completed
8382   && (TARGET_AVX512VL
8383       || REG_P (operands[0])
8384       || !EXT_REX_SSE_REG_P (operands[1]))"
8385  [(set (match_dup 0) (match_dup 1))]
8386{
8387  if (!TARGET_AVX512VL
8388      && REG_P (operands[0])
8389      && EXT_REX_SSE_REG_P (operands[1]))
8390    operands[0]
8391      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8392  else
8393    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8394})
8395
8396(define_insn "<mask_codefor>avx512f_vextract<shuffletype>32x4_1<mask_name>"
8397  [(set (match_operand:<ssequartermode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8398	(vec_select:<ssequartermode>
8399	  (match_operand:V16FI 1 "register_operand" "v")
8400	  (parallel [(match_operand 2  "const_0_to_15_operand")
8401            (match_operand 3  "const_0_to_15_operand")
8402            (match_operand 4  "const_0_to_15_operand")
8403            (match_operand 5  "const_0_to_15_operand")])))]
8404  "TARGET_AVX512F
8405   && INTVAL (operands[2]) % 4 == 0
8406   && INTVAL (operands[2]) == INTVAL (operands[3]) - 1
8407   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
8408   && INTVAL (operands[4]) == INTVAL (operands[5]) - 1"
8409{
8410  operands[2] = GEN_INT (INTVAL (operands[2]) >> 2);
8411  return "vextract<shuffletype>32x4\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
8412}
8413  [(set_attr "type" "sselog1")
8414   (set_attr "prefix_extra" "1")
8415   (set_attr "length_immediate" "1")
8416   (set_attr "prefix" "evex")
8417   (set_attr "mode" "<sseinsnmode>")])
8418
8419(define_split
8420  [(set (match_operand:<ssequartermode> 0 "nonimmediate_operand")
8421	(vec_select:<ssequartermode>
8422	  (match_operand:V16FI 1 "register_operand")
8423	  (parallel [(const_int 0) (const_int 1)
8424		     (const_int 2) (const_int 3)])))]
8425  "TARGET_AVX512F
8426   && reload_completed
8427   && (TARGET_AVX512VL
8428       || REG_P (operands[0])
8429       || !EXT_REX_SSE_REG_P (operands[1]))"
8430  [(set (match_dup 0) (match_dup 1))]
8431{
8432  if (!TARGET_AVX512VL
8433      && REG_P (operands[0])
8434      && EXT_REX_SSE_REG_P (operands[1]))
8435    operands[0]
8436      = lowpart_subreg (<MODE>mode, operands[0], <ssequartermode>mode);
8437  else
8438    operands[1] = gen_lowpart (<ssequartermode>mode, operands[1]);
8439})
8440
8441(define_mode_attr extract_type_2
8442  [(V16SF "avx512dq") (V16SI "avx512dq") (V8DF "avx512f") (V8DI "avx512f")])
8443
8444(define_mode_attr extract_suf_2
8445  [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
8446
8447(define_mode_iterator AVX512_VEC_2
8448  [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
8449
8450(define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
8451  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8452   (match_operand:AVX512_VEC_2 1 "register_operand")
8453   (match_operand:SI 2 "const_0_to_1_operand")
8454   (match_operand:<ssehalfvecmode> 3 "nonimmediate_operand")
8455   (match_operand:QI 4 "register_operand")]
8456  "TARGET_AVX512F"
8457{
8458  rtx (*insn)(rtx, rtx, rtx, rtx);
8459  rtx dest = operands[0];
8460
8461  if (MEM_P (dest) && !rtx_equal_p (dest, operands[3]))
8462    dest = gen_reg_rtx (<ssehalfvecmode>mode);
8463
8464  switch (INTVAL (operands[2]))
8465    {
8466    case 0:
8467      insn = gen_vec_extract_lo_<mode>_mask;
8468      break;
8469    case 1:
8470      insn = gen_vec_extract_hi_<mode>_mask;
8471      break;
8472    default:
8473      gcc_unreachable ();
8474    }
8475
8476  emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8477  if (dest != operands[0])
8478    emit_move_insn (operands[0], dest);
8479  DONE;
8480})
8481
8482(define_split
8483  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8484	(vec_select:<ssehalfvecmode>
8485	  (match_operand:V8FI 1 "nonimmediate_operand")
8486	  (parallel [(const_int 0) (const_int 1)
8487            (const_int 2) (const_int 3)])))]
8488  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8489   && reload_completed
8490   && (TARGET_AVX512VL
8491       || (REG_P (operands[0]) && !EXT_REX_SSE_REG_P (operands[1])))"
8492  [(set (match_dup 0) (match_dup 1))]
8493  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8494
8495(define_insn "vec_extract_lo_<mode>_maskm"
8496  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8497	(vec_merge:<ssehalfvecmode>
8498	  (vec_select:<ssehalfvecmode>
8499	    (match_operand:V8FI 1 "register_operand" "v")
8500	    (parallel [(const_int 0) (const_int 1)
8501	      (const_int 2) (const_int 3)]))
8502	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8503	  (match_operand:QI 3 "register_operand" "Yk")))]
8504  "TARGET_AVX512F
8505   && rtx_equal_p (operands[2], operands[0])"
8506  "vextract<shuffletype>64x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8507  [(set_attr "type" "sselog1")
8508   (set_attr "prefix_extra" "1")
8509   (set_attr "length_immediate" "1")
8510   (set_attr "prefix" "evex")
8511   (set_attr "mode" "<sseinsnmode>")])
8512
8513(define_insn "vec_extract_lo_<mode><mask_name>"
8514  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>,v")
8515	(vec_select:<ssehalfvecmode>
8516	  (match_operand:V8FI 1 "<store_mask_predicate>" "v,v,<store_mask_constraint>")
8517	  (parallel [(const_int 0) (const_int 1)
8518            (const_int 2) (const_int 3)])))]
8519  "TARGET_AVX512F
8520   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8521{
8522  if (<mask_applied> || (!TARGET_AVX512VL && !MEM_P (operands[1])))
8523    return "vextract<shuffletype>64x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8524  else
8525    return "#";
8526}
8527  [(set_attr "type" "sselog1")
8528   (set_attr "prefix_extra" "1")
8529   (set_attr "length_immediate" "1")
8530   (set_attr "memory" "none,store,load")
8531   (set_attr "prefix" "evex")
8532   (set_attr "mode" "<sseinsnmode>")])
8533
8534(define_insn "vec_extract_hi_<mode>_maskm"
8535  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8536	(vec_merge:<ssehalfvecmode>
8537	  (vec_select:<ssehalfvecmode>
8538	    (match_operand:V8FI 1 "register_operand" "v")
8539	    (parallel [(const_int 4) (const_int 5)
8540	      (const_int 6) (const_int 7)]))
8541	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8542	  (match_operand:QI 3 "register_operand" "Yk")))]
8543  "TARGET_AVX512F
8544   && rtx_equal_p (operands[2], operands[0])"
8545  "vextract<shuffletype>64x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8546  [(set_attr "type" "sselog")
8547   (set_attr "prefix_extra" "1")
8548   (set_attr "length_immediate" "1")
8549   (set_attr "memory" "store")
8550   (set_attr "prefix" "evex")
8551   (set_attr "mode" "<sseinsnmode>")])
8552
8553(define_insn "vec_extract_hi_<mode><mask_name>"
8554  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>")
8555	(vec_select:<ssehalfvecmode>
8556	  (match_operand:V8FI 1 "register_operand" "v")
8557	  (parallel [(const_int 4) (const_int 5)
8558            (const_int 6) (const_int 7)])))]
8559  "TARGET_AVX512F"
8560  "vextract<shuffletype>64x4\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}"
8561  [(set_attr "type" "sselog1")
8562   (set_attr "prefix_extra" "1")
8563   (set_attr "length_immediate" "1")
8564   (set_attr "prefix" "evex")
8565   (set_attr "mode" "<sseinsnmode>")])
8566
8567(define_insn "vec_extract_hi_<mode>_maskm"
8568   [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8569	(vec_merge:<ssehalfvecmode>
8570	  (vec_select:<ssehalfvecmode>
8571	    (match_operand:V16FI 1 "register_operand" "v")
8572	    (parallel [(const_int 8) (const_int 9)
8573	      (const_int 10) (const_int 11)
8574	      (const_int 12) (const_int 13)
8575	      (const_int 14) (const_int 15)]))
8576	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8577	  (match_operand:QI 3 "register_operand" "Yk")))]
8578  "TARGET_AVX512DQ
8579   && rtx_equal_p (operands[2], operands[0])"
8580  "vextract<shuffletype>32x8\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8581  [(set_attr "type" "sselog1")
8582   (set_attr "prefix_extra" "1")
8583   (set_attr "length_immediate" "1")
8584   (set_attr "prefix" "evex")
8585   (set_attr "mode" "<sseinsnmode>")])
8586
8587(define_insn "vec_extract_hi_<mode><mask_name>"
8588  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=<store_mask_constraint>,vm")
8589	(vec_select:<ssehalfvecmode>
8590	  (match_operand:V16FI 1 "register_operand" "v,v")
8591	  (parallel [(const_int 8) (const_int 9)
8592            (const_int 10) (const_int 11)
8593	    (const_int 12) (const_int 13)
8594	    (const_int 14) (const_int 15)])))]
8595  "TARGET_AVX512F && <mask_avx512dq_condition>"
8596  "@
8597   vextract<shuffletype>32x8\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}
8598   vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8599  [(set_attr "type" "sselog1")
8600   (set_attr "prefix_extra" "1")
8601   (set_attr "isa" "avx512dq,noavx512dq")
8602   (set_attr "length_immediate" "1")
8603   (set_attr "prefix" "evex")
8604   (set_attr "mode" "<sseinsnmode>")])
8605
8606(define_mode_iterator VI48F_256_DQ
8607  [V8SI V8SF (V4DI "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ")])
8608
8609(define_expand "avx512vl_vextractf128<mode>"
8610  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8611   (match_operand:VI48F_256_DQ 1 "register_operand")
8612   (match_operand:SI 2 "const_0_to_1_operand")
8613   (match_operand:<ssehalfvecmode> 3 "nonimm_or_0_operand")
8614   (match_operand:QI 4 "register_operand")]
8615  "TARGET_AVX512VL"
8616{
8617  rtx (*insn)(rtx, rtx, rtx, rtx);
8618  rtx dest = operands[0];
8619
8620  if (MEM_P (dest)
8621      && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4
8622	  /* For V8S[IF]mode there are maskm insns with =m and 0
8623	     constraints.  */
8624	  ? !rtx_equal_p (dest, operands[3])
8625	  /* For V4D[IF]mode, hi insns don't allow memory, and
8626	     lo insns have =m and 0C constraints.  */
8627	  : (operands[2] != const0_rtx
8628	     || (!rtx_equal_p (dest, operands[3])
8629		 && GET_CODE (operands[3]) != CONST_VECTOR))))
8630    dest = gen_reg_rtx (<ssehalfvecmode>mode);
8631  switch (INTVAL (operands[2]))
8632    {
8633    case 0:
8634      insn = gen_vec_extract_lo_<mode>_mask;
8635      break;
8636    case 1:
8637      insn = gen_vec_extract_hi_<mode>_mask;
8638      break;
8639    default:
8640      gcc_unreachable ();
8641    }
8642
8643  emit_insn (insn (dest, operands[1], operands[3], operands[4]));
8644  if (dest != operands[0])
8645    emit_move_insn (operands[0], dest);
8646  DONE;
8647})
8648
8649(define_expand "avx_vextractf128<mode>"
8650  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8651   (match_operand:V_256 1 "register_operand")
8652   (match_operand:SI 2 "const_0_to_1_operand")]
8653  "TARGET_AVX"
8654{
8655  rtx (*insn)(rtx, rtx);
8656
8657  switch (INTVAL (operands[2]))
8658    {
8659    case 0:
8660      insn = gen_vec_extract_lo_<mode>;
8661      break;
8662    case 1:
8663      insn = gen_vec_extract_hi_<mode>;
8664      break;
8665    default:
8666      gcc_unreachable ();
8667    }
8668
8669  emit_insn (insn (operands[0], operands[1]));
8670  DONE;
8671})
8672
8673(define_insn "vec_extract_lo_<mode><mask_name>"
8674  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
8675					  "=v,v,<store_mask_constraint>")
8676	(vec_select:<ssehalfvecmode>
8677	  (match_operand:V16FI 1 "<store_mask_predicate>"
8678				 "v,<store_mask_constraint>,v")
8679	  (parallel [(const_int 0) (const_int 1)
8680                     (const_int 2) (const_int 3)
8681                     (const_int 4) (const_int 5)
8682                     (const_int 6) (const_int 7)])))]
8683  "TARGET_AVX512F
8684   && <mask_avx512dq_condition>
8685   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8686{
8687  if (<mask_applied>
8688      || (!TARGET_AVX512VL
8689	  && !REG_P (operands[0])
8690	  && EXT_REX_SSE_REG_P (operands[1])))
8691    {
8692      if (TARGET_AVX512DQ)
8693	return "vextract<shuffletype>32x8\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8694      else
8695	return "vextract<shuffletype>64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8696    }
8697  else
8698    return "#";
8699}
8700  [(set_attr "type" "sselog1")
8701   (set_attr "prefix_extra" "1")
8702   (set_attr "length_immediate" "1")
8703   (set_attr "memory" "none,load,store")
8704   (set_attr "prefix" "evex")
8705   (set_attr "mode" "<sseinsnmode>")])
8706
8707(define_split
8708  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8709	(vec_select:<ssehalfvecmode>
8710	  (match_operand:V16FI 1 "nonimmediate_operand")
8711	  (parallel [(const_int 0) (const_int 1)
8712            (const_int 2) (const_int 3)
8713	    (const_int 4) (const_int 5)
8714	    (const_int 6) (const_int 7)])))]
8715  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8716   && reload_completed
8717   && (TARGET_AVX512VL
8718       || REG_P (operands[0])
8719       || !EXT_REX_SSE_REG_P (operands[1]))"
8720  [(set (match_dup 0) (match_dup 1))]
8721{
8722  if (!TARGET_AVX512VL
8723      && REG_P (operands[0])
8724      && EXT_REX_SSE_REG_P (operands[1]))
8725    operands[0]
8726      = lowpart_subreg (<MODE>mode, operands[0], <ssehalfvecmode>mode);
8727  else
8728    operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);
8729})
8730
8731(define_insn "vec_extract_lo_<mode><mask_name>"
8732  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
8733					  "=v,v,<store_mask_constraint>")
8734	(vec_select:<ssehalfvecmode>
8735	  (match_operand:VI8F_256 1 "<store_mask_predicate>"
8736				    "v,<store_mask_constraint>,v")
8737	  (parallel [(const_int 0) (const_int 1)])))]
8738  "TARGET_AVX
8739   && <mask_avx512vl_condition> && <mask_avx512dq_condition>
8740   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8741{
8742  if (<mask_applied>)
8743    return "vextract<shuffletype>64x2\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8744  else
8745    return "#";
8746}
8747   [(set_attr "type" "sselog1")
8748    (set_attr "prefix_extra" "1")
8749    (set_attr "length_immediate" "1")
8750    (set_attr "memory" "none,load,store")
8751    (set_attr "prefix" "evex")
8752    (set_attr "mode" "XI")])
8753
8754(define_split
8755  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8756	(vec_select:<ssehalfvecmode>
8757	  (match_operand:VI8F_256 1 "nonimmediate_operand")
8758	  (parallel [(const_int 0) (const_int 1)])))]
8759  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8760   && reload_completed"
8761  [(set (match_dup 0) (match_dup 1))]
8762  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8763
8764(define_insn "vec_extract_hi_<mode><mask_name>"
8765  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>" "=v,<store_mask_constraint>")
8766	(vec_select:<ssehalfvecmode>
8767	  (match_operand:VI8F_256 1 "register_operand" "v,v")
8768	  (parallel [(const_int 2) (const_int 3)])))]
8769  "TARGET_AVX && <mask_avx512vl_condition> && <mask_avx512dq_condition>"
8770{
8771  if (TARGET_AVX512VL)
8772  {
8773    if (TARGET_AVX512DQ)
8774      return "vextract<shuffletype>64x2\t{$0x1, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x1}";
8775    else
8776      return "vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}";
8777  }
8778  else
8779    return "vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}";
8780}
8781  [(set_attr "type" "sselog1")
8782   (set_attr "prefix_extra" "1")
8783   (set_attr "length_immediate" "1")
8784   (set_attr "prefix" "vex")
8785   (set_attr "mode" "<sseinsnmode>")])
8786
8787(define_split
8788  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
8789	(vec_select:<ssehalfvecmode>
8790	  (match_operand:VI4F_256 1 "nonimmediate_operand")
8791	  (parallel [(const_int 0) (const_int 1)
8792		     (const_int 2) (const_int 3)])))]
8793  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))
8794   && reload_completed"
8795  [(set (match_dup 0) (match_dup 1))]
8796  "operands[1] = gen_lowpart (<ssehalfvecmode>mode, operands[1]);")
8797
8798(define_insn "vec_extract_lo_<mode><mask_name>"
8799  [(set (match_operand:<ssehalfvecmode> 0 "<store_mask_predicate>"
8800					  "=<store_mask_constraint>,v")
8801	(vec_select:<ssehalfvecmode>
8802	  (match_operand:VI4F_256 1 "<store_mask_predicate>"
8803				    "v,<store_mask_constraint>")
8804	  (parallel [(const_int 0) (const_int 1)
8805		     (const_int 2) (const_int 3)])))]
8806  "TARGET_AVX
8807   && <mask_avx512vl_condition>
8808   && (<mask_applied> || !(MEM_P (operands[0]) && MEM_P (operands[1])))"
8809{
8810  if (<mask_applied>)
8811    return "vextract<shuffletype>32x4\t{$0x0, %1, %0<mask_operand2>|%0<mask_operand2>, %1, 0x0}";
8812  else
8813    return "#";
8814}
8815  [(set_attr "type" "sselog1")
8816   (set_attr "prefix_extra" "1")
8817   (set_attr "length_immediate" "1")
8818   (set_attr "prefix" "evex")
8819   (set_attr "mode" "<sseinsnmode>")])
8820
8821(define_insn "vec_extract_lo_<mode>_maskm"
8822  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8823	(vec_merge:<ssehalfvecmode>
8824	  (vec_select:<ssehalfvecmode>
8825	    (match_operand:VI4F_256 1 "register_operand" "v")
8826	    (parallel [(const_int 0) (const_int 1)
8827		      (const_int 2) (const_int 3)]))
8828	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8829	  (match_operand:QI 3 "register_operand" "Yk")))]
8830  "TARGET_AVX512VL && TARGET_AVX512F
8831   && rtx_equal_p (operands[2], operands[0])"
8832  "vextract<shuffletype>32x4\t{$0x0, %1, %0%{%3%}|%0%{%3%}, %1, 0x0}"
8833  [(set_attr "type" "sselog1")
8834   (set_attr "prefix_extra" "1")
8835   (set_attr "length_immediate" "1")
8836   (set_attr "prefix" "evex")
8837   (set_attr "mode" "<sseinsnmode>")])
8838
8839(define_insn "vec_extract_hi_<mode>_maskm"
8840  [(set (match_operand:<ssehalfvecmode> 0 "memory_operand" "=m")
8841	(vec_merge:<ssehalfvecmode>
8842	  (vec_select:<ssehalfvecmode>
8843	    (match_operand:VI4F_256 1 "register_operand" "v")
8844	    (parallel [(const_int 4) (const_int 5)
8845		      (const_int 6) (const_int 7)]))
8846	  (match_operand:<ssehalfvecmode> 2 "memory_operand" "0")
8847	  (match_operand:<ssehalfvecmode> 3 "register_operand" "Yk")))]
8848  "TARGET_AVX512F && TARGET_AVX512VL
8849   && rtx_equal_p (operands[2], operands[0])"
8850  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}|%0%{%3%}, %1, 0x1}"
8851  [(set_attr "type" "sselog1")
8852   (set_attr "length_immediate" "1")
8853   (set_attr "prefix" "evex")
8854   (set_attr "mode" "<sseinsnmode>")])
8855
8856(define_insn "vec_extract_hi_<mode>_mask"
8857  [(set (match_operand:<ssehalfvecmode> 0 "register_operand" "=v")
8858	(vec_merge:<ssehalfvecmode>
8859	  (vec_select:<ssehalfvecmode>
8860	    (match_operand:VI4F_256 1 "register_operand" "v")
8861	    (parallel [(const_int 4) (const_int 5)
8862		       (const_int 6) (const_int 7)]))
8863	  (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "0C")
8864	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
8865  "TARGET_AVX512VL"
8866  "vextract<shuffletype>32x4\t{$0x1, %1, %0%{%3%}%N2|%0%{%3%}%N2, %1, 0x1}"
8867  [(set_attr "type" "sselog1")
8868   (set_attr "length_immediate" "1")
8869   (set_attr "prefix" "evex")
8870   (set_attr "mode" "<sseinsnmode>")])
8871
8872(define_insn "vec_extract_hi_<mode>"
8873  [(set (match_operand:<ssehalfvecmode> 0 "nonimmediate_operand" "=xm, vm")
8874	(vec_select:<ssehalfvecmode>
8875	  (match_operand:VI4F_256 1 "register_operand" "x, v")
8876	  (parallel [(const_int 4) (const_int 5)
8877		     (const_int 6) (const_int 7)])))]
8878  "TARGET_AVX"
8879  "@
8880    vextract<i128>\t{$0x1, %1, %0|%0, %1, 0x1}
8881    vextract<shuffletype>32x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8882  [(set_attr "isa" "*, avx512vl")
8883   (set_attr "prefix" "vex, evex")
8884   (set_attr "type" "sselog1")
8885   (set_attr "length_immediate" "1")
8886   (set_attr "mode" "<sseinsnmode>")])
8887
8888(define_insn_and_split "vec_extract_lo_v32hi"
8889  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=v,v,m")
8890	(vec_select:V16HI
8891	  (match_operand:V32HI 1 "nonimmediate_operand" "v,m,v")
8892	  (parallel [(const_int 0) (const_int 1)
8893		     (const_int 2) (const_int 3)
8894		     (const_int 4) (const_int 5)
8895		     (const_int 6) (const_int 7)
8896		     (const_int 8) (const_int 9)
8897		     (const_int 10) (const_int 11)
8898		     (const_int 12) (const_int 13)
8899		     (const_int 14) (const_int 15)])))]
8900  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8901{
8902  if (TARGET_AVX512VL
8903      || REG_P (operands[0])
8904      || !EXT_REX_SSE_REG_P (operands[1]))
8905    return "#";
8906  else
8907    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
8908}
8909  "&& reload_completed
8910   && (TARGET_AVX512VL
8911       || REG_P (operands[0])
8912       || !EXT_REX_SSE_REG_P (operands[1]))"
8913  [(set (match_dup 0) (match_dup 1))]
8914{
8915  if (!TARGET_AVX512VL
8916      && REG_P (operands[0])
8917      && EXT_REX_SSE_REG_P (operands[1]))
8918    operands[0] = lowpart_subreg (V32HImode, operands[0], V16HImode);
8919  else
8920    operands[1] = gen_lowpart (V16HImode, operands[1]);
8921}
8922  [(set_attr "type" "sselog1")
8923   (set_attr "prefix_extra" "1")
8924   (set_attr "length_immediate" "1")
8925   (set_attr "memory" "none,load,store")
8926   (set_attr "prefix" "evex")
8927   (set_attr "mode" "XI")])
8928
8929(define_insn "vec_extract_hi_v32hi"
8930  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
8931	(vec_select:V16HI
8932	  (match_operand:V32HI 1 "register_operand" "v")
8933	  (parallel [(const_int 16) (const_int 17)
8934		     (const_int 18) (const_int 19)
8935		     (const_int 20) (const_int 21)
8936		     (const_int 22) (const_int 23)
8937		     (const_int 24) (const_int 25)
8938		     (const_int 26) (const_int 27)
8939		     (const_int 28) (const_int 29)
8940		     (const_int 30) (const_int 31)])))]
8941  "TARGET_AVX512F"
8942  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
8943  [(set_attr "type" "sselog1")
8944   (set_attr "prefix_extra" "1")
8945   (set_attr "length_immediate" "1")
8946   (set_attr "prefix" "evex")
8947   (set_attr "mode" "XI")])
8948
8949(define_insn_and_split "vec_extract_lo_v16hi"
8950  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,m")
8951	(vec_select:V8HI
8952	  (match_operand:V16HI 1 "nonimmediate_operand" "vm,v")
8953	  (parallel [(const_int 0) (const_int 1)
8954		     (const_int 2) (const_int 3)
8955		     (const_int 4) (const_int 5)
8956		     (const_int 6) (const_int 7)])))]
8957  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
8958  "#"
8959  "&& reload_completed"
8960  [(set (match_dup 0) (match_dup 1))]
8961  "operands[1] = gen_lowpart (V8HImode, operands[1]);")
8962
8963(define_insn "vec_extract_hi_v16hi"
8964  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=xm,vm,vm")
8965	(vec_select:V8HI
8966	  (match_operand:V16HI 1 "register_operand" "x,v,v")
8967	  (parallel [(const_int 8) (const_int 9)
8968		     (const_int 10) (const_int 11)
8969		     (const_int 12) (const_int 13)
8970		     (const_int 14) (const_int 15)])))]
8971  "TARGET_AVX"
8972  "@
8973   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
8974   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
8975   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
8976  [(set_attr "type" "sselog1")
8977   (set_attr "prefix_extra" "1")
8978   (set_attr "length_immediate" "1")
8979   (set_attr "isa" "*,avx512dq,avx512f")
8980   (set_attr "prefix" "vex,evex,evex")
8981   (set_attr "mode" "OI")])
8982
8983(define_insn_and_split "vec_extract_lo_v64qi"
8984  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,v,m")
8985	(vec_select:V32QI
8986	  (match_operand:V64QI 1 "nonimmediate_operand" "v,m,v")
8987	  (parallel [(const_int 0) (const_int 1)
8988		     (const_int 2) (const_int 3)
8989		     (const_int 4) (const_int 5)
8990		     (const_int 6) (const_int 7)
8991		     (const_int 8) (const_int 9)
8992		     (const_int 10) (const_int 11)
8993		     (const_int 12) (const_int 13)
8994		     (const_int 14) (const_int 15)
8995		     (const_int 16) (const_int 17)
8996		     (const_int 18) (const_int 19)
8997		     (const_int 20) (const_int 21)
8998		     (const_int 22) (const_int 23)
8999		     (const_int 24) (const_int 25)
9000		     (const_int 26) (const_int 27)
9001		     (const_int 28) (const_int 29)
9002		     (const_int 30) (const_int 31)])))]
9003  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9004{
9005  if (TARGET_AVX512VL
9006      || REG_P (operands[0])
9007      || !EXT_REX_SSE_REG_P (operands[1]))
9008    return "#";
9009  else
9010    return "vextracti64x4\t{$0x0, %1, %0|%0, %1, 0x0}";
9011}
9012  "&& reload_completed
9013   && (TARGET_AVX512VL
9014       || REG_P (operands[0])
9015       || !EXT_REX_SSE_REG_P (operands[1]))"
9016  [(set (match_dup 0) (match_dup 1))]
9017{
9018  if (!TARGET_AVX512VL
9019      && REG_P (operands[0])
9020      && EXT_REX_SSE_REG_P (operands[1]))
9021    operands[0] = lowpart_subreg (V64QImode, operands[0], V32QImode);
9022  else
9023    operands[1] = gen_lowpart (V32QImode, operands[1]);
9024}
9025  [(set_attr "type" "sselog1")
9026   (set_attr "prefix_extra" "1")
9027   (set_attr "length_immediate" "1")
9028   (set_attr "memory" "none,load,store")
9029   (set_attr "prefix" "evex")
9030   (set_attr "mode" "XI")])
9031
9032(define_insn "vec_extract_hi_v64qi"
9033  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=vm")
9034	(vec_select:V32QI
9035	  (match_operand:V64QI 1 "register_operand" "v")
9036	  (parallel [(const_int 32) (const_int 33)
9037		     (const_int 34) (const_int 35)
9038		     (const_int 36) (const_int 37)
9039		     (const_int 38) (const_int 39)
9040		     (const_int 40) (const_int 41)
9041		     (const_int 42) (const_int 43)
9042		     (const_int 44) (const_int 45)
9043		     (const_int 46) (const_int 47)
9044		     (const_int 48) (const_int 49)
9045		     (const_int 50) (const_int 51)
9046		     (const_int 52) (const_int 53)
9047		     (const_int 54) (const_int 55)
9048		     (const_int 56) (const_int 57)
9049		     (const_int 58) (const_int 59)
9050		     (const_int 60) (const_int 61)
9051		     (const_int 62) (const_int 63)])))]
9052  "TARGET_AVX512F"
9053  "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
9054  [(set_attr "type" "sselog1")
9055   (set_attr "prefix_extra" "1")
9056   (set_attr "length_immediate" "1")
9057   (set_attr "prefix" "evex")
9058   (set_attr "mode" "XI")])
9059
9060(define_insn_and_split "vec_extract_lo_v32qi"
9061  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,m")
9062	(vec_select:V16QI
9063	  (match_operand:V32QI 1 "nonimmediate_operand" "vm,v")
9064	  (parallel [(const_int 0) (const_int 1)
9065		     (const_int 2) (const_int 3)
9066		     (const_int 4) (const_int 5)
9067		     (const_int 6) (const_int 7)
9068		     (const_int 8) (const_int 9)
9069		     (const_int 10) (const_int 11)
9070		     (const_int 12) (const_int 13)
9071		     (const_int 14) (const_int 15)])))]
9072  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
9073  "#"
9074  "&& reload_completed"
9075  [(set (match_dup 0) (match_dup 1))]
9076  "operands[1] = gen_lowpart (V16QImode, operands[1]);")
9077
9078(define_insn "vec_extract_hi_v32qi"
9079  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=xm,vm,vm")
9080	(vec_select:V16QI
9081	  (match_operand:V32QI 1 "register_operand" "x,v,v")
9082	  (parallel [(const_int 16) (const_int 17)
9083		     (const_int 18) (const_int 19)
9084		     (const_int 20) (const_int 21)
9085		     (const_int 22) (const_int 23)
9086		     (const_int 24) (const_int 25)
9087		     (const_int 26) (const_int 27)
9088		     (const_int 28) (const_int 29)
9089		     (const_int 30) (const_int 31)])))]
9090  "TARGET_AVX"
9091  "@
9092   vextract%~128\t{$0x1, %1, %0|%0, %1, 0x1}
9093   vextracti32x4\t{$0x1, %1, %0|%0, %1, 0x1}
9094   vextracti32x4\t{$0x1, %g1, %0|%0, %g1, 0x1}"
9095  [(set_attr "type" "sselog1")
9096   (set_attr "prefix_extra" "1")
9097   (set_attr "length_immediate" "1")
9098   (set_attr "isa" "*,avx512dq,avx512f")
9099   (set_attr "prefix" "vex,evex,evex")
9100   (set_attr "mode" "OI")])
9101
9102;; Modes handled by vec_extract patterns.
9103(define_mode_iterator VEC_EXTRACT_MODE
9104  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
9105   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
9106   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
9107   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
9108   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
9109   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
9110   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
9111
9112(define_expand "vec_extract<mode><ssescalarmodelower>"
9113  [(match_operand:<ssescalarmode> 0 "register_operand")
9114   (match_operand:VEC_EXTRACT_MODE 1 "register_operand")
9115   (match_operand 2 "const_int_operand")]
9116  "TARGET_SSE"
9117{
9118  ix86_expand_vector_extract (false, operands[0], operands[1],
9119			      INTVAL (operands[2]));
9120  DONE;
9121})
9122
9123(define_expand "vec_extract<mode><ssehalfvecmodelower>"
9124  [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
9125   (match_operand:V_256_512 1 "register_operand")
9126   (match_operand 2 "const_0_to_1_operand")]
9127  "TARGET_AVX"
9128{
9129  if (INTVAL (operands[2]))
9130    emit_insn (gen_vec_extract_hi_<mode> (operands[0], operands[1]));
9131  else
9132    emit_insn (gen_vec_extract_lo_<mode> (operands[0], operands[1]));
9133  DONE;
9134})
9135
9136;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9137;;
9138;; Parallel double-precision floating point element swizzling
9139;;
9140;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9141
9142(define_insn "<mask_codefor>avx512f_unpckhpd512<mask_name>"
9143  [(set (match_operand:V8DF 0 "register_operand" "=v")
9144	(vec_select:V8DF
9145	  (vec_concat:V16DF
9146	    (match_operand:V8DF 1 "register_operand" "v")
9147	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9148	  (parallel [(const_int 1) (const_int 9)
9149		     (const_int 3) (const_int 11)
9150		     (const_int 5) (const_int 13)
9151		     (const_int 7) (const_int 15)])))]
9152  "TARGET_AVX512F"
9153  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9154  [(set_attr "type" "sselog")
9155   (set_attr "prefix" "evex")
9156   (set_attr "mode" "V8DF")])
9157
9158;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9159(define_insn "avx_unpckhpd256<mask_name>"
9160  [(set (match_operand:V4DF 0 "register_operand" "=v")
9161	(vec_select:V4DF
9162	  (vec_concat:V8DF
9163	    (match_operand:V4DF 1 "register_operand" "v")
9164	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9165	  (parallel [(const_int 1) (const_int 5)
9166		     (const_int 3) (const_int 7)])))]
9167  "TARGET_AVX && <mask_avx512vl_condition>"
9168  "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9169  [(set_attr "type" "sselog")
9170   (set_attr "prefix" "vex")
9171   (set_attr "mode" "V4DF")])
9172
9173(define_expand "vec_interleave_highv4df"
9174  [(set (match_dup 3)
9175	(vec_select:V4DF
9176	  (vec_concat:V8DF
9177	    (match_operand:V4DF 1 "register_operand")
9178	    (match_operand:V4DF 2 "nonimmediate_operand"))
9179	  (parallel [(const_int 0) (const_int 4)
9180		     (const_int 2) (const_int 6)])))
9181   (set (match_dup 4)
9182	(vec_select:V4DF
9183	  (vec_concat:V8DF
9184	    (match_dup 1)
9185	    (match_dup 2))
9186	  (parallel [(const_int 1) (const_int 5)
9187		     (const_int 3) (const_int 7)])))
9188   (set (match_operand:V4DF 0 "register_operand")
9189	(vec_select:V4DF
9190	  (vec_concat:V8DF
9191	    (match_dup 3)
9192	    (match_dup 4))
9193	  (parallel [(const_int 2) (const_int 3)
9194		     (const_int 6) (const_int 7)])))]
9195 "TARGET_AVX"
9196{
9197  operands[3] = gen_reg_rtx (V4DFmode);
9198  operands[4] = gen_reg_rtx (V4DFmode);
9199})
9200
9201
9202(define_insn "avx512vl_unpckhpd128_mask"
9203  [(set (match_operand:V2DF 0 "register_operand" "=v")
9204	(vec_merge:V2DF
9205	  (vec_select:V2DF
9206	    (vec_concat:V4DF
9207	      (match_operand:V2DF 1 "register_operand" "v")
9208	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9209	    (parallel [(const_int 1) (const_int 3)]))
9210	  (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9211	  (match_operand:QI 4 "register_operand" "Yk")))]
9212  "TARGET_AVX512VL"
9213  "vunpckhpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9214  [(set_attr "type" "sselog")
9215   (set_attr "prefix" "evex")
9216   (set_attr "mode" "V2DF")])
9217
9218(define_expand "vec_interleave_highv2df"
9219  [(set (match_operand:V2DF 0 "register_operand")
9220	(vec_select:V2DF
9221	  (vec_concat:V4DF
9222	    (match_operand:V2DF 1 "nonimmediate_operand")
9223	    (match_operand:V2DF 2 "nonimmediate_operand"))
9224	  (parallel [(const_int 1)
9225		     (const_int 3)])))]
9226  "TARGET_SSE2"
9227{
9228  if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
9229    operands[2] = force_reg (V2DFmode, operands[2]);
9230})
9231
9232(define_insn "*vec_interleave_highv2df"
9233  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,m")
9234	(vec_select:V2DF
9235	  (vec_concat:V4DF
9236	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,o,o,o,v")
9237	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,0,v,0"))
9238	  (parallel [(const_int 1)
9239		     (const_int 3)])))]
9240  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
9241  "@
9242   unpckhpd\t{%2, %0|%0, %2}
9243   vunpckhpd\t{%2, %1, %0|%0, %1, %2}
9244   %vmovddup\t{%H1, %0|%0, %H1}
9245   movlpd\t{%H1, %0|%0, %H1}
9246   vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
9247   %vmovhpd\t{%1, %0|%q0, %1}"
9248  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9249   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9250   (set (attr "prefix_data16")
9251     (if_then_else (eq_attr "alternative" "3,5")
9252		   (const_string "1")
9253		   (const_string "*")))
9254   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9255   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9256
9257(define_expand "avx512f_movddup512<mask_name>"
9258  [(set (match_operand:V8DF 0 "register_operand")
9259	(vec_select:V8DF
9260	  (vec_concat:V16DF
9261	    (match_operand:V8DF 1 "nonimmediate_operand")
9262	    (match_dup 1))
9263	  (parallel [(const_int 0) (const_int 8)
9264		     (const_int 2) (const_int 10)
9265		     (const_int 4) (const_int 12)
9266		     (const_int 6) (const_int 14)])))]
9267  "TARGET_AVX512F")
9268
9269(define_expand "avx512f_unpcklpd512<mask_name>"
9270  [(set (match_operand:V8DF 0 "register_operand")
9271	(vec_select:V8DF
9272	  (vec_concat:V16DF
9273	    (match_operand:V8DF 1 "register_operand")
9274	    (match_operand:V8DF 2 "nonimmediate_operand"))
9275	  (parallel [(const_int 0) (const_int 8)
9276		     (const_int 2) (const_int 10)
9277		     (const_int 4) (const_int 12)
9278		     (const_int 6) (const_int 14)])))]
9279  "TARGET_AVX512F")
9280
9281(define_insn "*avx512f_unpcklpd512<mask_name>"
9282  [(set (match_operand:V8DF 0 "register_operand" "=v,v")
9283	(vec_select:V8DF
9284	  (vec_concat:V16DF
9285	    (match_operand:V8DF 1 "nonimmediate_operand" "vm, v")
9286	    (match_operand:V8DF 2 "nonimmediate_operand" "1 ,vm"))
9287	  (parallel [(const_int 0) (const_int 8)
9288		     (const_int 2) (const_int 10)
9289		     (const_int 4) (const_int 12)
9290		     (const_int 6) (const_int 14)])))]
9291  "TARGET_AVX512F"
9292  "@
9293   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}
9294   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9295  [(set_attr "type" "sselog")
9296   (set_attr "prefix" "evex")
9297   (set_attr "mode" "V8DF")])
9298
9299;; Recall that the 256-bit unpck insns only shuffle within their lanes.
9300(define_expand "avx_movddup256<mask_name>"
9301  [(set (match_operand:V4DF 0 "register_operand")
9302	(vec_select:V4DF
9303	  (vec_concat:V8DF
9304	    (match_operand:V4DF 1 "nonimmediate_operand")
9305	    (match_dup 1))
9306	  (parallel [(const_int 0) (const_int 4)
9307		     (const_int 2) (const_int 6)])))]
9308  "TARGET_AVX && <mask_avx512vl_condition>")
9309
9310(define_expand "avx_unpcklpd256<mask_name>"
9311  [(set (match_operand:V4DF 0 "register_operand")
9312	(vec_select:V4DF
9313	  (vec_concat:V8DF
9314	    (match_operand:V4DF 1 "register_operand")
9315	    (match_operand:V4DF 2 "nonimmediate_operand"))
9316	  (parallel [(const_int 0) (const_int 4)
9317		     (const_int 2) (const_int 6)])))]
9318  "TARGET_AVX && <mask_avx512vl_condition>")
9319
9320(define_insn "*avx_unpcklpd256<mask_name>"
9321  [(set (match_operand:V4DF 0 "register_operand"         "=v,v")
9322	(vec_select:V4DF
9323	  (vec_concat:V8DF
9324	    (match_operand:V4DF 1 "nonimmediate_operand" " v,m")
9325	    (match_operand:V4DF 2 "nonimmediate_operand" "vm,1"))
9326	  (parallel [(const_int 0) (const_int 4)
9327		     (const_int 2) (const_int 6)])))]
9328  "TARGET_AVX && <mask_avx512vl_condition>"
9329  "@
9330   vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
9331   vmovddup\t{%1, %0<mask_operand3>|%0<mask_operand3>, %1}"
9332  [(set_attr "type" "sselog")
9333   (set_attr "prefix" "vex")
9334   (set_attr "mode" "V4DF")])
9335
9336(define_expand "vec_interleave_lowv4df"
9337  [(set (match_dup 3)
9338	(vec_select:V4DF
9339	  (vec_concat:V8DF
9340	    (match_operand:V4DF 1 "register_operand")
9341	    (match_operand:V4DF 2 "nonimmediate_operand"))
9342	  (parallel [(const_int 0) (const_int 4)
9343		     (const_int 2) (const_int 6)])))
9344   (set (match_dup 4)
9345	(vec_select:V4DF
9346	  (vec_concat:V8DF
9347	    (match_dup 1)
9348	    (match_dup 2))
9349	  (parallel [(const_int 1) (const_int 5)
9350		     (const_int 3) (const_int 7)])))
9351   (set (match_operand:V4DF 0 "register_operand")
9352	(vec_select:V4DF
9353	  (vec_concat:V8DF
9354	    (match_dup 3)
9355	    (match_dup 4))
9356	  (parallel [(const_int 0) (const_int 1)
9357		     (const_int 4) (const_int 5)])))]
9358 "TARGET_AVX"
9359{
9360  operands[3] = gen_reg_rtx (V4DFmode);
9361  operands[4] = gen_reg_rtx (V4DFmode);
9362})
9363
9364(define_insn "avx512vl_unpcklpd128_mask"
9365  [(set (match_operand:V2DF 0 "register_operand" "=v")
9366	(vec_merge:V2DF
9367	  (vec_select:V2DF
9368	    (vec_concat:V4DF
9369	      (match_operand:V2DF 1 "register_operand" "v")
9370	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9371	    (parallel [(const_int 0) (const_int 2)]))
9372	  (match_operand:V2DF 3 "nonimm_or_0_operand" "0C")
9373	  (match_operand:QI 4 "register_operand" "Yk")))]
9374  "TARGET_AVX512VL"
9375  "vunpcklpd\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
9376  [(set_attr "type" "sselog")
9377   (set_attr "prefix" "evex")
9378   (set_attr "mode" "V2DF")])
9379
9380(define_expand "vec_interleave_lowv2df"
9381  [(set (match_operand:V2DF 0 "register_operand")
9382	(vec_select:V2DF
9383	  (vec_concat:V4DF
9384	    (match_operand:V2DF 1 "nonimmediate_operand")
9385	    (match_operand:V2DF 2 "nonimmediate_operand"))
9386	  (parallel [(const_int 0)
9387		     (const_int 2)])))]
9388  "TARGET_SSE2"
9389{
9390  if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
9391    operands[1] = force_reg (V2DFmode, operands[1]);
9392})
9393
9394(define_insn "*vec_interleave_lowv2df"
9395  [(set (match_operand:V2DF 0 "nonimmediate_operand"     "=x,v,v,x,v,o")
9396	(vec_select:V2DF
9397	  (vec_concat:V4DF
9398	    (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,m,0,v,0")
9399	    (match_operand:V2DF 2 "nonimmediate_operand" " x,v,1,m,m,v"))
9400	  (parallel [(const_int 0)
9401		     (const_int 2)])))]
9402  "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
9403  "@
9404   unpcklpd\t{%2, %0|%0, %2}
9405   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
9406   %vmovddup\t{%1, %0|%0, %q1}
9407   movhpd\t{%2, %0|%0, %q2}
9408   vmovhpd\t{%2, %1, %0|%0, %1, %q2}
9409   %vmovlpd\t{%2, %H0|%H0, %2}"
9410  [(set_attr "isa" "noavx,avx,sse3,noavx,avx,*")
9411   (set_attr "type" "sselog,sselog,sselog,ssemov,ssemov,ssemov")
9412   (set (attr "prefix_data16")
9413     (if_then_else (eq_attr "alternative" "3,5")
9414		   (const_string "1")
9415		   (const_string "*")))
9416   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig,maybe_evex,maybe_vex")
9417   (set_attr "mode" "V2DF,V2DF,DF,V1DF,V1DF,V1DF")])
9418
9419(define_split
9420  [(set (match_operand:V2DF 0 "memory_operand")
9421	(vec_select:V2DF
9422	  (vec_concat:V4DF
9423	    (match_operand:V2DF 1 "register_operand")
9424	    (match_dup 1))
9425	  (parallel [(const_int 0)
9426		     (const_int 2)])))]
9427  "TARGET_SSE3 && reload_completed"
9428  [(const_int 0)]
9429{
9430  rtx low = gen_lowpart (DFmode, operands[1]);
9431
9432  emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
9433  emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
9434  DONE;
9435})
9436
9437(define_split
9438  [(set (match_operand:V2DF 0 "register_operand")
9439	(vec_select:V2DF
9440	  (vec_concat:V4DF
9441	    (match_operand:V2DF 1 "memory_operand")
9442	    (match_dup 1))
9443	  (parallel [(match_operand:SI 2 "const_0_to_1_operand")
9444		     (match_operand:SI 3 "const_int_operand")])))]
9445  "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
9446  [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
9447{
9448  operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
9449})
9450
9451(define_insn "avx512f_vmscalef<mode><mask_scalar_name><round_scalar_name>"
9452  [(set (match_operand:VF_128 0 "register_operand" "=v")
9453	(vec_merge:VF_128
9454	  (unspec:VF_128
9455	    [(match_operand:VF_128 1 "register_operand" "v")
9456	     (match_operand:VF_128 2 "<round_scalar_nimm_predicate>" "<round_scalar_constraint>")]
9457	    UNSPEC_SCALEF)
9458	  (match_dup 1)
9459	  (const_int 1)))]
9460  "TARGET_AVX512F"
9461  "vscalef<ssescalarmodesuffix>\t{<round_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %2<round_scalar_mask_op3>}"
9462  [(set_attr "prefix" "evex")
9463   (set_attr "mode"  "<ssescalarmode>")])
9464
9465(define_insn "<avx512>_scalef<mode><mask_name><round_name>"
9466  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9467	(unspec:VF_AVX512VL
9468	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
9469	   (match_operand:VF_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
9470	  UNSPEC_SCALEF))]
9471  "TARGET_AVX512F"
9472  "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
9473  [(set_attr "prefix" "evex")
9474   (set_attr "mode"  "<MODE>")])
9475
9476(define_expand "<avx512>_vternlog<mode>_maskz"
9477  [(match_operand:VI48_AVX512VL 0 "register_operand")
9478   (match_operand:VI48_AVX512VL 1 "register_operand")
9479   (match_operand:VI48_AVX512VL 2 "register_operand")
9480   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand")
9481   (match_operand:SI 4 "const_0_to_255_operand")
9482   (match_operand:<avx512fmaskmode> 5 "register_operand")]
9483  "TARGET_AVX512F"
9484{
9485  emit_insn (gen_<avx512>_vternlog<mode>_maskz_1 (
9486    operands[0], operands[1], operands[2], operands[3],
9487    operands[4], CONST0_RTX (<MODE>mode), operands[5]));
9488  DONE;
9489})
9490
9491(define_insn "<avx512>_vternlog<mode><sd_maskz_name>"
9492  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9493	(unspec:VI48_AVX512VL
9494	  [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9495	   (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9496	   (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9497	   (match_operand:SI 4 "const_0_to_255_operand")]
9498	  UNSPEC_VTERNLOG))]
9499  "TARGET_AVX512F"
9500  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3, %4}"
9501  [(set_attr "type" "sselog")
9502   (set_attr "prefix" "evex")
9503   (set_attr "mode" "<sseinsnmode>")])
9504
9505(define_insn "<avx512>_vternlog<mode>_mask"
9506  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9507	(vec_merge:VI48_AVX512VL
9508	  (unspec:VI48_AVX512VL
9509	    [(match_operand:VI48_AVX512VL 1 "register_operand" "0")
9510	     (match_operand:VI48_AVX512VL 2 "register_operand" "v")
9511	     (match_operand:VI48_AVX512VL 3 "nonimmediate_operand" "vm")
9512	     (match_operand:SI 4 "const_0_to_255_operand")]
9513	    UNSPEC_VTERNLOG)
9514	  (match_dup 1)
9515	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9516  "TARGET_AVX512F"
9517  "vpternlog<ssemodesuffix>\t{%4, %3, %2, %0%{%5%}|%0%{%5%}, %2, %3, %4}"
9518  [(set_attr "type" "sselog")
9519   (set_attr "prefix" "evex")
9520   (set_attr "mode" "<sseinsnmode>")])
9521
9522(define_insn "<avx512>_getexp<mode><mask_name><round_saeonly_name>"
9523  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9524        (unspec:VF_AVX512VL [(match_operand:VF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
9525                        UNSPEC_GETEXP))]
9526   "TARGET_AVX512F"
9527   "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
9528    [(set_attr "prefix" "evex")
9529     (set_attr "mode" "<MODE>")])
9530
9531(define_insn "avx512f_sgetexp<mode><mask_scalar_name><round_saeonly_scalar_name>"
9532  [(set (match_operand:VF_128 0 "register_operand" "=v")
9533	(vec_merge:VF_128
9534	  (unspec:VF_128
9535	    [(match_operand:VF_128 1 "register_operand" "v")
9536	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")]
9537	    UNSPEC_GETEXP)
9538	  (match_dup 1)
9539	  (const_int 1)))]
9540   "TARGET_AVX512F"
9541   "vgetexp<ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}";
9542    [(set_attr "prefix" "evex")
9543     (set_attr "mode" "<ssescalarmode>")])
9544
9545(define_insn "<mask_codefor><avx512>_align<mode><mask_name>"
9546  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
9547        (unspec:VI48_AVX512VL [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
9548			       (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
9549			       (match_operand:SI 3 "const_0_to_255_operand")]
9550			      UNSPEC_ALIGN))]
9551  "TARGET_AVX512F"
9552  "valign<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
9553  [(set_attr "prefix" "evex")
9554   (set_attr "mode" "<sseinsnmode>")])
9555
9556(define_expand "avx512f_shufps512_mask"
9557  [(match_operand:V16SF 0 "register_operand")
9558   (match_operand:V16SF 1 "register_operand")
9559   (match_operand:V16SF 2 "nonimmediate_operand")
9560   (match_operand:SI 3 "const_0_to_255_operand")
9561   (match_operand:V16SF 4 "register_operand")
9562   (match_operand:HI 5 "register_operand")]
9563  "TARGET_AVX512F"
9564{
9565  int mask = INTVAL (operands[3]);
9566  emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
9567					  GEN_INT ((mask >> 0) & 3),
9568					  GEN_INT ((mask >> 2) & 3),
9569					  GEN_INT (((mask >> 4) & 3) + 16),
9570					  GEN_INT (((mask >> 6) & 3) + 16),
9571					  GEN_INT (((mask >> 0) & 3) + 4),
9572					  GEN_INT (((mask >> 2) & 3) + 4),
9573					  GEN_INT (((mask >> 4) & 3) + 20),
9574					  GEN_INT (((mask >> 6) & 3) + 20),
9575					  GEN_INT (((mask >> 0) & 3) + 8),
9576					  GEN_INT (((mask >> 2) & 3) + 8),
9577					  GEN_INT (((mask >> 4) & 3) + 24),
9578					  GEN_INT (((mask >> 6) & 3) + 24),
9579					  GEN_INT (((mask >> 0) & 3) + 12),
9580					  GEN_INT (((mask >> 2) & 3) + 12),
9581					  GEN_INT (((mask >> 4) & 3) + 28),
9582					  GEN_INT (((mask >> 6) & 3) + 28),
9583					  operands[4], operands[5]));
9584  DONE;
9585})
9586
9587
9588(define_expand "<avx512>_fixupimm<mode>_maskz<round_saeonly_expand_name>"
9589  [(match_operand:VF_AVX512VL 0 "register_operand")
9590   (match_operand:VF_AVX512VL 1 "register_operand")
9591   (match_operand:VF_AVX512VL 2 "register_operand")
9592   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9593   (match_operand:SI 4 "const_0_to_255_operand")
9594   (match_operand:<avx512fmaskmode> 5 "register_operand")]
9595  "TARGET_AVX512F"
9596{
9597  emit_insn (gen_<avx512>_fixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9598	operands[0], operands[1], operands[2], operands[3],
9599	operands[4], CONST0_RTX (<MODE>mode), operands[5]
9600	<round_saeonly_expand_operand6>));
9601  DONE;
9602})
9603
9604(define_insn "<avx512>_fixupimm<mode><sd_maskz_name><round_saeonly_name>"
9605  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9606        (unspec:VF_AVX512VL
9607          [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9608	   (match_operand:VF_AVX512VL 2 "register_operand" "v")
9609           (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9610           (match_operand:SI 4 "const_0_to_255_operand")]
9611           UNSPEC_FIXUPIMM))]
9612  "TARGET_AVX512F"
9613  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
9614  [(set_attr "prefix" "evex")
9615   (set_attr "mode" "<MODE>")])
9616
9617(define_insn "<avx512>_fixupimm<mode>_mask<round_saeonly_name>"
9618  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9619	(vec_merge:VF_AVX512VL
9620          (unspec:VF_AVX512VL
9621            [(match_operand:VF_AVX512VL 1 "register_operand" "0")
9622	     (match_operand:VF_AVX512VL 2 "register_operand" "v")
9623             (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
9624             (match_operand:SI 4 "const_0_to_255_operand")]
9625             UNSPEC_FIXUPIMM)
9626	  (match_dup 1)
9627	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9628  "TARGET_AVX512F"
9629  "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
9630  [(set_attr "prefix" "evex")
9631   (set_attr "mode" "<MODE>")])
9632
9633(define_expand "avx512f_sfixupimm<mode>_maskz<round_saeonly_expand_name>"
9634  [(match_operand:VF_128 0 "register_operand")
9635   (match_operand:VF_128 1 "register_operand")
9636   (match_operand:VF_128 2 "register_operand")
9637   (match_operand:<sseintvecmode> 3 "<round_saeonly_expand_nimm_predicate>")
9638   (match_operand:SI 4 "const_0_to_255_operand")
9639   (match_operand:<avx512fmaskmode> 5 "register_operand")]
9640  "TARGET_AVX512F"
9641{
9642  emit_insn (gen_avx512f_sfixupimm<mode>_maskz_1<round_saeonly_expand_name> (
9643	operands[0], operands[1], operands[2], operands[3],
9644	operands[4], CONST0_RTX (<MODE>mode), operands[5]
9645	<round_saeonly_expand_operand6>));
9646  DONE;
9647})
9648
9649(define_insn "avx512f_sfixupimm<mode><sd_maskz_name><round_saeonly_name>"
9650  [(set (match_operand:VF_128 0 "register_operand" "=v")
9651	(vec_merge:VF_128
9652          (unspec:VF_128
9653            [(match_operand:VF_128 1 "register_operand" "0")
9654	     (match_operand:VF_128 2 "register_operand" "v")
9655	     (match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9656	     (match_operand:SI 4 "const_0_to_255_operand")]
9657	    UNSPEC_FIXUPIMM)
9658	  (match_dup 1)
9659	  (const_int 1)))]
9660   "TARGET_AVX512F"
9661   "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %<iptr>3<round_saeonly_sd_mask_op5>, %4}";
9662   [(set_attr "prefix" "evex")
9663   (set_attr "mode" "<ssescalarmode>")])
9664
9665(define_insn "avx512f_sfixupimm<mode>_mask<round_saeonly_name>"
9666  [(set (match_operand:VF_128 0 "register_operand" "=v")
9667	(vec_merge:VF_128
9668	  (vec_merge:VF_128
9669	    (unspec:VF_128
9670	       [(match_operand:VF_128 1 "register_operand" "0")
9671		(match_operand:VF_128 2 "register_operand" "v")
9672		(match_operand:<sseintvecmode> 3 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
9673		(match_operand:SI 4 "const_0_to_255_operand")]
9674	       UNSPEC_FIXUPIMM)
9675	    (match_dup 1)
9676	    (const_int 1))
9677	  (match_dup 1)
9678	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
9679  "TARGET_AVX512F"
9680  "vfixupimm<ssescalarmodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %<iptr>3<round_saeonly_op6>, %4}";
9681  [(set_attr "prefix" "evex")
9682   (set_attr "mode" "<ssescalarmode>")])
9683
9684(define_insn "<avx512>_rndscale<mode><mask_name><round_saeonly_name>"
9685  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
9686	(unspec:VF_AVX512VL
9687	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
9688	   (match_operand:SI 2 "const_0_to_255_operand")]
9689	  UNSPEC_ROUND))]
9690  "TARGET_AVX512F"
9691  "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
9692  [(set_attr "length_immediate" "1")
9693   (set_attr "prefix" "evex")
9694   (set_attr "mode" "<MODE>")])
9695
9696(define_insn "avx512f_rndscale<mode><mask_scalar_name><round_saeonly_scalar_name>"
9697  [(set (match_operand:VF_128 0 "register_operand" "=v")
9698	(vec_merge:VF_128
9699	  (unspec:VF_128
9700	    [(match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
9701	     (match_operand:SI 3 "const_0_to_255_operand")]
9702	    UNSPEC_ROUND)
9703	  (match_operand:VF_128 1 "register_operand" "v")
9704	  (const_int 1)))]
9705  "TARGET_AVX512F"
9706  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
9707  [(set_attr "length_immediate" "1")
9708   (set_attr "prefix" "evex")
9709   (set_attr "mode" "<MODE>")])
9710
9711(define_insn "*avx512f_rndscale<mode><round_saeonly_name>"
9712  [(set (match_operand:VF_128 0 "register_operand" "=v")
9713	(vec_merge:VF_128
9714	  (vec_duplicate:VF_128
9715	    (unspec:<ssescalarmode>
9716	      [(match_operand:<ssescalarmode> 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
9717	       (match_operand:SI 3 "const_0_to_255_operand")]
9718	      UNSPEC_ROUND))
9719          (match_operand:VF_128 1 "register_operand" "v")
9720	  (const_int 1)))]
9721  "TARGET_AVX512F"
9722  "vrndscale<ssescalarmodesuffix>\t{%3, <round_saeonly_op4>%2, %1, %0|%0, %1, %2<round_saeonly_op4>, %3}"
9723  [(set_attr "length_immediate" "1")
9724   (set_attr "prefix" "evex")
9725   (set_attr "mode" "<MODE>")])
9726
9727;; One bit in mask selects 2 elements.
9728(define_insn "avx512f_shufps512_1<mask_name>"
9729  [(set (match_operand:V16SF 0 "register_operand" "=v")
9730	(vec_select:V16SF
9731	  (vec_concat:V32SF
9732	    (match_operand:V16SF 1 "register_operand" "v")
9733	    (match_operand:V16SF 2 "nonimmediate_operand" "vm"))
9734	  (parallel [(match_operand 3  "const_0_to_3_operand")
9735		     (match_operand 4  "const_0_to_3_operand")
9736		     (match_operand 5  "const_16_to_19_operand")
9737		     (match_operand 6  "const_16_to_19_operand")
9738		     (match_operand 7  "const_4_to_7_operand")
9739		     (match_operand 8  "const_4_to_7_operand")
9740		     (match_operand 9  "const_20_to_23_operand")
9741		     (match_operand 10  "const_20_to_23_operand")
9742		     (match_operand 11  "const_8_to_11_operand")
9743		     (match_operand 12  "const_8_to_11_operand")
9744		     (match_operand 13  "const_24_to_27_operand")
9745		     (match_operand 14  "const_24_to_27_operand")
9746		     (match_operand 15  "const_12_to_15_operand")
9747		     (match_operand 16  "const_12_to_15_operand")
9748		     (match_operand 17  "const_28_to_31_operand")
9749		     (match_operand 18  "const_28_to_31_operand")])))]
9750  "TARGET_AVX512F
9751   && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
9752       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
9753       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
9754       && INTVAL (operands[6]) == (INTVAL (operands[10]) - 4)
9755       && INTVAL (operands[3]) == (INTVAL (operands[11]) - 8)
9756       && INTVAL (operands[4]) == (INTVAL (operands[12]) - 8)
9757       && INTVAL (operands[5]) == (INTVAL (operands[13]) - 8)
9758       && INTVAL (operands[6]) == (INTVAL (operands[14]) - 8)
9759       && INTVAL (operands[3]) == (INTVAL (operands[15]) - 12)
9760       && INTVAL (operands[4]) == (INTVAL (operands[16]) - 12)
9761       && INTVAL (operands[5]) == (INTVAL (operands[17]) - 12)
9762       && INTVAL (operands[6]) == (INTVAL (operands[18]) - 12))"
9763{
9764  int mask;
9765  mask = INTVAL (operands[3]);
9766  mask |= INTVAL (operands[4]) << 2;
9767  mask |= (INTVAL (operands[5]) - 16) << 4;
9768  mask |= (INTVAL (operands[6]) - 16) << 6;
9769  operands[3] = GEN_INT (mask);
9770
9771  return "vshufps\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
9772}
9773  [(set_attr "type" "sselog")
9774   (set_attr "length_immediate" "1")
9775   (set_attr "prefix" "evex")
9776   (set_attr "mode" "V16SF")])
9777
9778(define_expand "avx512f_shufpd512_mask"
9779  [(match_operand:V8DF 0 "register_operand")
9780   (match_operand:V8DF 1 "register_operand")
9781   (match_operand:V8DF 2 "nonimmediate_operand")
9782   (match_operand:SI 3 "const_0_to_255_operand")
9783   (match_operand:V8DF 4 "register_operand")
9784   (match_operand:QI 5 "register_operand")]
9785  "TARGET_AVX512F"
9786{
9787  int mask = INTVAL (operands[3]);
9788  emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
9789					GEN_INT (mask & 1),
9790					GEN_INT (mask & 2 ? 9 : 8),
9791					GEN_INT (mask & 4 ? 3 : 2),
9792					GEN_INT (mask & 8 ? 11 : 10),
9793					GEN_INT (mask & 16 ? 5 : 4),
9794					GEN_INT (mask & 32 ? 13 : 12),
9795					GEN_INT (mask & 64 ? 7 : 6),
9796					GEN_INT (mask & 128 ? 15 : 14),
9797					operands[4], operands[5]));
9798  DONE;
9799})
9800
9801(define_insn "avx512f_shufpd512_1<mask_name>"
9802  [(set (match_operand:V8DF 0 "register_operand" "=v")
9803	(vec_select:V8DF
9804	  (vec_concat:V16DF
9805	    (match_operand:V8DF 1 "register_operand" "v")
9806	    (match_operand:V8DF 2 "nonimmediate_operand" "vm"))
9807	  (parallel [(match_operand 3 "const_0_to_1_operand")
9808		     (match_operand 4 "const_8_to_9_operand")
9809		     (match_operand 5 "const_2_to_3_operand")
9810		     (match_operand 6 "const_10_to_11_operand")
9811		     (match_operand 7 "const_4_to_5_operand")
9812		     (match_operand 8 "const_12_to_13_operand")
9813		     (match_operand 9 "const_6_to_7_operand")
9814		     (match_operand 10 "const_14_to_15_operand")])))]
9815  "TARGET_AVX512F"
9816{
9817  int mask;
9818  mask = INTVAL (operands[3]);
9819  mask |= (INTVAL (operands[4]) - 8) << 1;
9820  mask |= (INTVAL (operands[5]) - 2) << 2;
9821  mask |= (INTVAL (operands[6]) - 10) << 3;
9822  mask |= (INTVAL (operands[7]) - 4) << 4;
9823  mask |= (INTVAL (operands[8]) - 12) << 5;
9824  mask |= (INTVAL (operands[9]) - 6) << 6;
9825  mask |= (INTVAL (operands[10]) - 14) << 7;
9826  operands[3] = GEN_INT (mask);
9827
9828  return "vshufpd\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
9829}
9830  [(set_attr "type" "sselog")
9831   (set_attr "length_immediate" "1")
9832   (set_attr "prefix" "evex")
9833   (set_attr "mode" "V8DF")])
9834
9835(define_expand "avx_shufpd256<mask_expand4_name>"
9836  [(match_operand:V4DF 0 "register_operand")
9837   (match_operand:V4DF 1 "register_operand")
9838   (match_operand:V4DF 2 "nonimmediate_operand")
9839   (match_operand:SI 3 "const_int_operand")]
9840  "TARGET_AVX"
9841{
9842  int mask = INTVAL (operands[3]);
9843  emit_insn (gen_avx_shufpd256_1<mask_expand4_name> (operands[0],
9844						     operands[1],
9845						     operands[2],
9846						     GEN_INT (mask & 1),
9847						     GEN_INT (mask & 2 ? 5 : 4),
9848						     GEN_INT (mask & 4 ? 3 : 2),
9849						     GEN_INT (mask & 8 ? 7 : 6)
9850						     <mask_expand4_args>));
9851  DONE;
9852})
9853
9854(define_insn "avx_shufpd256_1<mask_name>"
9855  [(set (match_operand:V4DF 0 "register_operand" "=v")
9856	(vec_select:V4DF
9857	  (vec_concat:V8DF
9858	    (match_operand:V4DF 1 "register_operand" "v")
9859	    (match_operand:V4DF 2 "nonimmediate_operand" "vm"))
9860	  (parallel [(match_operand 3 "const_0_to_1_operand")
9861		     (match_operand 4 "const_4_to_5_operand")
9862		     (match_operand 5 "const_2_to_3_operand")
9863		     (match_operand 6 "const_6_to_7_operand")])))]
9864  "TARGET_AVX && <mask_avx512vl_condition>"
9865{
9866  int mask;
9867  mask = INTVAL (operands[3]);
9868  mask |= (INTVAL (operands[4]) - 4) << 1;
9869  mask |= (INTVAL (operands[5]) - 2) << 2;
9870  mask |= (INTVAL (operands[6]) - 6) << 3;
9871  operands[3] = GEN_INT (mask);
9872
9873  return "vshufpd\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
9874}
9875  [(set_attr "type" "sseshuf")
9876   (set_attr "length_immediate" "1")
9877   (set_attr "prefix" "vex")
9878   (set_attr "mode" "V4DF")])
9879
9880(define_expand "sse2_shufpd<mask_expand4_name>"
9881  [(match_operand:V2DF 0 "register_operand")
9882   (match_operand:V2DF 1 "register_operand")
9883   (match_operand:V2DF 2 "vector_operand")
9884   (match_operand:SI 3 "const_int_operand")]
9885  "TARGET_SSE2"
9886{
9887  int mask = INTVAL (operands[3]);
9888  emit_insn (gen_sse2_shufpd_v2df<mask_expand4_name> (operands[0], operands[1],
9889						      operands[2], GEN_INT (mask & 1),
9890						      GEN_INT (mask & 2 ? 3 : 2)
9891						      <mask_expand4_args>));
9892  DONE;
9893})
9894
9895(define_insn "sse2_shufpd_v2df_mask"
9896  [(set (match_operand:V2DF 0 "register_operand" "=v")
9897    (vec_merge:V2DF
9898	  (vec_select:V2DF
9899	    (vec_concat:V4DF
9900	      (match_operand:V2DF 1 "register_operand" "v")
9901	      (match_operand:V2DF 2 "nonimmediate_operand" "vm"))
9902	    (parallel [(match_operand 3 "const_0_to_1_operand")
9903		           (match_operand 4 "const_2_to_3_operand")]))
9904      (match_operand:V2DF 5 "nonimm_or_0_operand" "0C")
9905      (match_operand:QI 6 "register_operand" "Yk")))]
9906  "TARGET_AVX512VL"
9907{
9908  int mask;
9909  mask = INTVAL (operands[3]);
9910  mask |= (INTVAL (operands[4]) - 2) << 1;
9911  operands[3] = GEN_INT (mask);
9912
9913  return "vshufpd\t{%3, %2, %1, %0%{%6%}%N5|%0%{%6%}%N5, %1, %2, %3}";
9914}
9915  [(set_attr "type" "sseshuf")
9916   (set_attr "length_immediate" "1")
9917   (set_attr "prefix" "evex")
9918   (set_attr "mode" "V2DF")])
9919
9920;; punpcklqdq and punpckhqdq are shorter than shufpd.
9921(define_insn "avx2_interleave_highv4di<mask_name>"
9922  [(set (match_operand:V4DI 0 "register_operand" "=v")
9923	(vec_select:V4DI
9924	  (vec_concat:V8DI
9925	    (match_operand:V4DI 1 "register_operand" "v")
9926	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9927	  (parallel [(const_int 1)
9928		     (const_int 5)
9929		     (const_int 3)
9930		     (const_int 7)])))]
9931  "TARGET_AVX2 && <mask_avx512vl_condition>"
9932  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9933  [(set_attr "type" "sselog")
9934   (set_attr "prefix" "vex")
9935   (set_attr "mode" "OI")])
9936
9937(define_insn "<mask_codefor>avx512f_interleave_highv8di<mask_name>"
9938  [(set (match_operand:V8DI 0 "register_operand" "=v")
9939	(vec_select:V8DI
9940	  (vec_concat:V16DI
9941	    (match_operand:V8DI 1 "register_operand" "v")
9942	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9943	  (parallel [(const_int 1) (const_int 9)
9944		     (const_int 3) (const_int 11)
9945		     (const_int 5) (const_int 13)
9946		     (const_int 7) (const_int 15)])))]
9947  "TARGET_AVX512F"
9948  "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9949  [(set_attr "type" "sselog")
9950   (set_attr "prefix" "evex")
9951   (set_attr "mode" "XI")])
9952
9953(define_insn "vec_interleave_highv2di<mask_name>"
9954  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
9955	(vec_select:V2DI
9956	  (vec_concat:V4DI
9957	    (match_operand:V2DI 1 "register_operand" "0,v")
9958	    (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
9959	  (parallel [(const_int 1)
9960		     (const_int 3)])))]
9961  "TARGET_SSE2 && <mask_avx512vl_condition>"
9962  "@
9963   punpckhqdq\t{%2, %0|%0, %2}
9964   vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9965  [(set_attr "isa" "noavx,avx")
9966   (set_attr "type" "sselog")
9967   (set_attr "prefix_data16" "1,*")
9968   (set_attr "prefix" "orig,<mask_prefix>")
9969   (set_attr "mode" "TI")])
9970
9971(define_insn "avx2_interleave_lowv4di<mask_name>"
9972  [(set (match_operand:V4DI 0 "register_operand" "=v")
9973	(vec_select:V4DI
9974	  (vec_concat:V8DI
9975	    (match_operand:V4DI 1 "register_operand" "v")
9976	    (match_operand:V4DI 2 "nonimmediate_operand" "vm"))
9977	  (parallel [(const_int 0)
9978		     (const_int 4)
9979		     (const_int 2)
9980		     (const_int 6)])))]
9981  "TARGET_AVX2 && <mask_avx512vl_condition>"
9982  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9983  [(set_attr "type" "sselog")
9984   (set_attr "prefix" "vex")
9985   (set_attr "mode" "OI")])
9986
9987(define_insn "<mask_codefor>avx512f_interleave_lowv8di<mask_name>"
9988  [(set (match_operand:V8DI 0 "register_operand" "=v")
9989	(vec_select:V8DI
9990	  (vec_concat:V16DI
9991	    (match_operand:V8DI 1 "register_operand" "v")
9992	    (match_operand:V8DI 2 "nonimmediate_operand" "vm"))
9993	  (parallel [(const_int 0) (const_int 8)
9994		     (const_int 2) (const_int 10)
9995		     (const_int 4) (const_int 12)
9996		     (const_int 6) (const_int 14)])))]
9997  "TARGET_AVX512F"
9998  "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
9999  [(set_attr "type" "sselog")
10000   (set_attr "prefix" "evex")
10001   (set_attr "mode" "XI")])
10002
10003(define_insn "vec_interleave_lowv2di<mask_name>"
10004  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
10005	(vec_select:V2DI
10006	  (vec_concat:V4DI
10007	    (match_operand:V2DI 1 "register_operand" "0,v")
10008	    (match_operand:V2DI 2 "vector_operand" "xBm,vm"))
10009	  (parallel [(const_int 0)
10010		     (const_int 2)])))]
10011  "TARGET_SSE2 && <mask_avx512vl_condition>"
10012  "@
10013   punpcklqdq\t{%2, %0|%0, %2}
10014   vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
10015  [(set_attr "isa" "noavx,avx")
10016   (set_attr "type" "sselog")
10017   (set_attr "prefix_data16" "1,*")
10018   (set_attr "prefix" "orig,vex")
10019   (set_attr "mode" "TI")])
10020
10021(define_insn "sse2_shufpd_<mode>"
10022  [(set (match_operand:VI8F_128 0 "register_operand" "=x,v")
10023	(vec_select:VI8F_128
10024	  (vec_concat:<ssedoublevecmode>
10025	    (match_operand:VI8F_128 1 "register_operand" "0,v")
10026	    (match_operand:VI8F_128 2 "vector_operand" "xBm,vm"))
10027	  (parallel [(match_operand 3 "const_0_to_1_operand")
10028		     (match_operand 4 "const_2_to_3_operand")])))]
10029  "TARGET_SSE2"
10030{
10031  int mask;
10032  mask = INTVAL (operands[3]);
10033  mask |= (INTVAL (operands[4]) - 2) << 1;
10034  operands[3] = GEN_INT (mask);
10035
10036  switch (which_alternative)
10037    {
10038    case 0:
10039      return "shufpd\t{%3, %2, %0|%0, %2, %3}";
10040    case 1:
10041      return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
10042    default:
10043      gcc_unreachable ();
10044    }
10045}
10046  [(set_attr "isa" "noavx,avx")
10047   (set_attr "type" "sseshuf")
10048   (set_attr "length_immediate" "1")
10049   (set_attr "prefix" "orig,maybe_evex")
10050   (set_attr "mode" "V2DF")])
10051
10052;; Avoid combining registers from different units in a single alternative,
10053;; see comment above inline_secondary_memory_needed function in i386.c
10054(define_insn "sse2_storehpd"
10055  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,Yv,x,*f,r")
10056	(vec_select:DF
10057	  (match_operand:V2DF 1 "nonimmediate_operand" " v,0, v,o,o,o")
10058	  (parallel [(const_int 1)])))]
10059  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10060  "@
10061   %vmovhpd\t{%1, %0|%0, %1}
10062   unpckhpd\t%0, %0
10063   vunpckhpd\t{%d1, %0|%0, %d1}
10064   #
10065   #
10066   #"
10067  [(set_attr "isa" "*,noavx,avx,*,*,*")
10068   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,fmov,imov")
10069   (set (attr "prefix_data16")
10070     (if_then_else
10071       (and (eq_attr "alternative" "0")
10072	    (not (match_test "TARGET_AVX")))
10073       (const_string "1")
10074       (const_string "*")))
10075   (set_attr "prefix" "maybe_vex,orig,maybe_evex,*,*,*")
10076   (set_attr "mode" "V1DF,V1DF,V2DF,DF,DF,DF")])
10077
10078(define_split
10079  [(set (match_operand:DF 0 "register_operand")
10080	(vec_select:DF
10081	  (match_operand:V2DF 1 "memory_operand")
10082	  (parallel [(const_int 1)])))]
10083  "TARGET_SSE2 && reload_completed"
10084  [(set (match_dup 0) (match_dup 1))]
10085  "operands[1] = adjust_address (operands[1], DFmode, 8);")
10086
10087(define_insn "*vec_extractv2df_1_sse"
10088  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10089	(vec_select:DF
10090	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,o")
10091	  (parallel [(const_int 1)])))]
10092  "!TARGET_SSE2 && TARGET_SSE
10093   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10094  "@
10095   movhps\t{%1, %0|%0, %1}
10096   movhlps\t{%1, %0|%0, %1}
10097   movlps\t{%H1, %0|%0, %H1}"
10098  [(set_attr "type" "ssemov")
10099   (set_attr "mode" "V2SF,V4SF,V2SF")])
10100
10101;; Avoid combining registers from different units in a single alternative,
10102;; see comment above inline_secondary_memory_needed function in i386.c
10103(define_insn "sse2_storelpd"
10104  [(set (match_operand:DF 0 "nonimmediate_operand"     "=m,x,x,*f,r")
10105	(vec_select:DF
10106	  (match_operand:V2DF 1 "nonimmediate_operand" " v,x,m,m,m")
10107	  (parallel [(const_int 0)])))]
10108  "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10109  "@
10110   %vmovlpd\t{%1, %0|%0, %1}
10111   #
10112   #
10113   #
10114   #"
10115  [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
10116   (set (attr "prefix_data16")
10117     (if_then_else (eq_attr "alternative" "0")
10118		   (const_string "1")
10119		   (const_string "*")))
10120   (set_attr "prefix" "maybe_vex")
10121   (set_attr "mode" "V1DF,DF,DF,DF,DF")])
10122
10123(define_split
10124  [(set (match_operand:DF 0 "register_operand")
10125	(vec_select:DF
10126	  (match_operand:V2DF 1 "nonimmediate_operand")
10127	  (parallel [(const_int 0)])))]
10128  "TARGET_SSE2 && reload_completed"
10129  [(set (match_dup 0) (match_dup 1))]
10130  "operands[1] = gen_lowpart (DFmode, operands[1]);")
10131
10132(define_insn "*vec_extractv2df_0_sse"
10133  [(set (match_operand:DF 0 "nonimmediate_operand" "=m,x,x")
10134	(vec_select:DF
10135	  (match_operand:V2DF 1 "nonimmediate_operand" "x,x,m")
10136	  (parallel [(const_int 0)])))]
10137  "!TARGET_SSE2 && TARGET_SSE
10138   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
10139  "@
10140   movlps\t{%1, %0|%0, %1}
10141   movaps\t{%1, %0|%0, %1}
10142   movlps\t{%1, %0|%0, %q1}"
10143  [(set_attr "type" "ssemov")
10144   (set_attr "mode" "V2SF,V4SF,V2SF")])
10145
10146(define_expand "sse2_loadhpd_exp"
10147  [(set (match_operand:V2DF 0 "nonimmediate_operand")
10148	(vec_concat:V2DF
10149	  (vec_select:DF
10150	    (match_operand:V2DF 1 "nonimmediate_operand")
10151	    (parallel [(const_int 0)]))
10152	  (match_operand:DF 2 "nonimmediate_operand")))]
10153  "TARGET_SSE2"
10154{
10155  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10156
10157  emit_insn (gen_sse2_loadhpd (dst, operands[1], operands[2]));
10158
10159  /* Fix up the destination if needed.  */
10160  if (dst != operands[0])
10161    emit_move_insn (operands[0], dst);
10162
10163  DONE;
10164})
10165
10166;; Avoid combining registers from different units in a single alternative,
10167;; see comment above inline_secondary_memory_needed function in i386.c
10168(define_insn "sse2_loadhpd"
10169  [(set (match_operand:V2DF 0 "nonimmediate_operand"
10170	  "=x,v,x,v ,o,o ,o")
10171	(vec_concat:V2DF
10172	  (vec_select:DF
10173	    (match_operand:V2DF 1 "nonimmediate_operand"
10174	  " 0,v,0,v ,0,0 ,0")
10175	    (parallel [(const_int 0)]))
10176	  (match_operand:DF 2 "nonimmediate_operand"
10177	  " m,m,x,Yv,x,*f,r")))]
10178  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10179  "@
10180   movhpd\t{%2, %0|%0, %2}
10181   vmovhpd\t{%2, %1, %0|%0, %1, %2}
10182   unpcklpd\t{%2, %0|%0, %2}
10183   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10184   #
10185   #
10186   #"
10187  [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
10188   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
10189   (set (attr "prefix_data16")
10190     (if_then_else (eq_attr "alternative" "0")
10191		   (const_string "1")
10192		   (const_string "*")))
10193   (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,*,*,*")
10194   (set_attr "mode" "V1DF,V1DF,V2DF,V2DF,DF,DF,DF")])
10195
10196(define_split
10197  [(set (match_operand:V2DF 0 "memory_operand")
10198	(vec_concat:V2DF
10199	  (vec_select:DF (match_dup 0) (parallel [(const_int 0)]))
10200	  (match_operand:DF 1 "register_operand")))]
10201  "TARGET_SSE2 && reload_completed"
10202  [(set (match_dup 0) (match_dup 1))]
10203  "operands[0] = adjust_address (operands[0], DFmode, 8);")
10204
10205(define_expand "sse2_loadlpd_exp"
10206  [(set (match_operand:V2DF 0 "nonimmediate_operand")
10207	(vec_concat:V2DF
10208	  (match_operand:DF 2 "nonimmediate_operand")
10209	  (vec_select:DF
10210	    (match_operand:V2DF 1 "nonimmediate_operand")
10211	    (parallel [(const_int 1)]))))]
10212  "TARGET_SSE2"
10213{
10214  rtx dst = ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);
10215
10216  emit_insn (gen_sse2_loadlpd (dst, operands[1], operands[2]));
10217
10218  /* Fix up the destination if needed.  */
10219  if (dst != operands[0])
10220    emit_move_insn (operands[0], dst);
10221
10222  DONE;
10223})
10224
10225;; Avoid combining registers from different units in a single alternative,
10226;; see comment above inline_secondary_memory_needed function in i386.c
10227(define_insn "sse2_loadlpd"
10228  [(set (match_operand:V2DF 0 "nonimmediate_operand"
10229	  "=v,x,v,x,v,x,x,v,m,m ,m")
10230	(vec_concat:V2DF
10231	  (match_operand:DF 2 "nonimmediate_operand"
10232	  "vm,m,m,x,v,0,0,v,x,*f,r")
10233	  (vec_select:DF
10234	    (match_operand:V2DF 1 "nonimm_or_0_operand"
10235	  " C,0,v,0,v,x,o,o,0,0 ,0")
10236	    (parallel [(const_int 1)]))))]
10237  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
10238  "@
10239   %vmovq\t{%2, %0|%0, %2}
10240   movlpd\t{%2, %0|%0, %2}
10241   vmovlpd\t{%2, %1, %0|%0, %1, %2}
10242   movsd\t{%2, %0|%0, %2}
10243   vmovsd\t{%2, %1, %0|%0, %1, %2}
10244   shufpd\t{$2, %1, %0|%0, %1, 2}
10245   movhpd\t{%H1, %0|%0, %H1}
10246   vmovhpd\t{%H1, %2, %0|%0, %2, %H1}
10247   #
10248   #
10249   #"
10250  [(set_attr "isa" "*,noavx,avx,noavx,avx,noavx,noavx,avx,*,*,*")
10251   (set (attr "type")
10252     (cond [(eq_attr "alternative" "5")
10253	      (const_string "sselog")
10254	    (eq_attr "alternative" "9")
10255	      (const_string "fmov")
10256	    (eq_attr "alternative" "10")
10257	      (const_string "imov")
10258	   ]
10259	   (const_string "ssemov")))
10260   (set (attr "prefix_data16")
10261     (if_then_else (eq_attr "alternative" "1,6")
10262		   (const_string "1")
10263		   (const_string "*")))
10264   (set (attr "length_immediate")
10265     (if_then_else (eq_attr "alternative" "5")
10266		   (const_string "1")
10267		   (const_string "*")))
10268   (set (attr "prefix")
10269     (cond [(eq_attr "alternative" "0")
10270	      (const_string "maybe_vex")
10271	    (eq_attr "alternative" "1,3,5,6")
10272	      (const_string "orig")
10273	    (eq_attr "alternative" "2,4,7")
10274	      (const_string "maybe_evex")
10275	   ]
10276	   (const_string "*")))
10277   (set_attr "mode" "DF,V1DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,DF,DF,DF")])
10278
10279(define_split
10280  [(set (match_operand:V2DF 0 "memory_operand")
10281	(vec_concat:V2DF
10282	  (match_operand:DF 1 "register_operand")
10283	  (vec_select:DF (match_dup 0) (parallel [(const_int 1)]))))]
10284  "TARGET_SSE2 && reload_completed"
10285  [(set (match_dup 0) (match_dup 1))]
10286  "operands[0] = adjust_address (operands[0], DFmode, 0);")
10287
10288(define_insn "sse2_movsd"
10289  [(set (match_operand:V2DF 0 "nonimmediate_operand"   "=x,v,x,v,m,x,x,v,o")
10290	(vec_merge:V2DF
10291	  (match_operand:V2DF 2 "nonimmediate_operand" " x,v,m,m,v,0,0,v,0")
10292	  (match_operand:V2DF 1 "nonimmediate_operand" " 0,v,0,v,0,x,o,o,v")
10293	  (const_int 1)))]
10294  "TARGET_SSE2"
10295  "@
10296   movsd\t{%2, %0|%0, %2}
10297   vmovsd\t{%2, %1, %0|%0, %1, %2}
10298   movlpd\t{%2, %0|%0, %q2}
10299   vmovlpd\t{%2, %1, %0|%0, %1, %q2}
10300   %vmovlpd\t{%2, %0|%q0, %2}
10301   shufpd\t{$2, %1, %0|%0, %1, 2}
10302   movhps\t{%H1, %0|%0, %H1}
10303   vmovhps\t{%H1, %2, %0|%0, %2, %H1}
10304   %vmovhps\t{%1, %H0|%H0, %1}"
10305  [(set_attr "isa" "noavx,avx,noavx,avx,*,noavx,noavx,avx,*")
10306   (set (attr "type")
10307     (if_then_else
10308       (eq_attr "alternative" "5")
10309       (const_string "sselog")
10310       (const_string "ssemov")))
10311   (set (attr "prefix_data16")
10312     (if_then_else
10313       (and (eq_attr "alternative" "2,4")
10314	    (not (match_test "TARGET_AVX")))
10315       (const_string "1")
10316       (const_string "*")))
10317   (set (attr "length_immediate")
10318     (if_then_else (eq_attr "alternative" "5")
10319		   (const_string "1")
10320		   (const_string "*")))
10321   (set (attr "prefix")
10322     (cond [(eq_attr "alternative" "1,3,7")
10323	      (const_string "maybe_evex")
10324	    (eq_attr "alternative" "4,8")
10325	      (const_string "maybe_vex")
10326	   ]
10327	   (const_string "orig")))
10328   (set_attr "mode" "DF,DF,V1DF,V1DF,V1DF,V2DF,V1DF,V1DF,V1DF")])
10329
10330(define_insn "vec_dupv2df<mask_name>"
10331  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v")
10332	(vec_duplicate:V2DF
10333	  (match_operand:DF 1 "nonimmediate_operand" " 0,xm,vm")))]
10334  "TARGET_SSE2 && <mask_avx512vl_condition>"
10335  "@
10336   unpcklpd\t%0, %0
10337   %vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
10338   vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
10339  [(set_attr "isa" "noavx,sse3,avx512vl")
10340   (set_attr "type" "sselog1")
10341   (set_attr "prefix" "orig,maybe_vex,evex")
10342   (set_attr "mode" "V2DF,DF,DF")])
10343
10344(define_insn "vec_concatv2df"
10345  [(set (match_operand:V2DF 0 "register_operand"     "=x,x,v,x,v,x,x, v,x,x")
10346	(vec_concat:V2DF
10347	  (match_operand:DF 1 "nonimmediate_operand" " 0,x,v,m,m,0,x,vm,0,0")
10348	  (match_operand:DF 2 "nonimm_or_0_operand"  " x,x,v,1,1,m,m, C,x,m")))]
10349  "TARGET_SSE
10350   && (!(MEM_P (operands[1]) && MEM_P (operands[2]))
10351       || (TARGET_SSE3 && rtx_equal_p (operands[1], operands[2])))"
10352  "@
10353   unpcklpd\t{%2, %0|%0, %2}
10354   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10355   vunpcklpd\t{%2, %1, %0|%0, %1, %2}
10356   %vmovddup\t{%1, %0|%0, %1}
10357   vmovddup\t{%1, %0|%0, %1}
10358   movhpd\t{%2, %0|%0, %2}
10359   vmovhpd\t{%2, %1, %0|%0, %1, %2}
10360   %vmovq\t{%1, %0|%0, %1}
10361   movlhps\t{%2, %0|%0, %2}
10362   movhps\t{%2, %0|%0, %2}"
10363  [(set (attr "isa")
10364     (cond [(eq_attr "alternative" "0,5")
10365	      (const_string "sse2_noavx")
10366	    (eq_attr "alternative" "1,6")
10367	      (const_string "avx")
10368	    (eq_attr "alternative" "2,4")
10369	      (const_string "avx512vl")
10370	    (eq_attr "alternative" "3")
10371	      (const_string "sse3")
10372	    (eq_attr "alternative" "7")
10373	      (const_string "sse2")
10374	   ]
10375	   (const_string "noavx")))
10376   (set (attr "type")
10377     (if_then_else
10378       (eq_attr "alternative" "0,1,2,3,4")
10379       (const_string "sselog")
10380       (const_string "ssemov")))
10381   (set (attr "prefix_data16")
10382	(if_then_else (eq_attr "alternative" "5")
10383		      (const_string "1")
10384		      (const_string "*")))
10385   (set (attr "prefix")
10386     (cond [(eq_attr "alternative" "1,6")
10387	      (const_string "vex")
10388	    (eq_attr "alternative" "2,4")
10389	      (const_string "evex")
10390	    (eq_attr "alternative" "3,7")
10391	      (const_string "maybe_vex")
10392	   ]
10393	   (const_string "orig")))
10394   (set_attr "mode" "V2DF,V2DF,V2DF, DF, DF, V1DF,V1DF,DF,V4SF,V2SF")])
10395
10396;; vmovq clears also the higher bits.
10397(define_insn "vec_set<mode>_0"
10398  [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
10399	(vec_merge:VF2_512_256
10400	  (vec_duplicate:VF2_512_256
10401	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm"))
10402	  (match_operand:VF2_512_256 1 "const0_operand" "C")
10403	  (const_int 1)))]
10404  "TARGET_AVX"
10405  "vmovq\t{%2, %x0|%x0, %2}"
10406  [(set_attr "type" "ssemov")
10407   (set_attr "prefix" "maybe_evex")
10408   (set_attr "mode" "DF")])
10409
10410;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10411;;
10412;; Parallel integer down-conversion operations
10413;;
10414;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
10415
10416(define_mode_iterator PMOV_DST_MODE_1 [V16QI V16HI V8SI V8HI])
10417(define_mode_attr pmov_src_mode
10418  [(V16QI "V16SI") (V16HI "V16SI") (V8SI "V8DI") (V8HI "V8DI")])
10419(define_mode_attr pmov_src_lower
10420  [(V16QI "v16si") (V16HI "v16si") (V8SI "v8di") (V8HI "v8di")])
10421(define_mode_attr pmov_suff_1
10422  [(V16QI "db") (V16HI "dw") (V8SI "qd") (V8HI "qw")])
10423
10424(define_insn "*avx512f_<code><pmov_src_lower><mode>2"
10425  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10426	(any_truncate:PMOV_DST_MODE_1
10427	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
10428  "TARGET_AVX512F"
10429  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
10430  [(set_attr "type" "ssemov")
10431   (set_attr "memory" "none,store")
10432   (set_attr "prefix" "evex")
10433   (set_attr "mode" "<sseinsnmode>")])
10434
10435(define_insn "avx512f_<code><pmov_src_lower><mode>2_mask"
10436  [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
10437    (vec_merge:PMOV_DST_MODE_1
10438      (any_truncate:PMOV_DST_MODE_1
10439        (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
10440      (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
10441      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10442  "TARGET_AVX512F"
10443  "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10444  [(set_attr "type" "ssemov")
10445   (set_attr "memory" "none,store")
10446   (set_attr "prefix" "evex")
10447   (set_attr "mode" "<sseinsnmode>")])
10448
10449(define_expand "avx512f_<code><pmov_src_lower><mode>2_mask_store"
10450  [(set (match_operand:PMOV_DST_MODE_1 0 "memory_operand")
10451    (vec_merge:PMOV_DST_MODE_1
10452      (any_truncate:PMOV_DST_MODE_1
10453        (match_operand:<pmov_src_mode> 1 "register_operand"))
10454      (match_dup 0)
10455      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10456  "TARGET_AVX512F")
10457
10458(define_insn "avx512bw_<code>v32hiv32qi2"
10459  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10460	(any_truncate:V32QI
10461	    (match_operand:V32HI 1 "register_operand" "v,v")))]
10462  "TARGET_AVX512BW"
10463  "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
10464  [(set_attr "type" "ssemov")
10465   (set_attr "memory" "none,store")
10466   (set_attr "prefix" "evex")
10467   (set_attr "mode" "XI")])
10468
10469(define_insn "avx512bw_<code>v32hiv32qi2_mask"
10470  [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
10471    (vec_merge:V32QI
10472      (any_truncate:V32QI
10473        (match_operand:V32HI 1 "register_operand" "v,v"))
10474      (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
10475      (match_operand:SI 3 "register_operand" "Yk,Yk")))]
10476  "TARGET_AVX512BW"
10477  "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10478  [(set_attr "type" "ssemov")
10479   (set_attr "memory" "none,store")
10480   (set_attr "prefix" "evex")
10481   (set_attr "mode" "XI")])
10482
10483(define_expand "avx512bw_<code>v32hiv32qi2_mask_store"
10484  [(set (match_operand:V32QI 0 "nonimmediate_operand")
10485    (vec_merge:V32QI
10486      (any_truncate:V32QI
10487        (match_operand:V32HI 1 "register_operand"))
10488      (match_dup 0)
10489      (match_operand:SI 2 "register_operand")))]
10490  "TARGET_AVX512BW")
10491
10492(define_mode_iterator PMOV_DST_MODE_2
10493  [V4SI V8HI (V16QI "TARGET_AVX512BW")])
10494(define_mode_attr pmov_suff_2
10495  [(V16QI "wb") (V8HI "dw") (V4SI "qd")])
10496
10497(define_insn "*avx512vl_<code><ssedoublemodelower><mode>2"
10498  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10499	(any_truncate:PMOV_DST_MODE_2
10500	    (match_operand:<ssedoublemode> 1 "register_operand" "v,v")))]
10501  "TARGET_AVX512VL"
10502  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0|%0, %1}"
10503  [(set_attr "type" "ssemov")
10504   (set_attr "memory" "none,store")
10505   (set_attr "prefix" "evex")
10506   (set_attr "mode" "<sseinsnmode>")])
10507
10508(define_insn "<avx512>_<code><ssedoublemodelower><mode>2_mask"
10509  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand" "=v,m")
10510    (vec_merge:PMOV_DST_MODE_2
10511      (any_truncate:PMOV_DST_MODE_2
10512        (match_operand:<ssedoublemode> 1 "register_operand" "v,v"))
10513      (match_operand:PMOV_DST_MODE_2 2 "nonimm_or_0_operand" "0C,0")
10514      (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
10515  "TARGET_AVX512VL"
10516  "vpmov<trunsuffix><pmov_suff_2>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10517  [(set_attr "type" "ssemov")
10518   (set_attr "memory" "none,store")
10519   (set_attr "prefix" "evex")
10520   (set_attr "mode" "<sseinsnmode>")])
10521
10522(define_expand "<avx512>_<code><ssedoublemodelower><mode>2_mask_store"
10523  [(set (match_operand:PMOV_DST_MODE_2 0 "nonimmediate_operand")
10524    (vec_merge:PMOV_DST_MODE_2
10525      (any_truncate:PMOV_DST_MODE_2
10526        (match_operand:<ssedoublemode> 1 "register_operand"))
10527      (match_dup 0)
10528      (match_operand:<avx512fmaskmode> 2 "register_operand")))]
10529  "TARGET_AVX512VL")
10530
10531(define_mode_iterator PMOV_SRC_MODE_3 [V4DI V2DI V8SI V4SI (V8HI "TARGET_AVX512BW")])
10532(define_mode_attr pmov_dst_3
10533  [(V4DI "V4QI") (V2DI "V2QI") (V8SI "V8QI") (V4SI "V4QI") (V8HI "V8QI")])
10534(define_mode_attr pmov_dst_zeroed_3
10535  [(V4DI "V12QI") (V2DI "V14QI") (V8SI "V8QI") (V4SI "V12QI") (V8HI "V8QI")])
10536(define_mode_attr pmov_suff_3
10537  [(V4DI "qb") (V2DI "qb") (V8SI "db") (V4SI "db") (V8HI "wb")])
10538
10539(define_insn "*avx512vl_<code><mode>v<ssescalarnum>qi2"
10540  [(set (match_operand:V16QI 0 "register_operand" "=v")
10541    (vec_concat:V16QI
10542      (any_truncate:<pmov_dst_3>
10543	      (match_operand:PMOV_SRC_MODE_3 1 "register_operand" "v"))
10544      (match_operand:<pmov_dst_zeroed_3> 2 "const0_operand")))]
10545  "TARGET_AVX512VL"
10546  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%0, %1}"
10547  [(set_attr "type" "ssemov")
10548   (set_attr "prefix" "evex")
10549   (set_attr "mode" "TI")])
10550
10551(define_insn "*avx512vl_<code>v2div2qi2_store"
10552  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10553    (vec_concat:V16QI
10554      (any_truncate:V2QI
10555	      (match_operand:V2DI 1 "register_operand" "v"))
10556      (vec_select:V14QI
10557        (match_dup 0)
10558        (parallel [(const_int 2) (const_int 3)
10559                   (const_int 4) (const_int 5)
10560                   (const_int 6) (const_int 7)
10561                   (const_int 8) (const_int 9)
10562                   (const_int 10) (const_int 11)
10563                   (const_int 12) (const_int 13)
10564                   (const_int 14) (const_int 15)]))))]
10565  "TARGET_AVX512VL"
10566  "vpmov<trunsuffix>qb\t{%1, %0|%w0, %1}"
10567  [(set_attr "type" "ssemov")
10568   (set_attr "memory" "store")
10569   (set_attr "prefix" "evex")
10570   (set_attr "mode" "TI")])
10571
10572(define_insn "avx512vl_<code>v2div2qi2_mask"
10573  [(set (match_operand:V16QI 0 "register_operand" "=v")
10574    (vec_concat:V16QI
10575      (vec_merge:V2QI
10576        (any_truncate:V2QI
10577          (match_operand:V2DI 1 "register_operand" "v"))
10578        (vec_select:V2QI
10579          (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10580          (parallel [(const_int 0) (const_int 1)]))
10581        (match_operand:QI 3 "register_operand" "Yk"))
10582      (const_vector:V14QI [(const_int 0) (const_int 0)
10583                           (const_int 0) (const_int 0)
10584                           (const_int 0) (const_int 0)
10585                           (const_int 0) (const_int 0)
10586                           (const_int 0) (const_int 0)
10587                           (const_int 0) (const_int 0)
10588                           (const_int 0) (const_int 0)])))]
10589  "TARGET_AVX512VL"
10590  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10591  [(set_attr "type" "ssemov")
10592   (set_attr "prefix" "evex")
10593   (set_attr "mode" "TI")])
10594
10595(define_insn "*avx512vl_<code>v2div2qi2_mask_1"
10596  [(set (match_operand:V16QI 0 "register_operand" "=v")
10597    (vec_concat:V16QI
10598      (vec_merge:V2QI
10599	(any_truncate:V2QI
10600	  (match_operand:V2DI 1 "register_operand" "v"))
10601	(const_vector:V2QI [(const_int 0) (const_int 0)])
10602	(match_operand:QI 2 "register_operand" "Yk"))
10603      (const_vector:V14QI [(const_int 0) (const_int 0)
10604			   (const_int 0) (const_int 0)
10605			   (const_int 0) (const_int 0)
10606			   (const_int 0) (const_int 0)
10607			   (const_int 0) (const_int 0)
10608			   (const_int 0) (const_int 0)
10609			   (const_int 0) (const_int 0)])))]
10610  "TARGET_AVX512VL"
10611  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10612  [(set_attr "type" "ssemov")
10613   (set_attr "prefix" "evex")
10614   (set_attr "mode" "TI")])
10615
10616(define_insn "avx512vl_<code>v2div2qi2_mask_store"
10617  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10618    (vec_concat:V16QI
10619      (vec_merge:V2QI
10620        (any_truncate:V2QI
10621          (match_operand:V2DI 1 "register_operand" "v"))
10622        (vec_select:V2QI
10623          (match_dup 0)
10624          (parallel [(const_int 0) (const_int 1)]))
10625        (match_operand:QI 2 "register_operand" "Yk"))
10626      (vec_select:V14QI
10627        (match_dup 0)
10628        (parallel [(const_int 2) (const_int 3)
10629                   (const_int 4) (const_int 5)
10630                   (const_int 6) (const_int 7)
10631                   (const_int 8) (const_int 9)
10632                   (const_int 10) (const_int 11)
10633                   (const_int 12) (const_int 13)
10634                   (const_int 14) (const_int 15)]))))]
10635  "TARGET_AVX512VL"
10636  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%w0%{%2%}, %1}"
10637  [(set_attr "type" "ssemov")
10638   (set_attr "memory" "store")
10639   (set_attr "prefix" "evex")
10640   (set_attr "mode" "TI")])
10641
10642(define_insn "*avx512vl_<code><mode>v4qi2_store"
10643  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10644    (vec_concat:V16QI
10645      (any_truncate:V4QI
10646	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10647      (vec_select:V12QI
10648        (match_dup 0)
10649        (parallel [(const_int 4) (const_int 5)
10650                   (const_int 6) (const_int 7)
10651                   (const_int 8) (const_int 9)
10652                   (const_int 10) (const_int 11)
10653                   (const_int 12) (const_int 13)
10654                   (const_int 14) (const_int 15)]))))]
10655  "TARGET_AVX512VL"
10656  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%k0, %1}"
10657  [(set_attr "type" "ssemov")
10658   (set_attr "memory" "store")
10659   (set_attr "prefix" "evex")
10660   (set_attr "mode" "TI")])
10661
10662(define_insn "avx512vl_<code><mode>v4qi2_mask"
10663  [(set (match_operand:V16QI 0 "register_operand" "=v")
10664    (vec_concat:V16QI
10665      (vec_merge:V4QI
10666        (any_truncate:V4QI
10667          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10668        (vec_select:V4QI
10669          (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10670          (parallel [(const_int 0) (const_int 1)
10671                     (const_int 2) (const_int 3)]))
10672        (match_operand:QI 3 "register_operand" "Yk"))
10673      (const_vector:V12QI [(const_int 0) (const_int 0)
10674                           (const_int 0) (const_int 0)
10675                           (const_int 0) (const_int 0)
10676                           (const_int 0) (const_int 0)
10677                           (const_int 0) (const_int 0)
10678                           (const_int 0) (const_int 0)])))]
10679  "TARGET_AVX512VL"
10680  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10681  [(set_attr "type" "ssemov")
10682   (set_attr "prefix" "evex")
10683   (set_attr "mode" "TI")])
10684
10685(define_insn "*avx512vl_<code><mode>v4qi2_mask_1"
10686  [(set (match_operand:V16QI 0 "register_operand" "=v")
10687    (vec_concat:V16QI
10688      (vec_merge:V4QI
10689	(any_truncate:V4QI
10690	  (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10691	(const_vector:V4QI [(const_int 0) (const_int 0)
10692			    (const_int 0) (const_int 0)])
10693	(match_operand:QI 2 "register_operand" "Yk"))
10694      (const_vector:V12QI [(const_int 0) (const_int 0)
10695			   (const_int 0) (const_int 0)
10696			   (const_int 0) (const_int 0)
10697			   (const_int 0) (const_int 0)
10698			   (const_int 0) (const_int 0)
10699			   (const_int 0) (const_int 0)])))]
10700  "TARGET_AVX512VL"
10701  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10702  [(set_attr "type" "ssemov")
10703   (set_attr "prefix" "evex")
10704   (set_attr "mode" "TI")])
10705
10706(define_insn "avx512vl_<code><mode>v4qi2_mask_store"
10707  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10708    (vec_concat:V16QI
10709      (vec_merge:V4QI
10710        (any_truncate:V4QI
10711          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10712        (vec_select:V4QI
10713          (match_dup 0)
10714          (parallel [(const_int 0) (const_int 1)
10715                     (const_int 2) (const_int 3)]))
10716        (match_operand:QI 2 "register_operand" "Yk"))
10717      (vec_select:V12QI
10718        (match_dup 0)
10719        (parallel [(const_int 4) (const_int 5)
10720                   (const_int 6) (const_int 7)
10721                   (const_int 8) (const_int 9)
10722                   (const_int 10) (const_int 11)
10723                   (const_int 12) (const_int 13)
10724                   (const_int 14) (const_int 15)]))))]
10725  "TARGET_AVX512VL"
10726  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%k0%{%2%}, %1}"
10727  [(set_attr "type" "ssemov")
10728   (set_attr "memory" "store")
10729   (set_attr "prefix" "evex")
10730   (set_attr "mode" "TI")])
10731
10732(define_mode_iterator VI2_128_BW_4_256
10733  [(V8HI "TARGET_AVX512BW") V8SI])
10734
10735(define_insn "*avx512vl_<code><mode>v8qi2_store"
10736  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10737    (vec_concat:V16QI
10738      (any_truncate:V8QI
10739	      (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10740      (vec_select:V8QI
10741        (match_dup 0)
10742        (parallel [(const_int 8) (const_int 9)
10743                   (const_int 10) (const_int 11)
10744                   (const_int 12) (const_int 13)
10745                   (const_int 14) (const_int 15)]))))]
10746  "TARGET_AVX512VL"
10747  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0|%q0, %1}"
10748  [(set_attr "type" "ssemov")
10749   (set_attr "memory" "store")
10750   (set_attr "prefix" "evex")
10751   (set_attr "mode" "TI")])
10752
10753(define_insn "avx512vl_<code><mode>v8qi2_mask"
10754  [(set (match_operand:V16QI 0 "register_operand" "=v")
10755    (vec_concat:V16QI
10756      (vec_merge:V8QI
10757        (any_truncate:V8QI
10758          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10759        (vec_select:V8QI
10760          (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
10761          (parallel [(const_int 0) (const_int 1)
10762                     (const_int 2) (const_int 3)
10763                     (const_int 4) (const_int 5)
10764                     (const_int 6) (const_int 7)]))
10765        (match_operand:QI 3 "register_operand" "Yk"))
10766      (const_vector:V8QI [(const_int 0) (const_int 0)
10767                          (const_int 0) (const_int 0)
10768                          (const_int 0) (const_int 0)
10769                          (const_int 0) (const_int 0)])))]
10770  "TARGET_AVX512VL"
10771  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10772  [(set_attr "type" "ssemov")
10773   (set_attr "prefix" "evex")
10774   (set_attr "mode" "TI")])
10775
10776(define_insn "*avx512vl_<code><mode>v8qi2_mask_1"
10777  [(set (match_operand:V16QI 0 "register_operand" "=v")
10778    (vec_concat:V16QI
10779      (vec_merge:V8QI
10780	(any_truncate:V8QI
10781	  (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10782	(const_vector:V8QI [(const_int 0) (const_int 0)
10783			    (const_int 0) (const_int 0)
10784			    (const_int 0) (const_int 0)
10785			    (const_int 0) (const_int 0)])
10786	(match_operand:QI 2 "register_operand" "Yk"))
10787      (const_vector:V8QI [(const_int 0) (const_int 0)
10788			  (const_int 0) (const_int 0)
10789			  (const_int 0) (const_int 0)
10790			  (const_int 0) (const_int 0)])))]
10791  "TARGET_AVX512VL"
10792  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10793  [(set_attr "type" "ssemov")
10794   (set_attr "prefix" "evex")
10795   (set_attr "mode" "TI")])
10796
10797(define_insn "avx512vl_<code><mode>v8qi2_mask_store"
10798  [(set (match_operand:V16QI 0 "memory_operand" "=m")
10799    (vec_concat:V16QI
10800      (vec_merge:V8QI
10801        (any_truncate:V8QI
10802          (match_operand:VI2_128_BW_4_256 1 "register_operand" "v"))
10803        (vec_select:V8QI
10804          (match_dup 0)
10805          (parallel [(const_int 0) (const_int 1)
10806                     (const_int 2) (const_int 3)
10807                     (const_int 4) (const_int 5)
10808                     (const_int 6) (const_int 7)]))
10809        (match_operand:QI 2 "register_operand" "Yk"))
10810      (vec_select:V8QI
10811        (match_dup 0)
10812        (parallel [(const_int 8) (const_int 9)
10813                   (const_int 10) (const_int 11)
10814                   (const_int 12) (const_int 13)
10815                   (const_int 14) (const_int 15)]))))]
10816  "TARGET_AVX512VL"
10817  "vpmov<trunsuffix><pmov_suff_3>\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
10818  [(set_attr "type" "ssemov")
10819   (set_attr "memory" "store")
10820   (set_attr "prefix" "evex")
10821   (set_attr "mode" "TI")])
10822
10823(define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI])
10824(define_mode_attr pmov_dst_4
10825  [(V4DI "V4HI") (V2DI "V2HI") (V4SI "V4HI")])
10826(define_mode_attr pmov_dst_zeroed_4
10827  [(V4DI "V4HI") (V2DI "V6HI") (V4SI "V4HI")])
10828(define_mode_attr pmov_suff_4
10829  [(V4DI "qw") (V2DI "qw") (V4SI "dw")])
10830
10831(define_insn "*avx512vl_<code><mode>v<ssescalarnum>hi2"
10832  [(set (match_operand:V8HI 0 "register_operand" "=v")
10833    (vec_concat:V8HI
10834      (any_truncate:<pmov_dst_4>
10835	      (match_operand:PMOV_SRC_MODE_4 1 "register_operand" "v"))
10836      (match_operand:<pmov_dst_zeroed_4> 2 "const0_operand")))]
10837  "TARGET_AVX512VL"
10838  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10839  [(set_attr "type" "ssemov")
10840   (set_attr "prefix" "evex")
10841   (set_attr "mode" "TI")])
10842
10843(define_insn "*avx512vl_<code><mode>v4hi2_store"
10844  [(set (match_operand:V8HI 0 "memory_operand" "=m")
10845    (vec_concat:V8HI
10846      (any_truncate:V4HI
10847	      (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10848      (vec_select:V4HI
10849        (match_dup 0)
10850        (parallel [(const_int 4) (const_int 5)
10851                   (const_int 6) (const_int 7)]))))]
10852  "TARGET_AVX512VL"
10853  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0|%0, %1}"
10854  [(set_attr "type" "ssemov")
10855   (set_attr "memory" "store")
10856   (set_attr "prefix" "evex")
10857   (set_attr "mode" "TI")])
10858
10859(define_insn "avx512vl_<code><mode>v4hi2_mask"
10860  [(set (match_operand:V8HI 0 "register_operand" "=v")
10861    (vec_concat:V8HI
10862      (vec_merge:V4HI
10863        (any_truncate:V4HI
10864          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10865        (vec_select:V4HI
10866          (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10867          (parallel [(const_int 0) (const_int 1)
10868                     (const_int 2) (const_int 3)]))
10869        (match_operand:QI 3 "register_operand" "Yk"))
10870      (const_vector:V4HI [(const_int 0) (const_int 0)
10871                          (const_int 0) (const_int 0)])))]
10872  "TARGET_AVX512VL"
10873  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10874  [(set_attr "type" "ssemov")
10875   (set_attr "prefix" "evex")
10876   (set_attr "mode" "TI")])
10877
10878(define_insn "*avx512vl_<code><mode>v4hi2_mask_1"
10879  [(set (match_operand:V8HI 0 "register_operand" "=v")
10880    (vec_concat:V8HI
10881      (vec_merge:V4HI
10882	(any_truncate:V4HI
10883	  (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10884	(const_vector:V4HI [(const_int 0) (const_int 0)
10885			    (const_int 0) (const_int 0)])
10886	(match_operand:QI 2 "register_operand" "Yk"))
10887      (const_vector:V4HI [(const_int 0) (const_int 0)
10888			  (const_int 0) (const_int 0)])))]
10889  "TARGET_AVX512VL"
10890  "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10891  [(set_attr "type" "ssemov")
10892   (set_attr "prefix" "evex")
10893   (set_attr "mode" "TI")])
10894
10895(define_insn "avx512vl_<code><mode>v4hi2_mask_store"
10896  [(set (match_operand:V8HI 0 "memory_operand" "=m")
10897    (vec_concat:V8HI
10898      (vec_merge:V4HI
10899        (any_truncate:V4HI
10900          (match_operand:VI4_128_8_256 1 "register_operand" "v"))
10901        (vec_select:V4HI
10902          (match_dup 0)
10903          (parallel [(const_int 0) (const_int 1)
10904                     (const_int 2) (const_int 3)]))
10905        (match_operand:QI 2 "register_operand" "Yk"))
10906      (vec_select:V4HI
10907        (match_dup 0)
10908        (parallel [(const_int 4) (const_int 5)
10909                   (const_int 6) (const_int 7)]))))]
10910  "TARGET_AVX512VL"
10911{
10912  if (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) == 4)
10913    return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %t1}";
10914  return "vpmov<trunsuffix><pmov_suff_4>\t{%1, %0%{%2%}|%0%{%2%}, %g1}";
10915}
10916  [(set_attr "type" "ssemov")
10917   (set_attr "memory" "store")
10918   (set_attr "prefix" "evex")
10919   (set_attr "mode" "TI")])
10920
10921(define_insn "*avx512vl_<code>v2div2hi2_store"
10922  [(set (match_operand:V8HI 0 "memory_operand" "=m")
10923    (vec_concat:V8HI
10924      (any_truncate:V2HI
10925	      (match_operand:V2DI 1 "register_operand" "v"))
10926      (vec_select:V6HI
10927        (match_dup 0)
10928        (parallel [(const_int 2) (const_int 3)
10929                   (const_int 4) (const_int 5)
10930                   (const_int 6) (const_int 7)]))))]
10931  "TARGET_AVX512VL"
10932  "vpmov<trunsuffix>qw\t{%1, %0|%0, %1}"
10933  [(set_attr "type" "ssemov")
10934   (set_attr "memory" "store")
10935   (set_attr "prefix" "evex")
10936   (set_attr "mode" "TI")])
10937
10938(define_insn "avx512vl_<code>v2div2hi2_mask"
10939  [(set (match_operand:V8HI 0 "register_operand" "=v")
10940    (vec_concat:V8HI
10941      (vec_merge:V2HI
10942        (any_truncate:V2HI
10943          (match_operand:V2DI 1 "register_operand" "v"))
10944        (vec_select:V2HI
10945          (match_operand:V8HI 2 "nonimm_or_0_operand" "0C")
10946          (parallel [(const_int 0) (const_int 1)]))
10947        (match_operand:QI 3 "register_operand" "Yk"))
10948      (const_vector:V6HI [(const_int 0) (const_int 0)
10949                          (const_int 0) (const_int 0)
10950                          (const_int 0) (const_int 0)])))]
10951  "TARGET_AVX512VL"
10952  "vpmov<trunsuffix>qw\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
10953  [(set_attr "type" "ssemov")
10954   (set_attr "prefix" "evex")
10955   (set_attr "mode" "TI")])
10956
10957(define_insn "*avx512vl_<code>v2div2hi2_mask_1"
10958  [(set (match_operand:V8HI 0 "register_operand" "=v")
10959    (vec_concat:V8HI
10960      (vec_merge:V2HI
10961	(any_truncate:V2HI
10962	  (match_operand:V2DI 1 "register_operand" "v"))
10963	(const_vector:V2HI [(const_int 0) (const_int 0)])
10964	(match_operand:QI 2 "register_operand" "Yk"))
10965      (const_vector:V6HI [(const_int 0) (const_int 0)
10966			  (const_int 0) (const_int 0)
10967			  (const_int 0) (const_int 0)])))]
10968  "TARGET_AVX512VL"
10969  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
10970  [(set_attr "type" "ssemov")
10971   (set_attr "prefix" "evex")
10972   (set_attr "mode" "TI")])
10973
10974(define_insn "avx512vl_<code>v2div2hi2_mask_store"
10975  [(set (match_operand:V8HI 0 "memory_operand" "=m")
10976    (vec_concat:V8HI
10977      (vec_merge:V2HI
10978        (any_truncate:V2HI
10979          (match_operand:V2DI 1 "register_operand" "v"))
10980        (vec_select:V2HI
10981          (match_dup 0)
10982          (parallel [(const_int 0) (const_int 1)]))
10983        (match_operand:QI 2 "register_operand" "Yk"))
10984      (vec_select:V6HI
10985        (match_dup 0)
10986        (parallel [(const_int 2) (const_int 3)
10987                   (const_int 4) (const_int 5)
10988                   (const_int 6) (const_int 7)]))))]
10989  "TARGET_AVX512VL"
10990  "vpmov<trunsuffix>qw\t{%1, %0%{%2%}|%0%{%2%}, %g1}"
10991  [(set_attr "type" "ssemov")
10992   (set_attr "memory" "store")
10993   (set_attr "prefix" "evex")
10994   (set_attr "mode" "TI")])
10995
10996(define_insn "*avx512vl_<code>v2div2si2"
10997  [(set (match_operand:V4SI 0 "register_operand" "=v")
10998    (vec_concat:V4SI
10999      (any_truncate:V2SI
11000	      (match_operand:V2DI 1 "register_operand" "v"))
11001      (match_operand:V2SI 2 "const0_operand")))]
11002  "TARGET_AVX512VL"
11003  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11004  [(set_attr "type" "ssemov")
11005   (set_attr "prefix" "evex")
11006   (set_attr "mode" "TI")])
11007
11008(define_insn "*avx512vl_<code>v2div2si2_store"
11009  [(set (match_operand:V4SI 0 "memory_operand" "=m")
11010    (vec_concat:V4SI
11011      (any_truncate:V2SI
11012	      (match_operand:V2DI 1 "register_operand" "v"))
11013      (vec_select:V2SI
11014        (match_dup 0)
11015        (parallel [(const_int 2) (const_int 3)]))))]
11016  "TARGET_AVX512VL"
11017  "vpmov<trunsuffix>qd\t{%1, %0|%0, %1}"
11018  [(set_attr "type" "ssemov")
11019   (set_attr "memory" "store")
11020   (set_attr "prefix" "evex")
11021   (set_attr "mode" "TI")])
11022
11023(define_insn "avx512vl_<code>v2div2si2_mask"
11024  [(set (match_operand:V4SI 0 "register_operand" "=v")
11025    (vec_concat:V4SI
11026      (vec_merge:V2SI
11027        (any_truncate:V2SI
11028          (match_operand:V2DI 1 "register_operand" "v"))
11029        (vec_select:V2SI
11030          (match_operand:V4SI 2 "nonimm_or_0_operand" "0C")
11031          (parallel [(const_int 0) (const_int 1)]))
11032        (match_operand:QI 3 "register_operand" "Yk"))
11033      (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11034  "TARGET_AVX512VL"
11035  "vpmov<trunsuffix>qd\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11036  [(set_attr "type" "ssemov")
11037   (set_attr "prefix" "evex")
11038   (set_attr "mode" "TI")])
11039
11040(define_insn "*avx512vl_<code>v2div2si2_mask_1"
11041  [(set (match_operand:V4SI 0 "register_operand" "=v")
11042    (vec_concat:V4SI
11043      (vec_merge:V2SI
11044	(any_truncate:V2SI
11045	  (match_operand:V2DI 1 "register_operand" "v"))
11046	(const_vector:V2SI [(const_int 0) (const_int 0)])
11047	(match_operand:QI 2 "register_operand" "Yk"))
11048      (const_vector:V2SI [(const_int 0) (const_int 0)])))]
11049  "TARGET_AVX512VL"
11050  "vpmov<trunsuffix>qd\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11051  [(set_attr "type" "ssemov")
11052   (set_attr "prefix" "evex")
11053   (set_attr "mode" "TI")])
11054
11055(define_insn "avx512vl_<code>v2div2si2_mask_store"
11056  [(set (match_operand:V4SI 0 "memory_operand" "=m")
11057    (vec_concat:V4SI
11058      (vec_merge:V2SI
11059        (any_truncate:V2SI
11060          (match_operand:V2DI 1 "register_operand" "v"))
11061        (vec_select:V2SI
11062          (match_dup 0)
11063          (parallel [(const_int 0) (const_int 1)]))
11064        (match_operand:QI 2 "register_operand" "Yk"))
11065      (vec_select:V2SI
11066        (match_dup 0)
11067        (parallel [(const_int 2) (const_int 3)]))))]
11068  "TARGET_AVX512VL"
11069  "vpmov<trunsuffix>qd\t{%1, %0%{%2%}|%0%{%2%}, %t1}"
11070  [(set_attr "type" "ssemov")
11071   (set_attr "memory" "store")
11072   (set_attr "prefix" "evex")
11073   (set_attr "mode" "TI")])
11074
11075(define_insn "*avx512f_<code>v8div16qi2"
11076  [(set (match_operand:V16QI 0 "register_operand" "=v")
11077	(vec_concat:V16QI
11078	  (any_truncate:V8QI
11079	    (match_operand:V8DI 1 "register_operand" "v"))
11080	  (const_vector:V8QI [(const_int 0) (const_int 0)
11081			      (const_int 0) (const_int 0)
11082			      (const_int 0) (const_int 0)
11083			      (const_int 0) (const_int 0)])))]
11084  "TARGET_AVX512F"
11085  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11086  [(set_attr "type" "ssemov")
11087   (set_attr "prefix" "evex")
11088   (set_attr "mode" "TI")])
11089
11090(define_insn "*avx512f_<code>v8div16qi2_store"
11091  [(set (match_operand:V16QI 0 "memory_operand" "=m")
11092	(vec_concat:V16QI
11093	  (any_truncate:V8QI
11094	    (match_operand:V8DI 1 "register_operand" "v"))
11095	  (vec_select:V8QI
11096	    (match_dup 0)
11097	    (parallel [(const_int 8) (const_int 9)
11098		       (const_int 10) (const_int 11)
11099		       (const_int 12) (const_int 13)
11100		       (const_int 14) (const_int 15)]))))]
11101  "TARGET_AVX512F"
11102  "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
11103  [(set_attr "type" "ssemov")
11104   (set_attr "memory" "store")
11105   (set_attr "prefix" "evex")
11106   (set_attr "mode" "TI")])
11107
11108(define_insn "avx512f_<code>v8div16qi2_mask"
11109  [(set (match_operand:V16QI 0 "register_operand" "=v")
11110    (vec_concat:V16QI
11111      (vec_merge:V8QI
11112        (any_truncate:V8QI
11113          (match_operand:V8DI 1 "register_operand" "v"))
11114        (vec_select:V8QI
11115          (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
11116          (parallel [(const_int 0) (const_int 1)
11117                     (const_int 2) (const_int 3)
11118                     (const_int 4) (const_int 5)
11119                     (const_int 6) (const_int 7)]))
11120        (match_operand:QI 3 "register_operand" "Yk"))
11121      (const_vector:V8QI [(const_int 0) (const_int 0)
11122                          (const_int 0) (const_int 0)
11123                          (const_int 0) (const_int 0)
11124                          (const_int 0) (const_int 0)])))]
11125  "TARGET_AVX512F"
11126  "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
11127  [(set_attr "type" "ssemov")
11128   (set_attr "prefix" "evex")
11129   (set_attr "mode" "TI")])
11130
11131(define_insn "*avx512f_<code>v8div16qi2_mask_1"
11132  [(set (match_operand:V16QI 0 "register_operand" "=v")
11133    (vec_concat:V16QI
11134      (vec_merge:V8QI
11135	(any_truncate:V8QI
11136	  (match_operand:V8DI 1 "register_operand" "v"))
11137	(const_vector:V8QI [(const_int 0) (const_int 0)
11138			    (const_int 0) (const_int 0)
11139			    (const_int 0) (const_int 0)
11140			    (const_int 0) (const_int 0)])
11141	(match_operand:QI 2 "register_operand" "Yk"))
11142      (const_vector:V8QI [(const_int 0) (const_int 0)
11143			  (const_int 0) (const_int 0)
11144			  (const_int 0) (const_int 0)
11145			  (const_int 0) (const_int 0)])))]
11146  "TARGET_AVX512F"
11147  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
11148  [(set_attr "type" "ssemov")
11149   (set_attr "prefix" "evex")
11150   (set_attr "mode" "TI")])
11151
11152(define_insn "avx512f_<code>v8div16qi2_mask_store"
11153  [(set (match_operand:V16QI 0 "memory_operand" "=m")
11154    (vec_concat:V16QI
11155      (vec_merge:V8QI
11156        (any_truncate:V8QI
11157          (match_operand:V8DI 1 "register_operand" "v"))
11158        (vec_select:V8QI
11159          (match_dup 0)
11160          (parallel [(const_int 0) (const_int 1)
11161                     (const_int 2) (const_int 3)
11162                     (const_int 4) (const_int 5)
11163                     (const_int 6) (const_int 7)]))
11164        (match_operand:QI 2 "register_operand" "Yk"))
11165      (vec_select:V8QI
11166        (match_dup 0)
11167        (parallel [(const_int 8) (const_int 9)
11168                   (const_int 10) (const_int 11)
11169                   (const_int 12) (const_int 13)
11170                   (const_int 14) (const_int 15)]))))]
11171  "TARGET_AVX512F"
11172  "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%q0%{%2%}, %1}"
11173  [(set_attr "type" "ssemov")
11174   (set_attr "memory" "store")
11175   (set_attr "prefix" "evex")
11176   (set_attr "mode" "TI")])
11177
11178;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11179;;
11180;; Parallel integral arithmetic
11181;;
11182;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11183
11184(define_expand "neg<mode>2"
11185  [(set (match_operand:VI_AVX2 0 "register_operand")
11186	(minus:VI_AVX2
11187	  (match_dup 2)
11188	  (match_operand:VI_AVX2 1 "vector_operand")))]
11189  "TARGET_SSE2"
11190  "operands[2] = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));")
11191
11192(define_expand "<plusminus_insn><mode>3"
11193  [(set (match_operand:VI_AVX2 0 "register_operand")
11194	(plusminus:VI_AVX2
11195	  (match_operand:VI_AVX2 1 "vector_operand")
11196	  (match_operand:VI_AVX2 2 "vector_operand")))]
11197  "TARGET_SSE2"
11198  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11199
11200(define_expand "<plusminus_insn><mode>3_mask"
11201  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
11202	(vec_merge:VI48_AVX512VL
11203	  (plusminus:VI48_AVX512VL
11204	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
11205	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
11206	  (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
11207	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11208  "TARGET_AVX512F"
11209  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11210
11211(define_expand "<plusminus_insn><mode>3_mask"
11212  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
11213	(vec_merge:VI12_AVX512VL
11214	  (plusminus:VI12_AVX512VL
11215	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
11216	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
11217	  (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
11218	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
11219  "TARGET_AVX512BW"
11220  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11221
11222(define_insn "*<plusminus_insn><mode>3"
11223  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
11224	(plusminus:VI_AVX2
11225	  (match_operand:VI_AVX2 1 "vector_operand" "<comm>0,v")
11226	  (match_operand:VI_AVX2 2 "vector_operand" "xBm,vm")))]
11227  "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11228  "@
11229   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11230   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11231  [(set_attr "isa" "noavx,avx")
11232   (set_attr "type" "sseiadd")
11233   (set_attr "prefix_data16" "1,*")
11234   (set_attr "prefix" "orig,vex")
11235   (set_attr "mode" "<sseinsnmode>")])
11236
11237(define_insn "*sub<mode>3_bcst"
11238  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11239	(minus:VI48_AVX512VL
11240	  (match_operand:VI48_AVX512VL 1 "register_operand" "v")
11241	  (vec_duplicate:VI48_AVX512VL
11242	    (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
11243  "TARGET_AVX512F && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
11244  "vpsub<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
11245  [(set_attr "type" "sseiadd")
11246   (set_attr "prefix" "evex")
11247   (set_attr "mode" "<sseinsnmode>")])
11248
11249(define_insn "*add<mode>3_bcst"
11250  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11251	(plus:VI48_AVX512VL
11252	  (vec_duplicate:VI48_AVX512VL
11253	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
11254	  (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
11255  "TARGET_AVX512F && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)"
11256  "vpadd<ssemodesuffix>\t{%1<avx512bcst>, %2, %0|%0, %2, %1<avx512bcst>}"
11257  [(set_attr "type" "sseiadd")
11258   (set_attr "prefix" "evex")
11259   (set_attr "mode" "<sseinsnmode>")])
11260
11261(define_insn "*<plusminus_insn><mode>3_mask"
11262  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
11263	(vec_merge:VI48_AVX512VL
11264	  (plusminus:VI48_AVX512VL
11265	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11266	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
11267	  (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
11268	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11269  "TARGET_AVX512F && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11270  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11271  [(set_attr "type" "sseiadd")
11272   (set_attr "prefix" "evex")
11273   (set_attr "mode" "<sseinsnmode>")])
11274
11275(define_insn "*<plusminus_insn><mode>3_mask"
11276  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
11277	(vec_merge:VI12_AVX512VL
11278	  (plusminus:VI12_AVX512VL
11279	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "<comm>v")
11280	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm"))
11281	  (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand" "0C")
11282	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
11283  "TARGET_AVX512BW && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11284  "vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
11285  [(set_attr "type" "sseiadd")
11286   (set_attr "prefix" "evex")
11287   (set_attr "mode" "<sseinsnmode>")])
11288
11289(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
11290  [(set (match_operand:VI12_AVX2 0 "register_operand")
11291	(sat_plusminus:VI12_AVX2
11292	  (match_operand:VI12_AVX2 1 "vector_operand")
11293	  (match_operand:VI12_AVX2 2 "vector_operand")))]
11294  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11295  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
11296
11297(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
11298  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
11299	(sat_plusminus:VI12_AVX2
11300	  (match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
11301	  (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
11302  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
11303   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
11304  "@
11305   p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
11306   vp<plusminus_mnemonic><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11307  [(set_attr "isa" "noavx,avx")
11308   (set_attr "type" "sseiadd")
11309   (set_attr "prefix_data16" "1,*")
11310   (set_attr "prefix" "orig,maybe_evex")
11311   (set_attr "mode" "TI")])
11312
11313(define_expand "mul<mode>3<mask_name>"
11314  [(set (match_operand:VI1_AVX512 0 "register_operand")
11315	(mult:VI1_AVX512 (match_operand:VI1_AVX512 1 "register_operand")
11316		       (match_operand:VI1_AVX512 2 "register_operand")))]
11317  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11318{
11319  ix86_expand_vecop_qihi (MULT, operands[0], operands[1], operands[2]);
11320  DONE;
11321})
11322
11323(define_expand "mul<mode>3<mask_name>"
11324  [(set (match_operand:VI2_AVX2 0 "register_operand")
11325	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand")
11326		       (match_operand:VI2_AVX2 2 "vector_operand")))]
11327  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11328  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11329
11330(define_insn "*mul<mode>3<mask_name>"
11331  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11332	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
11333		       (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
11334  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11335   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11336  "@
11337   pmullw\t{%2, %0|%0, %2}
11338   vpmullw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11339  [(set_attr "isa" "noavx,avx")
11340   (set_attr "type" "sseimul")
11341   (set_attr "prefix_data16" "1,*")
11342   (set_attr "prefix" "orig,vex")
11343   (set_attr "mode" "<sseinsnmode>")])
11344
11345(define_expand "<s>mul<mode>3_highpart<mask_name>"
11346  [(set (match_operand:VI2_AVX2 0 "register_operand")
11347	(truncate:VI2_AVX2
11348	  (lshiftrt:<ssedoublemode>
11349	    (mult:<ssedoublemode>
11350	      (any_extend:<ssedoublemode>
11351		(match_operand:VI2_AVX2 1 "vector_operand"))
11352	      (any_extend:<ssedoublemode>
11353		(match_operand:VI2_AVX2 2 "vector_operand")))
11354	    (const_int 16))))]
11355  "TARGET_SSE2
11356   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11357  "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
11358
11359(define_insn "*<s>mul<mode>3_highpart<mask_name>"
11360  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
11361	(truncate:VI2_AVX2
11362	  (lshiftrt:<ssedoublemode>
11363	    (mult:<ssedoublemode>
11364	      (any_extend:<ssedoublemode>
11365		(match_operand:VI2_AVX2 1 "vector_operand" "%0,v"))
11366	      (any_extend:<ssedoublemode>
11367		(match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))
11368	    (const_int 16))))]
11369  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11370   && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
11371  "@
11372   pmulh<u>w\t{%2, %0|%0, %2}
11373   vpmulh<u>w\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11374  [(set_attr "isa" "noavx,avx")
11375   (set_attr "type" "sseimul")
11376   (set_attr "prefix_data16" "1,*")
11377   (set_attr "prefix" "orig,vex")
11378   (set_attr "mode" "<sseinsnmode>")])
11379
11380(define_expand "vec_widen_umult_even_v16si<mask_name>"
11381  [(set (match_operand:V8DI 0 "register_operand")
11382        (mult:V8DI
11383          (zero_extend:V8DI
11384            (vec_select:V8SI
11385              (match_operand:V16SI 1 "nonimmediate_operand")
11386              (parallel [(const_int 0) (const_int 2)
11387                         (const_int 4) (const_int 6)
11388                         (const_int 8) (const_int 10)
11389                         (const_int 12) (const_int 14)])))
11390          (zero_extend:V8DI
11391            (vec_select:V8SI
11392              (match_operand:V16SI 2 "nonimmediate_operand")
11393              (parallel [(const_int 0) (const_int 2)
11394                         (const_int 4) (const_int 6)
11395                         (const_int 8) (const_int 10)
11396                         (const_int 12) (const_int 14)])))))]
11397  "TARGET_AVX512F"
11398  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11399
11400(define_insn "*vec_widen_umult_even_v16si<mask_name>"
11401  [(set (match_operand:V8DI 0 "register_operand" "=v")
11402        (mult:V8DI
11403          (zero_extend:V8DI
11404            (vec_select:V8SI
11405              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11406              (parallel [(const_int 0) (const_int 2)
11407                         (const_int 4) (const_int 6)
11408                         (const_int 8) (const_int 10)
11409                         (const_int 12) (const_int 14)])))
11410          (zero_extend:V8DI
11411            (vec_select:V8SI
11412              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11413              (parallel [(const_int 0) (const_int 2)
11414                         (const_int 4) (const_int 6)
11415                         (const_int 8) (const_int 10)
11416                         (const_int 12) (const_int 14)])))))]
11417  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11418  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11419  [(set_attr "type" "sseimul")
11420   (set_attr "prefix_extra" "1")
11421   (set_attr "prefix" "evex")
11422   (set_attr "mode" "XI")])
11423
11424(define_expand "vec_widen_umult_even_v8si<mask_name>"
11425  [(set (match_operand:V4DI 0 "register_operand")
11426	(mult:V4DI
11427	  (zero_extend:V4DI
11428	    (vec_select:V4SI
11429	      (match_operand:V8SI 1 "nonimmediate_operand")
11430	      (parallel [(const_int 0) (const_int 2)
11431			 (const_int 4) (const_int 6)])))
11432	  (zero_extend:V4DI
11433	    (vec_select:V4SI
11434	      (match_operand:V8SI 2 "nonimmediate_operand")
11435	      (parallel [(const_int 0) (const_int 2)
11436			 (const_int 4) (const_int 6)])))))]
11437  "TARGET_AVX2 && <mask_avx512vl_condition>"
11438  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11439
11440(define_insn "*vec_widen_umult_even_v8si<mask_name>"
11441  [(set (match_operand:V4DI 0 "register_operand" "=v")
11442	(mult:V4DI
11443	  (zero_extend:V4DI
11444	    (vec_select:V4SI
11445	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11446	      (parallel [(const_int 0) (const_int 2)
11447			 (const_int 4) (const_int 6)])))
11448	  (zero_extend:V4DI
11449	    (vec_select:V4SI
11450	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11451	      (parallel [(const_int 0) (const_int 2)
11452			 (const_int 4) (const_int 6)])))))]
11453  "TARGET_AVX2 && <mask_avx512vl_condition>
11454   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11455  "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11456  [(set_attr "type" "sseimul")
11457   (set_attr "prefix" "maybe_evex")
11458   (set_attr "mode" "OI")])
11459
11460(define_expand "vec_widen_umult_even_v4si<mask_name>"
11461  [(set (match_operand:V2DI 0 "register_operand")
11462	(mult:V2DI
11463	  (zero_extend:V2DI
11464	    (vec_select:V2SI
11465	      (match_operand:V4SI 1 "vector_operand")
11466	      (parallel [(const_int 0) (const_int 2)])))
11467	  (zero_extend:V2DI
11468	    (vec_select:V2SI
11469	      (match_operand:V4SI 2 "vector_operand")
11470	      (parallel [(const_int 0) (const_int 2)])))))]
11471  "TARGET_SSE2 && <mask_avx512vl_condition>"
11472  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11473
11474(define_insn "*vec_widen_umult_even_v4si<mask_name>"
11475  [(set (match_operand:V2DI 0 "register_operand" "=x,v")
11476	(mult:V2DI
11477	  (zero_extend:V2DI
11478	    (vec_select:V2SI
11479	      (match_operand:V4SI 1 "vector_operand" "%0,v")
11480	      (parallel [(const_int 0) (const_int 2)])))
11481	  (zero_extend:V2DI
11482	    (vec_select:V2SI
11483	      (match_operand:V4SI 2 "vector_operand" "xBm,vm")
11484	      (parallel [(const_int 0) (const_int 2)])))))]
11485  "TARGET_SSE2 && <mask_avx512vl_condition>
11486   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11487  "@
11488   pmuludq\t{%2, %0|%0, %2}
11489   vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11490  [(set_attr "isa" "noavx,avx")
11491   (set_attr "type" "sseimul")
11492   (set_attr "prefix_data16" "1,*")
11493   (set_attr "prefix" "orig,maybe_evex")
11494   (set_attr "mode" "TI")])
11495
11496(define_expand "vec_widen_smult_even_v16si<mask_name>"
11497  [(set (match_operand:V8DI 0 "register_operand")
11498        (mult:V8DI
11499          (sign_extend:V8DI
11500            (vec_select:V8SI
11501              (match_operand:V16SI 1 "nonimmediate_operand")
11502              (parallel [(const_int 0) (const_int 2)
11503                         (const_int 4) (const_int 6)
11504                         (const_int 8) (const_int 10)
11505                         (const_int 12) (const_int 14)])))
11506          (sign_extend:V8DI
11507            (vec_select:V8SI
11508              (match_operand:V16SI 2 "nonimmediate_operand")
11509              (parallel [(const_int 0) (const_int 2)
11510                         (const_int 4) (const_int 6)
11511                         (const_int 8) (const_int 10)
11512                         (const_int 12) (const_int 14)])))))]
11513  "TARGET_AVX512F"
11514  "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
11515
11516(define_insn "*vec_widen_smult_even_v16si<mask_name>"
11517  [(set (match_operand:V8DI 0 "register_operand" "=v")
11518        (mult:V8DI
11519          (sign_extend:V8DI
11520            (vec_select:V8SI
11521              (match_operand:V16SI 1 "nonimmediate_operand" "%v")
11522              (parallel [(const_int 0) (const_int 2)
11523                         (const_int 4) (const_int 6)
11524                         (const_int 8) (const_int 10)
11525                         (const_int 12) (const_int 14)])))
11526          (sign_extend:V8DI
11527            (vec_select:V8SI
11528              (match_operand:V16SI 2 "nonimmediate_operand" "vm")
11529              (parallel [(const_int 0) (const_int 2)
11530                         (const_int 4) (const_int 6)
11531                         (const_int 8) (const_int 10)
11532                         (const_int 12) (const_int 14)])))))]
11533  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11534  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11535  [(set_attr "type" "sseimul")
11536   (set_attr "prefix_extra" "1")
11537   (set_attr "prefix" "evex")
11538   (set_attr "mode" "XI")])
11539
11540(define_expand "vec_widen_smult_even_v8si<mask_name>"
11541  [(set (match_operand:V4DI 0 "register_operand")
11542	(mult:V4DI
11543	  (sign_extend:V4DI
11544	    (vec_select:V4SI
11545	      (match_operand:V8SI 1 "nonimmediate_operand")
11546	      (parallel [(const_int 0) (const_int 2)
11547			 (const_int 4) (const_int 6)])))
11548	  (sign_extend:V4DI
11549	    (vec_select:V4SI
11550	      (match_operand:V8SI 2 "nonimmediate_operand")
11551	      (parallel [(const_int 0) (const_int 2)
11552			 (const_int 4) (const_int 6)])))))]
11553  "TARGET_AVX2 && <mask_avx512vl_condition>"
11554  "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
11555
11556(define_insn "*vec_widen_smult_even_v8si<mask_name>"
11557  [(set (match_operand:V4DI 0 "register_operand" "=v")
11558	(mult:V4DI
11559	  (sign_extend:V4DI
11560	    (vec_select:V4SI
11561	      (match_operand:V8SI 1 "nonimmediate_operand" "%v")
11562	      (parallel [(const_int 0) (const_int 2)
11563			 (const_int 4) (const_int 6)])))
11564	  (sign_extend:V4DI
11565	    (vec_select:V4SI
11566	      (match_operand:V8SI 2 "nonimmediate_operand" "vm")
11567	      (parallel [(const_int 0) (const_int 2)
11568			 (const_int 4) (const_int 6)])))))]
11569  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11570  "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11571  [(set_attr "type" "sseimul")
11572   (set_attr "prefix_extra" "1")
11573   (set_attr "prefix" "vex")
11574   (set_attr "mode" "OI")])
11575
11576(define_expand "sse4_1_mulv2siv2di3<mask_name>"
11577  [(set (match_operand:V2DI 0 "register_operand")
11578	(mult:V2DI
11579	  (sign_extend:V2DI
11580	    (vec_select:V2SI
11581	      (match_operand:V4SI 1 "vector_operand")
11582	      (parallel [(const_int 0) (const_int 2)])))
11583	  (sign_extend:V2DI
11584	    (vec_select:V2SI
11585	      (match_operand:V4SI 2 "vector_operand")
11586	      (parallel [(const_int 0) (const_int 2)])))))]
11587  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
11588  "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
11589
11590(define_insn "*sse4_1_mulv2siv2di3<mask_name>"
11591  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
11592	(mult:V2DI
11593	  (sign_extend:V2DI
11594	    (vec_select:V2SI
11595	      (match_operand:V4SI 1 "vector_operand" "%0,0,v")
11596	      (parallel [(const_int 0) (const_int 2)])))
11597	  (sign_extend:V2DI
11598	    (vec_select:V2SI
11599	      (match_operand:V4SI 2 "vector_operand" "YrBm,*xBm,vm")
11600	      (parallel [(const_int 0) (const_int 2)])))))]
11601  "TARGET_SSE4_1 && <mask_avx512vl_condition>
11602   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11603  "@
11604   pmuldq\t{%2, %0|%0, %2}
11605   pmuldq\t{%2, %0|%0, %2}
11606   vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11607  [(set_attr "isa" "noavx,noavx,avx")
11608   (set_attr "type" "sseimul")
11609   (set_attr "prefix_data16" "1,1,*")
11610   (set_attr "prefix_extra" "1")
11611   (set_attr "prefix" "orig,orig,vex")
11612   (set_attr "mode" "TI")])
11613
11614(define_insn "avx512bw_pmaddwd512<mode><mask_name>"
11615  [(set (match_operand:<sseunpackmode> 0 "register_operand" "=v")
11616          (unspec:<sseunpackmode>
11617            [(match_operand:VI2_AVX2 1 "register_operand" "v")
11618             (match_operand:VI2_AVX2 2 "nonimmediate_operand" "vm")]
11619             UNSPEC_PMADDWD512))]
11620   "TARGET_AVX512BW && <mask_mode512bit_condition>"
11621   "vpmaddwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
11622  [(set_attr "type" "sseiadd")
11623   (set_attr "prefix" "evex")
11624   (set_attr "mode" "XI")])
11625
11626(define_expand "avx2_pmaddwd"
11627  [(set (match_operand:V8SI 0 "register_operand")
11628	(plus:V8SI
11629	  (mult:V8SI
11630	    (sign_extend:V8SI
11631	      (vec_select:V8HI
11632		(match_operand:V16HI 1 "nonimmediate_operand")
11633		(parallel [(const_int 0) (const_int 2)
11634			   (const_int 4) (const_int 6)
11635			   (const_int 8) (const_int 10)
11636			   (const_int 12) (const_int 14)])))
11637	    (sign_extend:V8SI
11638	      (vec_select:V8HI
11639		(match_operand:V16HI 2 "nonimmediate_operand")
11640		(parallel [(const_int 0) (const_int 2)
11641			   (const_int 4) (const_int 6)
11642			   (const_int 8) (const_int 10)
11643			   (const_int 12) (const_int 14)]))))
11644	  (mult:V8SI
11645	    (sign_extend:V8SI
11646	      (vec_select:V8HI (match_dup 1)
11647		(parallel [(const_int 1) (const_int 3)
11648			   (const_int 5) (const_int 7)
11649			   (const_int 9) (const_int 11)
11650			   (const_int 13) (const_int 15)])))
11651	    (sign_extend:V8SI
11652	      (vec_select:V8HI (match_dup 2)
11653		(parallel [(const_int 1) (const_int 3)
11654			   (const_int 5) (const_int 7)
11655			   (const_int 9) (const_int 11)
11656			   (const_int 13) (const_int 15)]))))))]
11657  "TARGET_AVX2"
11658  "ix86_fixup_binary_operands_no_copy (MULT, V16HImode, operands);")
11659
11660(define_insn "*avx2_pmaddwd"
11661  [(set (match_operand:V8SI 0 "register_operand" "=x,v")
11662	(plus:V8SI
11663	  (mult:V8SI
11664	    (sign_extend:V8SI
11665	      (vec_select:V8HI
11666		(match_operand:V16HI 1 "nonimmediate_operand" "%x,v")
11667		(parallel [(const_int 0) (const_int 2)
11668			   (const_int 4) (const_int 6)
11669			   (const_int 8) (const_int 10)
11670			   (const_int 12) (const_int 14)])))
11671	    (sign_extend:V8SI
11672	      (vec_select:V8HI
11673		(match_operand:V16HI 2 "nonimmediate_operand" "xm,vm")
11674		(parallel [(const_int 0) (const_int 2)
11675			   (const_int 4) (const_int 6)
11676			   (const_int 8) (const_int 10)
11677			   (const_int 12) (const_int 14)]))))
11678	  (mult:V8SI
11679	    (sign_extend:V8SI
11680	      (vec_select:V8HI (match_dup 1)
11681		(parallel [(const_int 1) (const_int 3)
11682			   (const_int 5) (const_int 7)
11683			   (const_int 9) (const_int 11)
11684			   (const_int 13) (const_int 15)])))
11685	    (sign_extend:V8SI
11686	      (vec_select:V8HI (match_dup 2)
11687		(parallel [(const_int 1) (const_int 3)
11688			   (const_int 5) (const_int 7)
11689			   (const_int 9) (const_int 11)
11690			   (const_int 13) (const_int 15)]))))))]
11691  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11692  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11693  [(set_attr "type" "sseiadd")
11694   (set_attr "isa" "*,avx512bw")
11695   (set_attr "prefix" "vex,evex")
11696   (set_attr "mode" "OI")])
11697
11698(define_expand "sse2_pmaddwd"
11699  [(set (match_operand:V4SI 0 "register_operand")
11700	(plus:V4SI
11701	  (mult:V4SI
11702	    (sign_extend:V4SI
11703	      (vec_select:V4HI
11704		(match_operand:V8HI 1 "vector_operand")
11705		(parallel [(const_int 0) (const_int 2)
11706			   (const_int 4) (const_int 6)])))
11707	    (sign_extend:V4SI
11708	      (vec_select:V4HI
11709		(match_operand:V8HI 2 "vector_operand")
11710		(parallel [(const_int 0) (const_int 2)
11711			   (const_int 4) (const_int 6)]))))
11712	  (mult:V4SI
11713	    (sign_extend:V4SI
11714	      (vec_select:V4HI (match_dup 1)
11715		(parallel [(const_int 1) (const_int 3)
11716			   (const_int 5) (const_int 7)])))
11717	    (sign_extend:V4SI
11718	      (vec_select:V4HI (match_dup 2)
11719		(parallel [(const_int 1) (const_int 3)
11720			   (const_int 5) (const_int 7)]))))))]
11721  "TARGET_SSE2"
11722  "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
11723
11724(define_insn "*sse2_pmaddwd"
11725  [(set (match_operand:V4SI 0 "register_operand" "=x,x,v")
11726	(plus:V4SI
11727	  (mult:V4SI
11728	    (sign_extend:V4SI
11729	      (vec_select:V4HI
11730		(match_operand:V8HI 1 "vector_operand" "%0,x,v")
11731		(parallel [(const_int 0) (const_int 2)
11732			   (const_int 4) (const_int 6)])))
11733	    (sign_extend:V4SI
11734	      (vec_select:V4HI
11735		(match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")
11736		(parallel [(const_int 0) (const_int 2)
11737			   (const_int 4) (const_int 6)]))))
11738	  (mult:V4SI
11739	    (sign_extend:V4SI
11740	      (vec_select:V4HI (match_dup 1)
11741		(parallel [(const_int 1) (const_int 3)
11742			   (const_int 5) (const_int 7)])))
11743	    (sign_extend:V4SI
11744	      (vec_select:V4HI (match_dup 2)
11745		(parallel [(const_int 1) (const_int 3)
11746			   (const_int 5) (const_int 7)]))))))]
11747  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11748  "@
11749   pmaddwd\t{%2, %0|%0, %2}
11750   vpmaddwd\t{%2, %1, %0|%0, %1, %2}
11751   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
11752  [(set_attr "isa" "noavx,avx,avx512bw")
11753   (set_attr "type" "sseiadd")
11754   (set_attr "atom_unit" "simul")
11755   (set_attr "prefix_data16" "1,*,*")
11756   (set_attr "prefix" "orig,vex,evex")
11757   (set_attr "mode" "TI")])
11758
11759(define_insn "avx512dq_mul<mode>3<mask_name>"
11760  [(set (match_operand:VI8 0 "register_operand" "=v")
11761	(mult:VI8
11762	  (match_operand:VI8 1 "register_operand" "v")
11763	  (match_operand:VI8 2 "nonimmediate_operand" "vm")))]
11764  "TARGET_AVX512DQ && <mask_mode512bit_condition>"
11765  "vpmullq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11766  [(set_attr "type" "sseimul")
11767   (set_attr "prefix" "evex")
11768   (set_attr "mode" "<sseinsnmode>")])
11769
11770(define_expand "mul<mode>3<mask_name>"
11771  [(set (match_operand:VI4_AVX512F 0 "register_operand")
11772	(mult:VI4_AVX512F
11773	  (match_operand:VI4_AVX512F 1 "general_vector_operand")
11774	  (match_operand:VI4_AVX512F 2 "general_vector_operand")))]
11775  "TARGET_SSE2 && <mask_mode512bit_condition>"
11776{
11777  if (TARGET_SSE4_1)
11778    {
11779      if (!vector_operand (operands[1], <MODE>mode))
11780	operands[1] = force_reg (<MODE>mode, operands[1]);
11781      if (!vector_operand (operands[2], <MODE>mode))
11782	operands[2] = force_reg (<MODE>mode, operands[2]);
11783      ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
11784    }
11785  else
11786    {
11787      ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
11788      DONE;
11789    }
11790})
11791
11792(define_insn "*<sse4_1_avx2>_mul<mode>3<mask_name>"
11793  [(set (match_operand:VI4_AVX512F 0 "register_operand" "=Yr,*x,v")
11794	(mult:VI4_AVX512F
11795	  (match_operand:VI4_AVX512F 1 "vector_operand" "%0,0,v")
11796	  (match_operand:VI4_AVX512F 2 "vector_operand" "YrBm,*xBm,vm")))]
11797  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
11798   && <mask_mode512bit_condition>"
11799  "@
11800   pmulld\t{%2, %0|%0, %2}
11801   pmulld\t{%2, %0|%0, %2}
11802   vpmulld\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11803  [(set_attr "isa" "noavx,noavx,avx")
11804   (set_attr "type" "sseimul")
11805   (set_attr "prefix_extra" "1")
11806   (set_attr "prefix" "<mask_prefix4>")
11807   (set_attr "btver2_decode" "vector,vector,vector")
11808   (set_attr "mode" "<sseinsnmode>")])
11809
11810(define_expand "mul<mode>3"
11811  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
11812	(mult:VI8_AVX2_AVX512F
11813	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
11814	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
11815  "TARGET_SSE2"
11816{
11817  ix86_expand_sse2_mulvxdi3 (operands[0], operands[1], operands[2]);
11818  DONE;
11819})
11820
11821(define_expand "vec_widen_<s>mult_hi_<mode>"
11822  [(match_operand:<sseunpackmode> 0 "register_operand")
11823   (any_extend:<sseunpackmode>
11824     (match_operand:VI124_AVX2 1 "register_operand"))
11825   (match_operand:VI124_AVX2 2 "register_operand")]
11826  "TARGET_SSE2"
11827{
11828  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11829			      <u_bool>, true);
11830  DONE;
11831})
11832
11833(define_expand "vec_widen_<s>mult_lo_<mode>"
11834  [(match_operand:<sseunpackmode> 0 "register_operand")
11835   (any_extend:<sseunpackmode>
11836     (match_operand:VI124_AVX2 1 "register_operand"))
11837   (match_operand:VI124_AVX2 2 "register_operand")]
11838  "TARGET_SSE2"
11839{
11840  ix86_expand_mul_widen_hilo (operands[0], operands[1], operands[2],
11841			      <u_bool>, false);
11842  DONE;
11843})
11844
11845;; Most widen_<s>mult_even_<mode> can be handled directly from other
11846;; named patterns, but signed V4SI needs special help for plain SSE2.
11847(define_expand "vec_widen_smult_even_v4si"
11848  [(match_operand:V2DI 0 "register_operand")
11849   (match_operand:V4SI 1 "vector_operand")
11850   (match_operand:V4SI 2 "vector_operand")]
11851  "TARGET_SSE2"
11852{
11853  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11854				 false, false);
11855  DONE;
11856})
11857
11858(define_expand "vec_widen_<s>mult_odd_<mode>"
11859  [(match_operand:<sseunpackmode> 0 "register_operand")
11860   (any_extend:<sseunpackmode>
11861     (match_operand:VI4_AVX512F 1 "general_vector_operand"))
11862   (match_operand:VI4_AVX512F 2 "general_vector_operand")]
11863  "TARGET_SSE2"
11864{
11865  ix86_expand_mul_widen_evenodd (operands[0], operands[1], operands[2],
11866				 <u_bool>, true);
11867  DONE;
11868})
11869
11870(define_mode_attr SDOT_PMADD_SUF
11871  [(V32HI "512v32hi") (V16HI "") (V8HI "")])
11872
11873(define_expand "sdot_prod<mode>"
11874  [(match_operand:<sseunpackmode> 0 "register_operand")
11875   (match_operand:VI2_AVX2 1 "register_operand")
11876   (match_operand:VI2_AVX2 2 "register_operand")
11877   (match_operand:<sseunpackmode> 3 "register_operand")]
11878  "TARGET_SSE2"
11879{
11880  rtx t = gen_reg_rtx (<sseunpackmode>mode);
11881  emit_insn (gen_<sse2_avx2>_pmaddwd<SDOT_PMADD_SUF> (t, operands[1], operands[2]));
11882  emit_insn (gen_rtx_SET (operands[0],
11883			  gen_rtx_PLUS (<sseunpackmode>mode,
11884					operands[3], t)));
11885  DONE;
11886})
11887
11888;; Normally we use widen_mul_even/odd, but combine can't quite get it all
11889;; back together when madd is available.
11890(define_expand "sdot_prodv4si"
11891  [(match_operand:V2DI 0 "register_operand")
11892   (match_operand:V4SI 1 "register_operand")
11893   (match_operand:V4SI 2 "register_operand")
11894   (match_operand:V2DI 3 "register_operand")]
11895  "TARGET_XOP"
11896{
11897  rtx t = gen_reg_rtx (V2DImode);
11898  emit_insn (gen_xop_pmacsdqh (t, operands[1], operands[2], operands[3]));
11899  emit_insn (gen_xop_pmacsdql (operands[0], operands[1], operands[2], t));
11900  DONE;
11901})
11902
11903(define_expand "uavg<mode>3_ceil"
11904  [(set (match_operand:VI12_AVX2 0 "register_operand")
11905	(truncate:VI12_AVX2
11906	  (lshiftrt:<ssedoublemode>
11907	    (plus:<ssedoublemode>
11908	      (plus:<ssedoublemode>
11909		(zero_extend:<ssedoublemode>
11910		  (match_operand:VI12_AVX2 1 "vector_operand"))
11911		(zero_extend:<ssedoublemode>
11912		  (match_operand:VI12_AVX2 2 "vector_operand")))
11913	      (match_dup 3))
11914	    (const_int 1))))]
11915  "TARGET_SSE2"
11916{
11917  operands[3] = CONST1_RTX(<ssedoublemode>mode);
11918  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
11919})
11920
11921(define_expand "usadv16qi"
11922  [(match_operand:V4SI 0 "register_operand")
11923   (match_operand:V16QI 1 "register_operand")
11924   (match_operand:V16QI 2 "vector_operand")
11925   (match_operand:V4SI 3 "vector_operand")]
11926  "TARGET_SSE2"
11927{
11928  rtx t1 = gen_reg_rtx (V2DImode);
11929  rtx t2 = gen_reg_rtx (V4SImode);
11930  emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
11931  convert_move (t2, t1, 0);
11932  emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
11933  DONE;
11934})
11935
11936(define_expand "usadv32qi"
11937  [(match_operand:V8SI 0 "register_operand")
11938   (match_operand:V32QI 1 "register_operand")
11939   (match_operand:V32QI 2 "nonimmediate_operand")
11940   (match_operand:V8SI 3 "nonimmediate_operand")]
11941  "TARGET_AVX2"
11942{
11943  rtx t1 = gen_reg_rtx (V4DImode);
11944  rtx t2 = gen_reg_rtx (V8SImode);
11945  emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
11946  convert_move (t2, t1, 0);
11947  emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
11948  DONE;
11949})
11950
11951(define_expand "usadv64qi"
11952  [(match_operand:V16SI 0 "register_operand")
11953   (match_operand:V64QI 1 "register_operand")
11954   (match_operand:V64QI 2 "nonimmediate_operand")
11955   (match_operand:V16SI 3 "nonimmediate_operand")]
11956  "TARGET_AVX512BW"
11957{
11958  rtx t1 = gen_reg_rtx (V8DImode);
11959  rtx t2 = gen_reg_rtx (V16SImode);
11960  emit_insn (gen_avx512f_psadbw (t1, operands[1], operands[2]));
11961  convert_move (t2, t1, 0);
11962  emit_insn (gen_addv16si3 (operands[0], t2, operands[3]));
11963  DONE;
11964})
11965
11966(define_insn "<mask_codefor>ashr<mode>3<mask_name>"
11967  [(set (match_operand:VI248_AVX512BW_1 0 "register_operand" "=v,v")
11968	(ashiftrt:VI248_AVX512BW_1
11969	  (match_operand:VI248_AVX512BW_1 1 "nonimmediate_operand" "v,vm")
11970	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
11971  "TARGET_AVX512VL"
11972  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
11973  [(set_attr "type" "sseishft")
11974   (set (attr "length_immediate")
11975     (if_then_else (match_operand 2 "const_int_operand")
11976       (const_string "1")
11977       (const_string "0")))
11978   (set_attr "mode" "<sseinsnmode>")])
11979
11980(define_insn "ashr<mode>3"
11981  [(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
11982	(ashiftrt:VI24_AVX2
11983	  (match_operand:VI24_AVX2 1 "register_operand" "0,x")
11984	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
11985  "TARGET_SSE2"
11986  "@
11987   psra<ssemodesuffix>\t{%2, %0|%0, %2}
11988   vpsra<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
11989  [(set_attr "isa" "noavx,avx")
11990   (set_attr "type" "sseishft")
11991   (set (attr "length_immediate")
11992     (if_then_else (match_operand 2 "const_int_operand")
11993       (const_string "1")
11994       (const_string "0")))
11995   (set_attr "prefix_data16" "1,*")
11996   (set_attr "prefix" "orig,vex")
11997   (set_attr "mode" "<sseinsnmode>")])
11998
11999(define_insn "ashr<mode>3<mask_name>"
12000  [(set (match_operand:VI248_AVX512BW_AVX512VL 0 "register_operand" "=v,v")
12001	(ashiftrt:VI248_AVX512BW_AVX512VL
12002	  (match_operand:VI248_AVX512BW_AVX512VL 1 "nonimmediate_operand" "v,vm")
12003	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12004  "TARGET_AVX512F"
12005  "vpsra<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12006  [(set_attr "type" "sseishft")
12007   (set (attr "length_immediate")
12008     (if_then_else (match_operand 2 "const_int_operand")
12009       (const_string "1")
12010       (const_string "0")))
12011   (set_attr "mode" "<sseinsnmode>")])
12012
12013(define_insn "<mask_codefor><shift_insn><mode>3<mask_name>"
12014  [(set (match_operand:VI248_AVX512BW_2 0 "register_operand" "=v,v")
12015	(any_lshift:VI248_AVX512BW_2
12016	  (match_operand:VI248_AVX512BW_2 1 "nonimmediate_operand" "v,vm")
12017	  (match_operand:DI 2 "nonmemory_operand" "v,N")))]
12018  "TARGET_AVX512VL"
12019  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12020  [(set_attr "type" "sseishft")
12021   (set (attr "length_immediate")
12022     (if_then_else (match_operand 2 "const_int_operand")
12023       (const_string "1")
12024       (const_string "0")))
12025   (set_attr "mode" "<sseinsnmode>")])
12026
12027(define_insn "<shift_insn><mode>3"
12028  [(set (match_operand:VI248_AVX2 0 "register_operand" "=x,x")
12029	(any_lshift:VI248_AVX2
12030	  (match_operand:VI248_AVX2 1 "register_operand" "0,x")
12031	  (match_operand:DI 2 "nonmemory_operand" "xN,xN")))]
12032  "TARGET_SSE2"
12033  "@
12034   p<vshift><ssemodesuffix>\t{%2, %0|%0, %2}
12035   vp<vshift><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12036  [(set_attr "isa" "noavx,avx")
12037   (set_attr "type" "sseishft")
12038   (set (attr "length_immediate")
12039     (if_then_else (match_operand 2 "const_int_operand")
12040       (const_string "1")
12041       (const_string "0")))
12042   (set_attr "prefix_data16" "1,*")
12043   (set_attr "prefix" "orig,vex")
12044   (set_attr "mode" "<sseinsnmode>")])
12045
12046(define_insn "<shift_insn><mode>3<mask_name>"
12047  [(set (match_operand:VI248_AVX512BW 0 "register_operand" "=v,v")
12048	(any_lshift:VI248_AVX512BW
12049	  (match_operand:VI248_AVX512BW 1 "nonimmediate_operand" "v,m")
12050	  (match_operand:DI 2 "nonmemory_operand" "vN,N")))]
12051  "TARGET_AVX512F"
12052  "vp<vshift><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12053  [(set_attr "type" "sseishft")
12054   (set (attr "length_immediate")
12055     (if_then_else (match_operand 2 "const_int_operand")
12056       (const_string "1")
12057       (const_string "0")))
12058   (set_attr "mode" "<sseinsnmode>")])
12059
12060
12061(define_expand "vec_shl_<mode>"
12062  [(set (match_dup 3)
12063	(ashift:V1TI
12064	 (match_operand:V_128 1 "register_operand")
12065	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12066   (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12067  "TARGET_SSE2"
12068{
12069  operands[1] = gen_lowpart (V1TImode, operands[1]);
12070  operands[3] = gen_reg_rtx (V1TImode);
12071  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12072})
12073
12074(define_expand "vec_shr_<mode>"
12075  [(set (match_dup 3)
12076	(lshiftrt:V1TI
12077	 (match_operand:V_128 1 "register_operand")
12078	 (match_operand:SI 2 "const_0_to_255_mul_8_operand")))
12079   (set (match_operand:V_128 0 "register_operand") (match_dup 4))]
12080  "TARGET_SSE2"
12081{
12082  operands[1] = gen_lowpart (V1TImode, operands[1]);
12083  operands[3] = gen_reg_rtx (V1TImode);
12084  operands[4] = gen_lowpart (<MODE>mode, operands[3]);
12085})
12086
12087(define_insn "avx512bw_<shift_insn><mode>3"
12088  [(set (match_operand:VIMAX_AVX512VL 0 "register_operand" "=v")
12089	(any_lshift:VIMAX_AVX512VL
12090	 (match_operand:VIMAX_AVX512VL 1 "nonimmediate_operand" "vm")
12091	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
12092  "TARGET_AVX512BW"
12093{
12094  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12095  return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12096}
12097  [(set_attr "type" "sseishft")
12098   (set_attr "length_immediate" "1")
12099   (set_attr "prefix" "maybe_evex")
12100   (set_attr "mode" "<sseinsnmode>")])
12101
12102(define_insn "<sse2_avx2>_<shift_insn><mode>3"
12103  [(set (match_operand:VIMAX_AVX2 0 "register_operand" "=x,v")
12104	(any_lshift:VIMAX_AVX2
12105	 (match_operand:VIMAX_AVX2 1 "register_operand" "0,v")
12106	 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n,n")))]
12107  "TARGET_SSE2"
12108{
12109  operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
12110
12111  switch (which_alternative)
12112    {
12113    case 0:
12114      return "p<vshift>dq\t{%2, %0|%0, %2}";
12115    case 1:
12116      return "vp<vshift>dq\t{%2, %1, %0|%0, %1, %2}";
12117    default:
12118      gcc_unreachable ();
12119    }
12120}
12121  [(set_attr "isa" "noavx,avx")
12122   (set_attr "type" "sseishft")
12123   (set_attr "length_immediate" "1")
12124   (set_attr "atom_unit" "sishuf")
12125   (set_attr "prefix_data16" "1,*")
12126   (set_attr "prefix" "orig,vex")
12127   (set_attr "mode" "<sseinsnmode>")])
12128
12129(define_insn "<avx512>_<rotate>v<mode><mask_name>"
12130  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12131	(any_rotate:VI48_AVX512VL
12132	  (match_operand:VI48_AVX512VL 1 "register_operand" "v")
12133	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12134  "TARGET_AVX512F"
12135  "vp<rotate>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12136  [(set_attr "prefix" "evex")
12137   (set_attr "mode" "<sseinsnmode>")])
12138
12139(define_insn "<avx512>_<rotate><mode><mask_name>"
12140  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12141	(any_rotate:VI48_AVX512VL
12142	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")
12143	  (match_operand:SI 2 "const_0_to_255_operand")))]
12144  "TARGET_AVX512F"
12145  "vp<rotate><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12146  [(set_attr "prefix" "evex")
12147   (set_attr "mode" "<sseinsnmode>")])
12148
12149(define_expand "<code><mode>3"
12150  [(set (match_operand:VI124_256_AVX512F_AVX512BW 0 "register_operand")
12151	(maxmin:VI124_256_AVX512F_AVX512BW
12152	  (match_operand:VI124_256_AVX512F_AVX512BW 1 "nonimmediate_operand")
12153	  (match_operand:VI124_256_AVX512F_AVX512BW 2 "nonimmediate_operand")))]
12154  "TARGET_AVX2"
12155  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12156
12157(define_insn "*avx2_<code><mode>3"
12158  [(set (match_operand:VI124_256 0 "register_operand" "=v")
12159	(maxmin:VI124_256
12160	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
12161	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
12162  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12163  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12164  [(set_attr "type" "sseiadd")
12165   (set_attr "prefix_extra" "1")
12166   (set_attr "prefix" "vex")
12167   (set_attr "mode" "OI")])
12168
12169(define_expand "<code><mode>3_mask"
12170  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12171	(vec_merge:VI48_AVX512VL
12172	  (maxmin:VI48_AVX512VL
12173	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12174	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12175	  (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12176	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12177  "TARGET_AVX512F"
12178  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
12179
12180(define_insn "*avx512f_<code><mode>3<mask_name>"
12181  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12182	(maxmin:VI48_AVX512VL
12183	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "%v")
12184	  (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")))]
12185  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12186  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12187  [(set_attr "type" "sseiadd")
12188   (set_attr "prefix_extra" "1")
12189   (set_attr "prefix" "maybe_evex")
12190   (set_attr "mode" "<sseinsnmode>")])
12191
12192(define_insn "<mask_codefor><code><mode>3<mask_name>"
12193  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
12194        (maxmin:VI12_AVX512VL
12195          (match_operand:VI12_AVX512VL 1 "register_operand" "v")
12196          (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
12197  "TARGET_AVX512BW"
12198  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12199  [(set_attr "type" "sseiadd")
12200   (set_attr "prefix" "evex")
12201   (set_attr "mode" "<sseinsnmode>")])
12202
12203(define_expand "<code><mode>3"
12204  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand")
12205	(maxmin:VI8_AVX2_AVX512F
12206	  (match_operand:VI8_AVX2_AVX512F 1 "register_operand")
12207	  (match_operand:VI8_AVX2_AVX512F 2 "register_operand")))]
12208  "TARGET_SSE4_2"
12209{
12210  if (TARGET_AVX512F
12211      && (<MODE>mode == V8DImode || TARGET_AVX512VL))
12212    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12213  else 
12214    {
12215      enum rtx_code code;
12216      rtx xops[6];
12217      bool ok;
12218
12219
12220      xops[0] = operands[0];
12221
12222      if (<CODE> == SMAX || <CODE> == UMAX)
12223	{
12224	  xops[1] = operands[1];
12225	  xops[2] = operands[2];
12226	}
12227      else
12228	{
12229	  xops[1] = operands[2];
12230	  xops[2] = operands[1];
12231	}
12232
12233      code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
12234
12235      xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
12236      xops[4] = operands[1];
12237      xops[5] = operands[2];
12238
12239      ok = ix86_expand_int_vcond (xops);
12240      gcc_assert (ok);
12241      DONE;
12242    }
12243})
12244
12245(define_expand "<code><mode>3"
12246  [(set (match_operand:VI124_128 0 "register_operand")
12247	(smaxmin:VI124_128
12248	  (match_operand:VI124_128 1 "vector_operand")
12249	  (match_operand:VI124_128 2 "vector_operand")))]
12250  "TARGET_SSE2"
12251{
12252  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
12253    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12254  else
12255    {
12256      rtx xops[6];
12257      bool ok;
12258
12259      xops[0] = operands[0];
12260      operands[1] = force_reg (<MODE>mode, operands[1]);
12261      operands[2] = force_reg (<MODE>mode, operands[2]);
12262
12263      if (<CODE> == SMAX)
12264	{
12265	  xops[1] = operands[1];
12266	  xops[2] = operands[2];
12267	}
12268      else
12269	{
12270	  xops[1] = operands[2];
12271	  xops[2] = operands[1];
12272	}
12273
12274      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
12275      xops[4] = operands[1];
12276      xops[5] = operands[2];
12277
12278      ok = ix86_expand_int_vcond (xops);
12279      gcc_assert (ok);
12280      DONE;
12281    }
12282})
12283
12284(define_insn "*sse4_1_<code><mode>3<mask_name>"
12285  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
12286	(smaxmin:VI14_128
12287	  (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
12288	  (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12289  "TARGET_SSE4_1
12290   && <mask_mode512bit_condition>
12291   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12292  "@
12293   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12294   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12295   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12296  [(set_attr "isa" "noavx,noavx,avx")
12297   (set_attr "type" "sseiadd")
12298   (set_attr "prefix_extra" "1,1,*")
12299   (set_attr "prefix" "orig,orig,vex")
12300   (set_attr "mode" "TI")])
12301
12302(define_insn "*<code>v8hi3"
12303  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
12304	(smaxmin:V8HI
12305	  (match_operand:V8HI 1 "vector_operand" "%0,x,v")
12306	  (match_operand:V8HI 2 "vector_operand" "xBm,xm,vm")))]
12307  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12308  "@
12309   p<maxmin_int>w\t{%2, %0|%0, %2}
12310   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}
12311   vp<maxmin_int>w\t{%2, %1, %0|%0, %1, %2}"
12312  [(set_attr "isa" "noavx,avx,avx512bw")
12313   (set_attr "type" "sseiadd")
12314   (set_attr "prefix_data16" "1,*,*")
12315   (set_attr "prefix_extra" "*,1,1")
12316   (set_attr "prefix" "orig,vex,evex")
12317   (set_attr "mode" "TI")])
12318
12319(define_expand "<code><mode>3"
12320  [(set (match_operand:VI124_128 0 "register_operand")
12321	(umaxmin:VI124_128
12322	  (match_operand:VI124_128 1 "vector_operand")
12323	  (match_operand:VI124_128 2 "vector_operand")))]
12324  "TARGET_SSE2"
12325{
12326  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
12327    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
12328  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
12329    {
12330      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
12331      operands[1] = force_reg (<MODE>mode, operands[1]);
12332      if (rtx_equal_p (op3, op2))
12333	op3 = gen_reg_rtx (V8HImode);
12334      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
12335      emit_insn (gen_addv8hi3 (op0, op3, op2));
12336      DONE;
12337    }
12338  else
12339    {
12340      rtx xops[6];
12341      bool ok;
12342
12343      operands[1] = force_reg (<MODE>mode, operands[1]);
12344      operands[2] = force_reg (<MODE>mode, operands[2]);
12345
12346      xops[0] = operands[0];
12347
12348      if (<CODE> == UMAX)
12349	{
12350	  xops[1] = operands[1];
12351	  xops[2] = operands[2];
12352	}
12353      else
12354	{
12355	  xops[1] = operands[2];
12356	  xops[2] = operands[1];
12357	}
12358
12359      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
12360      xops[4] = operands[1];
12361      xops[5] = operands[2];
12362
12363      ok = ix86_expand_int_vcond (xops);
12364      gcc_assert (ok);
12365      DONE;
12366    }
12367})
12368
12369(define_insn "*sse4_1_<code><mode>3<mask_name>"
12370  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
12371	(umaxmin:VI24_128
12372	  (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
12373	  (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
12374  "TARGET_SSE4_1
12375   && <mask_mode512bit_condition>
12376   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12377  "@
12378   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12379   p<maxmin_int><ssemodesuffix>\t{%2, %0|%0, %2}
12380   vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
12381  [(set_attr "isa" "noavx,noavx,avx")
12382   (set_attr "type" "sseiadd")
12383   (set_attr "prefix_extra" "1,1,*")
12384   (set_attr "prefix" "orig,orig,vex")
12385   (set_attr "mode" "TI")])
12386
12387(define_insn "*<code>v16qi3"
12388  [(set (match_operand:V16QI 0 "register_operand" "=x,x,v")
12389	(umaxmin:V16QI
12390	  (match_operand:V16QI 1 "vector_operand" "%0,x,v")
12391	  (match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")))]
12392  "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12393  "@
12394   p<maxmin_int>b\t{%2, %0|%0, %2}
12395   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}
12396   vp<maxmin_int>b\t{%2, %1, %0|%0, %1, %2}"
12397  [(set_attr "isa" "noavx,avx,avx512bw")
12398   (set_attr "type" "sseiadd")
12399   (set_attr "prefix_data16" "1,*,*")
12400   (set_attr "prefix_extra" "*,1,1")
12401   (set_attr "prefix" "orig,vex,evex")
12402   (set_attr "mode" "TI")])
12403
12404;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12405;;
12406;; Parallel integral comparisons
12407;;
12408;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12409
12410(define_expand "avx2_eq<mode>3"
12411  [(set (match_operand:VI_256 0 "register_operand")
12412	(eq:VI_256
12413	  (match_operand:VI_256 1 "nonimmediate_operand")
12414	  (match_operand:VI_256 2 "nonimmediate_operand")))]
12415  "TARGET_AVX2"
12416  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12417
12418(define_insn "*avx2_eq<mode>3"
12419  [(set (match_operand:VI_256 0 "register_operand" "=x")
12420	(eq:VI_256
12421	  (match_operand:VI_256 1 "nonimmediate_operand" "%x")
12422	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12423  "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12424  "vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12425  [(set_attr "type" "ssecmp")
12426   (set_attr "prefix_extra" "1")
12427   (set_attr "prefix" "vex")
12428   (set_attr "mode" "OI")])
12429
12430(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12431  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12432	(unspec:<avx512fmaskmode>
12433	  [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
12434	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")]
12435	  UNSPEC_MASKED_EQ))]
12436  "TARGET_AVX512BW"
12437  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12438
12439(define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
12440  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
12441	(unspec:<avx512fmaskmode>
12442	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
12443	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")]
12444	  UNSPEC_MASKED_EQ))]
12445  "TARGET_AVX512F"
12446  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12447
12448(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12449  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12450	(unspec:<avx512fmaskmode>
12451	  [(match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12452	   (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12453	  UNSPEC_MASKED_EQ))]
12454  "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12455  "@
12456   vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12457   vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12458  [(set_attr "type" "ssecmp")
12459   (set_attr "prefix_extra" "1")
12460   (set_attr "prefix" "evex")
12461   (set_attr "mode" "<sseinsnmode>")])
12462
12463(define_insn "<avx512>_eq<mode>3<mask_scalar_merge_name>_1"
12464  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k,k")
12465	(unspec:<avx512fmaskmode>
12466	  [(match_operand:VI48_AVX512VL 1 "nonimm_or_0_operand" "%v,v")
12467	   (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "vm,C")]
12468	  UNSPEC_MASKED_EQ))]
12469  "TARGET_AVX512F && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12470  "@
12471   vpcmpeq<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}
12472   vptestnm<ssemodesuffix>\t{%1, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %1}"
12473  [(set_attr "type" "ssecmp")
12474   (set_attr "prefix_extra" "1")
12475   (set_attr "prefix" "evex")
12476   (set_attr "mode" "<sseinsnmode>")])
12477
12478(define_insn "*sse4_1_eqv2di3"
12479  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12480	(eq:V2DI
12481	  (match_operand:V2DI 1 "vector_operand" "%0,0,x")
12482	  (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12483  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12484  "@
12485   pcmpeqq\t{%2, %0|%0, %2}
12486   pcmpeqq\t{%2, %0|%0, %2}
12487   vpcmpeqq\t{%2, %1, %0|%0, %1, %2}"
12488  [(set_attr "isa" "noavx,noavx,avx")
12489   (set_attr "type" "ssecmp")
12490   (set_attr "prefix_extra" "1")
12491   (set_attr "prefix" "orig,orig,vex")
12492   (set_attr "mode" "TI")])
12493
12494(define_insn "*sse2_eq<mode>3"
12495  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12496	(eq:VI124_128
12497	  (match_operand:VI124_128 1 "vector_operand" "%0,x")
12498	  (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12499  "TARGET_SSE2 && !TARGET_XOP
12500   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12501  "@
12502   pcmpeq<ssemodesuffix>\t{%2, %0|%0, %2}
12503   vpcmpeq<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12504  [(set_attr "isa" "noavx,avx")
12505   (set_attr "type" "ssecmp")
12506   (set_attr "prefix_data16" "1,*")
12507   (set_attr "prefix" "orig,vex")
12508   (set_attr "mode" "TI")])
12509
12510(define_expand "sse2_eq<mode>3"
12511  [(set (match_operand:VI124_128 0 "register_operand")
12512	(eq:VI124_128
12513	  (match_operand:VI124_128 1 "vector_operand")
12514	  (match_operand:VI124_128 2 "vector_operand")))]
12515  "TARGET_SSE2 && !TARGET_XOP "
12516  "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
12517
12518(define_expand "sse4_1_eqv2di3"
12519  [(set (match_operand:V2DI 0 "register_operand")
12520	(eq:V2DI
12521	  (match_operand:V2DI 1 "vector_operand")
12522	  (match_operand:V2DI 2 "vector_operand")))]
12523  "TARGET_SSE4_1"
12524  "ix86_fixup_binary_operands_no_copy (EQ, V2DImode, operands);")
12525
12526(define_insn "sse4_2_gtv2di3"
12527  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,x")
12528	(gt:V2DI
12529	  (match_operand:V2DI 1 "register_operand" "0,0,x")
12530	  (match_operand:V2DI 2 "vector_operand" "YrBm,*xBm,xm")))]
12531  "TARGET_SSE4_2"
12532  "@
12533   pcmpgtq\t{%2, %0|%0, %2}
12534   pcmpgtq\t{%2, %0|%0, %2}
12535   vpcmpgtq\t{%2, %1, %0|%0, %1, %2}"
12536  [(set_attr "isa" "noavx,noavx,avx")
12537   (set_attr "type" "ssecmp")
12538   (set_attr "prefix_extra" "1")
12539   (set_attr "prefix" "orig,orig,vex")
12540   (set_attr "mode" "TI")])
12541
12542(define_insn "avx2_gt<mode>3"
12543  [(set (match_operand:VI_256 0 "register_operand" "=x")
12544	(gt:VI_256
12545	  (match_operand:VI_256 1 "register_operand" "x")
12546	  (match_operand:VI_256 2 "nonimmediate_operand" "xm")))]
12547  "TARGET_AVX2"
12548  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12549  [(set_attr "type" "ssecmp")
12550   (set_attr "prefix_extra" "1")
12551   (set_attr "prefix" "vex")
12552   (set_attr "mode" "OI")])
12553
12554(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12555  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12556	(unspec:<avx512fmaskmode>
12557	  [(match_operand:VI48_AVX512VL 1 "register_operand" "v")
12558	   (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12559  "TARGET_AVX512F"
12560  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12561  [(set_attr "type" "ssecmp")
12562   (set_attr "prefix_extra" "1")
12563   (set_attr "prefix" "evex")
12564   (set_attr "mode" "<sseinsnmode>")])
12565
12566(define_insn "<avx512>_gt<mode>3<mask_scalar_merge_name>"
12567  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
12568	(unspec:<avx512fmaskmode>
12569	  [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
12570	   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")] UNSPEC_MASKED_GT))]
12571  "TARGET_AVX512BW"
12572  "vpcmpgt<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
12573  [(set_attr "type" "ssecmp")
12574   (set_attr "prefix_extra" "1")
12575   (set_attr "prefix" "evex")
12576   (set_attr "mode" "<sseinsnmode>")])
12577
12578(define_insn "sse2_gt<mode>3"
12579  [(set (match_operand:VI124_128 0 "register_operand" "=x,x")
12580	(gt:VI124_128
12581	  (match_operand:VI124_128 1 "register_operand" "0,x")
12582	  (match_operand:VI124_128 2 "vector_operand" "xBm,xm")))]
12583  "TARGET_SSE2 && !TARGET_XOP"
12584  "@
12585   pcmpgt<ssemodesuffix>\t{%2, %0|%0, %2}
12586   vpcmpgt<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
12587  [(set_attr "isa" "noavx,avx")
12588   (set_attr "type" "ssecmp")
12589   (set_attr "prefix_data16" "1,*")
12590   (set_attr "prefix" "orig,vex")
12591   (set_attr "mode" "TI")])
12592
12593(define_expand "vcond<V_512:mode><VI_AVX512BW:mode>"
12594  [(set (match_operand:V_512 0 "register_operand")
12595	(if_then_else:V_512
12596	  (match_operator 3 ""
12597	    [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12598	     (match_operand:VI_AVX512BW 5 "general_operand")])
12599	  (match_operand:V_512 1)
12600	  (match_operand:V_512 2)))]
12601  "TARGET_AVX512F
12602   && (GET_MODE_NUNITS (<V_512:MODE>mode)
12603       == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12604{
12605  bool ok = ix86_expand_int_vcond (operands);
12606  gcc_assert (ok);
12607  DONE;
12608})
12609
12610(define_expand "vcond<V_256:mode><VI_256:mode>"
12611  [(set (match_operand:V_256 0 "register_operand")
12612	(if_then_else:V_256
12613	  (match_operator 3 ""
12614	    [(match_operand:VI_256 4 "nonimmediate_operand")
12615	     (match_operand:VI_256 5 "general_operand")])
12616	  (match_operand:V_256 1)
12617	  (match_operand:V_256 2)))]
12618  "TARGET_AVX2
12619   && (GET_MODE_NUNITS (<V_256:MODE>mode)
12620       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12621{
12622  bool ok = ix86_expand_int_vcond (operands);
12623  gcc_assert (ok);
12624  DONE;
12625})
12626
12627(define_expand "vcond<V_128:mode><VI124_128:mode>"
12628  [(set (match_operand:V_128 0 "register_operand")
12629	(if_then_else:V_128
12630	  (match_operator 3 ""
12631	    [(match_operand:VI124_128 4 "vector_operand")
12632	     (match_operand:VI124_128 5 "general_operand")])
12633	  (match_operand:V_128 1)
12634	  (match_operand:V_128 2)))]
12635  "TARGET_SSE2
12636   && (GET_MODE_NUNITS (<V_128:MODE>mode)
12637       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12638{
12639  bool ok = ix86_expand_int_vcond (operands);
12640  gcc_assert (ok);
12641  DONE;
12642})
12643
12644(define_expand "vcond<VI8F_128:mode>v2di"
12645  [(set (match_operand:VI8F_128 0 "register_operand")
12646	(if_then_else:VI8F_128
12647	  (match_operator 3 ""
12648	    [(match_operand:V2DI 4 "vector_operand")
12649	     (match_operand:V2DI 5 "general_operand")])
12650	  (match_operand:VI8F_128 1)
12651	  (match_operand:VI8F_128 2)))]
12652  "TARGET_SSE4_2"
12653{
12654  bool ok = ix86_expand_int_vcond (operands);
12655  gcc_assert (ok);
12656  DONE;
12657})
12658
12659(define_expand "vcondu<V_512:mode><VI_AVX512BW:mode>"
12660  [(set (match_operand:V_512 0 "register_operand")
12661	(if_then_else:V_512
12662	  (match_operator 3 ""
12663	    [(match_operand:VI_AVX512BW 4 "nonimmediate_operand")
12664	     (match_operand:VI_AVX512BW 5 "nonimmediate_operand")])
12665	  (match_operand:V_512 1 "general_operand")
12666	  (match_operand:V_512 2 "general_operand")))]
12667  "TARGET_AVX512F
12668   && (GET_MODE_NUNITS (<V_512:MODE>mode)
12669       == GET_MODE_NUNITS (<VI_AVX512BW:MODE>mode))"
12670{
12671  bool ok = ix86_expand_int_vcond (operands);
12672  gcc_assert (ok);
12673  DONE;
12674})
12675
12676(define_expand "vcondu<V_256:mode><VI_256:mode>"
12677  [(set (match_operand:V_256 0 "register_operand")
12678	(if_then_else:V_256
12679	  (match_operator 3 ""
12680	    [(match_operand:VI_256 4 "nonimmediate_operand")
12681	     (match_operand:VI_256 5 "nonimmediate_operand")])
12682	  (match_operand:V_256 1 "general_operand")
12683	  (match_operand:V_256 2 "general_operand")))]
12684  "TARGET_AVX2
12685   && (GET_MODE_NUNITS (<V_256:MODE>mode)
12686       == GET_MODE_NUNITS (<VI_256:MODE>mode))"
12687{
12688  bool ok = ix86_expand_int_vcond (operands);
12689  gcc_assert (ok);
12690  DONE;
12691})
12692
12693(define_expand "vcondu<V_128:mode><VI124_128:mode>"
12694  [(set (match_operand:V_128 0 "register_operand")
12695	(if_then_else:V_128
12696	  (match_operator 3 ""
12697	    [(match_operand:VI124_128 4 "vector_operand")
12698	     (match_operand:VI124_128 5 "vector_operand")])
12699	  (match_operand:V_128 1 "general_operand")
12700	  (match_operand:V_128 2 "general_operand")))]
12701  "TARGET_SSE2
12702   && (GET_MODE_NUNITS (<V_128:MODE>mode)
12703       == GET_MODE_NUNITS (<VI124_128:MODE>mode))"
12704{
12705  bool ok = ix86_expand_int_vcond (operands);
12706  gcc_assert (ok);
12707  DONE;
12708})
12709
12710(define_expand "vcondu<VI8F_128:mode>v2di"
12711  [(set (match_operand:VI8F_128 0 "register_operand")
12712	(if_then_else:VI8F_128
12713	  (match_operator 3 ""
12714	    [(match_operand:V2DI 4 "vector_operand")
12715	     (match_operand:V2DI 5 "vector_operand")])
12716	  (match_operand:VI8F_128 1 "general_operand")
12717	  (match_operand:VI8F_128 2 "general_operand")))]
12718  "TARGET_SSE4_2"
12719{
12720  bool ok = ix86_expand_int_vcond (operands);
12721  gcc_assert (ok);
12722  DONE;
12723})
12724
12725(define_expand "vcondeq<VI8F_128:mode>v2di"
12726  [(set (match_operand:VI8F_128 0 "register_operand")
12727	(if_then_else:VI8F_128
12728	  (match_operator 3 ""
12729	    [(match_operand:V2DI 4 "vector_operand")
12730	     (match_operand:V2DI 5 "general_operand")])
12731	  (match_operand:VI8F_128 1)
12732	  (match_operand:VI8F_128 2)))]
12733  "TARGET_SSE4_1"
12734{
12735  bool ok = ix86_expand_int_vcond (operands);
12736  gcc_assert (ok);
12737  DONE;
12738})
12739
12740(define_mode_iterator VEC_PERM_AVX2
12741  [V16QI V8HI V4SI V2DI V4SF V2DF
12742   (V32QI "TARGET_AVX2") (V16HI "TARGET_AVX2")
12743   (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
12744   (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
12745   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
12746   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
12747   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")])
12748
12749(define_expand "vec_perm<mode>"
12750  [(match_operand:VEC_PERM_AVX2 0 "register_operand")
12751   (match_operand:VEC_PERM_AVX2 1 "register_operand")
12752   (match_operand:VEC_PERM_AVX2 2 "register_operand")
12753   (match_operand:<sseintvecmode> 3 "register_operand")]
12754  "TARGET_SSSE3 || TARGET_AVX || TARGET_XOP"
12755{
12756  ix86_expand_vec_perm (operands);
12757  DONE;
12758})
12759
12760;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12761;;
12762;; Parallel bitwise logical operations
12763;;
12764;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
12765
12766(define_expand "one_cmpl<mode>2"
12767  [(set (match_operand:VI 0 "register_operand")
12768	(xor:VI (match_operand:VI 1 "vector_operand")
12769		(match_dup 2)))]
12770  "TARGET_SSE"
12771{
12772  if (!TARGET_AVX512F)
12773    operands[2] = force_reg (<MODE>mode, CONSTM1_RTX (<MODE>mode));
12774  else
12775    operands[2] = CONSTM1_RTX (<MODE>mode);
12776})
12777
12778(define_insn "<mask_codefor>one_cmpl<mode>2<mask_name>"
12779  [(set (match_operand:VI 0 "register_operand" "=v,v")
12780	(xor:VI (match_operand:VI 1 "nonimmediate_operand" "v,m")
12781		(match_operand:VI 2 "vector_all_ones_operand" "BC,BC")))]
12782  "TARGET_AVX512F
12783   && (!<mask_applied>
12784       || <ssescalarmode>mode == SImode
12785       || <ssescalarmode>mode == DImode)"
12786{
12787  if (TARGET_AVX512VL)
12788    return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
12789  else
12790    return "vpternlog<ternlogsuffix>\t{$0x55, %g1, %g0, %g0<mask_operand3>|%g0<mask_operand3>, %g0, %g1, 0x55}";
12791}
12792  [(set_attr "type" "sselog")
12793   (set_attr "prefix" "evex")
12794   (set (attr "mode")
12795        (if_then_else (match_test "TARGET_AVX512VL")
12796		      (const_string "<sseinsnmode>")
12797		      (const_string "XI")))
12798   (set (attr "enabled")
12799	(if_then_else (eq_attr "alternative" "1")
12800		      (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
12801		      (const_int 1)))])
12802
12803(define_expand "<sse2_avx2>_andnot<mode>3"
12804  [(set (match_operand:VI_AVX2 0 "register_operand")
12805	(and:VI_AVX2
12806	  (not:VI_AVX2 (match_operand:VI_AVX2 1 "register_operand"))
12807	  (match_operand:VI_AVX2 2 "vector_operand")))]
12808  "TARGET_SSE2")
12809
12810(define_expand "<sse2_avx2>_andnot<mode>3_mask"
12811  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
12812	(vec_merge:VI48_AVX512VL
12813	  (and:VI48_AVX512VL
12814	    (not:VI48_AVX512VL
12815	      (match_operand:VI48_AVX512VL 1 "register_operand"))
12816	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
12817	  (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
12818	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12819  "TARGET_AVX512F")
12820
12821(define_expand "<sse2_avx2>_andnot<mode>3_mask"
12822  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
12823	(vec_merge:VI12_AVX512VL
12824	  (and:VI12_AVX512VL
12825	    (not:VI12_AVX512VL
12826	      (match_operand:VI12_AVX512VL 1 "register_operand"))
12827	    (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
12828	  (match_operand:VI12_AVX512VL 3 "nonimm_or_0_operand")
12829	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
12830  "TARGET_AVX512BW")
12831
12832(define_insn "*andnot<mode>3"
12833  [(set (match_operand:VI 0 "register_operand" "=x,x,v")
12834	(and:VI
12835	  (not:VI (match_operand:VI 1 "register_operand" "0,x,v"))
12836	  (match_operand:VI 2 "vector_operand" "xBm,xm,vm")))]
12837  "TARGET_SSE"
12838{
12839  char buf[64];
12840  const char *ops;
12841  const char *tmp;
12842  const char *ssesuffix;
12843
12844  switch (get_attr_mode (insn))
12845    {
12846    case MODE_XI:
12847      gcc_assert (TARGET_AVX512F);
12848      /* FALLTHRU */
12849    case MODE_OI:
12850      gcc_assert (TARGET_AVX2);
12851      /* FALLTHRU */
12852    case MODE_TI:
12853      gcc_assert (TARGET_SSE2);
12854      tmp = "pandn";
12855      switch (<MODE>mode)
12856	{
12857	case E_V64QImode:
12858	case E_V32HImode:
12859	  /* There is no vpandnb or vpandnw instruction, nor vpandn for
12860	     512-bit vectors. Use vpandnq instead.  */
12861	  ssesuffix = "q";
12862	  break;
12863	case E_V16SImode:
12864	case E_V8DImode:
12865	  ssesuffix = "<ssemodesuffix>";
12866	  break;
12867	case E_V8SImode:
12868	case E_V4DImode:
12869	case E_V4SImode:
12870	case E_V2DImode:
12871	  ssesuffix = (TARGET_AVX512VL && which_alternative == 2
12872		       ? "<ssemodesuffix>" : "");
12873	  break;
12874	default:
12875	  ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
12876	}
12877      break;
12878
12879    case MODE_V16SF:
12880      gcc_assert (TARGET_AVX512F);
12881      /* FALLTHRU */
12882    case MODE_V8SF:
12883      gcc_assert (TARGET_AVX);
12884      /* FALLTHRU */
12885    case MODE_V4SF:
12886      gcc_assert (TARGET_SSE);
12887      tmp = "andn";
12888      ssesuffix = "ps";
12889      break;
12890
12891    default:
12892      gcc_unreachable ();
12893    }
12894
12895  switch (which_alternative)
12896    {
12897    case 0:
12898      ops = "%s%s\t{%%2, %%0|%%0, %%2}";
12899      break;
12900    case 1:
12901    case 2:
12902      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
12903      break;
12904    default:
12905      gcc_unreachable ();
12906    }
12907
12908  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
12909  output_asm_insn (buf, operands);
12910  return "";
12911}
12912  [(set_attr "isa" "noavx,avx,avx")
12913   (set_attr "type" "sselog")
12914   (set (attr "prefix_data16")
12915     (if_then_else
12916       (and (eq_attr "alternative" "0")
12917	    (eq_attr "mode" "TI"))
12918       (const_string "1")
12919       (const_string "*")))
12920   (set_attr "prefix" "orig,vex,evex")
12921   (set (attr "mode")
12922	(cond [(match_test "TARGET_AVX2")
12923		 (const_string "<sseinsnmode>")
12924	       (match_test "TARGET_AVX")
12925		 (if_then_else
12926		   (match_test "<MODE_SIZE> > 16")
12927		   (const_string "V8SF")
12928		   (const_string "<sseinsnmode>"))
12929	       (ior (not (match_test "TARGET_SSE2"))
12930		    (match_test "optimize_function_for_size_p (cfun)"))
12931		 (const_string "V4SF")
12932	      ]
12933	      (const_string "<sseinsnmode>")))])
12934
12935(define_insn "*andnot<mode>3_bcst"
12936  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12937	(and:VI48_AVX512VL
12938	  (not:VI48_AVX512VL
12939	     (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12940	  (vec_duplicate:VI48_AVX512VL
12941	    (match_operand:<ssescalarmode> 2 "memory_operand" "m"))))]
12942  "TARGET_AVX512F"
12943  "vpandn<ssemodesuffix>\t{%2<avx512bcst>, %1, %0|%0, %1, %2<avx512bcst>}"
12944  [(set_attr "type" "sselog")
12945   (set_attr "prefix" "evex")
12946   (set_attr "mode" "<sseinsnmode>")])
12947
12948(define_insn "*andnot<mode>3_mask"
12949  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
12950	(vec_merge:VI48_AVX512VL
12951	  (and:VI48_AVX512VL
12952	    (not:VI48_AVX512VL
12953	      (match_operand:VI48_AVX512VL 1 "register_operand" "v"))
12954	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm"))
12955	  (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand" "0C")
12956	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
12957  "TARGET_AVX512F"
12958  "vpandn<ssemodesuffix>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}";
12959  [(set_attr "type" "sselog")
12960   (set_attr "prefix" "evex")
12961   (set_attr "mode" "<sseinsnmode>")])
12962
12963(define_expand "<code><mode>3"
12964  [(set (match_operand:VI 0 "register_operand")
12965	(any_logic:VI
12966	  (match_operand:VI 1 "nonimmediate_or_const_vector_operand")
12967	  (match_operand:VI 2 "nonimmediate_or_const_vector_operand")))]
12968  "TARGET_SSE"
12969{
12970  ix86_expand_vector_logical_operator (<CODE>, <MODE>mode, operands);
12971  DONE;
12972})
12973
12974(define_insn "<mask_codefor><code><mode>3<mask_name>"
12975  [(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,x,v")
12976	(any_logic:VI48_AVX_AVX512F
12977	  (match_operand:VI48_AVX_AVX512F 1 "vector_operand" "%0,x,v")
12978	  (match_operand:VI48_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
12979  "TARGET_SSE && <mask_mode512bit_condition>
12980   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
12981{
12982  char buf[64];
12983  const char *ops;
12984  const char *tmp;
12985  const char *ssesuffix;
12986
12987  switch (get_attr_mode (insn))
12988    {
12989    case MODE_XI:
12990      gcc_assert (TARGET_AVX512F);
12991      /* FALLTHRU */
12992    case MODE_OI:
12993      gcc_assert (TARGET_AVX2);
12994      /* FALLTHRU */
12995    case MODE_TI:
12996      gcc_assert (TARGET_SSE2);
12997      tmp = "p<logic>";
12998      switch (<MODE>mode)
12999	{
13000	case E_V16SImode:
13001	case E_V8DImode:
13002	  ssesuffix = "<ssemodesuffix>";
13003	  break;
13004	case E_V8SImode:
13005	case E_V4DImode:
13006	case E_V4SImode:
13007	case E_V2DImode:
13008	  ssesuffix = (TARGET_AVX512VL
13009		       && (<mask_applied> || which_alternative == 2)
13010		       ? "<ssemodesuffix>" : "");
13011	  break;
13012	default:
13013	  gcc_unreachable ();
13014	}
13015      break;
13016
13017    case MODE_V8SF:
13018      gcc_assert (TARGET_AVX);
13019      /* FALLTHRU */
13020    case MODE_V4SF:
13021      gcc_assert (TARGET_SSE);
13022      tmp = "<logic>";
13023      ssesuffix = "ps";
13024      break;
13025
13026    default:
13027      gcc_unreachable ();
13028    }
13029
13030  switch (which_alternative)
13031    {
13032    case 0:
13033      if (<mask_applied>)
13034        ops = "v%s%s\t{%%2, %%0, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%0, %%2}";
13035      else
13036        ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13037      break;
13038    case 1:
13039    case 2:
13040      ops = "v%s%s\t{%%2, %%1, %%0<mask_operand3_1>|%%0<mask_operand3_1>, %%1, %%2}";
13041      break;
13042    default:
13043      gcc_unreachable ();
13044    }
13045
13046  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13047  output_asm_insn (buf, operands);
13048  return "";
13049}
13050  [(set_attr "isa" "noavx,avx,avx")
13051   (set_attr "type" "sselog")
13052   (set (attr "prefix_data16")
13053     (if_then_else
13054       (and (eq_attr "alternative" "0")
13055	    (eq_attr "mode" "TI"))
13056       (const_string "1")
13057       (const_string "*")))
13058   (set_attr "prefix" "<mask_prefix3>,evex")
13059   (set (attr "mode")
13060	(cond [(match_test "TARGET_AVX2")
13061		 (const_string "<sseinsnmode>")
13062	       (match_test "TARGET_AVX")
13063		 (if_then_else
13064		   (match_test "<MODE_SIZE> > 16")
13065		   (const_string "V8SF")
13066		   (const_string "<sseinsnmode>"))
13067	       (ior (not (match_test "TARGET_SSE2"))
13068		    (match_test "optimize_function_for_size_p (cfun)"))
13069		 (const_string "V4SF")
13070	      ]
13071	      (const_string "<sseinsnmode>")))])
13072
13073(define_insn "*<code><mode>3"
13074  [(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,x,v")
13075	(any_logic:VI12_AVX_AVX512F
13076	  (match_operand:VI12_AVX_AVX512F 1 "vector_operand" "%0,x,v")
13077	  (match_operand:VI12_AVX_AVX512F 2 "vector_operand" "xBm,xm,vm")))]
13078  "TARGET_SSE && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
13079{
13080  char buf[64];
13081  const char *ops;
13082  const char *tmp;
13083  const char *ssesuffix;
13084
13085  switch (get_attr_mode (insn))
13086    {
13087    case MODE_XI:
13088      gcc_assert (TARGET_AVX512F);
13089      /* FALLTHRU */
13090    case MODE_OI:
13091      gcc_assert (TARGET_AVX2);
13092      /* FALLTHRU */
13093    case MODE_TI:
13094      gcc_assert (TARGET_SSE2);
13095      tmp = "p<logic>";
13096      switch (<MODE>mode)
13097	{
13098	case E_V64QImode:
13099	case E_V32HImode:
13100	  ssesuffix = "q";
13101	  break;
13102	case E_V32QImode:
13103	case E_V16HImode:
13104	case E_V16QImode:
13105	case E_V8HImode:
13106	  ssesuffix = TARGET_AVX512VL && which_alternative == 2 ? "q" : "";
13107	  break;
13108	default:
13109	  gcc_unreachable ();
13110	}
13111      break;
13112
13113    case MODE_V8SF:
13114      gcc_assert (TARGET_AVX);
13115      /* FALLTHRU */
13116    case MODE_V4SF:
13117      gcc_assert (TARGET_SSE);
13118      tmp = "<logic>";
13119      ssesuffix = "ps";
13120      break;
13121
13122    default:
13123      gcc_unreachable ();
13124    }
13125
13126  switch (which_alternative)
13127    {
13128    case 0:
13129      ops = "%s%s\t{%%2, %%0|%%0, %%2}";
13130      break;
13131    case 1:
13132    case 2:
13133      ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
13134      break;
13135    default:
13136      gcc_unreachable ();
13137    }
13138
13139  snprintf (buf, sizeof (buf), ops, tmp, ssesuffix);
13140  output_asm_insn (buf, operands);
13141  return "";
13142}
13143  [(set_attr "isa" "noavx,avx,avx")
13144   (set_attr "type" "sselog")
13145   (set (attr "prefix_data16")
13146     (if_then_else
13147       (and (eq_attr "alternative" "0")
13148	    (eq_attr "mode" "TI"))
13149       (const_string "1")
13150       (const_string "*")))
13151   (set_attr "prefix" "orig,vex,evex")
13152   (set (attr "mode")
13153	(cond [(match_test "TARGET_AVX2")
13154		 (const_string "<sseinsnmode>")
13155	       (match_test "TARGET_AVX")
13156		 (if_then_else
13157		   (match_test "<MODE_SIZE> > 16")
13158		   (const_string "V8SF")
13159		   (const_string "<sseinsnmode>"))
13160	       (ior (not (match_test "TARGET_SSE2"))
13161		    (match_test "optimize_function_for_size_p (cfun)"))
13162		 (const_string "V4SF")
13163	      ]
13164	      (const_string "<sseinsnmode>")))])
13165
13166(define_insn "*<code><mode>3_bcst"
13167  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
13168	(any_logic:VI48_AVX512VL
13169	  (vec_duplicate:VI48_AVX512VL
13170	    (match_operand:<ssescalarmode> 1 "memory_operand" "m"))
13171	  (match_operand:VI48_AVX512VL 2 "register_operand" "v")))]
13172  "TARGET_AVX512F && <mask_avx512vl_condition>"
13173  "vp<logic><ssemodesuffix>\t{%1<avx512bcst>, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1<avx512bcst>}"
13174  [(set_attr "type" "sseiadd")
13175   (set_attr "prefix" "evex")
13176   (set_attr "mode" "<sseinsnmode>")])
13177
13178(define_mode_iterator VI1248_AVX512VLBW
13179  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
13180   (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
13181   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
13182   (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
13183   V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
13184   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
13185
13186(define_mode_iterator AVX512ZEXTMASK
13187  [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
13188
13189(define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
13190  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13191	(unspec:<avx512fmaskmode>
13192	 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13193	  (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13194	 UNSPEC_TESTM))]
13195  "TARGET_AVX512F"
13196  "vptestm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13197  [(set_attr "prefix" "evex")
13198   (set_attr "mode"  "<sseinsnmode>")])
13199
13200(define_insn "<avx512>_testnm<mode>3<mask_scalar_merge_name>"
13201  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
13202	(unspec:<avx512fmaskmode>
13203	 [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13204	  (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13205	 UNSPEC_TESTNM))]
13206  "TARGET_AVX512F"
13207  "vptestnm<ssemodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
13208  [(set_attr "prefix" "evex")
13209   (set_attr "mode"  "<sseinsnmode>")])
13210
13211(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext"
13212  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13213	(zero_extend:AVX512ZEXTMASK
13214	  (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13215	   [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13216	    (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13217	   UNSPEC_TESTM)))]
13218  "TARGET_AVX512BW
13219   && (<AVX512ZEXTMASK:MODE_SIZE>
13220       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13221  "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13222  [(set_attr "prefix" "evex")
13223   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
13224
13225(define_insn "*<avx512>_testm<VI1248_AVX512VLBW:mode>3_zext_mask"
13226  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13227	(zero_extend:AVX512ZEXTMASK
13228	  (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13229	    (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13230	     [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13231	      (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13232	     UNSPEC_TESTM)
13233	    (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13234  "TARGET_AVX512BW
13235   && (<AVX512ZEXTMASK:MODE_SIZE>
13236       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13237  "vptestm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13238  [(set_attr "prefix" "evex")
13239   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
13240
13241(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext"
13242  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13243	(zero_extend:AVX512ZEXTMASK
13244	  (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13245	   [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13246	    (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13247	   UNSPEC_TESTNM)))]
13248  "TARGET_AVX512BW
13249   && (<AVX512ZEXTMASK:MODE_SIZE>
13250       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13251  "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
13252  [(set_attr "prefix" "evex")
13253   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
13254
13255(define_insn "*<avx512>_testnm<VI1248_AVX512VLBW:mode>3_zext_mask"
13256  [(set (match_operand:AVX512ZEXTMASK 0 "register_operand" "=k")
13257	(zero_extend:AVX512ZEXTMASK
13258	  (and:<VI1248_AVX512VLBW:avx512fmaskmode>
13259	    (unspec:<VI1248_AVX512VLBW:avx512fmaskmode>
13260	     [(match_operand:VI1248_AVX512VLBW 1 "register_operand" "v")
13261	      (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand" "vm")]
13262	     UNSPEC_TESTNM)
13263	    (match_operand:<VI1248_AVX512VLBW:avx512fmaskmode> 3 "register_operand" "Yk"))))]
13264  "TARGET_AVX512BW
13265   && (<AVX512ZEXTMASK:MODE_SIZE>
13266       > GET_MODE_SIZE (<VI1248_AVX512VLBW:avx512fmaskmode>mode))"
13267  "vptestnm<VI1248_AVX512VLBW:ssemodesuffix>\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}"
13268  [(set_attr "prefix" "evex")
13269   (set_attr "mode"  "<VI1248_AVX512VLBW:sseinsnmode>")])
13270
13271;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13272;;
13273;; Parallel integral element swizzling
13274;;
13275;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
13276
13277(define_expand "vec_pack_trunc_<mode>"
13278  [(match_operand:<ssepackmode> 0 "register_operand")
13279   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 1 "register_operand")
13280   (match_operand:VI248_AVX2_8_AVX512F_24_AVX512BW 2 "register_operand")]
13281  "TARGET_SSE2"
13282{
13283  rtx op1 = gen_lowpart (<ssepackmode>mode, operands[1]);
13284  rtx op2 = gen_lowpart (<ssepackmode>mode, operands[2]);
13285  ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
13286  DONE;
13287})
13288
13289(define_expand "vec_pack_trunc_qi"
13290  [(set (match_operand:HI 0 "register_operand")
13291	(ior:HI (ashift:HI (zero_extend:HI (match_operand:QI 2 "register_operand"))
13292                           (const_int 8))
13293		(zero_extend:HI (match_operand:QI 1 "register_operand"))))]
13294  "TARGET_AVX512F")
13295
13296(define_expand "vec_pack_trunc_<mode>"
13297  [(set (match_operand:<DOUBLEMASKMODE> 0 "register_operand")
13298	(ior:<DOUBLEMASKMODE>
13299	  (ashift:<DOUBLEMASKMODE>
13300	    (zero_extend:<DOUBLEMASKMODE>
13301	      (match_operand:SWI24 2 "register_operand"))
13302	    (match_dup 3))
13303	  (zero_extend:<DOUBLEMASKMODE>
13304	    (match_operand:SWI24 1 "register_operand"))))]
13305  "TARGET_AVX512BW"
13306{
13307  operands[3] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
13308})
13309
13310(define_expand "vec_pack_sbool_trunc_qi"
13311  [(match_operand:QI 0 "register_operand")
13312   (match_operand:QI 1 "register_operand")
13313   (match_operand:QI 2 "register_operand")
13314   (match_operand:QI 3 "const_int_operand")]
13315  "TARGET_AVX512F"
13316{
13317  HOST_WIDE_INT nunits = INTVAL (operands[3]);
13318  rtx mask, tem1, tem2;
13319  if (nunits != 8 && nunits != 4)
13320    FAIL;
13321  mask = gen_reg_rtx (QImode);
13322  emit_move_insn (mask, GEN_INT ((1 << (nunits / 2)) - 1));
13323  tem1 = gen_reg_rtx (QImode);
13324  emit_insn (gen_kandqi (tem1, operands[1], mask));
13325  if (TARGET_AVX512DQ)
13326    {
13327      tem2 = gen_reg_rtx (QImode);
13328      emit_insn (gen_kashiftqi (tem2, operands[2],
13329				GEN_INT (nunits / 2)));
13330    }
13331  else
13332    {
13333      tem2 = gen_reg_rtx (HImode);
13334      emit_insn (gen_kashifthi (tem2, lowpart_subreg (HImode, operands[2],
13335						      QImode),
13336				GEN_INT (nunits / 2)));
13337      tem2 = lowpart_subreg (QImode, tem2, HImode);
13338    }
13339  emit_insn (gen_kiorqi (operands[0], tem1, tem2));
13340  DONE;
13341})
13342
13343(define_insn "<sse2_avx2>_packsswb<mask_name>"
13344  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13345	(vec_concat:VI1_AVX512
13346	  (ss_truncate:<ssehalfvecmode>
13347	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13348	  (ss_truncate:<ssehalfvecmode>
13349	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13350  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13351  "@
13352   packsswb\t{%2, %0|%0, %2}
13353   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13354   vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13355  [(set_attr "isa" "noavx,avx,avx512bw")
13356   (set_attr "type" "sselog")
13357   (set_attr "prefix_data16" "1,*,*")
13358   (set_attr "prefix" "orig,<mask_prefix>,evex")
13359   (set_attr "mode" "<sseinsnmode>")])
13360
13361(define_insn "<sse2_avx2>_packssdw<mask_name>"
13362  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
13363	(vec_concat:VI2_AVX2
13364	  (ss_truncate:<ssehalfvecmode>
13365	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13366	  (ss_truncate:<ssehalfvecmode>
13367	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13368  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13369  "@
13370   packssdw\t{%2, %0|%0, %2}
13371   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13372   vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13373  [(set_attr "isa" "noavx,avx,avx512bw")
13374   (set_attr "type" "sselog")
13375   (set_attr "prefix_data16" "1,*,*")
13376   (set_attr "prefix" "orig,<mask_prefix>,evex")
13377   (set_attr "mode" "<sseinsnmode>")])
13378
13379(define_insn "<sse2_avx2>_packuswb<mask_name>"
13380  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
13381	(vec_concat:VI1_AVX512
13382	  (us_truncate:<ssehalfvecmode>
13383	    (match_operand:<sseunpackmode> 1 "register_operand" "0,x,v"))
13384	  (us_truncate:<ssehalfvecmode>
13385	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,xm,vm"))))]
13386  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
13387  "@
13388   packuswb\t{%2, %0|%0, %2}
13389   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
13390   vpackuswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13391  [(set_attr "isa" "noavx,avx,avx512bw")
13392   (set_attr "type" "sselog")
13393   (set_attr "prefix_data16" "1,*,*")
13394   (set_attr "prefix" "orig,<mask_prefix>,evex")
13395   (set_attr "mode" "<sseinsnmode>")])
13396
13397(define_insn "avx512bw_interleave_highv64qi<mask_name>"
13398  [(set (match_operand:V64QI 0 "register_operand" "=v")
13399	(vec_select:V64QI
13400	  (vec_concat:V128QI
13401	    (match_operand:V64QI 1 "register_operand" "v")
13402	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13403	  (parallel [(const_int 8)  (const_int 72)
13404		     (const_int 9)  (const_int 73)
13405		     (const_int 10) (const_int 74)
13406		     (const_int 11) (const_int 75)
13407		     (const_int 12) (const_int 76)
13408		     (const_int 13) (const_int 77)
13409		     (const_int 14) (const_int 78)
13410		     (const_int 15) (const_int 79)
13411		     (const_int 24) (const_int 88)
13412		     (const_int 25) (const_int 89)
13413		     (const_int 26) (const_int 90)
13414		     (const_int 27) (const_int 91)
13415		     (const_int 28) (const_int 92)
13416		     (const_int 29) (const_int 93)
13417		     (const_int 30) (const_int 94)
13418		     (const_int 31) (const_int 95)
13419		     (const_int 40) (const_int 104)
13420		     (const_int 41) (const_int 105)
13421		     (const_int 42) (const_int 106)
13422		     (const_int 43) (const_int 107)
13423		     (const_int 44) (const_int 108)
13424		     (const_int 45) (const_int 109)
13425		     (const_int 46) (const_int 110)
13426		     (const_int 47) (const_int 111)
13427		     (const_int 56) (const_int 120)
13428		     (const_int 57) (const_int 121)
13429		     (const_int 58) (const_int 122)
13430		     (const_int 59) (const_int 123)
13431		     (const_int 60) (const_int 124)
13432		     (const_int 61) (const_int 125)
13433		     (const_int 62) (const_int 126)
13434		     (const_int 63) (const_int 127)])))]
13435  "TARGET_AVX512BW"
13436  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13437  [(set_attr "type" "sselog")
13438   (set_attr "prefix" "evex")
13439   (set_attr "mode" "XI")])
13440
13441(define_insn "avx2_interleave_highv32qi<mask_name>"
13442  [(set (match_operand:V32QI 0 "register_operand" "=v")
13443	(vec_select:V32QI
13444	  (vec_concat:V64QI
13445	    (match_operand:V32QI 1 "register_operand" "v")
13446	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13447	  (parallel [(const_int 8)  (const_int 40)
13448		     (const_int 9)  (const_int 41)
13449		     (const_int 10) (const_int 42)
13450		     (const_int 11) (const_int 43)
13451		     (const_int 12) (const_int 44)
13452		     (const_int 13) (const_int 45)
13453		     (const_int 14) (const_int 46)
13454		     (const_int 15) (const_int 47)
13455		     (const_int 24) (const_int 56)
13456		     (const_int 25) (const_int 57)
13457		     (const_int 26) (const_int 58)
13458		     (const_int 27) (const_int 59)
13459		     (const_int 28) (const_int 60)
13460		     (const_int 29) (const_int 61)
13461		     (const_int 30) (const_int 62)
13462		     (const_int 31) (const_int 63)])))]
13463  "TARGET_AVX2 && <mask_avx512vl_condition>"
13464  "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13465  [(set_attr "type" "sselog")
13466   (set_attr "prefix" "<mask_prefix>")
13467   (set_attr "mode" "OI")])
13468
13469(define_insn "vec_interleave_highv16qi<mask_name>"
13470  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13471	(vec_select:V16QI
13472	  (vec_concat:V32QI
13473	    (match_operand:V16QI 1 "register_operand" "0,v")
13474	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13475	  (parallel [(const_int 8)  (const_int 24)
13476		     (const_int 9)  (const_int 25)
13477		     (const_int 10) (const_int 26)
13478		     (const_int 11) (const_int 27)
13479		     (const_int 12) (const_int 28)
13480		     (const_int 13) (const_int 29)
13481		     (const_int 14) (const_int 30)
13482		     (const_int 15) (const_int 31)])))]
13483  "TARGET_SSE2 && <mask_avx512vl_condition>"
13484  "@
13485   punpckhbw\t{%2, %0|%0, %2}
13486   vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13487  [(set_attr "isa" "noavx,avx")
13488   (set_attr "type" "sselog")
13489   (set_attr "prefix_data16" "1,*")
13490   (set_attr "prefix" "orig,<mask_prefix>")
13491   (set_attr "mode" "TI")])
13492
13493(define_insn "avx512bw_interleave_lowv64qi<mask_name>"
13494  [(set (match_operand:V64QI 0 "register_operand" "=v")
13495	(vec_select:V64QI
13496	  (vec_concat:V128QI
13497	    (match_operand:V64QI 1 "register_operand" "v")
13498	    (match_operand:V64QI 2 "nonimmediate_operand" "vm"))
13499	  (parallel [(const_int 0) (const_int 64)
13500		     (const_int 1) (const_int 65)
13501		     (const_int 2) (const_int 66)
13502		     (const_int 3) (const_int 67)
13503		     (const_int 4) (const_int 68)
13504		     (const_int 5) (const_int 69)
13505		     (const_int 6) (const_int 70)
13506		     (const_int 7) (const_int 71)
13507		     (const_int 16) (const_int 80)
13508		     (const_int 17) (const_int 81)
13509		     (const_int 18) (const_int 82)
13510		     (const_int 19) (const_int 83)
13511		     (const_int 20) (const_int 84)
13512		     (const_int 21) (const_int 85)
13513		     (const_int 22) (const_int 86)
13514		     (const_int 23) (const_int 87)
13515		     (const_int 32) (const_int 96)
13516		     (const_int 33) (const_int 97)
13517		     (const_int 34) (const_int 98)
13518		     (const_int 35) (const_int 99)
13519		     (const_int 36) (const_int 100)
13520		     (const_int 37) (const_int 101)
13521		     (const_int 38) (const_int 102)
13522		     (const_int 39) (const_int 103)
13523		     (const_int 48) (const_int 112)
13524		     (const_int 49) (const_int 113)
13525		     (const_int 50) (const_int 114)
13526		     (const_int 51) (const_int 115)
13527		     (const_int 52) (const_int 116)
13528		     (const_int 53) (const_int 117)
13529		     (const_int 54) (const_int 118)
13530		     (const_int 55) (const_int 119)])))]
13531  "TARGET_AVX512BW"
13532  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13533  [(set_attr "type" "sselog")
13534   (set_attr "prefix" "evex")
13535   (set_attr "mode" "XI")])
13536
13537(define_insn "avx2_interleave_lowv32qi<mask_name>"
13538  [(set (match_operand:V32QI 0 "register_operand" "=v")
13539	(vec_select:V32QI
13540	  (vec_concat:V64QI
13541	    (match_operand:V32QI 1 "register_operand" "v")
13542	    (match_operand:V32QI 2 "nonimmediate_operand" "vm"))
13543	  (parallel [(const_int 0) (const_int 32)
13544		     (const_int 1) (const_int 33)
13545		     (const_int 2) (const_int 34)
13546		     (const_int 3) (const_int 35)
13547		     (const_int 4) (const_int 36)
13548		     (const_int 5) (const_int 37)
13549		     (const_int 6) (const_int 38)
13550		     (const_int 7) (const_int 39)
13551		     (const_int 16) (const_int 48)
13552		     (const_int 17) (const_int 49)
13553		     (const_int 18) (const_int 50)
13554		     (const_int 19) (const_int 51)
13555		     (const_int 20) (const_int 52)
13556		     (const_int 21) (const_int 53)
13557		     (const_int 22) (const_int 54)
13558		     (const_int 23) (const_int 55)])))]
13559  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13560  "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13561  [(set_attr "type" "sselog")
13562   (set_attr "prefix" "maybe_vex")
13563   (set_attr "mode" "OI")])
13564
13565(define_insn "vec_interleave_lowv16qi<mask_name>"
13566  [(set (match_operand:V16QI 0 "register_operand" "=x,v")
13567	(vec_select:V16QI
13568	  (vec_concat:V32QI
13569	    (match_operand:V16QI 1 "register_operand" "0,v")
13570	    (match_operand:V16QI 2 "vector_operand" "xBm,vm"))
13571	  (parallel [(const_int 0) (const_int 16)
13572		     (const_int 1) (const_int 17)
13573		     (const_int 2) (const_int 18)
13574		     (const_int 3) (const_int 19)
13575		     (const_int 4) (const_int 20)
13576		     (const_int 5) (const_int 21)
13577		     (const_int 6) (const_int 22)
13578		     (const_int 7) (const_int 23)])))]
13579  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13580  "@
13581   punpcklbw\t{%2, %0|%0, %2}
13582   vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13583  [(set_attr "isa" "noavx,avx")
13584   (set_attr "type" "sselog")
13585   (set_attr "prefix_data16" "1,*")
13586   (set_attr "prefix" "orig,vex")
13587   (set_attr "mode" "TI")])
13588
13589(define_insn "avx512bw_interleave_highv32hi<mask_name>"
13590  [(set (match_operand:V32HI 0 "register_operand" "=v")
13591	(vec_select:V32HI
13592	  (vec_concat:V64HI
13593	    (match_operand:V32HI 1 "register_operand" "v")
13594	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13595	  (parallel [(const_int 4) (const_int 36)
13596		     (const_int 5) (const_int 37)
13597		     (const_int 6) (const_int 38)
13598		     (const_int 7) (const_int 39)
13599		     (const_int 12) (const_int 44)
13600		     (const_int 13) (const_int 45)
13601		     (const_int 14) (const_int 46)
13602		     (const_int 15) (const_int 47)
13603		     (const_int 20) (const_int 52)
13604		     (const_int 21) (const_int 53)
13605		     (const_int 22) (const_int 54)
13606		     (const_int 23) (const_int 55)
13607		     (const_int 28) (const_int 60)
13608		     (const_int 29) (const_int 61)
13609		     (const_int 30) (const_int 62)
13610		     (const_int 31) (const_int 63)])))]
13611  "TARGET_AVX512BW"
13612  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13613  [(set_attr "type" "sselog")
13614   (set_attr "prefix" "evex")
13615   (set_attr "mode" "XI")])
13616
13617(define_insn "avx2_interleave_highv16hi<mask_name>"
13618  [(set (match_operand:V16HI 0 "register_operand" "=v")
13619	(vec_select:V16HI
13620	  (vec_concat:V32HI
13621	    (match_operand:V16HI 1 "register_operand" "v")
13622	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13623	  (parallel [(const_int 4) (const_int 20)
13624		     (const_int 5) (const_int 21)
13625		     (const_int 6) (const_int 22)
13626		     (const_int 7) (const_int 23)
13627		     (const_int 12) (const_int 28)
13628		     (const_int 13) (const_int 29)
13629		     (const_int 14) (const_int 30)
13630		     (const_int 15) (const_int 31)])))]
13631  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13632  "vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13633  [(set_attr "type" "sselog")
13634   (set_attr "prefix" "maybe_evex")
13635   (set_attr "mode" "OI")])
13636
13637(define_insn "vec_interleave_highv8hi<mask_name>"
13638  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13639	(vec_select:V8HI
13640	  (vec_concat:V16HI
13641	    (match_operand:V8HI 1 "register_operand" "0,v")
13642	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13643	  (parallel [(const_int 4) (const_int 12)
13644		     (const_int 5) (const_int 13)
13645		     (const_int 6) (const_int 14)
13646		     (const_int 7) (const_int 15)])))]
13647  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13648  "@
13649   punpckhwd\t{%2, %0|%0, %2}
13650   vpunpckhwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13651  [(set_attr "isa" "noavx,avx")
13652   (set_attr "type" "sselog")
13653   (set_attr "prefix_data16" "1,*")
13654   (set_attr "prefix" "orig,maybe_vex")
13655   (set_attr "mode" "TI")])
13656
13657(define_insn "<mask_codefor>avx512bw_interleave_lowv32hi<mask_name>"
13658  [(set (match_operand:V32HI 0 "register_operand" "=v")
13659	(vec_select:V32HI
13660	  (vec_concat:V64HI
13661	    (match_operand:V32HI 1 "register_operand" "v")
13662	    (match_operand:V32HI 2 "nonimmediate_operand" "vm"))
13663	  (parallel [(const_int 0) (const_int 32)
13664		     (const_int 1) (const_int 33)
13665		     (const_int 2) (const_int 34)
13666		     (const_int 3) (const_int 35)
13667		     (const_int 8) (const_int 40)
13668		     (const_int 9) (const_int 41)
13669		     (const_int 10) (const_int 42)
13670		     (const_int 11) (const_int 43)
13671		     (const_int 16) (const_int 48)
13672		     (const_int 17) (const_int 49)
13673		     (const_int 18) (const_int 50)
13674		     (const_int 19) (const_int 51)
13675		     (const_int 24) (const_int 56)
13676		     (const_int 25) (const_int 57)
13677		     (const_int 26) (const_int 58)
13678		     (const_int 27) (const_int 59)])))]
13679  "TARGET_AVX512BW"
13680  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13681  [(set_attr "type" "sselog")
13682   (set_attr "prefix" "evex")
13683   (set_attr "mode" "XI")])
13684
13685(define_insn "avx2_interleave_lowv16hi<mask_name>"
13686  [(set (match_operand:V16HI 0 "register_operand" "=v")
13687	(vec_select:V16HI
13688	  (vec_concat:V32HI
13689	    (match_operand:V16HI 1 "register_operand" "v")
13690	    (match_operand:V16HI 2 "nonimmediate_operand" "vm"))
13691	  (parallel [(const_int 0) (const_int 16)
13692		     (const_int 1) (const_int 17)
13693		     (const_int 2) (const_int 18)
13694		     (const_int 3) (const_int 19)
13695		     (const_int 8) (const_int 24)
13696		     (const_int 9) (const_int 25)
13697		     (const_int 10) (const_int 26)
13698		     (const_int 11) (const_int 27)])))]
13699  "TARGET_AVX2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13700  "vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13701  [(set_attr "type" "sselog")
13702   (set_attr "prefix" "maybe_evex")
13703   (set_attr "mode" "OI")])
13704
13705(define_insn "vec_interleave_lowv8hi<mask_name>"
13706  [(set (match_operand:V8HI 0 "register_operand" "=x,v")
13707	(vec_select:V8HI
13708	  (vec_concat:V16HI
13709	    (match_operand:V8HI 1 "register_operand" "0,v")
13710	    (match_operand:V8HI 2 "vector_operand" "xBm,vm"))
13711	  (parallel [(const_int 0) (const_int 8)
13712		     (const_int 1) (const_int 9)
13713		     (const_int 2) (const_int 10)
13714		     (const_int 3) (const_int 11)])))]
13715  "TARGET_SSE2 && <mask_avx512vl_condition> && <mask_avx512bw_condition>"
13716  "@
13717   punpcklwd\t{%2, %0|%0, %2}
13718   vpunpcklwd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13719  [(set_attr "isa" "noavx,avx")
13720   (set_attr "type" "sselog")
13721   (set_attr "prefix_data16" "1,*")
13722   (set_attr "prefix" "orig,maybe_evex")
13723   (set_attr "mode" "TI")])
13724
13725(define_insn "avx2_interleave_highv8si<mask_name>"
13726  [(set (match_operand:V8SI 0 "register_operand" "=v")
13727	(vec_select:V8SI
13728	  (vec_concat:V16SI
13729	    (match_operand:V8SI 1 "register_operand" "v")
13730	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13731	  (parallel [(const_int 2) (const_int 10)
13732		     (const_int 3) (const_int 11)
13733		     (const_int 6) (const_int 14)
13734		     (const_int 7) (const_int 15)])))]
13735  "TARGET_AVX2 && <mask_avx512vl_condition>"
13736  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13737  [(set_attr "type" "sselog")
13738   (set_attr "prefix" "maybe_evex")
13739   (set_attr "mode" "OI")])
13740
13741(define_insn "<mask_codefor>avx512f_interleave_highv16si<mask_name>"
13742  [(set (match_operand:V16SI 0 "register_operand" "=v")
13743	(vec_select:V16SI
13744	  (vec_concat:V32SI
13745	    (match_operand:V16SI 1 "register_operand" "v")
13746	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13747	  (parallel [(const_int 2) (const_int 18)
13748		     (const_int 3) (const_int 19)
13749		     (const_int 6) (const_int 22)
13750		     (const_int 7) (const_int 23)
13751		     (const_int 10) (const_int 26)
13752		     (const_int 11) (const_int 27)
13753		     (const_int 14) (const_int 30)
13754		     (const_int 15) (const_int 31)])))]
13755  "TARGET_AVX512F"
13756  "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13757  [(set_attr "type" "sselog")
13758   (set_attr "prefix" "evex")
13759   (set_attr "mode" "XI")])
13760
13761
13762(define_insn "vec_interleave_highv4si<mask_name>"
13763  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13764	(vec_select:V4SI
13765	  (vec_concat:V8SI
13766	    (match_operand:V4SI 1 "register_operand" "0,v")
13767	    (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13768	  (parallel [(const_int 2) (const_int 6)
13769		     (const_int 3) (const_int 7)])))]
13770  "TARGET_SSE2 && <mask_avx512vl_condition>"
13771  "@
13772   punpckhdq\t{%2, %0|%0, %2}
13773   vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13774  [(set_attr "isa" "noavx,avx")
13775   (set_attr "type" "sselog")
13776   (set_attr "prefix_data16" "1,*")
13777   (set_attr "prefix" "orig,maybe_vex")
13778   (set_attr "mode" "TI")])
13779
13780(define_insn "avx2_interleave_lowv8si<mask_name>"
13781  [(set (match_operand:V8SI 0 "register_operand" "=v")
13782	(vec_select:V8SI
13783	  (vec_concat:V16SI
13784	    (match_operand:V8SI 1 "register_operand" "v")
13785	    (match_operand:V8SI 2 "nonimmediate_operand" "vm"))
13786	  (parallel [(const_int 0) (const_int 8)
13787		     (const_int 1) (const_int 9)
13788		     (const_int 4) (const_int 12)
13789		     (const_int 5) (const_int 13)])))]
13790  "TARGET_AVX2 && <mask_avx512vl_condition>"
13791  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13792  [(set_attr "type" "sselog")
13793   (set_attr "prefix" "maybe_evex")
13794   (set_attr "mode" "OI")])
13795
13796(define_insn "<mask_codefor>avx512f_interleave_lowv16si<mask_name>"
13797  [(set (match_operand:V16SI 0 "register_operand" "=v")
13798	(vec_select:V16SI
13799	  (vec_concat:V32SI
13800	    (match_operand:V16SI 1 "register_operand" "v")
13801	    (match_operand:V16SI 2 "nonimmediate_operand" "vm"))
13802	  (parallel [(const_int 0) (const_int 16)
13803		     (const_int 1) (const_int 17)
13804		     (const_int 4) (const_int 20)
13805		     (const_int 5) (const_int 21)
13806		     (const_int 8) (const_int 24)
13807		     (const_int 9) (const_int 25)
13808		     (const_int 12) (const_int 28)
13809		     (const_int 13) (const_int 29)])))]
13810  "TARGET_AVX512F"
13811  "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13812  [(set_attr "type" "sselog")
13813   (set_attr "prefix" "evex")
13814   (set_attr "mode" "XI")])
13815
13816(define_insn "vec_interleave_lowv4si<mask_name>"
13817  [(set (match_operand:V4SI 0 "register_operand" "=x,v")
13818	(vec_select:V4SI
13819	  (vec_concat:V8SI
13820	    (match_operand:V4SI 1 "register_operand" "0,v")
13821	    (match_operand:V4SI 2 "vector_operand" "xBm,vm"))
13822	  (parallel [(const_int 0) (const_int 4)
13823		     (const_int 1) (const_int 5)])))]
13824  "TARGET_SSE2 && <mask_avx512vl_condition>"
13825  "@
13826   punpckldq\t{%2, %0|%0, %2}
13827   vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
13828  [(set_attr "isa" "noavx,avx")
13829   (set_attr "type" "sselog")
13830   (set_attr "prefix_data16" "1,*")
13831   (set_attr "prefix" "orig,vex")
13832   (set_attr "mode" "TI")])
13833
13834(define_expand "vec_interleave_high<mode>"
13835  [(match_operand:VI_256 0 "register_operand")
13836   (match_operand:VI_256 1 "register_operand")
13837   (match_operand:VI_256 2 "nonimmediate_operand")]
13838 "TARGET_AVX2"
13839{
13840  rtx t1 = gen_reg_rtx (<MODE>mode);
13841  rtx t2 = gen_reg_rtx (<MODE>mode);
13842  rtx t3 = gen_reg_rtx (V4DImode);
13843  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13844  emit_insn (gen_avx2_interleave_high<mode> (t2,  operands[1], operands[2]));
13845  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13846				gen_lowpart (V4DImode, t2),
13847				GEN_INT (1 + (3 << 4))));
13848  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13849  DONE;
13850})
13851
13852(define_expand "vec_interleave_low<mode>"
13853  [(match_operand:VI_256 0 "register_operand")
13854   (match_operand:VI_256 1 "register_operand")
13855   (match_operand:VI_256 2 "nonimmediate_operand")]
13856 "TARGET_AVX2"
13857{
13858  rtx t1 = gen_reg_rtx (<MODE>mode);
13859  rtx t2 = gen_reg_rtx (<MODE>mode);
13860  rtx t3 = gen_reg_rtx (V4DImode);
13861  emit_insn (gen_avx2_interleave_low<mode> (t1, operands[1], operands[2]));
13862  emit_insn (gen_avx2_interleave_high<mode> (t2, operands[1], operands[2]));
13863  emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, t1),
13864				gen_lowpart (V4DImode, t2),
13865				GEN_INT (0 + (2 << 4))));
13866  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t3));
13867  DONE;
13868})
13869
13870;; Modes handled by pinsr patterns.
13871(define_mode_iterator PINSR_MODE
13872  [(V16QI "TARGET_SSE4_1") V8HI
13873   (V4SI "TARGET_SSE4_1")
13874   (V2DI "TARGET_SSE4_1 && TARGET_64BIT")])
13875
13876(define_mode_attr sse2p4_1
13877  [(V16QI "sse4_1") (V8HI "sse2")
13878   (V4SI "sse4_1") (V2DI "sse4_1")])
13879
13880(define_mode_attr pinsr_evex_isa
13881  [(V16QI "avx512bw") (V8HI "avx512bw")
13882   (V4SI "avx512dq") (V2DI "avx512dq")])
13883
13884;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
13885(define_insn "<sse2p4_1>_pinsr<ssemodesuffix>"
13886  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v")
13887	(vec_merge:PINSR_MODE
13888	  (vec_duplicate:PINSR_MODE
13889	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,r,m,r,m"))
13890	  (match_operand:PINSR_MODE 1 "register_operand" "0,0,x,x,v,v")
13891	  (match_operand:SI 3 "const_int_operand")))]
13892  "TARGET_SSE2
13893   && ((unsigned) exact_log2 (INTVAL (operands[3]))
13894       < GET_MODE_NUNITS (<MODE>mode))"
13895{
13896  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
13897
13898  switch (which_alternative)
13899    {
13900    case 0:
13901      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13902	return "pinsr<ssemodesuffix>\t{%3, %k2, %0|%0, %k2, %3}";
13903      /* FALLTHRU */
13904    case 1:
13905      return "pinsr<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}";
13906    case 2:
13907    case 4:
13908      if (GET_MODE_SIZE (<ssescalarmode>mode) < GET_MODE_SIZE (SImode))
13909	return "vpinsr<ssemodesuffix>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
13910      /* FALLTHRU */
13911    case 3:
13912    case 5:
13913      return "vpinsr<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
13914    default:
13915      gcc_unreachable ();
13916    }
13917}
13918  [(set_attr "isa" "noavx,noavx,avx,avx,<pinsr_evex_isa>,<pinsr_evex_isa>")
13919   (set_attr "type" "sselog")
13920   (set (attr "prefix_rex")
13921     (if_then_else
13922       (and (not (match_test "TARGET_AVX"))
13923	    (eq (const_string "<MODE>mode") (const_string "V2DImode")))
13924       (const_string "1")
13925       (const_string "*")))
13926   (set (attr "prefix_data16")
13927     (if_then_else
13928       (and (not (match_test "TARGET_AVX"))
13929	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13930       (const_string "1")
13931       (const_string "*")))
13932   (set (attr "prefix_extra")
13933     (if_then_else
13934       (and (not (match_test "TARGET_AVX"))
13935	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
13936       (const_string "*")
13937       (const_string "1")))
13938   (set_attr "length_immediate" "1")
13939   (set_attr "prefix" "orig,orig,vex,vex,evex,evex")
13940   (set_attr "mode" "TI")])
13941
13942(define_expand "<extract_type>_vinsert<shuffletype><extract_suf>_mask"
13943  [(match_operand:AVX512_VEC 0 "register_operand")
13944   (match_operand:AVX512_VEC 1 "register_operand")
13945   (match_operand:<ssequartermode> 2 "nonimmediate_operand")
13946   (match_operand:SI 3 "const_0_to_3_operand")
13947   (match_operand:AVX512_VEC 4 "register_operand")
13948   (match_operand:<avx512fmaskmode> 5 "register_operand")]
13949  "TARGET_AVX512F"
13950{
13951  int mask, selector;
13952  mask = INTVAL (operands[3]);
13953  selector = (GET_MODE_UNIT_SIZE (<MODE>mode) == 4
13954  	      ? 0xFFFF ^ (0x000F << mask * 4)
13955	      : 0xFF ^ (0x03 << mask * 2));
13956  emit_insn (gen_<extract_type>_vinsert<shuffletype><extract_suf>_1_mask
13957    (operands[0], operands[1], operands[2], GEN_INT (selector),
13958     operands[4], operands[5]));
13959  DONE;
13960})
13961
13962(define_insn "*<extract_type>_vinsert<shuffletype><extract_suf>_0"
13963  [(set (match_operand:AVX512_VEC 0 "register_operand" "=v,x,Yv")
13964	(vec_merge:AVX512_VEC
13965	  (match_operand:AVX512_VEC 1 "reg_or_0_operand" "v,C,C")
13966	  (vec_duplicate:AVX512_VEC
13967		(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm,xm,vm"))
13968	  (match_operand:SI 3 "const_int_operand" "n,n,n")))]
13969  "TARGET_AVX512F
13970   && (INTVAL (operands[3])
13971       == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))"
13972{
13973  if (which_alternative == 0)
13974    return "vinsert<shuffletype><extract_suf>\t{$0, %2, %1, %0|%0, %1, %2, 0}";
13975  switch (<MODE>mode)
13976    {
13977    case E_V8DFmode:
13978      if (misaligned_operand (operands[2], <ssequartermode>mode))
13979	return "vmovupd\t{%2, %x0|%x0, %2}";
13980      else
13981	return "vmovapd\t{%2, %x0|%x0, %2}";
13982    case E_V16SFmode:
13983      if (misaligned_operand (operands[2], <ssequartermode>mode))
13984	return "vmovups\t{%2, %x0|%x0, %2}";
13985      else
13986	return "vmovaps\t{%2, %x0|%x0, %2}";
13987    case E_V8DImode:
13988      if (misaligned_operand (operands[2], <ssequartermode>mode))
13989	return which_alternative == 2 ? "vmovdqu64\t{%2, %x0|%x0, %2}"
13990				      : "vmovdqu\t{%2, %x0|%x0, %2}";
13991      else
13992	return which_alternative == 2 ? "vmovdqa64\t{%2, %x0|%x0, %2}"
13993				      : "vmovdqa\t{%2, %x0|%x0, %2}";
13994    case E_V16SImode:
13995      if (misaligned_operand (operands[2], <ssequartermode>mode))
13996	return which_alternative == 2 ? "vmovdqu32\t{%2, %x0|%x0, %2}"
13997				      : "vmovdqu\t{%2, %x0|%x0, %2}";
13998      else
13999	return which_alternative == 2 ? "vmovdqa32\t{%2, %x0|%x0, %2}"
14000				      : "vmovdqa\t{%2, %x0|%x0, %2}";
14001    default:
14002      gcc_unreachable ();
14003    }
14004}
14005  [(set_attr "type" "sselog,ssemov,ssemov")
14006   (set_attr "length_immediate" "1,0,0")
14007   (set_attr "prefix" "evex,vex,evex")
14008   (set_attr "mode" "<sseinsnmode>,<ssequarterinsnmode>,<ssequarterinsnmode>")])
14009
14010(define_insn "<mask_codefor><extract_type>_vinsert<shuffletype><extract_suf>_1<mask_name>"
14011  [(set (match_operand:AVX512_VEC 0 "register_operand" "=v")
14012	(vec_merge:AVX512_VEC
14013	  (match_operand:AVX512_VEC 1 "register_operand" "v")
14014	  (vec_duplicate:AVX512_VEC
14015		(match_operand:<ssequartermode> 2 "nonimmediate_operand" "vm"))
14016	  (match_operand:SI 3 "const_int_operand" "n")))]
14017  "TARGET_AVX512F"
14018{
14019  int mask;
14020  int selector = INTVAL (operands[3]);
14021
14022  if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFFF0 : 0xFC))
14023    mask = 0;
14024  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xFF0F : 0xF3))
14025    mask = 1;
14026  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0xF0FF : 0xCF))
14027    mask = 2;
14028  else if (selector == (GET_MODE_UNIT_SIZE (<MODE>mode) == 4 ? 0x0FFF : 0x3F))
14029    mask = 3;
14030  else
14031    gcc_unreachable ();
14032
14033  operands[3] = GEN_INT (mask);
14034
14035  return "vinsert<shuffletype><extract_suf>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}";
14036}
14037  [(set_attr "type" "sselog")
14038   (set_attr "length_immediate" "1")
14039   (set_attr "prefix" "evex")
14040   (set_attr "mode" "<sseinsnmode>")])
14041
14042(define_expand "<extract_type_2>_vinsert<shuffletype><extract_suf_2>_mask"
14043  [(match_operand:AVX512_VEC_2 0 "register_operand")
14044   (match_operand:AVX512_VEC_2 1 "register_operand")
14045   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
14046   (match_operand:SI 3 "const_0_to_1_operand")
14047   (match_operand:AVX512_VEC_2 4 "register_operand")
14048   (match_operand:<avx512fmaskmode> 5 "register_operand")]
14049  "TARGET_AVX512F"
14050{
14051  int mask = INTVAL (operands[3]);
14052  if (mask == 0)
14053    emit_insn (gen_vec_set_lo_<mode>_mask (operands[0], operands[1],
14054					   operands[2], operands[4],
14055					   operands[5]));
14056  else
14057    emit_insn (gen_vec_set_hi_<mode>_mask (operands[0], operands[1],
14058					   operands[2], operands[4],
14059					   operands[5]));
14060  DONE;
14061})
14062
14063(define_insn "vec_set_lo_<mode><mask_name>"
14064  [(set (match_operand:V16FI 0 "register_operand" "=v")
14065	(vec_concat:V16FI
14066	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14067	  (vec_select:<ssehalfvecmode>
14068	    (match_operand:V16FI 1 "register_operand" "v")
14069	    (parallel [(const_int 8) (const_int 9)
14070		       (const_int 10) (const_int 11)
14071		       (const_int 12) (const_int 13)
14072		       (const_int 14) (const_int 15)]))))]
14073  "TARGET_AVX512DQ"
14074  "vinsert<shuffletype>32x8\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14075  [(set_attr "type" "sselog")
14076   (set_attr "length_immediate" "1")
14077   (set_attr "prefix" "evex")
14078   (set_attr "mode" "<sseinsnmode>")])
14079
14080(define_insn "vec_set_hi_<mode><mask_name>"
14081  [(set (match_operand:V16FI 0 "register_operand" "=v")
14082	(vec_concat:V16FI
14083	  (vec_select:<ssehalfvecmode>
14084	    (match_operand:V16FI 1 "register_operand" "v")
14085	    (parallel [(const_int 0) (const_int 1)
14086		       (const_int 2) (const_int 3)
14087		       (const_int 4) (const_int 5)
14088		       (const_int 6) (const_int 7)]))
14089	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14090  "TARGET_AVX512DQ"
14091  "vinsert<shuffletype>32x8\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14092  [(set_attr "type" "sselog")
14093   (set_attr "length_immediate" "1")
14094   (set_attr "prefix" "evex")
14095   (set_attr "mode" "<sseinsnmode>")])
14096
14097(define_insn "vec_set_lo_<mode><mask_name>"
14098  [(set (match_operand:V8FI 0 "register_operand" "=v")
14099	(vec_concat:V8FI
14100	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
14101	  (vec_select:<ssehalfvecmode>
14102	    (match_operand:V8FI 1 "register_operand" "v")
14103	    (parallel [(const_int 4) (const_int 5)
14104		       (const_int 6) (const_int 7)]))))]
14105  "TARGET_AVX512F"
14106  "vinsert<shuffletype>64x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}"
14107  [(set_attr "type" "sselog")
14108   (set_attr "length_immediate" "1")
14109   (set_attr "prefix" "evex")
14110   (set_attr "mode" "XI")])
14111
14112(define_insn "vec_set_hi_<mode><mask_name>"
14113  [(set (match_operand:V8FI 0 "register_operand" "=v")
14114	(vec_concat:V8FI
14115	  (vec_select:<ssehalfvecmode>
14116	    (match_operand:V8FI 1 "register_operand" "v")
14117	    (parallel [(const_int 0) (const_int 1)
14118		       (const_int 2) (const_int 3)]))
14119	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
14120  "TARGET_AVX512F"
14121  "vinsert<shuffletype>64x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}"
14122  [(set_attr "type" "sselog")
14123   (set_attr "length_immediate" "1")
14124   (set_attr "prefix" "evex")
14125   (set_attr "mode" "XI")])
14126
14127(define_expand "avx512dq_shuf_<shuffletype>64x2_mask"
14128  [(match_operand:VI8F_256 0 "register_operand")
14129   (match_operand:VI8F_256 1 "register_operand")
14130   (match_operand:VI8F_256 2 "nonimmediate_operand")
14131   (match_operand:SI 3 "const_0_to_3_operand")
14132   (match_operand:VI8F_256 4 "register_operand")
14133   (match_operand:QI 5 "register_operand")]
14134  "TARGET_AVX512DQ"
14135{
14136  int mask = INTVAL (operands[3]);
14137  emit_insn (gen_avx512dq_shuf_<shuffletype>64x2_1_mask
14138      (operands[0], operands[1], operands[2],
14139       GEN_INT (((mask >> 0) & 1) * 2 + 0),
14140       GEN_INT (((mask >> 0) & 1) * 2 + 1),
14141       GEN_INT (((mask >> 1) & 1) * 2 + 4),
14142       GEN_INT (((mask >> 1) & 1) * 2 + 5),
14143       operands[4], operands[5]));
14144  DONE;
14145})
14146
14147(define_insn "<mask_codefor>avx512dq_shuf_<shuffletype>64x2_1<mask_name>"
14148  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
14149	(vec_select:VI8F_256
14150	  (vec_concat:<ssedoublemode>
14151	    (match_operand:VI8F_256 1 "register_operand" "v")
14152	    (match_operand:VI8F_256 2 "nonimmediate_operand" "vm"))
14153	  (parallel [(match_operand 3 "const_0_to_3_operand")
14154		     (match_operand 4 "const_0_to_3_operand")
14155		     (match_operand 5 "const_4_to_7_operand")
14156		     (match_operand 6 "const_4_to_7_operand")])))]
14157  "TARGET_AVX512VL
14158   && (INTVAL (operands[3]) & 1) == 0
14159   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14160   && (INTVAL (operands[5]) & 1) == 0
14161   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1"
14162{
14163  int mask;
14164  mask = INTVAL (operands[3]) / 2;
14165  mask |= (INTVAL (operands[5]) - 4) / 2 << 1;
14166  operands[3] = GEN_INT (mask);
14167  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
14168}
14169  [(set_attr "type" "sselog")
14170   (set_attr "length_immediate" "1")
14171   (set_attr "prefix" "evex")
14172   (set_attr "mode" "XI")])
14173
14174(define_expand "avx512f_shuf_<shuffletype>64x2_mask"
14175  [(match_operand:V8FI 0 "register_operand")
14176   (match_operand:V8FI 1 "register_operand")
14177   (match_operand:V8FI 2 "nonimmediate_operand")
14178   (match_operand:SI 3 "const_0_to_255_operand")
14179   (match_operand:V8FI 4 "register_operand")
14180   (match_operand:QI 5 "register_operand")]
14181  "TARGET_AVX512F"
14182{
14183  int mask = INTVAL (operands[3]);
14184  emit_insn (gen_avx512f_shuf_<shuffletype>64x2_1_mask
14185      (operands[0], operands[1], operands[2],
14186       GEN_INT (((mask >> 0) & 3) * 2),
14187       GEN_INT (((mask >> 0) & 3) * 2 + 1),
14188       GEN_INT (((mask >> 2) & 3) * 2),
14189       GEN_INT (((mask >> 2) & 3) * 2 + 1),
14190       GEN_INT (((mask >> 4) & 3) * 2 + 8),
14191       GEN_INT (((mask >> 4) & 3) * 2 + 9),
14192       GEN_INT (((mask >> 6) & 3) * 2 + 8),
14193       GEN_INT (((mask >> 6) & 3) * 2 + 9),
14194       operands[4], operands[5]));
14195  DONE;
14196})
14197
14198(define_insn "avx512f_shuf_<shuffletype>64x2_1<mask_name>"
14199  [(set (match_operand:V8FI 0 "register_operand" "=v")
14200	(vec_select:V8FI
14201	  (vec_concat:<ssedoublemode>
14202	    (match_operand:V8FI 1 "register_operand" "v")
14203	    (match_operand:V8FI 2 "nonimmediate_operand" "vm"))
14204	  (parallel [(match_operand 3 "const_0_to_7_operand")
14205		     (match_operand 4 "const_0_to_7_operand")
14206		     (match_operand 5 "const_0_to_7_operand")
14207		     (match_operand 6 "const_0_to_7_operand")
14208		     (match_operand 7 "const_8_to_15_operand")
14209		     (match_operand 8 "const_8_to_15_operand")
14210		     (match_operand 9 "const_8_to_15_operand")
14211		     (match_operand 10 "const_8_to_15_operand")])))]
14212  "TARGET_AVX512F
14213   && (INTVAL (operands[3]) & 1) == 0
14214   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14215   && (INTVAL (operands[5]) & 1) == 0
14216   && INTVAL (operands[5]) == INTVAL (operands[6]) - 1
14217   && (INTVAL (operands[7]) & 1) == 0
14218   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14219   && (INTVAL (operands[9]) & 1) == 0
14220   && INTVAL (operands[9]) == INTVAL (operands[10]) - 1"
14221{
14222  int mask;
14223  mask = INTVAL (operands[3]) / 2;
14224  mask |= INTVAL (operands[5]) / 2 << 2;
14225  mask |= (INTVAL (operands[7]) - 8) / 2 << 4;
14226  mask |= (INTVAL (operands[9]) - 8) / 2 << 6;
14227  operands[3] = GEN_INT (mask);
14228
14229  return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14230}
14231  [(set_attr "type" "sselog")
14232   (set_attr "length_immediate" "1")
14233   (set_attr "prefix" "evex")
14234   (set_attr "mode" "<sseinsnmode>")])
14235
14236(define_expand "avx512vl_shuf_<shuffletype>32x4_mask"
14237  [(match_operand:VI4F_256 0 "register_operand")
14238   (match_operand:VI4F_256 1 "register_operand")
14239   (match_operand:VI4F_256 2 "nonimmediate_operand")
14240   (match_operand:SI 3 "const_0_to_3_operand")
14241   (match_operand:VI4F_256 4 "register_operand")
14242   (match_operand:QI 5 "register_operand")]
14243  "TARGET_AVX512VL"
14244{
14245  int mask = INTVAL (operands[3]);
14246  emit_insn (gen_avx512vl_shuf_<shuffletype>32x4_1_mask
14247      (operands[0], operands[1], operands[2],
14248       GEN_INT (((mask >> 0) & 1) * 4 + 0),
14249       GEN_INT (((mask >> 0) & 1) * 4 + 1),
14250       GEN_INT (((mask >> 0) & 1) * 4 + 2),
14251       GEN_INT (((mask >> 0) & 1) * 4 + 3),
14252       GEN_INT (((mask >> 1) & 1) * 4 + 8),
14253       GEN_INT (((mask >> 1) & 1) * 4 + 9),
14254       GEN_INT (((mask >> 1) & 1) * 4 + 10),
14255       GEN_INT (((mask >> 1) & 1) * 4 + 11),
14256       operands[4], operands[5]));
14257  DONE;
14258})
14259
14260(define_insn "avx512vl_shuf_<shuffletype>32x4_1<mask_name>"
14261  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
14262	(vec_select:VI4F_256
14263	  (vec_concat:<ssedoublemode>
14264	    (match_operand:VI4F_256 1 "register_operand" "v")
14265	    (match_operand:VI4F_256 2 "nonimmediate_operand" "vm"))
14266	  (parallel [(match_operand 3 "const_0_to_7_operand")
14267		     (match_operand 4 "const_0_to_7_operand")
14268		     (match_operand 5 "const_0_to_7_operand")
14269		     (match_operand 6 "const_0_to_7_operand")
14270		     (match_operand 7 "const_8_to_15_operand")
14271		     (match_operand 8 "const_8_to_15_operand")
14272		     (match_operand 9 "const_8_to_15_operand")
14273		     (match_operand 10 "const_8_to_15_operand")])))]
14274  "TARGET_AVX512VL
14275   && (INTVAL (operands[3]) & 3) == 0
14276   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14277   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14278   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14279   && (INTVAL (operands[7]) & 3) == 0
14280   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14281   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14282   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3"
14283{
14284  int mask;
14285  mask = INTVAL (operands[3]) / 4;
14286  mask |= (INTVAL (operands[7]) - 8) / 4 << 1;
14287  operands[3] = GEN_INT (mask);
14288
14289  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
14290}
14291  [(set_attr "type" "sselog")
14292   (set_attr "length_immediate" "1")
14293   (set_attr "prefix" "evex")
14294   (set_attr "mode" "<sseinsnmode>")])
14295
14296(define_expand "avx512f_shuf_<shuffletype>32x4_mask"
14297  [(match_operand:V16FI 0 "register_operand")
14298   (match_operand:V16FI 1 "register_operand")
14299   (match_operand:V16FI 2 "nonimmediate_operand")
14300   (match_operand:SI 3 "const_0_to_255_operand")
14301   (match_operand:V16FI 4 "register_operand")
14302   (match_operand:HI 5 "register_operand")]
14303  "TARGET_AVX512F"
14304{
14305  int mask = INTVAL (operands[3]);
14306  emit_insn (gen_avx512f_shuf_<shuffletype>32x4_1_mask
14307      (operands[0], operands[1], operands[2],
14308       GEN_INT (((mask >> 0) & 3) * 4),
14309       GEN_INT (((mask >> 0) & 3) * 4 + 1),
14310       GEN_INT (((mask >> 0) & 3) * 4 + 2),
14311       GEN_INT (((mask >> 0) & 3) * 4 + 3),
14312       GEN_INT (((mask >> 2) & 3) * 4),
14313       GEN_INT (((mask >> 2) & 3) * 4 + 1),
14314       GEN_INT (((mask >> 2) & 3) * 4 + 2),
14315       GEN_INT (((mask >> 2) & 3) * 4 + 3),
14316       GEN_INT (((mask >> 4) & 3) * 4 + 16),
14317       GEN_INT (((mask >> 4) & 3) * 4 + 17),
14318       GEN_INT (((mask >> 4) & 3) * 4 + 18),
14319       GEN_INT (((mask >> 4) & 3) * 4 + 19),
14320       GEN_INT (((mask >> 6) & 3) * 4 + 16),
14321       GEN_INT (((mask >> 6) & 3) * 4 + 17),
14322       GEN_INT (((mask >> 6) & 3) * 4 + 18),
14323       GEN_INT (((mask >> 6) & 3) * 4 + 19),
14324       operands[4], operands[5]));
14325  DONE;
14326})
14327
14328(define_insn "avx512f_shuf_<shuffletype>32x4_1<mask_name>"
14329  [(set (match_operand:V16FI 0 "register_operand" "=v")
14330	(vec_select:V16FI
14331	  (vec_concat:<ssedoublemode>
14332	    (match_operand:V16FI 1 "register_operand" "v")
14333	    (match_operand:V16FI 2 "nonimmediate_operand" "vm"))
14334	  (parallel [(match_operand 3 "const_0_to_15_operand")
14335		     (match_operand 4 "const_0_to_15_operand")
14336		     (match_operand 5 "const_0_to_15_operand")
14337		     (match_operand 6 "const_0_to_15_operand")
14338		     (match_operand 7 "const_0_to_15_operand")
14339		     (match_operand 8 "const_0_to_15_operand")
14340		     (match_operand 9 "const_0_to_15_operand")
14341		     (match_operand 10 "const_0_to_15_operand")
14342		     (match_operand 11 "const_16_to_31_operand")
14343		     (match_operand 12 "const_16_to_31_operand")
14344		     (match_operand 13 "const_16_to_31_operand")
14345		     (match_operand 14 "const_16_to_31_operand")
14346		     (match_operand 15 "const_16_to_31_operand")
14347		     (match_operand 16 "const_16_to_31_operand")
14348		     (match_operand 17 "const_16_to_31_operand")
14349		     (match_operand 18 "const_16_to_31_operand")])))]
14350  "TARGET_AVX512F
14351   && (INTVAL (operands[3]) & 3) == 0
14352   && INTVAL (operands[3]) == INTVAL (operands[4]) - 1
14353   && INTVAL (operands[3]) == INTVAL (operands[5]) - 2
14354   && INTVAL (operands[3]) == INTVAL (operands[6]) - 3
14355   && (INTVAL (operands[7]) & 3) == 0
14356   && INTVAL (operands[7]) == INTVAL (operands[8]) - 1
14357   && INTVAL (operands[7]) == INTVAL (operands[9]) - 2
14358   && INTVAL (operands[7]) == INTVAL (operands[10]) - 3
14359   && (INTVAL (operands[11]) & 3) == 0
14360   && INTVAL (operands[11]) == INTVAL (operands[12]) - 1
14361   && INTVAL (operands[11]) == INTVAL (operands[13]) - 2
14362   && INTVAL (operands[11]) == INTVAL (operands[14]) - 3
14363   && (INTVAL (operands[15]) & 3) == 0
14364   && INTVAL (operands[15]) == INTVAL (operands[16]) - 1
14365   && INTVAL (operands[15]) == INTVAL (operands[17]) - 2
14366   && INTVAL (operands[15]) == INTVAL (operands[18]) - 3"
14367{
14368  int mask;
14369  mask = INTVAL (operands[3]) / 4;
14370  mask |= INTVAL (operands[7]) / 4 << 2;
14371  mask |= (INTVAL (operands[11]) - 16) / 4 << 4;
14372  mask |= (INTVAL (operands[15]) - 16) / 4 << 6;
14373  operands[3] = GEN_INT (mask);
14374
14375  return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand19>|%0<mask_operand19>, %1, %2, %3}";
14376}
14377  [(set_attr "type" "sselog")
14378   (set_attr "length_immediate" "1")
14379   (set_attr "prefix" "evex")
14380   (set_attr "mode" "<sseinsnmode>")])
14381
14382(define_expand "avx512f_pshufdv3_mask"
14383  [(match_operand:V16SI 0 "register_operand")
14384   (match_operand:V16SI 1 "nonimmediate_operand")
14385   (match_operand:SI 2 "const_0_to_255_operand")
14386   (match_operand:V16SI 3 "register_operand")
14387   (match_operand:HI 4 "register_operand")]
14388  "TARGET_AVX512F"
14389{
14390  int mask = INTVAL (operands[2]);
14391  emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
14392				       GEN_INT ((mask >> 0) & 3),
14393				       GEN_INT ((mask >> 2) & 3),
14394				       GEN_INT ((mask >> 4) & 3),
14395				       GEN_INT ((mask >> 6) & 3),
14396				       GEN_INT (((mask >> 0) & 3) + 4),
14397				       GEN_INT (((mask >> 2) & 3) + 4),
14398				       GEN_INT (((mask >> 4) & 3) + 4),
14399				       GEN_INT (((mask >> 6) & 3) + 4),
14400				       GEN_INT (((mask >> 0) & 3) + 8),
14401				       GEN_INT (((mask >> 2) & 3) + 8),
14402				       GEN_INT (((mask >> 4) & 3) + 8),
14403				       GEN_INT (((mask >> 6) & 3) + 8),
14404				       GEN_INT (((mask >> 0) & 3) + 12),
14405				       GEN_INT (((mask >> 2) & 3) + 12),
14406				       GEN_INT (((mask >> 4) & 3) + 12),
14407				       GEN_INT (((mask >> 6) & 3) + 12),
14408				       operands[3], operands[4]));
14409  DONE;
14410})
14411
14412(define_insn "avx512f_pshufd_1<mask_name>"
14413  [(set (match_operand:V16SI 0 "register_operand" "=v")
14414	(vec_select:V16SI
14415	  (match_operand:V16SI 1 "nonimmediate_operand" "vm")
14416	  (parallel [(match_operand 2 "const_0_to_3_operand")
14417		     (match_operand 3 "const_0_to_3_operand")
14418		     (match_operand 4 "const_0_to_3_operand")
14419		     (match_operand 5 "const_0_to_3_operand")
14420		     (match_operand 6 "const_4_to_7_operand")
14421		     (match_operand 7 "const_4_to_7_operand")
14422		     (match_operand 8 "const_4_to_7_operand")
14423		     (match_operand 9 "const_4_to_7_operand")
14424		     (match_operand 10 "const_8_to_11_operand")
14425		     (match_operand 11 "const_8_to_11_operand")
14426		     (match_operand 12 "const_8_to_11_operand")
14427		     (match_operand 13 "const_8_to_11_operand")
14428		     (match_operand 14 "const_12_to_15_operand")
14429		     (match_operand 15 "const_12_to_15_operand")
14430		     (match_operand 16 "const_12_to_15_operand")
14431		     (match_operand 17 "const_12_to_15_operand")])))]
14432  "TARGET_AVX512F
14433   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14434   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14435   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14436   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])
14437   && INTVAL (operands[2]) + 8 == INTVAL (operands[10])
14438   && INTVAL (operands[3]) + 8 == INTVAL (operands[11])
14439   && INTVAL (operands[4]) + 8 == INTVAL (operands[12])
14440   && INTVAL (operands[5]) + 8 == INTVAL (operands[13])
14441   && INTVAL (operands[2]) + 12 == INTVAL (operands[14])
14442   && INTVAL (operands[3]) + 12 == INTVAL (operands[15])
14443   && INTVAL (operands[4]) + 12 == INTVAL (operands[16])
14444   && INTVAL (operands[5]) + 12 == INTVAL (operands[17])"
14445{
14446  int mask = 0;
14447  mask |= INTVAL (operands[2]) << 0;
14448  mask |= INTVAL (operands[3]) << 2;
14449  mask |= INTVAL (operands[4]) << 4;
14450  mask |= INTVAL (operands[5]) << 6;
14451  operands[2] = GEN_INT (mask);
14452
14453  return "vpshufd\t{%2, %1, %0<mask_operand18>|%0<mask_operand18>, %1, %2}";
14454}
14455  [(set_attr "type" "sselog1")
14456   (set_attr "prefix" "evex")
14457   (set_attr "length_immediate" "1")
14458   (set_attr "mode" "XI")])
14459
14460(define_expand "avx512vl_pshufdv3_mask"
14461  [(match_operand:V8SI 0 "register_operand")
14462   (match_operand:V8SI 1 "nonimmediate_operand")
14463   (match_operand:SI 2 "const_0_to_255_operand")
14464   (match_operand:V8SI 3 "register_operand")
14465   (match_operand:QI 4 "register_operand")]
14466  "TARGET_AVX512VL"
14467{
14468  int mask = INTVAL (operands[2]);
14469  emit_insn (gen_avx2_pshufd_1_mask (operands[0], operands[1],
14470				GEN_INT ((mask >> 0) & 3),
14471				GEN_INT ((mask >> 2) & 3),
14472				GEN_INT ((mask >> 4) & 3),
14473				GEN_INT ((mask >> 6) & 3),
14474				GEN_INT (((mask >> 0) & 3) + 4),
14475				GEN_INT (((mask >> 2) & 3) + 4),
14476				GEN_INT (((mask >> 4) & 3) + 4),
14477				GEN_INT (((mask >> 6) & 3) + 4),
14478                operands[3], operands[4]));
14479  DONE;
14480})
14481
14482(define_expand "avx2_pshufdv3"
14483  [(match_operand:V8SI 0 "register_operand")
14484   (match_operand:V8SI 1 "nonimmediate_operand")
14485   (match_operand:SI 2 "const_0_to_255_operand")]
14486  "TARGET_AVX2"
14487{
14488  int mask = INTVAL (operands[2]);
14489  emit_insn (gen_avx2_pshufd_1 (operands[0], operands[1],
14490				GEN_INT ((mask >> 0) & 3),
14491				GEN_INT ((mask >> 2) & 3),
14492				GEN_INT ((mask >> 4) & 3),
14493				GEN_INT ((mask >> 6) & 3),
14494				GEN_INT (((mask >> 0) & 3) + 4),
14495				GEN_INT (((mask >> 2) & 3) + 4),
14496				GEN_INT (((mask >> 4) & 3) + 4),
14497				GEN_INT (((mask >> 6) & 3) + 4)));
14498  DONE;
14499})
14500
14501(define_insn "avx2_pshufd_1<mask_name>"
14502  [(set (match_operand:V8SI 0 "register_operand" "=v")
14503	(vec_select:V8SI
14504	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")
14505	  (parallel [(match_operand 2 "const_0_to_3_operand")
14506		     (match_operand 3 "const_0_to_3_operand")
14507		     (match_operand 4 "const_0_to_3_operand")
14508		     (match_operand 5 "const_0_to_3_operand")
14509		     (match_operand 6 "const_4_to_7_operand")
14510		     (match_operand 7 "const_4_to_7_operand")
14511		     (match_operand 8 "const_4_to_7_operand")
14512		     (match_operand 9 "const_4_to_7_operand")])))]
14513  "TARGET_AVX2
14514   && <mask_avx512vl_condition>
14515   && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
14516   && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
14517   && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
14518   && INTVAL (operands[5]) + 4 == INTVAL (operands[9])"
14519{
14520  int mask = 0;
14521  mask |= INTVAL (operands[2]) << 0;
14522  mask |= INTVAL (operands[3]) << 2;
14523  mask |= INTVAL (operands[4]) << 4;
14524  mask |= INTVAL (operands[5]) << 6;
14525  operands[2] = GEN_INT (mask);
14526
14527  return "vpshufd\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14528}
14529  [(set_attr "type" "sselog1")
14530   (set_attr "prefix" "maybe_evex")
14531   (set_attr "length_immediate" "1")
14532   (set_attr "mode" "OI")])
14533
14534(define_expand "avx512vl_pshufd_mask"
14535  [(match_operand:V4SI 0 "register_operand")
14536   (match_operand:V4SI 1 "nonimmediate_operand")
14537   (match_operand:SI 2 "const_0_to_255_operand")
14538   (match_operand:V4SI 3 "register_operand")
14539   (match_operand:QI 4 "register_operand")]
14540  "TARGET_AVX512VL"
14541{
14542  int mask = INTVAL (operands[2]);
14543  emit_insn (gen_sse2_pshufd_1_mask (operands[0], operands[1],
14544				GEN_INT ((mask >> 0) & 3),
14545				GEN_INT ((mask >> 2) & 3),
14546				GEN_INT ((mask >> 4) & 3),
14547				GEN_INT ((mask >> 6) & 3),
14548                operands[3], operands[4]));
14549  DONE;
14550})
14551
14552(define_expand "sse2_pshufd"
14553  [(match_operand:V4SI 0 "register_operand")
14554   (match_operand:V4SI 1 "vector_operand")
14555   (match_operand:SI 2 "const_int_operand")]
14556  "TARGET_SSE2"
14557{
14558  int mask = INTVAL (operands[2]);
14559  emit_insn (gen_sse2_pshufd_1 (operands[0], operands[1],
14560				GEN_INT ((mask >> 0) & 3),
14561				GEN_INT ((mask >> 2) & 3),
14562				GEN_INT ((mask >> 4) & 3),
14563				GEN_INT ((mask >> 6) & 3)));
14564  DONE;
14565})
14566
14567(define_insn "sse2_pshufd_1<mask_name>"
14568  [(set (match_operand:V4SI 0 "register_operand" "=v")
14569	(vec_select:V4SI
14570	  (match_operand:V4SI 1 "vector_operand" "vBm")
14571	  (parallel [(match_operand 2 "const_0_to_3_operand")
14572		     (match_operand 3 "const_0_to_3_operand")
14573		     (match_operand 4 "const_0_to_3_operand")
14574		     (match_operand 5 "const_0_to_3_operand")])))]
14575  "TARGET_SSE2 && <mask_avx512vl_condition>"
14576{
14577  int mask = 0;
14578  mask |= INTVAL (operands[2]) << 0;
14579  mask |= INTVAL (operands[3]) << 2;
14580  mask |= INTVAL (operands[4]) << 4;
14581  mask |= INTVAL (operands[5]) << 6;
14582  operands[2] = GEN_INT (mask);
14583
14584  return "%vpshufd\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14585}
14586  [(set_attr "type" "sselog1")
14587   (set_attr "prefix_data16" "1")
14588   (set_attr "prefix" "<mask_prefix2>")
14589   (set_attr "length_immediate" "1")
14590   (set_attr "mode" "TI")])
14591
14592(define_insn "<mask_codefor>avx512bw_pshuflwv32hi<mask_name>"
14593  [(set (match_operand:V32HI 0 "register_operand" "=v")
14594	(unspec:V32HI
14595	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14596	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
14597	  UNSPEC_PSHUFLW))]
14598  "TARGET_AVX512BW"
14599  "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14600  [(set_attr "type" "sselog")
14601   (set_attr "prefix" "evex")
14602   (set_attr "mode" "XI")])
14603
14604(define_expand "avx512vl_pshuflwv3_mask"
14605  [(match_operand:V16HI 0 "register_operand")
14606   (match_operand:V16HI 1 "nonimmediate_operand")
14607   (match_operand:SI 2 "const_0_to_255_operand")
14608   (match_operand:V16HI 3 "register_operand")
14609   (match_operand:HI 4 "register_operand")]
14610  "TARGET_AVX512VL && TARGET_AVX512BW"
14611{
14612  int mask = INTVAL (operands[2]);
14613  emit_insn (gen_avx2_pshuflw_1_mask (operands[0], operands[1],
14614				 GEN_INT ((mask >> 0) & 3),
14615				 GEN_INT ((mask >> 2) & 3),
14616				 GEN_INT ((mask >> 4) & 3),
14617				 GEN_INT ((mask >> 6) & 3),
14618				 GEN_INT (((mask >> 0) & 3) + 8),
14619				 GEN_INT (((mask >> 2) & 3) + 8),
14620				 GEN_INT (((mask >> 4) & 3) + 8),
14621				 GEN_INT (((mask >> 6) & 3) + 8),
14622                 operands[3], operands[4]));
14623  DONE;
14624})
14625
14626(define_expand "avx2_pshuflwv3"
14627  [(match_operand:V16HI 0 "register_operand")
14628   (match_operand:V16HI 1 "nonimmediate_operand")
14629   (match_operand:SI 2 "const_0_to_255_operand")]
14630  "TARGET_AVX2"
14631{
14632  int mask = INTVAL (operands[2]);
14633  emit_insn (gen_avx2_pshuflw_1 (operands[0], operands[1],
14634				 GEN_INT ((mask >> 0) & 3),
14635				 GEN_INT ((mask >> 2) & 3),
14636				 GEN_INT ((mask >> 4) & 3),
14637				 GEN_INT ((mask >> 6) & 3),
14638				 GEN_INT (((mask >> 0) & 3) + 8),
14639				 GEN_INT (((mask >> 2) & 3) + 8),
14640				 GEN_INT (((mask >> 4) & 3) + 8),
14641				 GEN_INT (((mask >> 6) & 3) + 8)));
14642  DONE;
14643})
14644
14645(define_insn "avx2_pshuflw_1<mask_name>"
14646  [(set (match_operand:V16HI 0 "register_operand" "=v")
14647	(vec_select:V16HI
14648	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14649	  (parallel [(match_operand 2 "const_0_to_3_operand")
14650		     (match_operand 3 "const_0_to_3_operand")
14651		     (match_operand 4 "const_0_to_3_operand")
14652		     (match_operand 5 "const_0_to_3_operand")
14653		     (const_int 4)
14654		     (const_int 5)
14655		     (const_int 6)
14656		     (const_int 7)
14657		     (match_operand 6 "const_8_to_11_operand")
14658		     (match_operand 7 "const_8_to_11_operand")
14659		     (match_operand 8 "const_8_to_11_operand")
14660		     (match_operand 9 "const_8_to_11_operand")
14661		     (const_int 12)
14662		     (const_int 13)
14663		     (const_int 14)
14664		     (const_int 15)])))]
14665  "TARGET_AVX2
14666   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14667   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14668   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14669   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14670   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14671{
14672  int mask = 0;
14673  mask |= INTVAL (operands[2]) << 0;
14674  mask |= INTVAL (operands[3]) << 2;
14675  mask |= INTVAL (operands[4]) << 4;
14676  mask |= INTVAL (operands[5]) << 6;
14677  operands[2] = GEN_INT (mask);
14678
14679  return "vpshuflw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14680}
14681  [(set_attr "type" "sselog")
14682   (set_attr "prefix" "maybe_evex")
14683   (set_attr "length_immediate" "1")
14684   (set_attr "mode" "OI")])
14685
14686(define_expand "avx512vl_pshuflw_mask"
14687  [(match_operand:V8HI 0 "register_operand")
14688   (match_operand:V8HI 1 "nonimmediate_operand")
14689   (match_operand:SI 2 "const_0_to_255_operand")
14690   (match_operand:V8HI 3 "register_operand")
14691   (match_operand:QI 4 "register_operand")]
14692  "TARGET_AVX512VL && TARGET_AVX512BW"
14693{
14694  int mask = INTVAL (operands[2]);
14695  emit_insn (gen_sse2_pshuflw_1_mask (operands[0], operands[1],
14696				 GEN_INT ((mask >> 0) & 3),
14697				 GEN_INT ((mask >> 2) & 3),
14698				 GEN_INT ((mask >> 4) & 3),
14699				 GEN_INT ((mask >> 6) & 3),
14700                 operands[3], operands[4]));
14701  DONE;
14702})
14703
14704(define_expand "sse2_pshuflw"
14705  [(match_operand:V8HI 0 "register_operand")
14706   (match_operand:V8HI 1 "vector_operand")
14707   (match_operand:SI 2 "const_int_operand")]
14708  "TARGET_SSE2"
14709{
14710  int mask = INTVAL (operands[2]);
14711  emit_insn (gen_sse2_pshuflw_1 (operands[0], operands[1],
14712				 GEN_INT ((mask >> 0) & 3),
14713				 GEN_INT ((mask >> 2) & 3),
14714				 GEN_INT ((mask >> 4) & 3),
14715				 GEN_INT ((mask >> 6) & 3)));
14716  DONE;
14717})
14718
14719(define_insn "sse2_pshuflw_1<mask_name>"
14720  [(set (match_operand:V8HI 0 "register_operand" "=v")
14721	(vec_select:V8HI
14722	  (match_operand:V8HI 1 "vector_operand" "vBm")
14723	  (parallel [(match_operand 2 "const_0_to_3_operand")
14724		     (match_operand 3 "const_0_to_3_operand")
14725		     (match_operand 4 "const_0_to_3_operand")
14726		     (match_operand 5 "const_0_to_3_operand")
14727		     (const_int 4)
14728		     (const_int 5)
14729		     (const_int 6)
14730		     (const_int 7)])))]
14731  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14732{
14733  int mask = 0;
14734  mask |= INTVAL (operands[2]) << 0;
14735  mask |= INTVAL (operands[3]) << 2;
14736  mask |= INTVAL (operands[4]) << 4;
14737  mask |= INTVAL (operands[5]) << 6;
14738  operands[2] = GEN_INT (mask);
14739
14740  return "%vpshuflw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14741}
14742  [(set_attr "type" "sselog")
14743   (set_attr "prefix_data16" "0")
14744   (set_attr "prefix_rep" "1")
14745   (set_attr "prefix" "maybe_vex")
14746   (set_attr "length_immediate" "1")
14747   (set_attr "mode" "TI")])
14748
14749(define_expand "avx2_pshufhwv3"
14750  [(match_operand:V16HI 0 "register_operand")
14751   (match_operand:V16HI 1 "nonimmediate_operand")
14752   (match_operand:SI 2 "const_0_to_255_operand")]
14753  "TARGET_AVX2"
14754{
14755  int mask = INTVAL (operands[2]);
14756  emit_insn (gen_avx2_pshufhw_1 (operands[0], operands[1],
14757				 GEN_INT (((mask >> 0) & 3) + 4),
14758				 GEN_INT (((mask >> 2) & 3) + 4),
14759				 GEN_INT (((mask >> 4) & 3) + 4),
14760				 GEN_INT (((mask >> 6) & 3) + 4),
14761				 GEN_INT (((mask >> 0) & 3) + 12),
14762				 GEN_INT (((mask >> 2) & 3) + 12),
14763				 GEN_INT (((mask >> 4) & 3) + 12),
14764				 GEN_INT (((mask >> 6) & 3) + 12)));
14765  DONE;
14766})
14767
14768(define_insn "<mask_codefor>avx512bw_pshufhwv32hi<mask_name>"
14769  [(set (match_operand:V32HI 0 "register_operand" "=v")
14770	(unspec:V32HI
14771	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
14772	   (match_operand:SI 2 "const_0_to_255_operand" "n")]
14773	  UNSPEC_PSHUFHW))]
14774  "TARGET_AVX512BW"
14775  "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
14776  [(set_attr "type" "sselog")
14777   (set_attr "prefix" "evex")
14778   (set_attr "mode" "XI")])
14779
14780(define_expand "avx512vl_pshufhwv3_mask"
14781  [(match_operand:V16HI 0 "register_operand")
14782   (match_operand:V16HI 1 "nonimmediate_operand")
14783   (match_operand:SI 2 "const_0_to_255_operand")
14784   (match_operand:V16HI 3 "register_operand")
14785   (match_operand:HI 4 "register_operand")]
14786  "TARGET_AVX512VL && TARGET_AVX512BW"
14787{
14788  int mask = INTVAL (operands[2]);
14789  emit_insn (gen_avx2_pshufhw_1_mask (operands[0], operands[1],
14790				 GEN_INT (((mask >> 0) & 3) + 4),
14791				 GEN_INT (((mask >> 2) & 3) + 4),
14792				 GEN_INT (((mask >> 4) & 3) + 4),
14793				 GEN_INT (((mask >> 6) & 3) + 4),
14794				 GEN_INT (((mask >> 0) & 3) + 12),
14795				 GEN_INT (((mask >> 2) & 3) + 12),
14796				 GEN_INT (((mask >> 4) & 3) + 12),
14797				 GEN_INT (((mask >> 6) & 3) + 12),
14798                 operands[3], operands[4]));
14799  DONE;
14800})
14801
14802(define_insn "avx2_pshufhw_1<mask_name>"
14803  [(set (match_operand:V16HI 0 "register_operand" "=v")
14804	(vec_select:V16HI
14805	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")
14806	  (parallel [(const_int 0)
14807		     (const_int 1)
14808		     (const_int 2)
14809		     (const_int 3)
14810		     (match_operand 2 "const_4_to_7_operand")
14811		     (match_operand 3 "const_4_to_7_operand")
14812		     (match_operand 4 "const_4_to_7_operand")
14813		     (match_operand 5 "const_4_to_7_operand")
14814		     (const_int 8)
14815		     (const_int 9)
14816		     (const_int 10)
14817		     (const_int 11)
14818		     (match_operand 6 "const_12_to_15_operand")
14819		     (match_operand 7 "const_12_to_15_operand")
14820		     (match_operand 8 "const_12_to_15_operand")
14821		     (match_operand 9 "const_12_to_15_operand")])))]
14822  "TARGET_AVX2
14823   && <mask_avx512bw_condition> && <mask_avx512vl_condition>
14824   && INTVAL (operands[2]) + 8 == INTVAL (operands[6])
14825   && INTVAL (operands[3]) + 8 == INTVAL (operands[7])
14826   && INTVAL (operands[4]) + 8 == INTVAL (operands[8])
14827   && INTVAL (operands[5]) + 8 == INTVAL (operands[9])"
14828{
14829  int mask = 0;
14830  mask |= (INTVAL (operands[2]) - 4) << 0;
14831  mask |= (INTVAL (operands[3]) - 4) << 2;
14832  mask |= (INTVAL (operands[4]) - 4) << 4;
14833  mask |= (INTVAL (operands[5]) - 4) << 6;
14834  operands[2] = GEN_INT (mask);
14835
14836  return "vpshufhw\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
14837}
14838  [(set_attr "type" "sselog")
14839   (set_attr "prefix" "maybe_evex")
14840   (set_attr "length_immediate" "1")
14841   (set_attr "mode" "OI")])
14842
14843(define_expand "avx512vl_pshufhw_mask"
14844  [(match_operand:V8HI 0 "register_operand")
14845   (match_operand:V8HI 1 "nonimmediate_operand")
14846   (match_operand:SI 2 "const_0_to_255_operand")
14847   (match_operand:V8HI 3 "register_operand")
14848   (match_operand:QI 4 "register_operand")]
14849  "TARGET_AVX512VL && TARGET_AVX512BW"
14850{
14851  int mask = INTVAL (operands[2]);
14852  emit_insn (gen_sse2_pshufhw_1_mask (operands[0], operands[1],
14853				 GEN_INT (((mask >> 0) & 3) + 4),
14854				 GEN_INT (((mask >> 2) & 3) + 4),
14855				 GEN_INT (((mask >> 4) & 3) + 4),
14856				 GEN_INT (((mask >> 6) & 3) + 4),
14857                 operands[3], operands[4]));
14858  DONE;
14859})
14860
14861(define_expand "sse2_pshufhw"
14862  [(match_operand:V8HI 0 "register_operand")
14863   (match_operand:V8HI 1 "vector_operand")
14864   (match_operand:SI 2 "const_int_operand")]
14865  "TARGET_SSE2"
14866{
14867  int mask = INTVAL (operands[2]);
14868  emit_insn (gen_sse2_pshufhw_1 (operands[0], operands[1],
14869				 GEN_INT (((mask >> 0) & 3) + 4),
14870				 GEN_INT (((mask >> 2) & 3) + 4),
14871				 GEN_INT (((mask >> 4) & 3) + 4),
14872				 GEN_INT (((mask >> 6) & 3) + 4)));
14873  DONE;
14874})
14875
14876(define_insn "sse2_pshufhw_1<mask_name>"
14877  [(set (match_operand:V8HI 0 "register_operand" "=v")
14878	(vec_select:V8HI
14879	  (match_operand:V8HI 1 "vector_operand" "vBm")
14880	  (parallel [(const_int 0)
14881		     (const_int 1)
14882		     (const_int 2)
14883		     (const_int 3)
14884		     (match_operand 2 "const_4_to_7_operand")
14885		     (match_operand 3 "const_4_to_7_operand")
14886		     (match_operand 4 "const_4_to_7_operand")
14887		     (match_operand 5 "const_4_to_7_operand")])))]
14888  "TARGET_SSE2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
14889{
14890  int mask = 0;
14891  mask |= (INTVAL (operands[2]) - 4) << 0;
14892  mask |= (INTVAL (operands[3]) - 4) << 2;
14893  mask |= (INTVAL (operands[4]) - 4) << 4;
14894  mask |= (INTVAL (operands[5]) - 4) << 6;
14895  operands[2] = GEN_INT (mask);
14896
14897  return "%vpshufhw\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
14898}
14899  [(set_attr "type" "sselog")
14900   (set_attr "prefix_rep" "1")
14901   (set_attr "prefix_data16" "0")
14902   (set_attr "prefix" "maybe_vex")
14903   (set_attr "length_immediate" "1")
14904   (set_attr "mode" "TI")])
14905
14906(define_expand "sse2_loadd"
14907  [(set (match_operand:V4SI 0 "register_operand")
14908	(vec_merge:V4SI
14909	  (vec_duplicate:V4SI
14910	    (match_operand:SI 1 "nonimmediate_operand"))
14911	  (match_dup 2)
14912	  (const_int 1)))]
14913  "TARGET_SSE"
14914  "operands[2] = CONST0_RTX (V4SImode);")
14915
14916(define_insn "sse2_loadld"
14917  [(set (match_operand:V4SI 0 "register_operand"       "=v,v,x,x,v")
14918	(vec_merge:V4SI
14919	  (vec_duplicate:V4SI
14920	    (match_operand:SI 2 "nonimmediate_operand" "m ,r ,m,x,v"))
14921	  (match_operand:V4SI 1 "reg_or_0_operand"     "C ,C ,C,0,v")
14922	  (const_int 1)))]
14923  "TARGET_SSE"
14924  "@
14925   %vmovd\t{%2, %0|%0, %2}
14926   %vmovd\t{%2, %0|%0, %2}
14927   movss\t{%2, %0|%0, %2}
14928   movss\t{%2, %0|%0, %2}
14929   vmovss\t{%2, %1, %0|%0, %1, %2}"
14930  [(set_attr "isa" "sse2,sse2,noavx,noavx,avx")
14931   (set_attr "type" "ssemov")
14932   (set_attr "prefix" "maybe_vex,maybe_vex,orig,orig,maybe_evex")
14933   (set_attr "mode" "TI,TI,V4SF,SF,SF")
14934   (set (attr "preferred_for_speed")
14935     (cond [(eq_attr "alternative" "1")
14936	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
14937	   ]
14938	   (symbol_ref "true")))])
14939
14940;; QI and HI modes handled by pextr patterns.
14941(define_mode_iterator PEXTR_MODE12
14942  [(V16QI "TARGET_SSE4_1") V8HI])
14943
14944(define_insn "*vec_extract<mode>"
14945  [(set (match_operand:<ssescalarmode> 0 "register_sse4nonimm_operand" "=r,m,r,m")
14946	(vec_select:<ssescalarmode>
14947	  (match_operand:PEXTR_MODE12 1 "register_operand" "x,x,v,v")
14948	  (parallel
14949	    [(match_operand:SI 2 "const_0_to_<ssescalarnummask>_operand")])))]
14950  "TARGET_SSE2"
14951  "@
14952   %vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14953   %vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}
14954   vpextr<ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14955   vpextr<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
14956  [(set_attr "isa" "*,sse4,avx512bw,avx512bw")
14957   (set_attr "type" "sselog1")
14958   (set_attr "prefix_data16" "1")
14959   (set (attr "prefix_extra")
14960     (if_then_else
14961       (and (eq_attr "alternative" "0,2")
14962	    (eq (const_string "<MODE>mode") (const_string "V8HImode")))
14963       (const_string "*")
14964       (const_string "1")))
14965   (set_attr "length_immediate" "1")
14966   (set_attr "prefix" "maybe_vex,maybe_vex,evex,evex")
14967   (set_attr "mode" "TI")])
14968
14969(define_insn "*vec_extract<PEXTR_MODE12:mode>_zext"
14970  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
14971	(zero_extend:SWI48
14972	  (vec_select:<PEXTR_MODE12:ssescalarmode>
14973	    (match_operand:PEXTR_MODE12 1 "register_operand" "x,v")
14974	    (parallel
14975	      [(match_operand:SI 2
14976		"const_0_to_<PEXTR_MODE12:ssescalarnummask>_operand")]))))]
14977  "TARGET_SSE2"
14978  "@
14979   %vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}
14980   vpextr<PEXTR_MODE12:ssemodesuffix>\t{%2, %1, %k0|%k0, %1, %2}"
14981  [(set_attr "isa" "*,avx512bw")
14982   (set_attr "type" "sselog1")
14983   (set_attr "prefix_data16" "1")
14984   (set (attr "prefix_extra")
14985     (if_then_else
14986       (eq (const_string "<PEXTR_MODE12:MODE>mode") (const_string "V8HImode"))
14987       (const_string "*")
14988       (const_string "1")))
14989   (set_attr "length_immediate" "1")
14990   (set_attr "prefix" "maybe_vex")
14991   (set_attr "mode" "TI")])
14992
14993(define_insn "*vec_extractv16qi_zext"
14994  [(set (match_operand:HI 0 "register_operand" "=r,r")
14995	(zero_extend:HI
14996	  (vec_select:QI
14997	    (match_operand:V16QI 1 "register_operand" "x,v")
14998	    (parallel
14999	      [(match_operand:SI 2 "const_0_to_15_operand")]))))]
15000  "TARGET_SSE4_1"
15001  "@
15002   %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
15003   vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
15004  [(set_attr "isa" "*,avx512bw")
15005   (set_attr "type" "sselog1")
15006   (set_attr "prefix_data16" "1")
15007   (set_attr "prefix_extra" "1")
15008   (set_attr "length_immediate" "1")
15009   (set_attr "prefix" "maybe_vex")
15010   (set_attr "mode" "TI")])
15011
15012(define_insn "*vec_extract<mode>_mem"
15013  [(set (match_operand:<ssescalarmode> 0 "register_operand" "=r")
15014	(vec_select:<ssescalarmode>
15015	  (match_operand:VI12_128 1 "memory_operand" "o")
15016	  (parallel
15017	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15018  "TARGET_SSE"
15019  "#")
15020
15021(define_insn "*vec_extract<ssevecmodelower>_0"
15022  [(set (match_operand:SWI48 0 "nonimmediate_operand"	       "=r,r,v ,m")
15023	(vec_select:SWI48
15024	  (match_operand:<ssevecmode> 1 "nonimmediate_operand" "m ,v,vm,v")
15025	  (parallel [(const_int 0)])))]
15026  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15027  "#"
15028  [(set_attr "isa" "*,sse2,*,*")
15029   (set (attr "preferred_for_speed")
15030     (cond [(eq_attr "alternative" "1")
15031	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15032	   ]
15033	   (symbol_ref "true")))])
15034
15035(define_insn "*vec_extractv2di_0_sse"
15036  [(set (match_operand:DI 0 "nonimmediate_operand"     "=r,x ,m")
15037	(vec_select:DI
15038	  (match_operand:V2DI 1 "nonimmediate_operand" " x,xm,x")
15039	  (parallel [(const_int 0)])))]
15040  "TARGET_SSE && !TARGET_64BIT
15041   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15042  "#"
15043  [(set_attr "isa" "sse4,*,*")
15044   (set (attr "preferred_for_speed")
15045     (cond [(eq_attr "alternative" "0")
15046	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15047	   ]
15048	   (symbol_ref "true")))])
15049
15050(define_split
15051  [(set (match_operand:DI 0 "general_reg_operand")
15052	(vec_select:DI
15053	  (match_operand:V2DI 1 "register_operand")
15054	  (parallel [(const_int 0)])))]
15055  "TARGET_SSE4_1 && !TARGET_64BIT
15056   && reload_completed"
15057  [(set (match_dup 2) (match_dup 4))
15058   (set (match_dup 3)
15059	(vec_select:SI
15060	  (match_dup 5)
15061	  (parallel [(const_int 1)])))]
15062{
15063  operands[4] = gen_lowpart (SImode, operands[1]);
15064  operands[5] = gen_lowpart (V4SImode, operands[1]);
15065  split_double_mode (DImode, &operands[0], 1, &operands[2], &operands[3]);
15066})
15067
15068(define_split
15069  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15070	(vec_select:SWI48x
15071	  (match_operand:<ssevecmode> 1 "register_operand")
15072	  (parallel [(const_int 0)])))]
15073  "TARGET_SSE && reload_completed"
15074  [(set (match_dup 0) (match_dup 1))]
15075  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);")
15076
15077(define_insn "*vec_extractv4si_0_zext_sse4"
15078  [(set (match_operand:DI 0 "register_operand" "=r,x,v")
15079	(zero_extend:DI
15080	  (vec_select:SI
15081	    (match_operand:V4SI 1 "register_operand" "v,x,v")
15082	    (parallel [(const_int 0)]))))]
15083  "TARGET_SSE4_1"
15084  "#"
15085  [(set_attr "isa" "x64,*,avx512f")
15086   (set (attr "preferred_for_speed")
15087     (cond [(eq_attr "alternative" "0")
15088	      (symbol_ref "TARGET_INTER_UNIT_MOVES_FROM_VEC")
15089	   ]
15090	   (symbol_ref "true")))])
15091
15092(define_insn "*vec_extractv4si_0_zext"
15093  [(set (match_operand:DI 0 "register_operand" "=r")
15094	(zero_extend:DI
15095	  (vec_select:SI
15096	    (match_operand:V4SI 1 "register_operand" "x")
15097	    (parallel [(const_int 0)]))))]
15098  "TARGET_64BIT && TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_FROM_VEC"
15099  "#")
15100
15101(define_split
15102  [(set (match_operand:DI 0 "register_operand")
15103	(zero_extend:DI
15104	  (vec_select:SI
15105	    (match_operand:V4SI 1 "register_operand")
15106	    (parallel [(const_int 0)]))))]
15107  "TARGET_SSE2 && reload_completed"
15108  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15109  "operands[1] = gen_lowpart (SImode, operands[1]);")
15110
15111(define_insn "*vec_extractv4si"
15112  [(set (match_operand:SI 0 "nonimmediate_operand" "=rm,rm,Yr,*x,x,Yv")
15113	(vec_select:SI
15114	  (match_operand:V4SI 1 "register_operand" "x,v,0,0,x,v")
15115	  (parallel [(match_operand:SI 2 "const_0_to_3_operand")])))]
15116  "TARGET_SSE4_1"
15117{
15118  switch (which_alternative)
15119    {
15120    case 0:
15121    case 1:
15122      return "%vpextrd\t{%2, %1, %0|%0, %1, %2}";
15123
15124    case 2:
15125    case 3:
15126      operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15127      return "psrldq\t{%2, %0|%0, %2}";
15128
15129    case 4:
15130    case 5:
15131      operands[2] = GEN_INT (INTVAL (operands[2]) * 4);
15132      return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
15133
15134    default:
15135      gcc_unreachable ();
15136    }
15137}
15138  [(set_attr "isa" "*,avx512dq,noavx,noavx,avx,avx512bw")
15139   (set_attr "type" "sselog1,sselog1,sseishft1,sseishft1,sseishft1,sseishft1")
15140   (set (attr "prefix_extra")
15141     (if_then_else (eq_attr "alternative" "0,1")
15142		   (const_string "1")
15143		   (const_string "*")))
15144   (set_attr "length_immediate" "1")
15145   (set_attr "prefix" "maybe_vex,evex,orig,orig,vex,evex")
15146   (set_attr "mode" "TI")])
15147
15148(define_insn "*vec_extractv4si_zext"
15149  [(set (match_operand:DI 0 "register_operand" "=r,r")
15150	(zero_extend:DI
15151	  (vec_select:SI
15152	    (match_operand:V4SI 1 "register_operand" "x,v")
15153	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15154  "TARGET_64BIT && TARGET_SSE4_1"
15155  "%vpextrd\t{%2, %1, %k0|%k0, %1, %2}"
15156  [(set_attr "isa" "*,avx512dq")
15157   (set_attr "type" "sselog1")
15158   (set_attr "prefix_extra" "1")
15159   (set_attr "length_immediate" "1")
15160   (set_attr "prefix" "maybe_vex")
15161   (set_attr "mode" "TI")])
15162
15163(define_insn "*vec_extractv4si_mem"
15164  [(set (match_operand:SI 0 "register_operand" "=x,r")
15165	(vec_select:SI
15166	  (match_operand:V4SI 1 "memory_operand" "o,o")
15167	  (parallel [(match_operand 2 "const_0_to_3_operand")])))]
15168  "TARGET_SSE"
15169  "#")
15170
15171(define_insn_and_split "*vec_extractv4si_zext_mem"
15172  [(set (match_operand:DI 0 "register_operand" "=x,r")
15173	(zero_extend:DI
15174	  (vec_select:SI
15175	    (match_operand:V4SI 1 "memory_operand" "o,o")
15176	    (parallel [(match_operand:SI 2 "const_0_to_3_operand")]))))]
15177  "TARGET_64BIT && TARGET_SSE"
15178  "#"
15179  "&& reload_completed"
15180  [(set (match_dup 0) (zero_extend:DI (match_dup 1)))]
15181{
15182  operands[1] = adjust_address (operands[1], SImode, INTVAL (operands[2]) * 4);
15183})
15184
15185(define_insn "*vec_extractv2di_1"
15186  [(set (match_operand:DI 0 "nonimmediate_operand"     "=rm,rm,m,x,x,Yv,x,v,r")
15187	(vec_select:DI
15188	  (match_operand:V2DI 1 "nonimmediate_operand"  "x ,v ,v,0,x, v,x,o,o")
15189	  (parallel [(const_int 1)])))]
15190  "TARGET_SSE && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
15191  "@
15192   %vpextrq\t{$1, %1, %0|%0, %1, 1}
15193   vpextrq\t{$1, %1, %0|%0, %1, 1}
15194   %vmovhps\t{%1, %0|%0, %1}
15195   psrldq\t{$8, %0|%0, 8}
15196   vpsrldq\t{$8, %1, %0|%0, %1, 8}
15197   vpsrldq\t{$8, %1, %0|%0, %1, 8}
15198   movhlps\t{%1, %0|%0, %1}
15199   #
15200   #"
15201  [(set (attr "isa")
15202     (cond [(eq_attr "alternative" "0")
15203	      (const_string "x64_sse4")
15204	    (eq_attr "alternative" "1")
15205	      (const_string "x64_avx512dq")
15206	    (eq_attr "alternative" "3")
15207	      (const_string "sse2_noavx")
15208	    (eq_attr "alternative" "4")
15209	      (const_string "avx")
15210	    (eq_attr "alternative" "5")
15211	      (const_string "avx512bw")
15212	    (eq_attr "alternative" "6")
15213	      (const_string "noavx")
15214	    (eq_attr "alternative" "8")
15215	      (const_string "x64")
15216	   ]
15217	   (const_string "*")))
15218   (set (attr "type")
15219     (cond [(eq_attr "alternative" "2,6,7")
15220	      (const_string "ssemov")
15221	    (eq_attr "alternative" "3,4,5")
15222	      (const_string "sseishft1")
15223	    (eq_attr "alternative" "8")
15224	      (const_string "imov")
15225	   ]
15226	   (const_string "sselog1")))
15227   (set (attr "length_immediate")
15228     (if_then_else (eq_attr "alternative" "0,1,3,4,5")
15229		   (const_string "1")
15230		   (const_string "*")))
15231   (set (attr "prefix_rex")
15232     (if_then_else (eq_attr "alternative" "0,1")
15233		   (const_string "1")
15234		   (const_string "*")))
15235   (set (attr "prefix_extra")
15236     (if_then_else (eq_attr "alternative" "0,1")
15237		   (const_string "1")
15238		   (const_string "*")))
15239   (set_attr "prefix" "maybe_vex,evex,maybe_vex,orig,vex,evex,orig,*,*")
15240   (set_attr "mode" "TI,TI,V2SF,TI,TI,TI,V4SF,DI,DI")])
15241
15242(define_split
15243  [(set (match_operand:<ssescalarmode> 0 "register_operand")
15244	(vec_select:<ssescalarmode>
15245	  (match_operand:VI_128 1 "memory_operand")
15246	  (parallel
15247	    [(match_operand 2 "const_0_to_<ssescalarnummask>_operand")])))]
15248  "TARGET_SSE && reload_completed"
15249  [(set (match_dup 0) (match_dup 1))]
15250{
15251  int offs = INTVAL (operands[2]) * GET_MODE_SIZE (<ssescalarmode>mode);
15252
15253  operands[1] = adjust_address (operands[1], <ssescalarmode>mode, offs);
15254})
15255
15256(define_insn "*vec_extractv2ti"
15257  [(set (match_operand:TI 0 "nonimmediate_operand" "=xm,vm")
15258	(vec_select:TI
15259	  (match_operand:V2TI 1 "register_operand" "x,v")
15260	  (parallel
15261	    [(match_operand:SI 2 "const_0_to_1_operand")])))]
15262  "TARGET_AVX"
15263  "@
15264   vextract%~128\t{%2, %1, %0|%0, %1, %2}
15265   vextracti32x4\t{%2, %g1, %0|%0, %g1, %2}"
15266  [(set_attr "type" "sselog")
15267   (set_attr "prefix_extra" "1")
15268   (set_attr "length_immediate" "1")
15269   (set_attr "prefix" "vex,evex")
15270   (set_attr "mode" "OI")])
15271
15272(define_insn "*vec_extractv4ti"
15273  [(set (match_operand:TI 0 "nonimmediate_operand" "=vm")
15274	(vec_select:TI
15275	  (match_operand:V4TI 1 "register_operand" "v")
15276	  (parallel
15277	    [(match_operand:SI 2 "const_0_to_3_operand")])))]
15278  "TARGET_AVX512F"
15279  "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
15280  [(set_attr "type" "sselog")
15281   (set_attr "prefix_extra" "1")
15282   (set_attr "length_immediate" "1")
15283   (set_attr "prefix" "evex")
15284   (set_attr "mode" "XI")])
15285
15286(define_mode_iterator VEXTRACTI128_MODE
15287  [(V4TI "TARGET_AVX512F") V2TI])
15288
15289(define_split
15290  [(set (match_operand:TI 0 "nonimmediate_operand")
15291	(vec_select:TI
15292	  (match_operand:VEXTRACTI128_MODE 1 "register_operand")
15293	  (parallel [(const_int 0)])))]
15294  "TARGET_AVX
15295   && reload_completed
15296   && (TARGET_AVX512VL || !EXT_REX_SSE_REG_P (operands[1]))"
15297  [(set (match_dup 0) (match_dup 1))]
15298  "operands[1] = gen_lowpart (TImode, operands[1]);")
15299
15300;; Turn SImode or DImode extraction from arbitrary SSE/AVX/AVX512F
15301;; vector modes into vec_extract*.
15302(define_split
15303  [(set (match_operand:SWI48x 0 "nonimmediate_operand")
15304	(subreg:SWI48x (match_operand 1 "register_operand") 0))]
15305  "can_create_pseudo_p ()
15306   && REG_P (operands[1])
15307   && VECTOR_MODE_P (GET_MODE (operands[1]))
15308   && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
15309       || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
15310       || (TARGET_AVX512F && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
15311   && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
15312  [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
15313					 (parallel [(const_int 0)])))]
15314{
15315  rtx tmp;
15316
15317  switch (GET_MODE_SIZE (GET_MODE (operands[1])))
15318    {
15319    case 64:
15320      if (<MODE>mode == SImode)
15321	{
15322	  tmp = gen_reg_rtx (V8SImode);
15323	  emit_insn (gen_vec_extract_lo_v16si (tmp,
15324					       gen_lowpart (V16SImode,
15325							    operands[1])));
15326	}
15327      else
15328	{
15329	  tmp = gen_reg_rtx (V4DImode);
15330	  emit_insn (gen_vec_extract_lo_v8di (tmp,
15331					      gen_lowpart (V8DImode,
15332							   operands[1])));
15333	}
15334      operands[1] = tmp;
15335      /* FALLTHRU */
15336    case 32:
15337      tmp = gen_reg_rtx (<ssevecmode>mode);
15338      if (<MODE>mode == SImode)
15339	emit_insn (gen_vec_extract_lo_v8si (tmp, gen_lowpart (V8SImode,
15340							      operands[1])));
15341      else
15342	emit_insn (gen_vec_extract_lo_v4di (tmp, gen_lowpart (V4DImode,
15343							      operands[1])));
15344      operands[1] = tmp;
15345      break;
15346    case 16:
15347      operands[1] = gen_lowpart (<ssevecmode>mode, operands[1]);
15348      break;
15349    }
15350})
15351
15352(define_insn "*vec_concatv2si_sse4_1"
15353  [(set (match_operand:V2SI 0 "register_operand"
15354	  "=Yr,*x, x, v,Yr,*x, v, v, *y,*y")
15355	(vec_concat:V2SI
15356	  (match_operand:SI 1 "nonimmediate_operand"
15357	  "  0, 0, x,Yv, 0, 0,Yv,rm,  0,rm")
15358	  (match_operand:SI 2 "nonimm_or_0_operand"
15359	  " rm,rm,rm,rm,Yr,*x,Yv, C,*ym, C")))]
15360  "TARGET_SSE4_1 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15361  "@
15362   pinsrd\t{$1, %2, %0|%0, %2, 1}
15363   pinsrd\t{$1, %2, %0|%0, %2, 1}
15364   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15365   vpinsrd\t{$1, %2, %1, %0|%0, %1, %2, 1}
15366   punpckldq\t{%2, %0|%0, %2}
15367   punpckldq\t{%2, %0|%0, %2}
15368   vpunpckldq\t{%2, %1, %0|%0, %1, %2}
15369   %vmovd\t{%1, %0|%0, %1}
15370   punpckldq\t{%2, %0|%0, %2}
15371   movd\t{%1, %0|%0, %1}"
15372  [(set_attr "isa" "noavx,noavx,avx,avx512dq,noavx,noavx,avx,*,*,*")
15373   (set (attr "mmx_isa")
15374     (if_then_else (eq_attr "alternative" "8,9")
15375		   (const_string "native")
15376		   (const_string "*")))
15377   (set (attr "type")
15378     (cond [(eq_attr "alternative" "7")
15379	      (const_string "ssemov")
15380	    (eq_attr "alternative" "8")
15381	      (const_string "mmxcvt")
15382	    (eq_attr "alternative" "9")
15383	      (const_string "mmxmov")
15384	   ]
15385	   (const_string "sselog")))
15386   (set (attr "prefix_extra")
15387     (if_then_else (eq_attr "alternative" "0,1,2,3")
15388		   (const_string "1")
15389		   (const_string "*")))
15390   (set (attr "length_immediate")
15391     (if_then_else (eq_attr "alternative" "0,1,2,3")
15392		   (const_string "1")
15393		   (const_string "*")))
15394   (set_attr "prefix" "orig,orig,vex,evex,orig,orig,maybe_evex,maybe_vex,orig,orig")
15395   (set_attr "mode" "TI,TI,TI,TI,TI,TI,TI,TI,DI,DI")])
15396
15397;; ??? In theory we can match memory for the MMX alternative, but allowing
15398;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
15399;; alternatives pretty much forces the MMX alternative to be chosen.
15400(define_insn "*vec_concatv2si"
15401  [(set (match_operand:V2SI 0 "register_operand"     "=x,x ,x,x,*y,*y")
15402	(vec_concat:V2SI
15403	  (match_operand:SI 1 "nonimmediate_operand" " 0,rm,0,m, 0,rm")
15404	  (match_operand:SI 2 "reg_or_0_operand"     " x,C ,x,C,*y,C")))]
15405  "TARGET_SSE && !TARGET_SSE4_1"
15406  "@
15407   punpckldq\t{%2, %0|%0, %2}
15408   movd\t{%1, %0|%0, %1}
15409   unpcklps\t{%2, %0|%0, %2}
15410   movss\t{%1, %0|%0, %1}
15411   punpckldq\t{%2, %0|%0, %2}
15412   movd\t{%1, %0|%0, %1}"
15413  [(set_attr "isa" "sse2,sse2,*,*,*,*")
15414   (set_attr "mmx_isa" "*,*,*,*,native,native")
15415   (set_attr "type" "sselog,ssemov,sselog,ssemov,mmxcvt,mmxmov")
15416   (set_attr "mode" "TI,TI,V4SF,SF,DI,DI")])
15417
15418(define_insn "*vec_concatv4si"
15419  [(set (match_operand:V4SI 0 "register_operand"       "=x,v,x,x,v")
15420	(vec_concat:V4SI
15421	  (match_operand:V2SI 1 "register_operand"     " 0,v,0,0,v")
15422	  (match_operand:V2SI 2 "nonimmediate_operand" " x,v,x,m,m")))]
15423  "TARGET_SSE"
15424  "@
15425   punpcklqdq\t{%2, %0|%0, %2}
15426   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15427   movlhps\t{%2, %0|%0, %2}
15428   movhps\t{%2, %0|%0, %q2}
15429   vmovhps\t{%2, %1, %0|%0, %1, %q2}"
15430  [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
15431   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
15432   (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
15433   (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
15434
15435(define_insn "*vec_concatv4si_0"
15436  [(set (match_operand:V4SI 0 "register_operand"       "=v,x")
15437	(vec_concat:V4SI
15438	  (match_operand:V2SI 1 "nonimmediate_operand" "vm,?!*y")
15439	  (match_operand:V2SI 2 "const0_operand"       " C,C")))]
15440  "TARGET_SSE2"
15441  "@
15442   %vmovq\t{%1, %0|%0, %1}
15443   movq2dq\t{%1, %0|%0, %1}"
15444  [(set_attr "mmx_isa" "*,native")
15445   (set_attr "type" "ssemov")
15446   (set_attr "prefix" "maybe_vex,orig")
15447   (set_attr "mode" "TI")])
15448
15449(define_insn "vec_concatv2di"
15450  [(set (match_operand:V2DI 0 "register_operand"
15451	  "=Yr,*x,x ,v ,x,v ,x,x,v")
15452	(vec_concat:V2DI
15453	  (match_operand:DI 1 "register_operand"
15454	  "  0, 0,x ,Yv,0,Yv,0,0,v")
15455	  (match_operand:DI 2 "nonimmediate_operand"
15456	  " rm,rm,rm,rm,x,Yv,x,m,m")))]
15457  "TARGET_SSE"
15458  "@
15459   pinsrq\t{$1, %2, %0|%0, %2, 1}
15460   pinsrq\t{$1, %2, %0|%0, %2, 1}
15461   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15462   vpinsrq\t{$1, %2, %1, %0|%0, %1, %2, 1}
15463   punpcklqdq\t{%2, %0|%0, %2}
15464   vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
15465   movlhps\t{%2, %0|%0, %2}
15466   movhps\t{%2, %0|%0, %2}
15467   vmovhps\t{%2, %1, %0|%0, %1, %2}"
15468  [(set (attr "isa")
15469     (cond [(eq_attr "alternative" "0,1")
15470	      (const_string "x64_sse4_noavx")
15471	    (eq_attr "alternative" "2")
15472	      (const_string "x64_avx")
15473	    (eq_attr "alternative" "3")
15474	      (const_string "x64_avx512dq")
15475	    (eq_attr "alternative" "4")
15476	      (const_string "sse2_noavx")
15477	    (eq_attr "alternative" "5,8")
15478	      (const_string "avx")
15479	   ]
15480	   (const_string "noavx")))
15481   (set (attr "type")
15482     (if_then_else
15483       (eq_attr "alternative" "0,1,2,3,4,5")
15484       (const_string "sselog")
15485       (const_string "ssemov")))
15486   (set (attr "prefix_rex")
15487     (if_then_else (eq_attr "alternative" "0,1,2,3")
15488		   (const_string "1")
15489		   (const_string "*")))
15490   (set (attr "prefix_extra")
15491     (if_then_else (eq_attr "alternative" "0,1,2,3")
15492		   (const_string "1")
15493		   (const_string "*")))
15494   (set (attr "length_immediate")
15495     (if_then_else (eq_attr "alternative" "0,1,2,3")
15496		   (const_string "1")
15497		   (const_string "*")))
15498   (set (attr "prefix")
15499     (cond [(eq_attr "alternative" "2")
15500	      (const_string "vex")
15501	    (eq_attr "alternative" "3")
15502	      (const_string "evex")
15503	    (eq_attr "alternative" "5,8")
15504	      (const_string "maybe_evex")
15505	   ]
15506	   (const_string "orig")))
15507   (set_attr "mode" "TI,TI,TI,TI,TI,TI,V4SF,V2SF,V2SF")])
15508
15509(define_insn "*vec_concatv2di_0"
15510  [(set (match_operand:V2DI 0 "register_operand"     "=v,v ,x")
15511	(vec_concat:V2DI
15512	  (match_operand:DI 1 "nonimmediate_operand" " r,vm,?!*y")
15513	  (match_operand:DI 2 "const0_operand"       " C,C ,C")))]
15514  "TARGET_SSE2"
15515  "@
15516   * return HAVE_AS_IX86_INTERUNIT_MOVQ ? \"%vmovq\t{%1, %0|%0, %1}\" : \"%vmovd\t{%1, %0|%0, %1}\";
15517   %vmovq\t{%1, %0|%0, %1}
15518   movq2dq\t{%1, %0|%0, %1}"
15519  [(set_attr "isa" "x64,*,*")
15520   (set_attr "mmx_isa" "*,*,native")
15521   (set_attr "type" "ssemov")
15522   (set_attr "prefix_rex" "1,*,*")
15523   (set_attr "prefix" "maybe_vex,maybe_vex,orig")
15524   (set_attr "mode" "TI")
15525   (set (attr "preferred_for_speed")
15526     (cond [(eq_attr "alternative" "0")
15527	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15528	   ]
15529	   (symbol_ref "true")))])
15530
15531;; vmovq clears also the higher bits.
15532(define_insn "vec_set<mode>_0"
15533  [(set (match_operand:VI8_AVX_AVX512F 0 "register_operand" "=v,v")
15534	(vec_merge:VI8_AVX_AVX512F
15535	  (vec_duplicate:VI8_AVX_AVX512F
15536	    (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,vm"))
15537	  (match_operand:VI8_AVX_AVX512F 1 "const0_operand" "C,C")
15538	  (const_int 1)))]
15539  "TARGET_AVX"
15540  "vmovq\t{%2, %x0|%x0, %2}"
15541  [(set_attr "isa" "x64,*")
15542   (set_attr "type" "ssemov")
15543   (set_attr "prefix_rex" "1,*")
15544   (set_attr "prefix" "maybe_evex")
15545   (set_attr "mode" "TI")
15546   (set (attr "preferred_for_speed")
15547     (cond [(eq_attr "alternative" "0")
15548	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
15549	   ]
15550	   (symbol_ref "true")))])
15551
15552(define_expand "vec_unpacks_lo_<mode>"
15553  [(match_operand:<sseunpackmode> 0 "register_operand")
15554   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15555  "TARGET_SSE2"
15556  "ix86_expand_sse_unpack (operands[0], operands[1], false, false); DONE;")
15557
15558(define_expand "vec_unpacks_hi_<mode>"
15559  [(match_operand:<sseunpackmode> 0 "register_operand")
15560   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15561  "TARGET_SSE2"
15562  "ix86_expand_sse_unpack (operands[0], operands[1], false, true); DONE;")
15563
15564(define_expand "vec_unpacku_lo_<mode>"
15565  [(match_operand:<sseunpackmode> 0 "register_operand")
15566   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15567  "TARGET_SSE2"
15568  "ix86_expand_sse_unpack (operands[0], operands[1], true, false); DONE;")
15569
15570(define_expand "vec_unpacks_sbool_lo_qi"
15571  [(match_operand:QI 0 "register_operand")
15572   (match_operand:QI 1 "register_operand")
15573   (match_operand:QI 2 "const_int_operand")]
15574  "TARGET_AVX512F"
15575{
15576  if (INTVAL (operands[2]) != 8 && INTVAL (operands[2]) != 4)
15577    FAIL;
15578  emit_move_insn (operands[0], operands[1]);
15579  DONE;
15580})
15581
15582(define_expand "vec_unpacks_lo_hi"
15583  [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15584        (match_operand:HI 1 "register_operand"))]
15585  "TARGET_AVX512F")
15586
15587(define_expand "vec_unpacks_lo_si"
15588  [(set (match_operand:HI 0 "register_operand")
15589        (subreg:HI (match_operand:SI 1 "register_operand") 0))]
15590  "TARGET_AVX512F")
15591
15592(define_expand "vec_unpacks_lo_di"
15593  [(set (match_operand:SI 0 "register_operand")
15594        (subreg:SI (match_operand:DI 1 "register_operand") 0))]
15595  "TARGET_AVX512BW")
15596
15597(define_expand "vec_unpacku_hi_<mode>"
15598  [(match_operand:<sseunpackmode> 0 "register_operand")
15599   (match_operand:VI124_AVX2_24_AVX512F_1_AVX512BW 1 "register_operand")]
15600  "TARGET_SSE2"
15601  "ix86_expand_sse_unpack (operands[0], operands[1], true, true); DONE;")
15602
15603(define_expand "vec_unpacks_sbool_hi_qi"
15604  [(match_operand:QI 0 "register_operand")
15605   (match_operand:QI 1 "register_operand")
15606   (match_operand:QI 2 "const_int_operand")]
15607  "TARGET_AVX512F"
15608{
15609  HOST_WIDE_INT nunits = INTVAL (operands[2]);
15610  if (nunits != 8 && nunits != 4)
15611    FAIL;
15612  if (TARGET_AVX512DQ)
15613    emit_insn (gen_klshiftrtqi (operands[0], operands[1],
15614				GEN_INT (nunits / 2)));
15615  else
15616    {
15617      rtx tem = gen_reg_rtx (HImode);
15618      emit_insn (gen_klshiftrthi (tem, lowpart_subreg (HImode, operands[1],
15619						       QImode),
15620				  GEN_INT (nunits / 2)));
15621      emit_move_insn (operands[0], lowpart_subreg (QImode, tem, HImode));
15622    }
15623  DONE;
15624})
15625
15626(define_expand "vec_unpacks_hi_hi"
15627  [(parallel
15628     [(set (subreg:HI (match_operand:QI 0 "register_operand") 0)
15629	   (lshiftrt:HI (match_operand:HI 1 "register_operand")
15630			(const_int 8)))
15631      (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15632  "TARGET_AVX512F")
15633
15634(define_expand "vec_unpacks_hi_<mode>"
15635  [(parallel
15636     [(set (subreg:SWI48x
15637	     (match_operand:<HALFMASKMODE> 0 "register_operand") 0)
15638	   (lshiftrt:SWI48x (match_operand:SWI48x 1 "register_operand")
15639			    (match_dup 2)))
15640      (unspec [(const_int 0)] UNSPEC_MASKOP)])]
15641  "TARGET_AVX512BW"
15642  "operands[2] = GEN_INT (GET_MODE_BITSIZE (<HALFMASKMODE>mode));")
15643
15644;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15645;;
15646;; Miscellaneous
15647;;
15648;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
15649
15650(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
15651  [(set (match_operand:VI12_AVX2 0 "register_operand")
15652	(truncate:VI12_AVX2
15653	  (lshiftrt:<ssedoublemode>
15654	    (plus:<ssedoublemode>
15655	      (plus:<ssedoublemode>
15656		(zero_extend:<ssedoublemode>
15657		  (match_operand:VI12_AVX2 1 "vector_operand"))
15658		(zero_extend:<ssedoublemode>
15659		  (match_operand:VI12_AVX2 2 "vector_operand")))
15660	      (match_dup <mask_expand_op3>))
15661	    (const_int 1))))]
15662  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
15663{
15664  operands[<mask_expand_op3>] = CONST1_RTX(<ssedoublemode>mode);
15665  ix86_fixup_binary_operands_no_copy (PLUS, <MODE>mode, operands);
15666})
15667
15668(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
15669  [(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
15670	(truncate:VI12_AVX2
15671	  (lshiftrt:<ssedoublemode>
15672	    (plus:<ssedoublemode>
15673	      (plus:<ssedoublemode>
15674		(zero_extend:<ssedoublemode>
15675		  (match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
15676		(zero_extend:<ssedoublemode>
15677		  (match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
15678	      (match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
15679	    (const_int 1))))]
15680  "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
15681   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
15682  "@
15683   pavg<ssemodesuffix>\t{%2, %0|%0, %2}
15684   vpavg<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
15685  [(set_attr "isa" "noavx,avx")
15686   (set_attr "type" "sseiadd")
15687   (set_attr "prefix_data16" "1,*")
15688   (set_attr "prefix" "orig,<mask_prefix>")
15689   (set_attr "mode" "<sseinsnmode>")])
15690
15691;; The correct representation for this is absolutely enormous, and
15692;; surely not generally useful.
15693(define_insn "<sse2_avx2>_psadbw"
15694  [(set (match_operand:VI8_AVX2_AVX512BW 0 "register_operand" "=x,v")
15695	(unspec:VI8_AVX2_AVX512BW
15696	  [(match_operand:<ssebytemode> 1 "register_operand" "0,v")
15697	   (match_operand:<ssebytemode> 2 "vector_operand" "xBm,vm")]
15698	  UNSPEC_PSADBW))]
15699  "TARGET_SSE2"
15700  "@
15701   psadbw\t{%2, %0|%0, %2}
15702   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
15703  [(set_attr "isa" "noavx,avx")
15704   (set_attr "type" "sseiadd")
15705   (set_attr "atom_unit" "simul")
15706   (set_attr "prefix_data16" "1,*")
15707   (set_attr "prefix" "orig,maybe_evex")
15708   (set_attr "mode" "<sseinsnmode>")])
15709
15710(define_insn "<sse>_movmsk<ssemodesuffix><avxsizesuffix>"
15711  [(set (match_operand:SI 0 "register_operand" "=r")
15712	(unspec:SI
15713	  [(match_operand:VF_128_256 1 "register_operand" "x")]
15714	  UNSPEC_MOVMSK))]
15715  "TARGET_SSE"
15716  "%vmovmsk<ssemodesuffix>\t{%1, %0|%0, %1}"
15717  [(set_attr "type" "ssemov")
15718   (set_attr "prefix" "maybe_vex")
15719   (set_attr "mode" "<MODE>")])
15720
15721(define_insn "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext"
15722  [(set (match_operand:DI 0 "register_operand" "=r")
15723	(any_extend:DI
15724	  (unspec:SI
15725	    [(match_operand:VF_128_256 1 "register_operand" "x")]
15726	    UNSPEC_MOVMSK)))]
15727  "TARGET_64BIT && TARGET_SSE"
15728  "%vmovmsk<ssemodesuffix>\t{%1, %k0|%k0, %1}"
15729  [(set_attr "type" "ssemov")
15730   (set_attr "prefix" "maybe_vex")
15731   (set_attr "mode" "<MODE>")])
15732
15733(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt"
15734  [(set (match_operand:SI 0 "register_operand" "=r")
15735	(unspec:SI
15736	  [(lt:VF_128_256
15737	     (match_operand:<sseintvecmode> 1 "register_operand" "x")
15738	     (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15739	  UNSPEC_MOVMSK))]
15740  "TARGET_SSE"
15741  "#"
15742  "&& reload_completed"
15743  [(set (match_dup 0)
15744	(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15745  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15746  [(set_attr "type" "ssemov")
15747   (set_attr "prefix" "maybe_vex")
15748   (set_attr "mode" "<MODE>")])
15749
15750(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
15751  [(set (match_operand:DI 0 "register_operand" "=r")
15752	(any_extend:DI
15753	  (unspec:SI
15754	    [(lt:VF_128_256
15755	       (match_operand:<sseintvecmode> 1 "register_operand" "x")
15756	       (match_operand:<sseintvecmode> 2 "const0_operand" "C"))]
15757	    UNSPEC_MOVMSK)))]
15758  "TARGET_64BIT && TARGET_SSE"
15759  "#"
15760  "&& reload_completed"
15761  [(set (match_dup 0)
15762	(any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15763  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15764  [(set_attr "type" "ssemov")
15765   (set_attr "prefix" "maybe_vex")
15766   (set_attr "mode" "<MODE>")])
15767
15768(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
15769  [(set (match_operand:SI 0 "register_operand" "=r")
15770	(unspec:SI
15771	  [(subreg:VF_128_256
15772	     (ashiftrt:<sseintvecmode>
15773	       (match_operand:<sseintvecmode> 1 "register_operand" "x")
15774	       (match_operand:QI 2 "const_int_operand" "n")) 0)]
15775	  UNSPEC_MOVMSK))]
15776  "TARGET_SSE"
15777  "#"
15778  "&& reload_completed"
15779  [(set (match_dup 0)
15780	(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15781  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15782  [(set_attr "type" "ssemov")
15783   (set_attr "prefix" "maybe_vex")
15784   (set_attr "mode" "<MODE>")])
15785
15786(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_shift"
15787  [(set (match_operand:DI 0 "register_operand" "=r")
15788	(any_extend:DI
15789	  (unspec:SI
15790	    [(subreg:VF_128_256
15791	       (ashiftrt:<sseintvecmode>
15792		 (match_operand:<sseintvecmode> 1 "register_operand" "x")
15793	       (match_operand:QI 2 "const_int_operand" "n")) 0)]
15794	    UNSPEC_MOVMSK)))]
15795  "TARGET_64BIT && TARGET_SSE"
15796  "#"
15797  "&& reload_completed"
15798  [(set (match_dup 0)
15799	(any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15800  "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
15801  [(set_attr "type" "ssemov")
15802   (set_attr "prefix" "maybe_vex")
15803   (set_attr "mode" "<MODE>")])
15804
15805(define_insn "<sse2_avx2>_pmovmskb"
15806  [(set (match_operand:SI 0 "register_operand" "=r")
15807	(unspec:SI
15808	  [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15809	  UNSPEC_MOVMSK))]
15810  "TARGET_SSE2"
15811  "%vpmovmskb\t{%1, %0|%0, %1}"
15812  [(set_attr "type" "ssemov")
15813   (set (attr "prefix_data16")
15814     (if_then_else
15815       (match_test "TARGET_AVX")
15816     (const_string "*")
15817     (const_string "1")))
15818   (set_attr "prefix" "maybe_vex")
15819   (set_attr "mode" "SI")])
15820
15821(define_insn "*<sse2_avx2>_pmovmskb_zext"
15822  [(set (match_operand:DI 0 "register_operand" "=r")
15823	(zero_extend:DI
15824	  (unspec:SI
15825	    [(match_operand:VI1_AVX2 1 "register_operand" "x")]
15826	    UNSPEC_MOVMSK)))]
15827  "TARGET_64BIT && TARGET_SSE2"
15828  "%vpmovmskb\t{%1, %k0|%k0, %1}"
15829  [(set_attr "type" "ssemov")
15830   (set (attr "prefix_data16")
15831     (if_then_else
15832       (match_test "TARGET_AVX")
15833     (const_string "*")
15834     (const_string "1")))
15835   (set_attr "prefix" "maybe_vex")
15836   (set_attr "mode" "SI")])
15837
15838(define_insn "*sse2_pmovmskb_ext"
15839  [(set (match_operand:DI 0 "register_operand" "=r")
15840	(sign_extend:DI
15841	  (unspec:SI
15842	    [(match_operand:V16QI 1 "register_operand" "x")]
15843	    UNSPEC_MOVMSK)))]
15844  "TARGET_64BIT && TARGET_SSE2"
15845  "%vpmovmskb\t{%1, %k0|%k0, %1}"
15846  [(set_attr "type" "ssemov")
15847   (set (attr "prefix_data16")
15848     (if_then_else
15849       (match_test "TARGET_AVX")
15850     (const_string "*")
15851     (const_string "1")))
15852   (set_attr "prefix" "maybe_vex")
15853   (set_attr "mode" "SI")])
15854
15855(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt"
15856  [(set (match_operand:SI 0 "register_operand" "=r")
15857	(unspec:SI
15858	  [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15859			(match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15860	  UNSPEC_MOVMSK))]
15861  "TARGET_SSE2"
15862  "#"
15863  ""
15864  [(set (match_dup 0)
15865	(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
15866  ""
15867  [(set_attr "type" "ssemov")
15868   (set (attr "prefix_data16")
15869     (if_then_else
15870       (match_test "TARGET_AVX")
15871     (const_string "*")
15872     (const_string "1")))
15873   (set_attr "prefix" "maybe_vex")
15874   (set_attr "mode" "SI")])
15875
15876(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
15877  [(set (match_operand:DI 0 "register_operand" "=r")
15878	(zero_extend:DI
15879	  (unspec:SI
15880	    [(lt:VI1_AVX2 (match_operand:VI1_AVX2 1 "register_operand" "x")
15881			  (match_operand:VI1_AVX2 2 "const0_operand" "C"))]
15882	    UNSPEC_MOVMSK)))]
15883  "TARGET_64BIT && TARGET_SSE2"
15884  "#"
15885  ""
15886  [(set (match_dup 0)
15887	(zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15888  ""
15889  [(set_attr "type" "ssemov")
15890   (set (attr "prefix_data16")
15891     (if_then_else
15892       (match_test "TARGET_AVX")
15893     (const_string "*")
15894     (const_string "1")))
15895   (set_attr "prefix" "maybe_vex")
15896   (set_attr "mode" "SI")])
15897
15898(define_insn_and_split "*sse2_pmovmskb_ext_lt"
15899  [(set (match_operand:DI 0 "register_operand" "=r")
15900	(sign_extend:DI
15901	  (unspec:SI
15902	    [(lt:V16QI (match_operand:V16QI 1 "register_operand" "x")
15903		       (match_operand:V16QI 2 "const0_operand" "C"))]
15904	    UNSPEC_MOVMSK)))]
15905  "TARGET_64BIT && TARGET_SSE2"
15906  "#"
15907  ""
15908  [(set (match_dup 0)
15909	(sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
15910  ""
15911  [(set_attr "type" "ssemov")
15912   (set (attr "prefix_data16")
15913     (if_then_else
15914       (match_test "TARGET_AVX")
15915     (const_string "*")
15916     (const_string "1")))
15917   (set_attr "prefix" "maybe_vex")
15918   (set_attr "mode" "SI")])
15919
15920(define_expand "sse2_maskmovdqu"
15921  [(set (match_operand:V16QI 0 "memory_operand")
15922	(unspec:V16QI [(match_operand:V16QI 1 "register_operand")
15923		       (match_operand:V16QI 2 "register_operand")
15924		       (match_dup 0)]
15925		      UNSPEC_MASKMOV))]
15926  "TARGET_SSE2")
15927
15928(define_insn "*sse2_maskmovdqu"
15929  [(set (mem:V16QI (match_operand:P 0 "register_operand" "D"))
15930	(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
15931		       (match_operand:V16QI 2 "register_operand" "x")
15932		       (mem:V16QI (match_dup 0))]
15933		      UNSPEC_MASKMOV))]
15934  "TARGET_SSE2"
15935{
15936  /* We can't use %^ here due to ASM_OUTPUT_OPCODE processing
15937     that requires %v to be at the beginning of the opcode name.  */
15938  if (Pmode != word_mode)
15939    fputs ("\taddr32", asm_out_file);
15940  return "%vmaskmovdqu\t{%2, %1|%1, %2}";
15941}
15942  [(set_attr "type" "ssemov")
15943   (set_attr "prefix_data16" "1")
15944   (set (attr "length_address")
15945     (symbol_ref ("Pmode != word_mode")))
15946   ;; The implicit %rdi operand confuses default length_vex computation.
15947   (set (attr "length_vex")
15948     (symbol_ref ("3 + REX_SSE_REGNO_P (REGNO (operands[2]))")))
15949   (set_attr "prefix" "maybe_vex")
15950   (set_attr "znver1_decode" "vector")
15951   (set_attr "mode" "TI")])
15952
15953(define_insn "sse_ldmxcsr"
15954  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
15955		    UNSPECV_LDMXCSR)]
15956  "TARGET_SSE"
15957  "%vldmxcsr\t%0"
15958  [(set_attr "type" "sse")
15959   (set_attr "atom_sse_attr" "mxcsr")
15960   (set_attr "prefix" "maybe_vex")
15961   (set_attr "memory" "load")])
15962
15963(define_insn "sse_stmxcsr"
15964  [(set (match_operand:SI 0 "memory_operand" "=m")
15965	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
15966  "TARGET_SSE"
15967  "%vstmxcsr\t%0"
15968  [(set_attr "type" "sse")
15969   (set_attr "atom_sse_attr" "mxcsr")
15970   (set_attr "prefix" "maybe_vex")
15971   (set_attr "memory" "store")])
15972
15973(define_insn "sse2_clflush"
15974  [(unspec_volatile [(match_operand 0 "address_operand" "p")]
15975		    UNSPECV_CLFLUSH)]
15976  "TARGET_SSE2"
15977  "clflush\t%a0"
15978  [(set_attr "type" "sse")
15979   (set_attr "atom_sse_attr" "fence")
15980   (set_attr "memory" "unknown")])
15981
15982;; As per AMD and Intel ISA manuals, the first operand is extensions
15983;; and it goes to %ecx. The second operand received is hints and it goes
15984;; to %eax.
15985(define_insn "sse3_mwait"
15986  [(unspec_volatile [(match_operand:SI 0 "register_operand" "c")
15987		     (match_operand:SI 1 "register_operand" "a")]
15988		    UNSPECV_MWAIT)]
15989  "TARGET_SSE3"
15990;; 64bit version is "mwait %rax,%rcx". But only lower 32bits are used.
15991;; Since 32bit register operands are implicitly zero extended to 64bit,
15992;; we only need to set up 32bit registers.
15993  "mwait"
15994  [(set_attr "length" "3")])
15995
15996(define_insn "@sse3_monitor_<mode>"
15997  [(unspec_volatile [(match_operand:P 0 "register_operand" "a")
15998		     (match_operand:SI 1 "register_operand" "c")
15999		     (match_operand:SI 2 "register_operand" "d")]
16000		    UNSPECV_MONITOR)]
16001  "TARGET_SSE3"
16002;; 64bit version is "monitor %rax,%rcx,%rdx". But only lower 32bits in
16003;; RCX and RDX are used.  Since 32bit register operands are implicitly
16004;; zero extended to 64bit, we only need to set up 32bit registers.
16005  "%^monitor"
16006  [(set (attr "length")
16007     (symbol_ref ("(Pmode != word_mode) + 3")))])
16008
16009;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16010;;
16011;; SSSE3 instructions
16012;;
16013;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16014
16015(define_code_iterator ssse3_plusminus [plus ss_plus minus ss_minus])
16016
16017(define_insn "avx2_ph<plusminus_mnemonic>wv16hi3"
16018  [(set (match_operand:V16HI 0 "register_operand" "=x")
16019	(vec_concat:V16HI
16020	  (vec_concat:V8HI
16021	    (vec_concat:V4HI
16022	      (vec_concat:V2HI
16023		(ssse3_plusminus:HI
16024		  (vec_select:HI
16025		    (match_operand:V16HI 1 "register_operand" "x")
16026		    (parallel [(const_int 0)]))
16027		  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
16028		(ssse3_plusminus:HI
16029		  (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
16030		  (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
16031	      (vec_concat:V2HI
16032		(ssse3_plusminus:HI
16033		  (vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
16034		  (vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
16035		(ssse3_plusminus:HI
16036		  (vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
16037		  (vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
16038	    (vec_concat:V4HI
16039	      (vec_concat:V2HI
16040		(ssse3_plusminus:HI
16041		  (vec_select:HI
16042		    (match_operand:V16HI 2 "nonimmediate_operand" "xm")
16043		    (parallel [(const_int 0)]))
16044		  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
16045		(ssse3_plusminus:HI
16046		  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
16047		  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
16048	      (vec_concat:V2HI
16049		(ssse3_plusminus:HI
16050		  (vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
16051		  (vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
16052		(ssse3_plusminus:HI
16053		  (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
16054		  (vec_select:HI (match_dup 2) (parallel [(const_int 7)]))))))
16055	  (vec_concat:V8HI
16056	    (vec_concat:V4HI
16057	      (vec_concat:V2HI
16058		(ssse3_plusminus:HI
16059		  (vec_select:HI (match_dup 1) (parallel [(const_int 8)]))
16060		  (vec_select:HI (match_dup 1) (parallel [(const_int 9)])))
16061		(ssse3_plusminus:HI
16062		  (vec_select:HI (match_dup 1) (parallel [(const_int 10)]))
16063		  (vec_select:HI (match_dup 1) (parallel [(const_int 11)]))))
16064	      (vec_concat:V2HI
16065		(ssse3_plusminus:HI
16066		  (vec_select:HI (match_dup 1) (parallel [(const_int 12)]))
16067		  (vec_select:HI (match_dup 1) (parallel [(const_int 13)])))
16068		(ssse3_plusminus:HI
16069		  (vec_select:HI (match_dup 1) (parallel [(const_int 14)]))
16070		  (vec_select:HI (match_dup 1) (parallel [(const_int 15)])))))
16071	    (vec_concat:V4HI
16072	      (vec_concat:V2HI
16073		(ssse3_plusminus:HI
16074		  (vec_select:HI (match_dup 2) (parallel [(const_int 8)]))
16075		  (vec_select:HI (match_dup 2) (parallel [(const_int 9)])))
16076		(ssse3_plusminus:HI
16077		  (vec_select:HI (match_dup 2) (parallel [(const_int 10)]))
16078		  (vec_select:HI (match_dup 2) (parallel [(const_int 11)]))))
16079	      (vec_concat:V2HI
16080		(ssse3_plusminus:HI
16081		  (vec_select:HI (match_dup 2) (parallel [(const_int 12)]))
16082		  (vec_select:HI (match_dup 2) (parallel [(const_int 13)])))
16083		(ssse3_plusminus:HI
16084		  (vec_select:HI (match_dup 2) (parallel [(const_int 14)]))
16085		  (vec_select:HI (match_dup 2) (parallel [(const_int 15)]))))))))]
16086  "TARGET_AVX2"
16087  "vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16088  [(set_attr "type" "sseiadd")
16089   (set_attr "prefix_extra" "1")
16090   (set_attr "prefix" "vex")
16091   (set_attr "mode" "OI")])
16092
16093(define_insn "ssse3_ph<plusminus_mnemonic>wv8hi3"
16094  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
16095	(vec_concat:V8HI
16096	  (vec_concat:V4HI
16097	    (vec_concat:V2HI
16098	      (ssse3_plusminus:HI
16099		(vec_select:HI
16100		  (match_operand:V8HI 1 "register_operand" "0,x")
16101		  (parallel [(const_int 0)]))
16102		(vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
16103	      (ssse3_plusminus:HI
16104		(vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
16105		(vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
16106	    (vec_concat:V2HI
16107	      (ssse3_plusminus:HI
16108		(vec_select:HI (match_dup 1) (parallel [(const_int 4)]))
16109		(vec_select:HI (match_dup 1) (parallel [(const_int 5)])))
16110	      (ssse3_plusminus:HI
16111		(vec_select:HI (match_dup 1) (parallel [(const_int 6)]))
16112		(vec_select:HI (match_dup 1) (parallel [(const_int 7)])))))
16113	  (vec_concat:V4HI
16114	    (vec_concat:V2HI
16115	      (ssse3_plusminus:HI
16116		(vec_select:HI
16117		  (match_operand:V8HI 2 "vector_operand" "xBm,xm")
16118		  (parallel [(const_int 0)]))
16119		(vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
16120	      (ssse3_plusminus:HI
16121		(vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
16122		(vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))
16123	    (vec_concat:V2HI
16124	      (ssse3_plusminus:HI
16125		(vec_select:HI (match_dup 2) (parallel [(const_int 4)]))
16126		(vec_select:HI (match_dup 2) (parallel [(const_int 5)])))
16127	      (ssse3_plusminus:HI
16128		(vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
16129		(vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
16130  "TARGET_SSSE3"
16131  "@
16132   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16133   vph<plusminus_mnemonic>w\t{%2, %1, %0|%0, %1, %2}"
16134  [(set_attr "isa" "noavx,avx")
16135   (set_attr "type" "sseiadd")
16136   (set_attr "atom_unit" "complex")
16137   (set_attr "prefix_data16" "1,*")
16138   (set_attr "prefix_extra" "1")
16139   (set_attr "prefix" "orig,vex")
16140   (set_attr "mode" "TI")])
16141
16142(define_insn_and_split "ssse3_ph<plusminus_mnemonic>wv4hi3"
16143  [(set (match_operand:V4HI 0 "register_operand" "=y,x,x")
16144	(vec_concat:V4HI
16145	  (vec_concat:V2HI
16146	    (ssse3_plusminus:HI
16147	      (vec_select:HI
16148		(match_operand:V4HI 1 "register_operand" "0,0,x")
16149		(parallel [(const_int 0)]))
16150	      (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
16151	    (ssse3_plusminus:HI
16152	      (vec_select:HI (match_dup 1) (parallel [(const_int 2)]))
16153	      (vec_select:HI (match_dup 1) (parallel [(const_int 3)]))))
16154	  (vec_concat:V2HI
16155	    (ssse3_plusminus:HI
16156	      (vec_select:HI
16157		(match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,x")
16158		(parallel [(const_int 0)]))
16159	      (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
16160	    (ssse3_plusminus:HI
16161	      (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
16162	      (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
16163  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16164  "@
16165   ph<plusminus_mnemonic>w\t{%2, %0|%0, %2}
16166   #
16167   #"
16168  "TARGET_SSSE3 && reload_completed
16169   && SSE_REGNO_P (REGNO (operands[0]))"
16170  [(const_int 0)]
16171{
16172  /* Generate SSE version of the operation.  */
16173  rtx op0 = lowpart_subreg (V8HImode, operands[0],
16174			    GET_MODE (operands[0]));
16175  rtx op1 = lowpart_subreg (V8HImode, operands[1],
16176			    GET_MODE (operands[1]));
16177  rtx op2 = lowpart_subreg (V8HImode, operands[2],
16178			    GET_MODE (operands[2]));
16179  emit_insn (gen_ssse3_ph<plusminus_mnemonic>wv8hi3 (op0, op1, op2));
16180  ix86_move_vector_high_sse_to_mmx (op0);
16181  DONE;
16182}
16183  [(set_attr "mmx_isa" "native,sse_noavx,avx")
16184   (set_attr "type" "sseiadd")
16185   (set_attr "atom_unit" "complex")
16186   (set_attr "prefix_extra" "1")
16187   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16188   (set_attr "mode" "DI,TI,TI")])
16189
16190(define_insn "avx2_ph<plusminus_mnemonic>dv8si3"
16191  [(set (match_operand:V8SI 0 "register_operand" "=x")
16192	(vec_concat:V8SI
16193	  (vec_concat:V4SI
16194	    (vec_concat:V2SI
16195	      (plusminus:SI
16196		(vec_select:SI
16197		  (match_operand:V8SI 1 "register_operand" "x")
16198		  (parallel [(const_int 0)]))
16199		(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
16200	      (plusminus:SI
16201		(vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
16202		(vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
16203	    (vec_concat:V2SI
16204	      (plusminus:SI
16205		(vec_select:SI
16206		  (match_operand:V8SI 2 "nonimmediate_operand" "xm")
16207		  (parallel [(const_int 0)]))
16208		(vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
16209	      (plusminus:SI
16210		(vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
16211		(vec_select:SI (match_dup 2) (parallel [(const_int 3)])))))
16212	  (vec_concat:V4SI
16213	    (vec_concat:V2SI
16214	      (plusminus:SI
16215		(vec_select:SI (match_dup 1) (parallel [(const_int 4)]))
16216		(vec_select:SI (match_dup 1) (parallel [(const_int 5)])))
16217	      (plusminus:SI
16218		(vec_select:SI (match_dup 1) (parallel [(const_int 6)]))
16219		(vec_select:SI (match_dup 1) (parallel [(const_int 7)]))))
16220	    (vec_concat:V2SI
16221	      (plusminus:SI
16222		(vec_select:SI (match_dup 2) (parallel [(const_int 4)]))
16223		(vec_select:SI (match_dup 2) (parallel [(const_int 5)])))
16224	      (plusminus:SI
16225		(vec_select:SI (match_dup 2) (parallel [(const_int 6)]))
16226		(vec_select:SI (match_dup 2) (parallel [(const_int 7)])))))))]
16227  "TARGET_AVX2"
16228  "vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16229  [(set_attr "type" "sseiadd")
16230   (set_attr "prefix_extra" "1")
16231   (set_attr "prefix" "vex")
16232   (set_attr "mode" "OI")])
16233
16234(define_insn "ssse3_ph<plusminus_mnemonic>dv4si3"
16235  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
16236	(vec_concat:V4SI
16237	  (vec_concat:V2SI
16238	    (plusminus:SI
16239	      (vec_select:SI
16240		(match_operand:V4SI 1 "register_operand" "0,x")
16241		(parallel [(const_int 0)]))
16242	      (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
16243	    (plusminus:SI
16244	      (vec_select:SI (match_dup 1) (parallel [(const_int 2)]))
16245	      (vec_select:SI (match_dup 1) (parallel [(const_int 3)]))))
16246	  (vec_concat:V2SI
16247	    (plusminus:SI
16248	      (vec_select:SI
16249		(match_operand:V4SI 2 "vector_operand" "xBm,xm")
16250		(parallel [(const_int 0)]))
16251	      (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))
16252	    (plusminus:SI
16253	      (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
16254	      (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
16255  "TARGET_SSSE3"
16256  "@
16257   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16258   vph<plusminus_mnemonic>d\t{%2, %1, %0|%0, %1, %2}"
16259  [(set_attr "isa" "noavx,avx")
16260   (set_attr "type" "sseiadd")
16261   (set_attr "atom_unit" "complex")
16262   (set_attr "prefix_data16" "1,*")
16263   (set_attr "prefix_extra" "1")
16264   (set_attr "prefix" "orig,vex")
16265   (set_attr "mode" "TI")])
16266
16267(define_insn_and_split "ssse3_ph<plusminus_mnemonic>dv2si3"
16268  [(set (match_operand:V2SI 0 "register_operand" "=y,x,x")
16269	(vec_concat:V2SI
16270	  (plusminus:SI
16271	    (vec_select:SI
16272	      (match_operand:V2SI 1 "register_operand" "0,0,x")
16273	      (parallel [(const_int 0)]))
16274	    (vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
16275	  (plusminus:SI
16276	    (vec_select:SI
16277	      (match_operand:V2SI 2 "register_mmxmem_operand" "ym,x,x")
16278	      (parallel [(const_int 0)]))
16279	    (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
16280  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16281  "@
16282   ph<plusminus_mnemonic>d\t{%2, %0|%0, %2}
16283   #
16284   #"
16285  "TARGET_SSSE3 && reload_completed
16286   && SSE_REGNO_P (REGNO (operands[0]))"
16287  [(const_int 0)]
16288{
16289  /* Generate SSE version of the operation.  */
16290  rtx op0 = lowpart_subreg (V4SImode, operands[0],
16291			    GET_MODE (operands[0]));
16292  rtx op1 = lowpart_subreg (V4SImode, operands[1],
16293			    GET_MODE (operands[1]));
16294  rtx op2 = lowpart_subreg (V4SImode, operands[2],
16295			    GET_MODE (operands[2]));
16296  emit_insn (gen_ssse3_ph<plusminus_mnemonic>dv4si3 (op0, op1, op2));
16297  ix86_move_vector_high_sse_to_mmx (op0);
16298  DONE;
16299}
16300  [(set_attr "mmx_isa" "native,sse_noavx,avx")
16301   (set_attr "type" "sseiadd")
16302   (set_attr "atom_unit" "complex")
16303   (set_attr "prefix_extra" "1")
16304   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16305   (set_attr "mode" "DI,TI,TI")])
16306
16307(define_insn "avx2_pmaddubsw256"
16308  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
16309	(ss_plus:V16HI
16310	  (mult:V16HI
16311	    (zero_extend:V16HI
16312	      (vec_select:V16QI
16313		(match_operand:V32QI 1 "register_operand" "x,v")
16314		(parallel [(const_int 0) (const_int 2)
16315			   (const_int 4) (const_int 6)
16316			   (const_int 8) (const_int 10)
16317			   (const_int 12) (const_int 14)
16318			   (const_int 16) (const_int 18)
16319			   (const_int 20) (const_int 22)
16320			   (const_int 24) (const_int 26)
16321			   (const_int 28) (const_int 30)])))
16322	    (sign_extend:V16HI
16323	      (vec_select:V16QI
16324		(match_operand:V32QI 2 "nonimmediate_operand" "xm,vm")
16325		(parallel [(const_int 0) (const_int 2)
16326			   (const_int 4) (const_int 6)
16327			   (const_int 8) (const_int 10)
16328			   (const_int 12) (const_int 14)
16329			   (const_int 16) (const_int 18)
16330			   (const_int 20) (const_int 22)
16331			   (const_int 24) (const_int 26)
16332			   (const_int 28) (const_int 30)]))))
16333	  (mult:V16HI
16334	    (zero_extend:V16HI
16335	      (vec_select:V16QI (match_dup 1)
16336		(parallel [(const_int 1) (const_int 3)
16337			   (const_int 5) (const_int 7)
16338			   (const_int 9) (const_int 11)
16339			   (const_int 13) (const_int 15)
16340			   (const_int 17) (const_int 19)
16341			   (const_int 21) (const_int 23)
16342			   (const_int 25) (const_int 27)
16343			   (const_int 29) (const_int 31)])))
16344	    (sign_extend:V16HI
16345	      (vec_select:V16QI (match_dup 2)
16346		(parallel [(const_int 1) (const_int 3)
16347			   (const_int 5) (const_int 7)
16348			   (const_int 9) (const_int 11)
16349			   (const_int 13) (const_int 15)
16350			   (const_int 17) (const_int 19)
16351			   (const_int 21) (const_int 23)
16352			   (const_int 25) (const_int 27)
16353			   (const_int 29) (const_int 31)]))))))]
16354  "TARGET_AVX2"
16355  "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16356  [(set_attr "isa" "*,avx512bw")
16357   (set_attr "type" "sseiadd")
16358   (set_attr "prefix_extra" "1")
16359   (set_attr "prefix" "vex,evex")
16360   (set_attr "mode" "OI")])
16361
16362;; The correct representation for this is absolutely enormous, and
16363;; surely not generally useful.
16364(define_insn "avx512bw_pmaddubsw512<mode><mask_name>"
16365  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
16366          (unspec:VI2_AVX512VL
16367            [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
16368             (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")]
16369             UNSPEC_PMADDUBSW512))]
16370   "TARGET_AVX512BW"
16371   "vpmaddubsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}";
16372  [(set_attr "type" "sseiadd")
16373   (set_attr "prefix" "evex")
16374   (set_attr "mode" "XI")])
16375
16376(define_insn "avx512bw_umulhrswv32hi3<mask_name>"
16377  [(set (match_operand:V32HI 0 "register_operand" "=v")
16378	(truncate:V32HI
16379	  (lshiftrt:V32SI
16380	    (plus:V32SI
16381	      (lshiftrt:V32SI
16382		(mult:V32SI
16383		  (sign_extend:V32SI
16384		    (match_operand:V32HI 1 "nonimmediate_operand" "%v"))
16385		  (sign_extend:V32SI
16386		    (match_operand:V32HI 2 "nonimmediate_operand" "vm")))
16387		(const_int 14))
16388	      (const_vector:V32HI [(const_int 1) (const_int 1)
16389				   (const_int 1) (const_int 1)
16390				   (const_int 1) (const_int 1)
16391				   (const_int 1) (const_int 1)
16392				   (const_int 1) (const_int 1)
16393				   (const_int 1) (const_int 1)
16394				   (const_int 1) (const_int 1)
16395				   (const_int 1) (const_int 1)
16396				   (const_int 1) (const_int 1)
16397				   (const_int 1) (const_int 1)
16398				   (const_int 1) (const_int 1)
16399				   (const_int 1) (const_int 1)
16400				   (const_int 1) (const_int 1)
16401				   (const_int 1) (const_int 1)
16402				   (const_int 1) (const_int 1)
16403				   (const_int 1) (const_int 1)]))
16404	    (const_int 1))))]
16405  "TARGET_AVX512BW"
16406  "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16407  [(set_attr "type" "sseimul")
16408   (set_attr "prefix" "evex")
16409   (set_attr "mode" "XI")])
16410
16411(define_insn "ssse3_pmaddubsw128"
16412  [(set (match_operand:V8HI 0 "register_operand" "=x,x,v")
16413	(ss_plus:V8HI
16414	  (mult:V8HI
16415	    (zero_extend:V8HI
16416	      (vec_select:V8QI
16417		(match_operand:V16QI 1 "register_operand" "0,x,v")
16418		(parallel [(const_int 0) (const_int 2)
16419			   (const_int 4) (const_int 6)
16420			   (const_int 8) (const_int 10)
16421			   (const_int 12) (const_int 14)])))
16422	    (sign_extend:V8HI
16423	      (vec_select:V8QI
16424		(match_operand:V16QI 2 "vector_operand" "xBm,xm,vm")
16425		(parallel [(const_int 0) (const_int 2)
16426			   (const_int 4) (const_int 6)
16427			   (const_int 8) (const_int 10)
16428			   (const_int 12) (const_int 14)]))))
16429	  (mult:V8HI
16430	    (zero_extend:V8HI
16431	      (vec_select:V8QI (match_dup 1)
16432		(parallel [(const_int 1) (const_int 3)
16433			   (const_int 5) (const_int 7)
16434			   (const_int 9) (const_int 11)
16435			   (const_int 13) (const_int 15)])))
16436	    (sign_extend:V8HI
16437	      (vec_select:V8QI (match_dup 2)
16438		(parallel [(const_int 1) (const_int 3)
16439			   (const_int 5) (const_int 7)
16440			   (const_int 9) (const_int 11)
16441			   (const_int 13) (const_int 15)]))))))]
16442  "TARGET_SSSE3"
16443  "@
16444   pmaddubsw\t{%2, %0|%0, %2}
16445   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}
16446   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16447  [(set_attr "isa" "noavx,avx,avx512bw")
16448   (set_attr "type" "sseiadd")
16449   (set_attr "atom_unit" "simul")
16450   (set_attr "prefix_data16" "1,*,*")
16451   (set_attr "prefix_extra" "1")
16452   (set_attr "prefix" "orig,vex,evex")
16453   (set_attr "mode" "TI")])
16454
16455(define_insn "ssse3_pmaddubsw"
16456  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16457	(ss_plus:V4HI
16458	  (mult:V4HI
16459	    (zero_extend:V4HI
16460	      (vec_select:V4QI
16461		(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16462		(parallel [(const_int 0) (const_int 2)
16463			   (const_int 4) (const_int 6)])))
16464	    (sign_extend:V4HI
16465	      (vec_select:V4QI
16466		(match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")
16467		(parallel [(const_int 0) (const_int 2)
16468			   (const_int 4) (const_int 6)]))))
16469	  (mult:V4HI
16470	    (zero_extend:V4HI
16471	      (vec_select:V4QI (match_dup 1)
16472		(parallel [(const_int 1) (const_int 3)
16473			   (const_int 5) (const_int 7)])))
16474	    (sign_extend:V4HI
16475	      (vec_select:V4QI (match_dup 2)
16476		(parallel [(const_int 1) (const_int 3)
16477			   (const_int 5) (const_int 7)]))))))]
16478  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16479  "@
16480   pmaddubsw\t{%2, %0|%0, %2}
16481   pmaddubsw\t{%2, %0|%0, %2}
16482   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
16483  [(set_attr "isa" "*,noavx,avx")
16484   (set_attr "mmx_isa" "native,*,*")
16485   (set_attr "type" "sseiadd")
16486   (set_attr "atom_unit" "simul")
16487   (set_attr "prefix_extra" "1")
16488   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16489   (set_attr "mode" "DI,TI,TI")])
16490
16491(define_mode_iterator PMULHRSW
16492  [V8HI (V16HI "TARGET_AVX2")])
16493
16494(define_expand "<ssse3_avx2>_pmulhrsw<mode>3_mask"
16495  [(set (match_operand:PMULHRSW 0 "register_operand")
16496	(vec_merge:PMULHRSW
16497	  (truncate:PMULHRSW
16498	    (lshiftrt:<ssedoublemode>
16499	      (plus:<ssedoublemode>
16500	        (lshiftrt:<ssedoublemode>
16501		  (mult:<ssedoublemode>
16502		    (sign_extend:<ssedoublemode>
16503		      (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16504		    (sign_extend:<ssedoublemode>
16505		      (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16506		  (const_int 14))
16507	        (match_dup 5))
16508	      (const_int 1)))
16509	  (match_operand:PMULHRSW 3 "register_operand")
16510	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
16511  "TARGET_AVX512BW && TARGET_AVX512VL"
16512{
16513  operands[5] = CONST1_RTX(<MODE>mode);
16514  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16515})
16516
16517(define_expand "<ssse3_avx2>_pmulhrsw<mode>3"
16518  [(set (match_operand:PMULHRSW 0 "register_operand")
16519	(truncate:PMULHRSW
16520	  (lshiftrt:<ssedoublemode>
16521	    (plus:<ssedoublemode>
16522	      (lshiftrt:<ssedoublemode>
16523		(mult:<ssedoublemode>
16524		  (sign_extend:<ssedoublemode>
16525		    (match_operand:PMULHRSW 1 "nonimmediate_operand"))
16526		  (sign_extend:<ssedoublemode>
16527		    (match_operand:PMULHRSW 2 "nonimmediate_operand")))
16528		(const_int 14))
16529	      (match_dup 3))
16530	    (const_int 1))))]
16531  "TARGET_SSSE3"
16532{
16533  operands[3] = CONST1_RTX(<MODE>mode);
16534  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16535})
16536
16537(define_expand "smulhrs<mode>3"
16538  [(set (match_operand:VI2_AVX2 0 "register_operand")
16539	(truncate:VI2_AVX2
16540	  (lshiftrt:<ssedoublemode>
16541	    (plus:<ssedoublemode>
16542	      (lshiftrt:<ssedoublemode>
16543		(mult:<ssedoublemode>
16544		  (sign_extend:<ssedoublemode>
16545		    (match_operand:VI2_AVX2 1 "nonimmediate_operand"))
16546		  (sign_extend:<ssedoublemode>
16547		    (match_operand:VI2_AVX2 2 "nonimmediate_operand")))
16548		(const_int 14))
16549	      (match_dup 3))
16550	    (const_int 1))))]
16551  "TARGET_SSSE3"
16552{
16553  operands[3] = CONST1_RTX(<MODE>mode);
16554  ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
16555})
16556
16557(define_insn "*<ssse3_avx2>_pmulhrsw<mode>3<mask_name>"
16558  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,x,v")
16559	(truncate:VI2_AVX2
16560	  (lshiftrt:<ssedoublemode>
16561	    (plus:<ssedoublemode>
16562	      (lshiftrt:<ssedoublemode>
16563		(mult:<ssedoublemode>
16564		  (sign_extend:<ssedoublemode>
16565		    (match_operand:VI2_AVX2 1 "vector_operand" "%0,x,v"))
16566		  (sign_extend:<ssedoublemode>
16567		    (match_operand:VI2_AVX2 2 "vector_operand" "xBm,xm,vm")))
16568		(const_int 14))
16569	      (match_operand:VI2_AVX2 3 "const1_operand"))
16570	    (const_int 1))))]
16571  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
16572   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16573  "@
16574   pmulhrsw\t{%2, %0|%0, %2}
16575   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}
16576   vpmulhrsw\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
16577  [(set_attr "isa" "noavx,avx,avx512bw")
16578   (set_attr "type" "sseimul")
16579   (set_attr "prefix_data16" "1,*,*")
16580   (set_attr "prefix_extra" "1")
16581   (set_attr "prefix" "orig,maybe_evex,evex")
16582   (set_attr "mode" "<sseinsnmode>")])
16583
16584(define_expand "smulhrsv4hi3"
16585  [(set (match_operand:V4HI 0 "register_operand")
16586	(truncate:V4HI
16587	  (lshiftrt:V4SI
16588	    (plus:V4SI
16589	      (lshiftrt:V4SI
16590		(mult:V4SI
16591		  (sign_extend:V4SI
16592		    (match_operand:V4HI 1 "register_operand"))
16593		  (sign_extend:V4SI
16594		    (match_operand:V4HI 2 "register_operand")))
16595		(const_int 14))
16596	      (match_dup 3))
16597	    (const_int 1))))]
16598  "TARGET_MMX_WITH_SSE && TARGET_SSSE3"
16599{
16600  operands[3] = CONST1_RTX(V4HImode);
16601  ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16602})
16603
16604(define_expand "ssse3_pmulhrswv4hi3"
16605  [(set (match_operand:V4HI 0 "register_operand")
16606	(truncate:V4HI
16607	  (lshiftrt:V4SI
16608	    (plus:V4SI
16609	      (lshiftrt:V4SI
16610		(mult:V4SI
16611		  (sign_extend:V4SI
16612		    (match_operand:V4HI 1 "register_mmxmem_operand"))
16613		  (sign_extend:V4SI
16614		    (match_operand:V4HI 2 "register_mmxmem_operand")))
16615		(const_int 14))
16616	      (match_dup 3))
16617	    (const_int 1))))]
16618  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16619{
16620  operands[3] = CONST1_RTX(V4HImode);
16621  ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);
16622})
16623
16624(define_insn "*ssse3_pmulhrswv4hi3"
16625  [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yv")
16626	(truncate:V4HI
16627	  (lshiftrt:V4SI
16628	    (plus:V4SI
16629	      (lshiftrt:V4SI
16630		(mult:V4SI
16631		  (sign_extend:V4SI
16632		    (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yv"))
16633		  (sign_extend:V4SI
16634		    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yv")))
16635		(const_int 14))
16636	      (match_operand:V4HI 3 "const1_operand"))
16637	    (const_int 1))))]
16638  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
16639   && TARGET_SSSE3
16640   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
16641  "@
16642   pmulhrsw\t{%2, %0|%0, %2}
16643   pmulhrsw\t{%2, %0|%0, %2}
16644   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
16645  [(set_attr "isa" "*,noavx,avx")
16646   (set_attr "mmx_isa" "native,*,*")
16647   (set_attr "type" "sseimul")
16648   (set_attr "prefix_extra" "1")
16649   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16650   (set_attr "mode" "DI,TI,TI")])
16651
16652(define_insn "<ssse3_avx2>_pshufb<mode>3<mask_name>"
16653  [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
16654	(unspec:VI1_AVX512
16655	  [(match_operand:VI1_AVX512 1 "register_operand" "0,x,v")
16656	   (match_operand:VI1_AVX512 2 "vector_operand" "xBm,xm,vm")]
16657	  UNSPEC_PSHUFB))]
16658  "TARGET_SSSE3 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
16659  "@
16660   pshufb\t{%2, %0|%0, %2}
16661   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
16662   vpshufb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
16663  [(set_attr "isa" "noavx,avx,avx512bw")
16664   (set_attr "type" "sselog1")
16665   (set_attr "prefix_data16" "1,*,*")
16666   (set_attr "prefix_extra" "1")
16667   (set_attr "prefix" "orig,maybe_evex,evex")
16668   (set_attr "btver2_decode" "vector")
16669   (set_attr "mode" "<sseinsnmode>")])
16670
16671(define_insn_and_split "ssse3_pshufbv8qi3"
16672  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
16673	(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,0,Yv")
16674		      (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yv")]
16675		     UNSPEC_PSHUFB))
16676   (clobber (match_scratch:V4SI 3 "=X,&x,&Yv"))]
16677  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16678  "@
16679   pshufb\t{%2, %0|%0, %2}
16680   #
16681   #"
16682  "TARGET_SSSE3 && reload_completed
16683   && SSE_REGNO_P (REGNO (operands[0]))"
16684  [(set (match_dup 3) (match_dup 5))
16685   (set (match_dup 3)
16686	(and:V4SI (match_dup 3) (match_dup 2)))
16687   (set (match_dup 0)
16688	(unspec:V16QI [(match_dup 1) (match_dup 4)] UNSPEC_PSHUFB))]
16689{
16690  /* Emulate MMX version of pshufb with SSE version by masking out the
16691     bit 3 of the shuffle control byte.  */
16692  operands[0] = lowpart_subreg (V16QImode, operands[0],
16693				GET_MODE (operands[0]));
16694  operands[1] = lowpart_subreg (V16QImode, operands[1],
16695				GET_MODE (operands[1]));
16696  operands[2] = lowpart_subreg (V4SImode, operands[2],
16697				GET_MODE (operands[2]));
16698  operands[4] = lowpart_subreg (V16QImode, operands[3],
16699				GET_MODE (operands[3]));
16700  rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
16701			 GEN_INT (0xf7f7f7f7),
16702			 GEN_INT (0xf7f7f7f7),
16703			 GEN_INT (0xf7f7f7f7));
16704  rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
16705  operands[5] = force_const_mem (V4SImode, vec_const);
16706}
16707  [(set_attr "mmx_isa" "native,sse_noavx,avx")
16708   (set_attr "prefix_extra" "1")
16709   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16710   (set_attr "mode" "DI,TI,TI")])
16711
16712(define_insn "<ssse3_avx2>_psign<mode>3"
16713  [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
16714	(unspec:VI124_AVX2
16715	  [(match_operand:VI124_AVX2 1 "register_operand" "0,x")
16716	   (match_operand:VI124_AVX2 2 "vector_operand" "xBm,xm")]
16717	  UNSPEC_PSIGN))]
16718  "TARGET_SSSE3"
16719  "@
16720   psign<ssemodesuffix>\t{%2, %0|%0, %2}
16721   vpsign<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
16722  [(set_attr "isa" "noavx,avx")
16723   (set_attr "type" "sselog1")
16724   (set_attr "prefix_data16" "1,*")
16725   (set_attr "prefix_extra" "1")
16726   (set_attr "prefix" "orig,vex")
16727   (set_attr "mode" "<sseinsnmode>")])
16728
16729(define_insn "ssse3_psign<mode>3"
16730  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
16731	(unspec:MMXMODEI
16732	  [(match_operand:MMXMODEI 1 "register_operand" "0,0,x")
16733	   (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")]
16734	  UNSPEC_PSIGN))]
16735  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16736  "@
16737   psign<mmxvecsize>\t{%2, %0|%0, %2}
16738   psign<mmxvecsize>\t{%2, %0|%0, %2}
16739   vpsign<mmxvecsize>\t{%2, %1, %0|%0, %1, %2}"
16740  [(set_attr "isa" "*,noavx,avx")
16741   (set_attr "mmx_isa" "native,*,*")
16742   (set_attr "type" "sselog1")
16743   (set_attr "prefix_extra" "1")
16744   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16745   (set_attr "mode" "DI,TI,TI")])
16746
16747(define_insn "<ssse3_avx2>_palignr<mode>_mask"
16748  [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
16749        (vec_merge:VI1_AVX512
16750	  (unspec:VI1_AVX512
16751	    [(match_operand:VI1_AVX512 1 "register_operand" "v")
16752	     (match_operand:VI1_AVX512 2 "nonimmediate_operand" "vm")
16753	     (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
16754	    UNSPEC_PALIGNR)
16755	(match_operand:VI1_AVX512 4 "nonimm_or_0_operand" "0C")
16756	(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
16757  "TARGET_AVX512BW && (<MODE_SIZE> == 64 || TARGET_AVX512VL)"
16758{
16759  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16760  return "vpalignr\t{%3, %2, %1, %0%{%5%}%N4|%0%{%5%}%N4, %1, %2, %3}";
16761}
16762  [(set_attr "type" "sseishft")
16763   (set_attr "atom_unit" "sishuf")
16764   (set_attr "prefix_extra" "1")
16765   (set_attr "length_immediate" "1")
16766   (set_attr "prefix" "evex")
16767   (set_attr "mode" "<sseinsnmode>")])
16768
16769(define_insn "<ssse3_avx2>_palignr<mode>"
16770  [(set (match_operand:SSESCALARMODE 0 "register_operand" "=x,x,v")
16771	(unspec:SSESCALARMODE
16772	  [(match_operand:SSESCALARMODE 1 "register_operand" "0,x,v")
16773	   (match_operand:SSESCALARMODE 2 "vector_operand" "xBm,xm,vm")
16774	   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16775	  UNSPEC_PALIGNR))]
16776  "TARGET_SSSE3"
16777{
16778  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16779
16780  switch (which_alternative)
16781    {
16782    case 0:
16783      return "palignr\t{%3, %2, %0|%0, %2, %3}";
16784    case 1:
16785    case 2:
16786      return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
16787    default:
16788      gcc_unreachable ();
16789    }
16790}
16791  [(set_attr "isa" "noavx,avx,avx512bw")
16792   (set_attr "type" "sseishft")
16793   (set_attr "atom_unit" "sishuf")
16794   (set_attr "prefix_data16" "1,*,*")
16795   (set_attr "prefix_extra" "1")
16796   (set_attr "length_immediate" "1")
16797   (set_attr "prefix" "orig,vex,evex")
16798   (set_attr "mode" "<sseinsnmode>")])
16799
16800(define_insn_and_split "ssse3_palignrdi"
16801  [(set (match_operand:DI 0 "register_operand" "=y,x,Yv")
16802	(unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yv")
16803		    (match_operand:DI 2 "register_mmxmem_operand" "ym,x,Yv")
16804		    (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
16805		   UNSPEC_PALIGNR))]
16806  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16807{
16808  switch (which_alternative)
16809    {
16810    case 0:
16811      operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
16812      return "palignr\t{%3, %2, %0|%0, %2, %3}";
16813    case 1:
16814    case 2:
16815      return "#";
16816    default:
16817      gcc_unreachable ();
16818    }
16819}
16820  "TARGET_SSSE3 && reload_completed
16821   && SSE_REGNO_P (REGNO (operands[0]))"
16822  [(set (match_dup 0)
16823	(lshiftrt:V1TI (match_dup 0) (match_dup 3)))]
16824{
16825  /* Emulate MMX palignrdi with SSE psrldq.  */
16826  rtx op0 = lowpart_subreg (V2DImode, operands[0],
16827			    GET_MODE (operands[0]));
16828  if (TARGET_AVX)
16829    emit_insn (gen_vec_concatv2di (op0, operands[2], operands[1]));
16830  else
16831    {
16832      /* NB: SSE can only concatenate OP0 and OP1 to OP0.  */
16833      emit_insn (gen_vec_concatv2di (op0, operands[1], operands[2]));
16834      /* Swap bits 0:63 with bits 64:127.  */
16835      rtx mask = gen_rtx_PARALLEL (VOIDmode,
16836				   gen_rtvec (4, GEN_INT (2),
16837					      GEN_INT (3),
16838					      GEN_INT (0),
16839					      GEN_INT (1)));
16840      rtx op1 = lowpart_subreg (V4SImode, op0, GET_MODE (op0));
16841      rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
16842      emit_insn (gen_rtx_SET (op1, op2));
16843    }
16844  operands[0] = lowpart_subreg (V1TImode, op0, GET_MODE (op0));
16845}
16846  [(set_attr "mmx_isa" "native,sse_noavx,avx")
16847   (set_attr "type" "sseishft")
16848   (set_attr "atom_unit" "sishuf")
16849   (set_attr "prefix_extra" "1")
16850   (set_attr "length_immediate" "1")
16851   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16852   (set_attr "mode" "DI,TI,TI")])
16853
16854;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
16855;; modes for abs instruction on pre AVX-512 targets.
16856(define_mode_iterator VI1248_AVX512VL_AVX512BW
16857  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
16858   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
16859   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
16860   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
16861
16862(define_insn "*abs<mode>2"
16863  [(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand" "=v")
16864	(abs:VI1248_AVX512VL_AVX512BW
16865	  (match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand" "vBm")))]
16866  "TARGET_SSSE3"
16867  "%vpabs<ssemodesuffix>\t{%1, %0|%0, %1}"
16868  [(set_attr "type" "sselog1")
16869   (set_attr "prefix_data16" "1")
16870   (set_attr "prefix_extra" "1")
16871   (set_attr "prefix" "maybe_vex")
16872   (set_attr "mode" "<sseinsnmode>")])
16873
16874(define_insn "abs<mode>2_mask"
16875  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
16876	(vec_merge:VI48_AVX512VL
16877	  (abs:VI48_AVX512VL
16878	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm"))
16879	  (match_operand:VI48_AVX512VL 2 "nonimm_or_0_operand" "0C")
16880	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16881  "TARGET_AVX512F"
16882  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16883  [(set_attr "type" "sselog1")
16884   (set_attr "prefix" "evex")
16885   (set_attr "mode" "<sseinsnmode>")])
16886
16887(define_insn "abs<mode>2_mask"
16888  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
16889	(vec_merge:VI12_AVX512VL
16890	  (abs:VI12_AVX512VL
16891	    (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm"))
16892	  (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
16893	  (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
16894  "TARGET_AVX512BW"
16895  "vpabs<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
16896  [(set_attr "type" "sselog1")
16897   (set_attr "prefix" "evex")
16898   (set_attr "mode" "<sseinsnmode>")])
16899
16900(define_expand "abs<mode>2"
16901  [(set (match_operand:VI_AVX2 0 "register_operand")
16902	(abs:VI_AVX2
16903	  (match_operand:VI_AVX2 1 "vector_operand")))]
16904  "TARGET_SSE2"
16905{
16906  if (!TARGET_SSSE3
16907      || ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
16908	  && !TARGET_AVX512VL))
16909    {
16910      ix86_expand_sse2_abs (operands[0], operands[1]);
16911      DONE;
16912    }
16913})
16914
16915(define_insn "ssse3_abs<mode>2"
16916  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yv")
16917	(abs:MMXMODEI
16918	  (match_operand:MMXMODEI 1 "register_mmxmem_operand" "ym,Yv")))]
16919  "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSSE3"
16920  "@
16921   pabs<mmxvecsize>\t{%1, %0|%0, %1}
16922   %vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
16923  [(set_attr "mmx_isa" "native,*")
16924   (set_attr "type" "sselog1")
16925   (set_attr "prefix_rep" "0")
16926   (set_attr "prefix_extra" "1")
16927   (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
16928   (set_attr "mode" "DI,TI")])
16929
16930(define_expand "abs<mode>2"
16931  [(set (match_operand:MMXMODEI 0 "register_operand")
16932	(abs:MMXMODEI
16933	  (match_operand:MMXMODEI 1 "register_operand")))]
16934  "TARGET_MMX_WITH_SSE && TARGET_SSSE3")
16935
16936;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16937;;
16938;; AMD SSE4A instructions
16939;;
16940;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
16941
16942(define_insn "sse4a_movnt<mode>"
16943  [(set (match_operand:MODEF 0 "memory_operand" "=m")
16944	(unspec:MODEF
16945	  [(match_operand:MODEF 1 "register_operand" "x")]
16946	  UNSPEC_MOVNT))]
16947  "TARGET_SSE4A"
16948  "movnt<ssemodesuffix>\t{%1, %0|%0, %1}"
16949  [(set_attr "type" "ssemov")
16950   (set_attr "mode" "<MODE>")])
16951
16952(define_insn "sse4a_vmmovnt<mode>"
16953  [(set (match_operand:<ssescalarmode> 0 "memory_operand" "=m")
16954	(unspec:<ssescalarmode>
16955	  [(vec_select:<ssescalarmode>
16956	     (match_operand:VF_128 1 "register_operand" "x")
16957	     (parallel [(const_int 0)]))]
16958	  UNSPEC_MOVNT))]
16959  "TARGET_SSE4A"
16960  "movnt<ssescalarmodesuffix>\t{%1, %0|%0, %1}"
16961  [(set_attr "type" "ssemov")
16962   (set_attr "mode" "<ssescalarmode>")])
16963
16964(define_insn "sse4a_extrqi"
16965  [(set (match_operand:V2DI 0 "register_operand" "=x")
16966	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16967		      (match_operand 2 "const_0_to_255_operand")
16968		      (match_operand 3 "const_0_to_255_operand")]
16969		     UNSPEC_EXTRQI))]
16970  "TARGET_SSE4A"
16971  "extrq\t{%3, %2, %0|%0, %2, %3}"
16972  [(set_attr "type" "sse")
16973   (set_attr "prefix_data16" "1")
16974   (set_attr "length_immediate" "2")
16975   (set_attr "mode" "TI")])
16976
16977(define_insn "sse4a_extrq"
16978  [(set (match_operand:V2DI 0 "register_operand" "=x")
16979	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16980		      (match_operand:V16QI 2 "register_operand" "x")]
16981		     UNSPEC_EXTRQ))]
16982  "TARGET_SSE4A"
16983  "extrq\t{%2, %0|%0, %2}"
16984  [(set_attr "type" "sse")
16985   (set_attr "prefix_data16" "1")
16986   (set_attr "mode" "TI")])
16987
16988(define_insn "sse4a_insertqi"
16989  [(set (match_operand:V2DI 0 "register_operand" "=x")
16990	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
16991		      (match_operand:V2DI 2 "register_operand" "x")
16992		      (match_operand 3 "const_0_to_255_operand")
16993		      (match_operand 4 "const_0_to_255_operand")]
16994		     UNSPEC_INSERTQI))]
16995  "TARGET_SSE4A"
16996  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
16997  [(set_attr "type" "sseins")
16998   (set_attr "prefix_data16" "0")
16999   (set_attr "prefix_rep" "1")
17000   (set_attr "length_immediate" "2")
17001   (set_attr "mode" "TI")])
17002
17003(define_insn "sse4a_insertq"
17004  [(set (match_operand:V2DI 0 "register_operand" "=x")
17005	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
17006		      (match_operand:V2DI 2 "register_operand" "x")]
17007		     UNSPEC_INSERTQ))]
17008  "TARGET_SSE4A"
17009  "insertq\t{%2, %0|%0, %2}"
17010  [(set_attr "type" "sseins")
17011   (set_attr "prefix_data16" "0")
17012   (set_attr "prefix_rep" "1")
17013   (set_attr "mode" "TI")])
17014
17015;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17016;;
17017;; Intel SSE4.1 instructions
17018;;
17019;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
17020
17021;; Mapping of immediate bits for blend instructions
17022(define_mode_attr blendbits
17023  [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
17024
17025(define_insn "<sse4_1>_blend<ssemodesuffix><avxsizesuffix>"
17026  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17027	(vec_merge:VF_128_256
17028	  (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17029	  (match_operand:VF_128_256 1 "register_operand" "0,0,x")
17030	  (match_operand:SI 3 "const_0_to_<blendbits>_operand")))]
17031  "TARGET_SSE4_1"
17032  "@
17033   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17034   blend<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17035   vblend<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17036  [(set_attr "isa" "noavx,noavx,avx")
17037   (set_attr "type" "ssemov")
17038   (set_attr "length_immediate" "1")
17039   (set_attr "prefix_data16" "1,1,*")
17040   (set_attr "prefix_extra" "1")
17041   (set_attr "prefix" "orig,orig,vex")
17042   (set_attr "mode" "<MODE>")])
17043
17044(define_insn "<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>"
17045  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17046	(unspec:VF_128_256
17047	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17048	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17049	   (match_operand:VF_128_256 3 "register_operand" "Yz,Yz,x")]
17050	  UNSPEC_BLENDV))]
17051  "TARGET_SSE4_1"
17052  "@
17053   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17054   blendv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17055   vblendv<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17056  [(set_attr "isa" "noavx,noavx,avx")
17057   (set_attr "type" "ssemov")
17058   (set_attr "length_immediate" "1")
17059   (set_attr "prefix_data16" "1,1,*")
17060   (set_attr "prefix_extra" "1")
17061   (set_attr "prefix" "orig,orig,vex")
17062   (set_attr "btver2_decode" "vector,vector,vector") 
17063   (set_attr "mode" "<MODE>")])
17064
17065;; Also define scalar versions.  These are used for conditional move.
17066;; Using subregs into vector modes causes register allocation lossage.
17067;; These patterns do not allow memory operands because the native
17068;; instructions read the full 128-bits.
17069
17070(define_insn "sse4_1_blendv<ssemodesuffix>"
17071  [(set (match_operand:MODEF 0 "register_operand" "=Yr,*x,x")
17072	(unspec:MODEF
17073	  [(match_operand:MODEF 1 "register_operand" "0,0,x")
17074	   (match_operand:MODEF 2 "register_operand" "Yr,*x,x")
17075	   (match_operand:MODEF 3 "register_operand" "Yz,Yz,x")]
17076	  UNSPEC_BLENDV))]
17077  "TARGET_SSE4_1"
17078{
17079  if (get_attr_mode (insn) == MODE_V4SF)
17080    return (which_alternative == 2
17081	    ? "vblendvps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17082	    : "blendvps\t{%3, %2, %0|%0, %2, %3}");
17083  else
17084    return (which_alternative == 2
17085	    ? "vblendv<ssevecmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17086	    : "blendv<ssevecmodesuffix>\t{%3, %2, %0|%0, %2, %3}");
17087}
17088  [(set_attr "isa" "noavx,noavx,avx")
17089   (set_attr "type" "ssemov")
17090   (set_attr "length_immediate" "1")
17091   (set_attr "prefix_data16" "1,1,*")
17092   (set_attr "prefix_extra" "1")
17093   (set_attr "prefix" "orig,orig,vex")
17094   (set_attr "btver2_decode" "vector,vector,vector") 
17095   (set (attr "mode")
17096	(cond [(match_test "TARGET_AVX")
17097		 (const_string "<ssevecmode>")
17098	       (match_test "optimize_function_for_size_p (cfun)")
17099		 (const_string "V4SF")
17100	       (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
17101		 (const_string "V4SF")
17102	      ]
17103	      (const_string "<ssevecmode>")))])
17104
17105(define_insn_and_split "*<sse4_1>_blendv<ssemodesuffix><avxsizesuffix>_lt"
17106  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17107	(unspec:VF_128_256
17108	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
17109	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17110	   (lt:VF_128_256
17111	     (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
17112	     (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
17113	  UNSPEC_BLENDV))]
17114  "TARGET_SSE4_1"
17115  "#"
17116  "&& reload_completed"
17117  [(set (match_dup 0)
17118	(unspec:VF_128_256
17119	 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17120  "operands[3] = gen_lowpart (<MODE>mode, operands[3]);"
17121  [(set_attr "isa" "noavx,noavx,avx")
17122   (set_attr "type" "ssemov")
17123   (set_attr "length_immediate" "1")
17124   (set_attr "prefix_data16" "1,1,*")
17125   (set_attr "prefix_extra" "1")
17126   (set_attr "prefix" "orig,orig,vex")
17127   (set_attr "btver2_decode" "vector,vector,vector") 
17128   (set_attr "mode" "<MODE>")])
17129
17130(define_mode_attr ssefltmodesuffix
17131  [(V2DI "pd") (V4DI "pd") (V4SI "ps") (V8SI "ps")])
17132
17133(define_mode_attr ssefltvecmode
17134  [(V2DI "V2DF") (V4DI "V4DF") (V4SI "V4SF") (V8SI "V8SF")])
17135
17136(define_insn_and_split "*<sse4_1>_blendv<ssefltmodesuffix><avxsizesuffix>_ltint"
17137  [(set (match_operand:<ssebytemode> 0 "register_operand" "=Yr,*x,x")
17138	(unspec:<ssebytemode>
17139	  [(match_operand:<ssebytemode> 1 "register_operand" "0,0,x")
17140	   (match_operand:<ssebytemode> 2 "vector_operand" "YrBm,*xBm,xm")
17141	   (subreg:<ssebytemode>
17142	     (lt:VI48_AVX
17143	       (match_operand:VI48_AVX 3 "register_operand" "Yz,Yz,x")
17144	       (match_operand:VI48_AVX 4 "const0_operand" "C,C,C")) 0)]
17145	  UNSPEC_BLENDV))]
17146  "TARGET_SSE4_1"
17147  "#"
17148  "&& reload_completed"
17149  [(set (match_dup 0)
17150	(unspec:<ssefltvecmode>
17151	 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17152{
17153  operands[0] = gen_lowpart (<ssefltvecmode>mode, operands[0]);
17154  operands[1] = gen_lowpart (<ssefltvecmode>mode, operands[1]);
17155  operands[2] = gen_lowpart (<ssefltvecmode>mode, operands[2]);
17156  operands[3] = gen_lowpart (<ssefltvecmode>mode, operands[3]);
17157}
17158  [(set_attr "isa" "noavx,noavx,avx")
17159   (set_attr "type" "ssemov")
17160   (set_attr "length_immediate" "1")
17161   (set_attr "prefix_data16" "1,1,*")
17162   (set_attr "prefix_extra" "1")
17163   (set_attr "prefix" "orig,orig,vex")
17164   (set_attr "btver2_decode" "vector,vector,vector") 
17165   (set_attr "mode" "<ssefltvecmode>")])
17166
17167(define_insn "<sse4_1>_dp<ssemodesuffix><avxsizesuffix>"
17168  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17169	(unspec:VF_128_256
17170	  [(match_operand:VF_128_256 1 "vector_operand" "%0,0,x")
17171	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
17172	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17173	  UNSPEC_DP))]
17174  "TARGET_SSE4_1"
17175  "@
17176   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17177   dp<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
17178   vdp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17179  [(set_attr "isa" "noavx,noavx,avx")
17180   (set_attr "type" "ssemul")
17181   (set_attr "length_immediate" "1")
17182   (set_attr "prefix_data16" "1,1,*")
17183   (set_attr "prefix_extra" "1")
17184   (set_attr "prefix" "orig,orig,vex")
17185   (set_attr "btver2_decode" "vector,vector,vector")
17186   (set_attr "znver1_decode" "vector,vector,vector")
17187   (set_attr "mode" "<MODE>")])
17188
17189;; Mode attribute used by `vmovntdqa' pattern
17190(define_mode_attr vi8_sse4_1_avx2_avx512
17191   [(V2DI "sse4_1") (V4DI "avx2") (V8DI "avx512f")])
17192
17193(define_insn "<vi8_sse4_1_avx2_avx512>_movntdqa"
17194  [(set (match_operand:VI8_AVX2_AVX512F 0 "register_operand" "=Yr,*x,v")
17195	(unspec:VI8_AVX2_AVX512F [(match_operand:VI8_AVX2_AVX512F 1 "memory_operand" "m,m,m")]
17196		     UNSPEC_MOVNTDQA))]
17197  "TARGET_SSE4_1"
17198  "%vmovntdqa\t{%1, %0|%0, %1}"
17199  [(set_attr "isa" "noavx,noavx,avx")
17200   (set_attr "type" "ssemov")
17201   (set_attr "prefix_extra" "1,1,*")
17202   (set_attr "prefix" "orig,orig,maybe_evex")
17203   (set_attr "mode" "<sseinsnmode>")])
17204
17205(define_insn "<sse4_1_avx2>_mpsadbw"
17206  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17207	(unspec:VI1_AVX2
17208	  [(match_operand:VI1_AVX2 1 "register_operand" "0,0,x")
17209	   (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17210	   (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")]
17211	  UNSPEC_MPSADBW))]
17212  "TARGET_SSE4_1"
17213  "@
17214   mpsadbw\t{%3, %2, %0|%0, %2, %3}
17215   mpsadbw\t{%3, %2, %0|%0, %2, %3}
17216   vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17217  [(set_attr "isa" "noavx,noavx,avx")
17218   (set_attr "type" "sselog1")
17219   (set_attr "length_immediate" "1")
17220   (set_attr "prefix_extra" "1")
17221   (set_attr "prefix" "orig,orig,vex")
17222   (set_attr "btver2_decode" "vector,vector,vector")
17223   (set_attr "znver1_decode" "vector,vector,vector")
17224   (set_attr "mode" "<sseinsnmode>")])
17225
17226(define_insn "<sse4_1_avx2>_packusdw<mask_name>"
17227  [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,x,v")
17228	(vec_concat:VI2_AVX2
17229	  (us_truncate:<ssehalfvecmode>
17230	    (match_operand:<sseunpackmode> 1 "register_operand" "0,0,x,v"))
17231	  (us_truncate:<ssehalfvecmode>
17232	    (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,xm,vm"))))]
17233  "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
17234  "@
17235   packusdw\t{%2, %0|%0, %2}
17236   packusdw\t{%2, %0|%0, %2}
17237   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}
17238   vpackusdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
17239  [(set_attr "isa" "noavx,noavx,avx,avx512bw")
17240   (set_attr "type" "sselog")
17241   (set_attr "prefix_extra" "1")
17242   (set_attr "prefix" "orig,orig,<mask_prefix>,evex")
17243   (set_attr "mode" "<sseinsnmode>")])
17244
17245(define_insn "<sse4_1_avx2>_pblendvb"
17246  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17247	(unspec:VI1_AVX2
17248	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,0,x")
17249	   (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17250	   (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")]
17251	  UNSPEC_BLENDV))]
17252  "TARGET_SSE4_1"
17253  "@
17254   pblendvb\t{%3, %2, %0|%0, %2, %3}
17255   pblendvb\t{%3, %2, %0|%0, %2, %3}
17256   vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17257  [(set_attr "isa" "noavx,noavx,avx")
17258   (set_attr "type" "ssemov")
17259   (set_attr "prefix_extra" "1")
17260   (set_attr "length_immediate" "*,*,1")
17261   (set_attr "prefix" "orig,orig,vex")
17262   (set_attr "btver2_decode" "vector,vector,vector")
17263   (set_attr "mode" "<sseinsnmode>")])
17264
17265(define_insn_and_split "*<sse4_1_avx2>_pblendvb_lt"
17266  [(set (match_operand:VI1_AVX2 0 "register_operand" "=Yr,*x,x")
17267	(unspec:VI1_AVX2
17268	  [(match_operand:VI1_AVX2 1 "register_operand"  "0,0,x")
17269	   (match_operand:VI1_AVX2 2 "vector_operand" "YrBm,*xBm,xm")
17270	   (lt:VI1_AVX2 (match_operand:VI1_AVX2 3 "register_operand" "Yz,Yz,x")
17271			(match_operand:VI1_AVX2 4 "const0_operand" "C,C,C"))]
17272	  UNSPEC_BLENDV))]
17273  "TARGET_SSE4_1"
17274  "#"
17275  ""
17276  [(set (match_dup 0)
17277	(unspec:VI1_AVX2
17278	 [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_BLENDV))]
17279  ""
17280  [(set_attr "isa" "noavx,noavx,avx")
17281   (set_attr "type" "ssemov")
17282   (set_attr "prefix_extra" "1")
17283   (set_attr "length_immediate" "*,*,1")
17284   (set_attr "prefix" "orig,orig,vex")
17285   (set_attr "btver2_decode" "vector,vector,vector")
17286   (set_attr "mode" "<sseinsnmode>")])
17287
17288(define_insn "sse4_1_pblendw"
17289  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17290	(vec_merge:V8HI
17291	  (match_operand:V8HI 2 "vector_operand" "YrBm,*xBm,xm")
17292	  (match_operand:V8HI 1 "register_operand" "0,0,x")
17293	  (match_operand:SI 3 "const_0_to_255_operand" "n,n,n")))]
17294  "TARGET_SSE4_1"
17295  "@
17296   pblendw\t{%3, %2, %0|%0, %2, %3}
17297   pblendw\t{%3, %2, %0|%0, %2, %3}
17298   vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17299  [(set_attr "isa" "noavx,noavx,avx")
17300   (set_attr "type" "ssemov")
17301   (set_attr "prefix_extra" "1")
17302   (set_attr "length_immediate" "1")
17303   (set_attr "prefix" "orig,orig,vex")
17304   (set_attr "mode" "TI")])
17305
17306;; The builtin uses an 8-bit immediate.  Expand that.
17307(define_expand "avx2_pblendw"
17308  [(set (match_operand:V16HI 0 "register_operand")
17309	(vec_merge:V16HI
17310	  (match_operand:V16HI 2 "nonimmediate_operand")
17311	  (match_operand:V16HI 1 "register_operand")
17312	  (match_operand:SI 3 "const_0_to_255_operand")))]
17313  "TARGET_AVX2"
17314{
17315  HOST_WIDE_INT val = INTVAL (operands[3]) & 0xff;
17316  operands[3] = GEN_INT (val << 8 | val);
17317})
17318
17319(define_insn "*avx2_pblendw"
17320  [(set (match_operand:V16HI 0 "register_operand" "=x")
17321	(vec_merge:V16HI
17322	  (match_operand:V16HI 2 "nonimmediate_operand" "xm")
17323	  (match_operand:V16HI 1 "register_operand" "x")
17324	  (match_operand:SI 3 "avx2_pblendw_operand" "n")))]
17325  "TARGET_AVX2"
17326{
17327  operands[3] = GEN_INT (INTVAL (operands[3]) & 0xff);
17328  return "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
17329}
17330  [(set_attr "type" "ssemov")
17331   (set_attr "prefix_extra" "1")
17332   (set_attr "length_immediate" "1")
17333   (set_attr "prefix" "vex")
17334   (set_attr "mode" "OI")])
17335
17336(define_insn "avx2_pblendd<mode>"
17337  [(set (match_operand:VI4_AVX2 0 "register_operand" "=x")
17338	(vec_merge:VI4_AVX2
17339	  (match_operand:VI4_AVX2 2 "nonimmediate_operand" "xm")
17340	  (match_operand:VI4_AVX2 1 "register_operand" "x")
17341	  (match_operand:SI 3 "const_0_to_255_operand" "n")))]
17342  "TARGET_AVX2"
17343  "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
17344  [(set_attr "type" "ssemov")
17345   (set_attr "prefix_extra" "1")
17346   (set_attr "length_immediate" "1")
17347   (set_attr "prefix" "vex")
17348   (set_attr "mode" "<sseinsnmode>")])
17349
17350(define_insn "sse4_1_phminposuw"
17351  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x")
17352	(unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")]
17353		     UNSPEC_PHMINPOSUW))]
17354  "TARGET_SSE4_1"
17355  "%vphminposuw\t{%1, %0|%0, %1}"
17356  [(set_attr "isa" "noavx,noavx,avx")
17357   (set_attr "type" "sselog1")
17358   (set_attr "prefix_extra" "1")
17359   (set_attr "prefix" "orig,orig,vex")
17360   (set_attr "mode" "TI")])
17361
17362(define_insn "avx2_<code>v16qiv16hi2<mask_name>"
17363  [(set (match_operand:V16HI 0 "register_operand" "=v")
17364	(any_extend:V16HI
17365	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17366  "TARGET_AVX2 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17367  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17368  [(set_attr "type" "ssemov")
17369   (set_attr "prefix_extra" "1")
17370   (set_attr "prefix" "maybe_evex")
17371   (set_attr "mode" "OI")])
17372
17373(define_insn "avx512bw_<code>v32qiv32hi2<mask_name>"
17374  [(set (match_operand:V32HI 0 "register_operand" "=v")
17375	(any_extend:V32HI
17376	  (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
17377  "TARGET_AVX512BW"
17378  "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17379  [(set_attr "type" "ssemov")
17380   (set_attr "prefix_extra" "1")
17381   (set_attr "prefix" "evex")
17382   (set_attr "mode" "XI")])
17383
17384(define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
17385  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17386	(any_extend:V8HI
17387	  (vec_select:V8QI
17388	    (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17389	    (parallel [(const_int 0) (const_int 1)
17390		       (const_int 2) (const_int 3)
17391		       (const_int 4) (const_int 5)
17392		       (const_int 6) (const_int 7)]))))]
17393  "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17394  "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17395  [(set_attr "isa" "noavx,noavx,avx")
17396   (set_attr "type" "ssemov")
17397   (set_attr "prefix_extra" "1")
17398   (set_attr "prefix" "orig,orig,maybe_evex")
17399   (set_attr "mode" "TI")])
17400
17401(define_insn "*sse4_1_<code>v8qiv8hi2<mask_name>_1"
17402  [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,v")
17403	(any_extend:V8HI
17404	  (match_operand:V8QI 1 "memory_operand" "m,m,m")))]
17405  "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>"
17406  "%vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17407  [(set_attr "isa" "noavx,noavx,avx")
17408   (set_attr "type" "ssemov")
17409   (set_attr "prefix_extra" "1")
17410   (set_attr "prefix" "orig,orig,maybe_evex")
17411   (set_attr "mode" "TI")])
17412
17413(define_insn_and_split "*sse4_1_<code>v8qiv8hi2<mask_name>_2"
17414  [(set (match_operand:V8HI 0 "register_operand")
17415	(any_extend:V8HI
17416	  (vec_select:V8QI
17417	    (subreg:V16QI
17418	      (vec_concat:V2DI
17419	        (match_operand:DI 1 "memory_operand")
17420		(const_int 0)) 0)
17421	    (parallel [(const_int 0) (const_int 1)
17422		       (const_int 2) (const_int 3)
17423		       (const_int 4) (const_int 5)
17424		       (const_int 6) (const_int 7)]))))]
17425  "TARGET_SSE4_1 && <mask_avx512bw_condition> && <mask_avx512vl_condition>
17426   && ix86_pre_reload_split ()"
17427  "#"
17428  "&& 1"
17429  [(set (match_dup 0)
17430	(any_extend:V8HI (match_dup 1)))]
17431  "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17432
17433(define_insn "<mask_codefor>avx512f_<code>v16qiv16si2<mask_name>"
17434  [(set (match_operand:V16SI 0 "register_operand" "=v")
17435	(any_extend:V16SI
17436	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
17437  "TARGET_AVX512F"
17438  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17439  [(set_attr "type" "ssemov")
17440   (set_attr "prefix" "evex")
17441   (set_attr "mode" "XI")])
17442
17443(define_insn "avx2_<code>v8qiv8si2<mask_name>"
17444  [(set (match_operand:V8SI 0 "register_operand" "=v")
17445	(any_extend:V8SI
17446	  (vec_select:V8QI
17447	    (match_operand:V16QI 1 "register_operand" "v")
17448	    (parallel [(const_int 0) (const_int 1)
17449		       (const_int 2) (const_int 3)
17450		       (const_int 4) (const_int 5)
17451		       (const_int 6) (const_int 7)]))))]
17452  "TARGET_AVX2 && <mask_avx512vl_condition>"
17453  "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17454  [(set_attr "type" "ssemov")
17455   (set_attr "prefix_extra" "1")
17456   (set_attr "prefix" "maybe_evex")
17457   (set_attr "mode" "OI")])
17458
17459(define_insn "*avx2_<code>v8qiv8si2<mask_name>_1"
17460  [(set (match_operand:V8SI 0 "register_operand" "=v")
17461	(any_extend:V8SI
17462	  (match_operand:V8QI 1 "memory_operand" "m")))]
17463  "TARGET_AVX2 && <mask_avx512vl_condition>"
17464  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17465  [(set_attr "type" "ssemov")
17466   (set_attr "prefix_extra" "1")
17467   (set_attr "prefix" "maybe_evex")
17468   (set_attr "mode" "OI")])
17469
17470(define_insn_and_split "*avx2_<code>v8qiv8si2<mask_name>_2"
17471  [(set (match_operand:V8SI 0 "register_operand")
17472	(any_extend:V8SI
17473	  (vec_select:V8QI
17474	    (subreg:V16QI
17475	      (vec_concat:V2DI
17476	        (match_operand:DI 1 "memory_operand")
17477		(const_int 0)) 0)
17478	    (parallel [(const_int 0) (const_int 1)
17479		       (const_int 2) (const_int 3)
17480		       (const_int 4) (const_int 5)
17481		       (const_int 6) (const_int 7)]))))]
17482  "TARGET_AVX2 && <mask_avx512vl_condition>
17483   && ix86_pre_reload_split ()"
17484  "#"
17485  "&& 1"
17486  [(set (match_dup 0)
17487	(any_extend:V8SI (match_dup 1)))]
17488  "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17489
17490(define_insn "sse4_1_<code>v4qiv4si2<mask_name>"
17491  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17492	(any_extend:V4SI
17493	  (vec_select:V4QI
17494	    (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17495	    (parallel [(const_int 0) (const_int 1)
17496		       (const_int 2) (const_int 3)]))))]
17497  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17498  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17499  [(set_attr "isa" "noavx,noavx,avx")
17500   (set_attr "type" "ssemov")
17501   (set_attr "prefix_extra" "1")
17502   (set_attr "prefix" "orig,orig,maybe_evex")
17503   (set_attr "mode" "TI")])
17504
17505(define_insn "*sse4_1_<code>v4qiv4si2<mask_name>_1"
17506  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17507	(any_extend:V4SI
17508	  (match_operand:V4QI 1 "memory_operand" "m,m,m")))]
17509  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17510  "%vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17511  [(set_attr "isa" "noavx,noavx,avx")
17512   (set_attr "type" "ssemov")
17513   (set_attr "prefix_extra" "1")
17514   (set_attr "prefix" "orig,orig,maybe_evex")
17515   (set_attr "mode" "TI")])
17516
17517(define_insn_and_split "*sse4_1_<code>v4qiv4si2<mask_name>_2"
17518  [(set (match_operand:V4SI 0 "register_operand")
17519	(any_extend:V4SI
17520	  (vec_select:V4QI
17521	    (subreg:V16QI
17522	      (vec_merge:V4SI
17523	        (vec_duplicate:V4SI
17524		  (match_operand:SI 1 "memory_operand"))
17525		(const_vector:V4SI
17526		   [(const_int 0) (const_int 0)
17527		    (const_int 0) (const_int 0)])
17528		(const_int 1)) 0)
17529	    (parallel [(const_int 0) (const_int 1)
17530		       (const_int 2) (const_int 3)]))))]
17531  "TARGET_SSE4_1 && <mask_avx512vl_condition>
17532   && ix86_pre_reload_split ()"
17533  "#"
17534  "&& 1"
17535  [(set (match_dup 0)
17536	(any_extend:V4SI (match_dup 1)))]
17537  "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17538
17539(define_insn "avx512f_<code>v16hiv16si2<mask_name>"
17540  [(set (match_operand:V16SI 0 "register_operand" "=v")
17541	(any_extend:V16SI
17542	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
17543  "TARGET_AVX512F"
17544  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17545  [(set_attr "type" "ssemov")
17546   (set_attr "prefix" "evex")
17547   (set_attr "mode" "XI")])
17548
17549(define_insn "avx2_<code>v8hiv8si2<mask_name>"
17550  [(set (match_operand:V8SI 0 "register_operand" "=v")
17551	(any_extend:V8SI
17552	    (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17553  "TARGET_AVX2 && <mask_avx512vl_condition>"
17554  "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17555  [(set_attr "type" "ssemov")
17556   (set_attr "prefix_extra" "1")
17557   (set_attr "prefix" "maybe_evex")
17558   (set_attr "mode" "OI")])
17559
17560(define_insn "sse4_1_<code>v4hiv4si2<mask_name>"
17561  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17562	(any_extend:V4SI
17563	  (vec_select:V4HI
17564	    (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17565	    (parallel [(const_int 0) (const_int 1)
17566		       (const_int 2) (const_int 3)]))))]
17567  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17568  "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17569  [(set_attr "isa" "noavx,noavx,avx")
17570   (set_attr "type" "ssemov")
17571   (set_attr "prefix_extra" "1")
17572   (set_attr "prefix" "orig,orig,maybe_evex")
17573   (set_attr "mode" "TI")])
17574
17575(define_insn "*sse4_1_<code>v4hiv4si2<mask_name>_1"
17576  [(set (match_operand:V4SI 0 "register_operand" "=Yr,*x,v")
17577	(any_extend:V4SI
17578	  (match_operand:V4HI 1 "memory_operand" "m,m,m")))]
17579  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17580  "%vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17581  [(set_attr "isa" "noavx,noavx,avx")
17582   (set_attr "type" "ssemov")
17583   (set_attr "prefix_extra" "1")
17584   (set_attr "prefix" "orig,orig,maybe_evex")
17585   (set_attr "mode" "TI")])
17586
17587(define_insn_and_split "*sse4_1_<code>v4hiv4si2<mask_name>_2"
17588  [(set (match_operand:V4SI 0 "register_operand")
17589	(any_extend:V4SI
17590	  (vec_select:V4HI
17591	    (subreg:V8HI
17592	      (vec_concat:V2DI
17593		(match_operand:DI 1 "memory_operand")
17594		(const_int 0)) 0)
17595	    (parallel [(const_int 0) (const_int 1)
17596		       (const_int 2) (const_int 3)]))))]
17597  "TARGET_SSE4_1 && <mask_avx512vl_condition>
17598   && ix86_pre_reload_split ()"
17599  "#"
17600  "&& 1"
17601  [(set (match_dup 0)
17602	(any_extend:V4SI (match_dup 1)))]
17603  "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17604
17605(define_insn "avx512f_<code>v8qiv8di2<mask_name>"
17606  [(set (match_operand:V8DI 0 "register_operand" "=v")
17607	(any_extend:V8DI
17608	  (vec_select:V8QI
17609	    (match_operand:V16QI 1 "register_operand" "v")
17610	    (parallel [(const_int 0) (const_int 1)
17611		       (const_int 2) (const_int 3)
17612		       (const_int 4) (const_int 5)
17613		       (const_int 6) (const_int 7)]))))]
17614  "TARGET_AVX512F"
17615  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17616  [(set_attr "type" "ssemov")
17617   (set_attr "prefix" "evex")
17618   (set_attr "mode" "XI")])
17619
17620(define_insn "*avx512f_<code>v8qiv8di2<mask_name>_1"
17621  [(set (match_operand:V8DI 0 "register_operand" "=v")
17622	(any_extend:V8DI
17623	  (match_operand:V8QI 1 "memory_operand" "m")))]
17624  "TARGET_AVX512F"
17625  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17626  [(set_attr "type" "ssemov")
17627   (set_attr "prefix" "evex")
17628   (set_attr "mode" "XI")])
17629
17630(define_insn_and_split "*avx512f_<code>v8qiv8di2<mask_name>_2"
17631  [(set (match_operand:V8DI 0 "register_operand")
17632	(any_extend:V8DI
17633	  (vec_select:V8QI
17634	    (subreg:V16QI
17635	      (vec_concat:V2DI
17636	        (match_operand:DI 1 "memory_operand")
17637		(const_int 0)) 0)
17638	    (parallel [(const_int 0) (const_int 1)
17639		       (const_int 2) (const_int 3)
17640		       (const_int 4) (const_int 5)
17641		       (const_int 6) (const_int 7)]))))]
17642  "TARGET_AVX512F && ix86_pre_reload_split ()"
17643  "#"
17644  "&& 1"
17645  [(set (match_dup 0)
17646	(any_extend:V8DI (match_dup 1)))]
17647  "operands[1] = adjust_address_nv (operands[1], V8QImode, 0);")
17648
17649(define_insn "avx2_<code>v4qiv4di2<mask_name>"
17650  [(set (match_operand:V4DI 0 "register_operand" "=v")
17651	(any_extend:V4DI
17652	  (vec_select:V4QI
17653	    (match_operand:V16QI 1 "register_operand" "v")
17654	    (parallel [(const_int 0) (const_int 1)
17655		       (const_int 2) (const_int 3)]))))]
17656  "TARGET_AVX2 && <mask_avx512vl_condition>"
17657  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17658  [(set_attr "type" "ssemov")
17659   (set_attr "prefix_extra" "1")
17660   (set_attr "prefix" "maybe_evex")
17661   (set_attr "mode" "OI")])
17662
17663(define_insn "*avx2_<code>v4qiv4di2<mask_name>_1"
17664  [(set (match_operand:V4DI 0 "register_operand" "=v")
17665	(any_extend:V4DI
17666	  (match_operand:V4QI 1 "memory_operand" "m")))]
17667  "TARGET_AVX2 && <mask_avx512vl_condition>"
17668  "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17669  [(set_attr "type" "ssemov")
17670   (set_attr "prefix_extra" "1")
17671   (set_attr "prefix" "maybe_evex")
17672   (set_attr "mode" "OI")])
17673
17674(define_insn_and_split "*avx2_<code>v4qiv4di2<mask_name>_2"
17675  [(set (match_operand:V4DI 0 "register_operand")
17676	(any_extend:V4DI
17677	  (vec_select:V4QI
17678	    (subreg:V16QI
17679	      (vec_merge:V4SI
17680	        (vec_duplicate:V4SI
17681		  (match_operand:SI 1 "memory_operand"))
17682		(const_vector:V4SI
17683		   [(const_int 0) (const_int 0)
17684		    (const_int 0) (const_int 0)])
17685		(const_int 1)) 0)
17686	    (parallel [(const_int 0) (const_int 1)
17687		       (const_int 2) (const_int 3)]))))]
17688  "TARGET_AVX2 && <mask_avx512vl_condition>
17689   && ix86_pre_reload_split ()"
17690  "#"
17691  "&& 1"
17692  [(set (match_dup 0)
17693	(any_extend:V4DI (match_dup 1)))]
17694  "operands[1] = adjust_address_nv (operands[1], V4QImode, 0);")
17695
17696(define_insn "sse4_1_<code>v2qiv2di2<mask_name>"
17697  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17698	(any_extend:V2DI
17699	  (vec_select:V2QI
17700	    (match_operand:V16QI 1 "register_operand" "Yr,*x,v")
17701	    (parallel [(const_int 0) (const_int 1)]))))]
17702  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17703  "%vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17704  [(set_attr "isa" "noavx,noavx,avx")
17705   (set_attr "type" "ssemov")
17706   (set_attr "prefix_extra" "1")
17707   (set_attr "prefix" "orig,orig,maybe_evex")
17708   (set_attr "mode" "TI")])
17709
17710(define_insn "avx512f_<code>v8hiv8di2<mask_name>"
17711  [(set (match_operand:V8DI 0 "register_operand" "=v")
17712	(any_extend:V8DI
17713	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
17714  "TARGET_AVX512F"
17715  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17716  [(set_attr "type" "ssemov")
17717   (set_attr "prefix" "evex")
17718   (set_attr "mode" "XI")])
17719
17720(define_insn "avx2_<code>v4hiv4di2<mask_name>"
17721  [(set (match_operand:V4DI 0 "register_operand" "=v")
17722	(any_extend:V4DI
17723	  (vec_select:V4HI
17724	    (match_operand:V8HI 1 "register_operand" "v")
17725	    (parallel [(const_int 0) (const_int 1)
17726		       (const_int 2) (const_int 3)]))))]
17727  "TARGET_AVX2 && <mask_avx512vl_condition>"
17728  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17729  [(set_attr "type" "ssemov")
17730   (set_attr "prefix_extra" "1")
17731   (set_attr "prefix" "maybe_evex")
17732   (set_attr "mode" "OI")])
17733
17734(define_insn "*avx2_<code>v4hiv4di2<mask_name>_1"
17735  [(set (match_operand:V4DI 0 "register_operand" "=v")
17736	(any_extend:V4DI
17737	  (match_operand:V4HI 1 "memory_operand" "m")))]
17738  "TARGET_AVX2 && <mask_avx512vl_condition>"
17739  "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17740  [(set_attr "type" "ssemov")
17741   (set_attr "prefix_extra" "1")
17742   (set_attr "prefix" "maybe_evex")
17743   (set_attr "mode" "OI")])
17744
17745(define_insn_and_split "*avx2_<code>v4hiv4di2<mask_name>_2"
17746  [(set (match_operand:V4DI 0 "register_operand")
17747	(any_extend:V4DI
17748	  (vec_select:V4HI
17749	    (subreg:V8HI
17750	      (vec_concat:V2DI
17751		(match_operand:DI 1 "memory_operand")
17752		(const_int 0)) 0)
17753	    (parallel [(const_int 0) (const_int 1)
17754		       (const_int 2) (const_int 3)]))))]
17755  "TARGET_AVX2 && <mask_avx512vl_condition>
17756   && ix86_pre_reload_split ()"
17757  "#"
17758  "&& 1"
17759  [(set (match_dup 0)
17760	(any_extend:V4DI (match_dup 1)))]
17761  "operands[1] = adjust_address_nv (operands[1], V4HImode, 0);")
17762
17763(define_insn "sse4_1_<code>v2hiv2di2<mask_name>"
17764  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17765	(any_extend:V2DI
17766	  (vec_select:V2HI
17767	    (match_operand:V8HI 1 "register_operand" "Yr,*x,v")
17768	    (parallel [(const_int 0) (const_int 1)]))))]
17769  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17770  "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17771  [(set_attr "isa" "noavx,noavx,avx")
17772   (set_attr "type" "ssemov")
17773   (set_attr "prefix_extra" "1")
17774   (set_attr "prefix" "orig,orig,maybe_evex")
17775   (set_attr "mode" "TI")])
17776
17777(define_insn "*sse4_1_<code>v2hiv2di2<mask_name>_1"
17778  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17779	(any_extend:V2DI
17780	  (match_operand:V2HI 1 "memory_operand" "m,m,m")))]
17781  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17782  "%vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17783  [(set_attr "isa" "noavx,noavx,avx")
17784   (set_attr "type" "ssemov")
17785   (set_attr "prefix_extra" "1")
17786   (set_attr "prefix" "orig,orig,maybe_evex")
17787   (set_attr "mode" "TI")])
17788
17789(define_insn_and_split "*sse4_1_<code>v2hiv2di2<mask_name>_2"
17790  [(set (match_operand:V2DI 0 "register_operand")
17791	(any_extend:V2DI
17792	  (vec_select:V2HI
17793	    (subreg:V8HI
17794	      (vec_merge:V4SI
17795	        (vec_duplicate:V4SI
17796		  (match_operand:SI 1 "memory_operand"))
17797		(const_vector:V4SI
17798		   [(const_int 0) (const_int 0)
17799		    (const_int 0) (const_int 0)])
17800		(const_int 1)) 0)
17801	    (parallel [(const_int 0) (const_int 1)]))))]
17802  "TARGET_SSE4_1 && <mask_avx512vl_condition>
17803   && ix86_pre_reload_split ()"
17804  "#"
17805  "&& 1"
17806  [(set (match_dup 0)
17807	(any_extend:V2DI (match_dup 1)))]
17808  "operands[1] = adjust_address_nv (operands[1], V2HImode, 0);")
17809
17810(define_insn "avx512f_<code>v8siv8di2<mask_name>"
17811  [(set (match_operand:V8DI 0 "register_operand" "=v")
17812	(any_extend:V8DI
17813	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
17814  "TARGET_AVX512F"
17815  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17816  [(set_attr "type" "ssemov")
17817   (set_attr "prefix" "evex")
17818   (set_attr "mode" "XI")])
17819
17820(define_insn "avx2_<code>v4siv4di2<mask_name>"
17821  [(set (match_operand:V4DI 0 "register_operand" "=v")
17822	(any_extend:V4DI
17823	    (match_operand:V4SI 1 "nonimmediate_operand" "vm")))]
17824  "TARGET_AVX2 && <mask_avx512vl_condition>"
17825  "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17826  [(set_attr "type" "ssemov")
17827   (set_attr "prefix" "maybe_evex")
17828   (set_attr "prefix_extra" "1")
17829   (set_attr "mode" "OI")])
17830
17831(define_insn "sse4_1_<code>v2siv2di2<mask_name>"
17832  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17833	(any_extend:V2DI
17834	  (vec_select:V2SI
17835	    (match_operand:V4SI 1 "register_operand" "Yr,*x,v")
17836	    (parallel [(const_int 0) (const_int 1)]))))]
17837  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17838  "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17839  [(set_attr "isa" "noavx,noavx,avx")
17840   (set_attr "type" "ssemov")
17841   (set_attr "prefix_extra" "1")
17842   (set_attr "prefix" "orig,orig,maybe_evex")
17843   (set_attr "mode" "TI")])
17844
17845(define_insn "*sse4_1_<code>v2siv2di2<mask_name>_1"
17846  [(set (match_operand:V2DI 0 "register_operand" "=Yr,*x,v")
17847	(any_extend:V2DI
17848	  (match_operand:V2SI 1 "memory_operand" "m,m,m")))]
17849  "TARGET_SSE4_1 && <mask_avx512vl_condition>"
17850  "%vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
17851  [(set_attr "isa" "noavx,noavx,avx")
17852   (set_attr "type" "ssemov")
17853   (set_attr "prefix_extra" "1")
17854   (set_attr "prefix" "orig,orig,maybe_evex")
17855   (set_attr "mode" "TI")])
17856
17857(define_insn_and_split "*sse4_1_<code>v2siv2di2<mask_name>_2"
17858  [(set (match_operand:V2DI 0 "register_operand")
17859	(any_extend:V2DI
17860	  (vec_select:V2SI
17861	    (subreg:V4SI
17862	      (vec_concat:V2DI
17863		(match_operand:DI 1 "memory_operand")
17864		(const_int 0)) 0)
17865	    (parallel [(const_int 0) (const_int 1)]))))]
17866  "TARGET_SSE4_1 && <mask_avx512vl_condition>
17867   && ix86_pre_reload_split ()"
17868  "#"
17869  "&& 1"
17870  [(set (match_dup 0)
17871	(any_extend:V2DI (match_dup 1)))]
17872  "operands[1] = adjust_address_nv (operands[1], V2SImode, 0);")
17873
17874;; ptestps/ptestpd are very similar to comiss and ucomiss when
17875;; setting FLAGS_REG. But it is not a really compare instruction.
17876(define_insn "avx_vtest<ssemodesuffix><avxsizesuffix>"
17877  [(set (reg:CC FLAGS_REG)
17878	(unspec:CC [(match_operand:VF_128_256 0 "register_operand" "x")
17879		    (match_operand:VF_128_256 1 "nonimmediate_operand" "xm")]
17880		   UNSPEC_VTESTP))]
17881  "TARGET_AVX"
17882  "vtest<ssemodesuffix>\t{%1, %0|%0, %1}"
17883  [(set_attr "type" "ssecomi")
17884   (set_attr "prefix_extra" "1")
17885   (set_attr "prefix" "vex")
17886   (set_attr "mode" "<MODE>")])
17887
17888;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
17889;; But it is not a really compare instruction.
17890(define_insn "<sse4_1>_ptest<mode>"
17891  [(set (reg:CC FLAGS_REG)
17892	(unspec:CC [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x")
17893		    (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")]
17894		   UNSPEC_PTEST))]
17895  "TARGET_SSE4_1"
17896  "%vptest\t{%1, %0|%0, %1}"
17897  [(set_attr "isa" "noavx,noavx,avx")
17898   (set_attr "type" "ssecomi")
17899   (set_attr "prefix_extra" "1")
17900   (set_attr "prefix" "orig,orig,vex")
17901   (set (attr "btver2_decode")
17902     (if_then_else
17903       (match_test "<sseinsnmode>mode==OImode")
17904     (const_string "vector")
17905     (const_string "*")))
17906   (set_attr "mode" "<sseinsnmode>")])
17907
17908(define_insn "ptesttf2"
17909  [(set (reg:CC FLAGS_REG)
17910	(unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x")
17911		    (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")]
17912		   UNSPEC_PTEST))]
17913  "TARGET_SSE4_1"
17914  "%vptest\t{%1, %0|%0, %1}"
17915  [(set_attr "isa" "noavx,noavx,avx")
17916   (set_attr "type" "ssecomi")
17917   (set_attr "prefix_extra" "1")
17918   (set_attr "prefix" "orig,orig,vex")
17919   (set_attr "mode" "TI")])
17920
17921(define_expand "nearbyint<mode>2"
17922  [(set (match_operand:VF 0 "register_operand")
17923	(unspec:VF
17924	  [(match_operand:VF 1 "vector_operand")
17925	   (match_dup 2)]
17926	  UNSPEC_ROUND))]
17927  "TARGET_SSE4_1"
17928  "operands[2] = GEN_INT (ROUND_MXCSR | ROUND_NO_EXC);")
17929
17930(define_expand "rint<mode>2"
17931  [(set (match_operand:VF 0 "register_operand")
17932	(unspec:VF
17933	  [(match_operand:VF 1 "vector_operand")
17934	   (match_dup 2)]
17935	  UNSPEC_ROUND))]
17936  "TARGET_SSE4_1"
17937  "operands[2] = GEN_INT (ROUND_MXCSR);")
17938
17939(define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>"
17940  [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x")
17941	(unspec:VF_128_256
17942	  [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm")
17943	   (match_operand:SI 2 "const_0_to_15_operand" "n,n,n")]
17944	  UNSPEC_ROUND))]
17945  "TARGET_SSE4_1"
17946  "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
17947  [(set_attr "isa" "noavx,noavx,avx")
17948   (set_attr "type" "ssecvt")
17949   (set_attr "prefix_data16" "1,1,*")
17950   (set_attr "prefix_extra" "1")
17951   (set_attr "length_immediate" "1")
17952   (set_attr "prefix" "orig,orig,vex")
17953   (set_attr "mode" "<MODE>")])
17954
17955(define_expand "<sse4_1>_round<ssemodesuffix>_sfix<avxsizesuffix>"
17956  [(match_operand:<sseintvecmode> 0 "register_operand")
17957   (match_operand:VF1_128_256 1 "vector_operand")
17958   (match_operand:SI 2 "const_0_to_15_operand")]
17959  "TARGET_SSE4_1"
17960{
17961  rtx tmp = gen_reg_rtx (<MODE>mode);
17962
17963  emit_insn
17964    (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp, operands[1],
17965						       operands[2]));
17966  emit_insn
17967    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
17968  DONE;
17969})
17970
17971(define_expand "avx512f_round<castmode>512"
17972  [(match_operand:VF_512 0 "register_operand")
17973   (match_operand:VF_512 1 "nonimmediate_operand")
17974   (match_operand:SI 2 "const_0_to_15_operand")]
17975  "TARGET_AVX512F"
17976{
17977  emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
17978  DONE;
17979})
17980
17981(define_expand "avx512f_roundps512_sfix"
17982  [(match_operand:V16SI 0 "register_operand")
17983   (match_operand:V16SF 1 "nonimmediate_operand")
17984   (match_operand:SI 2 "const_0_to_15_operand")]
17985  "TARGET_AVX512F"
17986{
17987  rtx tmp = gen_reg_rtx (V16SFmode);
17988  emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
17989  emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
17990  DONE;
17991})
17992
17993(define_expand "<sse4_1>_round<ssemodesuffix>_vec_pack_sfix<avxsizesuffix>"
17994  [(match_operand:<ssepackfltmode> 0 "register_operand")
17995   (match_operand:VF2 1 "vector_operand")
17996   (match_operand:VF2 2 "vector_operand")
17997   (match_operand:SI 3 "const_0_to_15_operand")]
17998  "TARGET_SSE4_1"
17999{
18000  rtx tmp0, tmp1;
18001
18002  if (<MODE>mode == V2DFmode
18003      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18004    {
18005      rtx tmp2 = gen_reg_rtx (V4DFmode);
18006
18007      tmp0 = gen_reg_rtx (V4DFmode);
18008      tmp1 = force_reg (V2DFmode, operands[1]);
18009
18010      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18011      emit_insn (gen_avx_roundpd256 (tmp2, tmp0, operands[3]));
18012      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18013    }
18014  else
18015    {
18016      tmp0 = gen_reg_rtx (<MODE>mode);
18017      tmp1 = gen_reg_rtx (<MODE>mode);
18018
18019      emit_insn
18020       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp0, operands[1],
18021							  operands[3]));
18022      emit_insn
18023       (gen_<sse4_1>_round<ssemodesuffix><avxsizesuffix> (tmp1, operands[2],
18024							  operands[3]));
18025      emit_insn
18026       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18027    }
18028  DONE;
18029})
18030
18031(define_insn "sse4_1_round<ssescalarmodesuffix>"
18032  [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18033	(vec_merge:VF_128
18034	  (unspec:VF_128
18035	    [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18036	     (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18037	    UNSPEC_ROUND)
18038	  (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18039	  (const_int 1)))]
18040  "TARGET_SSE4_1"
18041  "@
18042   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18043   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}
18044   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}
18045   vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"
18046  [(set_attr "isa" "noavx,noavx,avx,avx512f")
18047   (set_attr "type" "ssecvt")
18048   (set_attr "length_immediate" "1")
18049   (set_attr "prefix_data16" "1,1,*,*")
18050   (set_attr "prefix_extra" "1")
18051   (set_attr "prefix" "orig,orig,vex,evex")
18052   (set_attr "mode" "<MODE>")])
18053
18054(define_insn "*sse4_1_round<ssescalarmodesuffix>"
18055  [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
18056	(vec_merge:VF_128
18057	  (vec_duplicate:VF_128
18058	    (unspec:<ssescalarmode>
18059	      [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm")
18060	       (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
18061	      UNSPEC_ROUND))
18062	  (match_operand:VF_128 1 "register_operand" "0,0,x,v")
18063	  (const_int 1)))]
18064  "TARGET_SSE4_1"
18065  "@
18066   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18067   round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
18068   vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
18069   vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18070  [(set_attr "isa" "noavx,noavx,avx,avx512f")
18071   (set_attr "type" "ssecvt")
18072   (set_attr "length_immediate" "1")
18073   (set_attr "prefix_data16" "1,1,*,*")
18074   (set_attr "prefix_extra" "1")
18075   (set_attr "prefix" "orig,orig,vex,evex")
18076   (set_attr "mode" "<MODE>")])
18077
18078(define_expand "round<mode>2"
18079  [(set (match_dup 3)
18080	(plus:VF
18081	  (match_operand:VF 1 "register_operand")
18082	  (match_dup 2)))
18083   (set (match_operand:VF 0 "register_operand")
18084	(unspec:VF
18085	  [(match_dup 3) (match_dup 4)]
18086	  UNSPEC_ROUND))]
18087  "TARGET_SSE4_1 && !flag_trapping_math"
18088{
18089  machine_mode scalar_mode;
18090  const struct real_format *fmt;
18091  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
18092  rtx half, vec_half;
18093
18094  scalar_mode = GET_MODE_INNER (<MODE>mode);
18095
18096  /* load nextafter (0.5, 0.0) */
18097  fmt = REAL_MODE_FORMAT (scalar_mode);
18098  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, scalar_mode);
18099  real_arithmetic (&pred_half, MINUS_EXPR, &dconsthalf, &half_minus_pred_half);
18100  half = const_double_from_real_value (pred_half, scalar_mode);
18101
18102  vec_half = ix86_build_const_vector (<MODE>mode, true, half);
18103  vec_half = force_reg (<MODE>mode, vec_half);
18104
18105  operands[2] = gen_reg_rtx (<MODE>mode);
18106  emit_insn (gen_copysign<mode>3 (operands[2], vec_half, operands[1]));
18107
18108  operands[3] = gen_reg_rtx (<MODE>mode);
18109  operands[4] = GEN_INT (ROUND_TRUNC);
18110})
18111
18112(define_expand "round<mode>2_sfix"
18113  [(match_operand:<sseintvecmode> 0 "register_operand")
18114   (match_operand:VF1 1 "register_operand")]
18115  "TARGET_SSE4_1 && !flag_trapping_math"
18116{
18117  rtx tmp = gen_reg_rtx (<MODE>mode);
18118
18119  emit_insn (gen_round<mode>2 (tmp, operands[1]));
18120
18121  emit_insn
18122    (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp));
18123  DONE;
18124})
18125
18126(define_expand "round<mode>2_vec_pack_sfix"
18127  [(match_operand:<ssepackfltmode> 0 "register_operand")
18128   (match_operand:VF2 1 "register_operand")
18129   (match_operand:VF2 2 "register_operand")]
18130  "TARGET_SSE4_1 && !flag_trapping_math"
18131{
18132  rtx tmp0, tmp1;
18133
18134  if (<MODE>mode == V2DFmode
18135      && TARGET_AVX && !TARGET_PREFER_AVX128 && optimize_insn_for_speed_p ())
18136    {
18137      rtx tmp2 = gen_reg_rtx (V4DFmode);
18138
18139      tmp0 = gen_reg_rtx (V4DFmode);
18140      tmp1 = force_reg (V2DFmode, operands[1]);
18141
18142      emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2]));
18143      emit_insn (gen_roundv4df2 (tmp2, tmp0));
18144      emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp2));
18145    }
18146  else
18147    {
18148      tmp0 = gen_reg_rtx (<MODE>mode);
18149      tmp1 = gen_reg_rtx (<MODE>mode);
18150
18151      emit_insn (gen_round<mode>2 (tmp0, operands[1]));
18152      emit_insn (gen_round<mode>2 (tmp1, operands[2]));
18153
18154      emit_insn
18155       (gen_vec_pack_sfix_trunc_<mode> (operands[0], tmp0, tmp1));
18156    }
18157  DONE;
18158})
18159
18160;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18161;;
18162;; Intel SSE4.2 string/text processing instructions
18163;;
18164;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18165
18166(define_insn_and_split "sse4_2_pcmpestr"
18167  [(set (match_operand:SI 0 "register_operand" "=c,c")
18168	(unspec:SI
18169	  [(match_operand:V16QI 2 "register_operand" "x,x")
18170	   (match_operand:SI 3 "register_operand" "a,a")
18171	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m")
18172	   (match_operand:SI 5 "register_operand" "d,d")
18173	   (match_operand:SI 6 "const_0_to_255_operand" "n,n")]
18174	  UNSPEC_PCMPESTR))
18175   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18176	(unspec:V16QI
18177	  [(match_dup 2)
18178	   (match_dup 3)
18179	   (match_dup 4)
18180	   (match_dup 5)
18181	   (match_dup 6)]
18182	  UNSPEC_PCMPESTR))
18183   (set (reg:CC FLAGS_REG)
18184	(unspec:CC
18185	  [(match_dup 2)
18186	   (match_dup 3)
18187	   (match_dup 4)
18188	   (match_dup 5)
18189	   (match_dup 6)]
18190	  UNSPEC_PCMPESTR))]
18191  "TARGET_SSE4_2
18192   && ix86_pre_reload_split ()"
18193  "#"
18194  "&& 1"
18195  [(const_int 0)]
18196{
18197  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18198  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18199  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18200
18201  if (ecx)
18202    emit_insn (gen_sse4_2_pcmpestri (operands[0], operands[2],
18203				     operands[3], operands[4],
18204				     operands[5], operands[6]));
18205  if (xmm0)
18206    emit_insn (gen_sse4_2_pcmpestrm (operands[1], operands[2],
18207				     operands[3], operands[4],
18208				     operands[5], operands[6]));
18209  if (flags && !(ecx || xmm0))
18210    emit_insn (gen_sse4_2_pcmpestr_cconly (NULL, NULL,
18211					   operands[2], operands[3],
18212					   operands[4], operands[5],
18213					   operands[6]));
18214  if (!(flags || ecx || xmm0))
18215    emit_note (NOTE_INSN_DELETED);
18216
18217  DONE;
18218}
18219  [(set_attr "type" "sselog")
18220   (set_attr "prefix_data16" "1")
18221   (set_attr "prefix_extra" "1")
18222   (set_attr "length_immediate" "1")
18223   (set_attr "memory" "none,load")
18224   (set_attr "mode" "TI")])
18225
18226(define_insn "sse4_2_pcmpestri"
18227  [(set (match_operand:SI 0 "register_operand" "=c,c")
18228	(unspec:SI
18229	  [(match_operand:V16QI 1 "register_operand" "x,x")
18230	   (match_operand:SI 2 "register_operand" "a,a")
18231	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18232	   (match_operand:SI 4 "register_operand" "d,d")
18233	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18234	  UNSPEC_PCMPESTR))
18235   (set (reg:CC FLAGS_REG)
18236	(unspec:CC
18237	  [(match_dup 1)
18238	   (match_dup 2)
18239	   (match_dup 3)
18240	   (match_dup 4)
18241	   (match_dup 5)]
18242	  UNSPEC_PCMPESTR))]
18243  "TARGET_SSE4_2"
18244  "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
18245  [(set_attr "type" "sselog")
18246   (set_attr "prefix_data16" "1")
18247   (set_attr "prefix_extra" "1")
18248   (set_attr "prefix" "maybe_vex")
18249   (set_attr "length_immediate" "1")
18250   (set_attr "btver2_decode" "vector")
18251   (set_attr "memory" "none,load")
18252   (set_attr "mode" "TI")])
18253
18254(define_insn "sse4_2_pcmpestrm"
18255  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
18256	(unspec:V16QI
18257	  [(match_operand:V16QI 1 "register_operand" "x,x")
18258	   (match_operand:SI 2 "register_operand" "a,a")
18259	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18260	   (match_operand:SI 4 "register_operand" "d,d")
18261	   (match_operand:SI 5 "const_0_to_255_operand" "n,n")]
18262	  UNSPEC_PCMPESTR))
18263   (set (reg:CC FLAGS_REG)
18264	(unspec:CC
18265	  [(match_dup 1)
18266	   (match_dup 2)
18267	   (match_dup 3)
18268	   (match_dup 4)
18269	   (match_dup 5)]
18270	  UNSPEC_PCMPESTR))]
18271  "TARGET_SSE4_2"
18272  "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
18273  [(set_attr "type" "sselog")
18274   (set_attr "prefix_data16" "1")
18275   (set_attr "prefix_extra" "1")
18276   (set_attr "length_immediate" "1")
18277   (set_attr "prefix" "maybe_vex")
18278   (set_attr "btver2_decode" "vector")
18279   (set_attr "memory" "none,load")
18280   (set_attr "mode" "TI")])
18281
18282(define_insn "sse4_2_pcmpestr_cconly"
18283  [(set (reg:CC FLAGS_REG)
18284	(unspec:CC
18285	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
18286	   (match_operand:SI 3 "register_operand" "a,a,a,a")
18287	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
18288	   (match_operand:SI 5 "register_operand" "d,d,d,d")
18289	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
18290	  UNSPEC_PCMPESTR))
18291   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
18292   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
18293  "TARGET_SSE4_2"
18294  "@
18295   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18296   %vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
18297   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
18298   %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
18299  [(set_attr "type" "sselog")
18300   (set_attr "prefix_data16" "1")
18301   (set_attr "prefix_extra" "1")
18302   (set_attr "length_immediate" "1")
18303   (set_attr "memory" "none,load,none,load")
18304   (set_attr "btver2_decode" "vector,vector,vector,vector") 
18305   (set_attr "prefix" "maybe_vex")
18306   (set_attr "mode" "TI")])
18307
18308(define_insn_and_split "sse4_2_pcmpistr"
18309  [(set (match_operand:SI 0 "register_operand" "=c,c")
18310	(unspec:SI
18311	  [(match_operand:V16QI 2 "register_operand" "x,x")
18312	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m")
18313	   (match_operand:SI 4 "const_0_to_255_operand" "n,n")]
18314	  UNSPEC_PCMPISTR))
18315   (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz")
18316	(unspec:V16QI
18317	  [(match_dup 2)
18318	   (match_dup 3)
18319	   (match_dup 4)]
18320	  UNSPEC_PCMPISTR))
18321   (set (reg:CC FLAGS_REG)
18322	(unspec:CC
18323	  [(match_dup 2)
18324	   (match_dup 3)
18325	   (match_dup 4)]
18326	  UNSPEC_PCMPISTR))]
18327  "TARGET_SSE4_2
18328   && ix86_pre_reload_split ()"
18329  "#"
18330  "&& 1"
18331  [(const_int 0)]
18332{
18333  int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
18334  int xmm0 = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[1]));
18335  int flags = !find_regno_note (curr_insn, REG_UNUSED, FLAGS_REG);
18336
18337  if (ecx)
18338    emit_insn (gen_sse4_2_pcmpistri (operands[0], operands[2],
18339				     operands[3], operands[4]));
18340  if (xmm0)
18341    emit_insn (gen_sse4_2_pcmpistrm (operands[1], operands[2],
18342				     operands[3], operands[4]));
18343  if (flags && !(ecx || xmm0))
18344    emit_insn (gen_sse4_2_pcmpistr_cconly (NULL, NULL,
18345					   operands[2], operands[3],
18346					   operands[4]));
18347  if (!(flags || ecx || xmm0))
18348    emit_note (NOTE_INSN_DELETED);
18349
18350  DONE;
18351}
18352  [(set_attr "type" "sselog")
18353   (set_attr "prefix_data16" "1")
18354   (set_attr "prefix_extra" "1")
18355   (set_attr "length_immediate" "1")
18356   (set_attr "memory" "none,load")
18357   (set_attr "mode" "TI")])
18358
18359(define_insn "sse4_2_pcmpistri"
18360  [(set (match_operand:SI 0 "register_operand" "=c,c")
18361	(unspec:SI
18362	  [(match_operand:V16QI 1 "register_operand" "x,x")
18363	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18364	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
18365	  UNSPEC_PCMPISTR))
18366   (set (reg:CC FLAGS_REG)
18367	(unspec:CC
18368	  [(match_dup 1)
18369	   (match_dup 2)
18370	   (match_dup 3)]
18371	  UNSPEC_PCMPISTR))]
18372  "TARGET_SSE4_2"
18373  "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
18374  [(set_attr "type" "sselog")
18375   (set_attr "prefix_data16" "1")
18376   (set_attr "prefix_extra" "1")
18377   (set_attr "length_immediate" "1")
18378   (set_attr "prefix" "maybe_vex")
18379   (set_attr "memory" "none,load")
18380   (set_attr "btver2_decode" "vector")
18381   (set_attr "mode" "TI")])
18382
18383(define_insn "sse4_2_pcmpistrm"
18384  [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
18385	(unspec:V16QI
18386	  [(match_operand:V16QI 1 "register_operand" "x,x")
18387	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
18388	   (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
18389	  UNSPEC_PCMPISTR))
18390   (set (reg:CC FLAGS_REG)
18391	(unspec:CC
18392	  [(match_dup 1)
18393	   (match_dup 2)
18394	   (match_dup 3)]
18395	  UNSPEC_PCMPISTR))]
18396  "TARGET_SSE4_2"
18397  "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
18398  [(set_attr "type" "sselog")
18399   (set_attr "prefix_data16" "1")
18400   (set_attr "prefix_extra" "1")
18401   (set_attr "length_immediate" "1")
18402   (set_attr "prefix" "maybe_vex")
18403   (set_attr "memory" "none,load")
18404   (set_attr "btver2_decode" "vector")
18405   (set_attr "mode" "TI")])
18406
18407(define_insn "sse4_2_pcmpistr_cconly"
18408  [(set (reg:CC FLAGS_REG)
18409	(unspec:CC
18410	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
18411	   (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m")
18412	   (match_operand:SI 4 "const_0_to_255_operand" "n,n,n,n")]
18413	  UNSPEC_PCMPISTR))
18414   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
18415   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
18416  "TARGET_SSE4_2"
18417  "@
18418   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
18419   %vpcmpistrm\t{%4, %3, %2|%2, %3, %4}
18420   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
18421   %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
18422  [(set_attr "type" "sselog")
18423   (set_attr "prefix_data16" "1")
18424   (set_attr "prefix_extra" "1")
18425   (set_attr "length_immediate" "1")
18426   (set_attr "memory" "none,load,none,load")
18427   (set_attr "prefix" "maybe_vex")
18428   (set_attr "btver2_decode" "vector,vector,vector,vector")
18429   (set_attr "mode" "TI")])
18430
18431;; Packed float variants
18432(define_mode_attr GATHER_SCATTER_SF_MEM_MODE
18433		      [(V8DI "V8SF") (V16SI "V16SF")])
18434
18435(define_expand "avx512pf_gatherpf<mode>sf"
18436  [(unspec
18437     [(match_operand:<avx512fmaskmode> 0 "register_operand")
18438      (mem:<GATHER_SCATTER_SF_MEM_MODE>
18439	(match_par_dup 5
18440	  [(match_operand 2 "vsib_address_operand")
18441	   (match_operand:VI48_512 1 "register_operand")
18442	   (match_operand:SI 3 "const1248_operand")]))
18443      (match_operand:SI 4 "const_2_to_3_operand")]
18444     UNSPEC_GATHER_PREFETCH)]
18445  "TARGET_AVX512PF"
18446{
18447  operands[5]
18448    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18449					operands[3]), UNSPEC_VSIBADDR);
18450})
18451
18452(define_insn "*avx512pf_gatherpf<VI48_512:mode>sf_mask"
18453  [(unspec
18454     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18455      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
18456	[(unspec:P
18457	   [(match_operand:P 2 "vsib_address_operand" "Tv")
18458	    (match_operand:VI48_512 1 "register_operand" "v")
18459	    (match_operand:SI 3 "const1248_operand" "n")]
18460	   UNSPEC_VSIBADDR)])
18461      (match_operand:SI 4 "const_2_to_3_operand" "n")]
18462     UNSPEC_GATHER_PREFETCH)]
18463  "TARGET_AVX512PF"
18464{
18465  switch (INTVAL (operands[4]))
18466    {
18467    case 3:
18468      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18469	 gas changed what it requires incompatibly.  */
18470      return "%M2vgatherpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18471    case 2:
18472      return "%M2vgatherpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18473    default:
18474      gcc_unreachable ();
18475    }
18476}
18477  [(set_attr "type" "sse")
18478   (set_attr "prefix" "evex")
18479   (set_attr "mode" "XI")])
18480
18481;; Packed double variants
18482(define_expand "avx512pf_gatherpf<mode>df"
18483  [(unspec
18484     [(match_operand:<avx512fmaskmode> 0 "register_operand")
18485      (mem:V8DF
18486	(match_par_dup 5
18487	  [(match_operand 2 "vsib_address_operand")
18488	   (match_operand:VI4_256_8_512 1 "register_operand")
18489	   (match_operand:SI 3 "const1248_operand")]))
18490      (match_operand:SI 4 "const_2_to_3_operand")]
18491     UNSPEC_GATHER_PREFETCH)]
18492  "TARGET_AVX512PF"
18493{
18494  operands[5]
18495    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18496					operands[3]), UNSPEC_VSIBADDR);
18497})
18498
18499(define_insn "*avx512pf_gatherpf<VI4_256_8_512:mode>df_mask"
18500  [(unspec
18501     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18502      (match_operator:V8DF 5 "vsib_mem_operator"
18503	[(unspec:P
18504	   [(match_operand:P 2 "vsib_address_operand" "Tv")
18505	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
18506	    (match_operand:SI 3 "const1248_operand" "n")]
18507	   UNSPEC_VSIBADDR)])
18508      (match_operand:SI 4 "const_2_to_3_operand" "n")]
18509     UNSPEC_GATHER_PREFETCH)]
18510  "TARGET_AVX512PF"
18511{
18512  switch (INTVAL (operands[4]))
18513    {
18514    case 3:
18515      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18516	 gas changed what it requires incompatibly.  */
18517      return "%M2vgatherpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18518    case 2:
18519      return "%M2vgatherpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18520    default:
18521      gcc_unreachable ();
18522    }
18523}
18524  [(set_attr "type" "sse")
18525   (set_attr "prefix" "evex")
18526   (set_attr "mode" "XI")])
18527
18528;; Packed float variants
18529(define_expand "avx512pf_scatterpf<mode>sf"
18530  [(unspec
18531     [(match_operand:<avx512fmaskmode> 0 "register_operand")
18532      (mem:<GATHER_SCATTER_SF_MEM_MODE>
18533	(match_par_dup 5
18534	  [(match_operand 2 "vsib_address_operand")
18535	   (match_operand:VI48_512 1 "register_operand")
18536	   (match_operand:SI 3 "const1248_operand")]))
18537      (match_operand:SI 4 "const2367_operand")]
18538     UNSPEC_SCATTER_PREFETCH)]
18539  "TARGET_AVX512PF"
18540{
18541  operands[5]
18542    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18543					operands[3]), UNSPEC_VSIBADDR);
18544})
18545
18546(define_insn "*avx512pf_scatterpf<VI48_512:mode>sf_mask"
18547  [(unspec
18548     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18549      (match_operator:<GATHER_SCATTER_SF_MEM_MODE> 5 "vsib_mem_operator"
18550	[(unspec:P
18551	   [(match_operand:P 2 "vsib_address_operand" "Tv")
18552	    (match_operand:VI48_512 1 "register_operand" "v")
18553	    (match_operand:SI 3 "const1248_operand" "n")]
18554	   UNSPEC_VSIBADDR)])
18555      (match_operand:SI 4 "const2367_operand" "n")]
18556     UNSPEC_SCATTER_PREFETCH)]
18557  "TARGET_AVX512PF"
18558{
18559  switch (INTVAL (operands[4]))
18560    {
18561    case 3:
18562    case 7:
18563      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18564	 gas changed what it requires incompatibly.  */
18565      return "%M2vscatterpf0<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18566    case 2:
18567    case 6:
18568      return "%M2vscatterpf1<ssemodesuffix>ps\t{%5%{%0%}|%X5%{%0%}}";
18569    default:
18570      gcc_unreachable ();
18571    }
18572}
18573  [(set_attr "type" "sse")
18574   (set_attr "prefix" "evex")
18575   (set_attr "mode" "XI")])
18576
18577;; Packed double variants
18578(define_expand "avx512pf_scatterpf<mode>df"
18579  [(unspec
18580     [(match_operand:<avx512fmaskmode> 0 "register_operand")
18581      (mem:V8DF
18582	(match_par_dup 5
18583	  [(match_operand 2 "vsib_address_operand")
18584	   (match_operand:VI4_256_8_512 1 "register_operand")
18585	   (match_operand:SI 3 "const1248_operand")]))
18586      (match_operand:SI 4 "const2367_operand")]
18587     UNSPEC_SCATTER_PREFETCH)]
18588  "TARGET_AVX512PF"
18589{
18590  operands[5]
18591    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[1],
18592					operands[3]), UNSPEC_VSIBADDR);
18593})
18594
18595(define_insn "*avx512pf_scatterpf<VI4_256_8_512:mode>df_mask"
18596  [(unspec
18597     [(match_operand:<avx512fmaskmode> 0 "register_operand" "Yk")
18598      (match_operator:V8DF 5 "vsib_mem_operator"
18599	[(unspec:P
18600	   [(match_operand:P 2 "vsib_address_operand" "Tv")
18601	    (match_operand:VI4_256_8_512 1 "register_operand" "v")
18602	    (match_operand:SI 3 "const1248_operand" "n")]
18603	   UNSPEC_VSIBADDR)])
18604      (match_operand:SI 4 "const2367_operand" "n")]
18605     UNSPEC_SCATTER_PREFETCH)]
18606  "TARGET_AVX512PF"
18607{
18608  switch (INTVAL (operands[4]))
18609    {
18610    case 3:
18611    case 7:
18612      /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
18613	 gas changed what it requires incompatibly.  */
18614      return "%M2vscatterpf0<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18615    case 2:
18616    case 6:
18617      return "%M2vscatterpf1<ssemodesuffix>pd\t{%5%{%0%}|%X5%{%0%}}";
18618    default:
18619      gcc_unreachable ();
18620    }
18621}
18622  [(set_attr "type" "sse")
18623   (set_attr "prefix" "evex")
18624   (set_attr "mode" "XI")])
18625
18626(define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>"
18627  [(set (match_operand:VF_512 0 "register_operand" "=v")
18628	(unspec:VF_512
18629	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18630	  UNSPEC_EXP2))]
18631  "TARGET_AVX512ER"
18632  "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18633  [(set_attr "prefix" "evex")
18634   (set_attr "type" "sse")
18635   (set_attr "mode" "<MODE>")])
18636
18637(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
18638  [(set (match_operand:VF_512 0 "register_operand" "=v")
18639	(unspec:VF_512
18640	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18641	  UNSPEC_RCP28))]
18642  "TARGET_AVX512ER"
18643  "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18644  [(set_attr "prefix" "evex")
18645   (set_attr "type" "sse")
18646   (set_attr "mode" "<MODE>")])
18647
18648(define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
18649  [(set (match_operand:VF_128 0 "register_operand" "=v")
18650	(vec_merge:VF_128
18651	  (unspec:VF_128
18652	    [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
18653	    UNSPEC_RCP28)
18654	  (match_operand:VF_128 2 "register_operand" "v")
18655	  (const_int 1)))]
18656  "TARGET_AVX512ER"
18657  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18658  [(set_attr "length_immediate" "1")
18659   (set_attr "prefix" "evex")
18660   (set_attr "type" "sse")
18661   (set_attr "mode" "<MODE>")])
18662
18663(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
18664  [(set (match_operand:VF_512 0 "register_operand" "=v")
18665	(unspec:VF_512
18666	  [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
18667	  UNSPEC_RSQRT28))]
18668  "TARGET_AVX512ER"
18669  "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
18670  [(set_attr "prefix" "evex")
18671   (set_attr "type" "sse")
18672   (set_attr "mode" "<MODE>")])
18673
18674(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
18675  [(set (match_operand:VF_128 0 "register_operand" "=v")
18676	(vec_merge:VF_128
18677	  (unspec:VF_128
18678	    [(match_operand:VF_128 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
18679	    UNSPEC_RSQRT28)
18680	  (match_operand:VF_128 2 "register_operand" "v")
18681	  (const_int 1)))]
18682  "TARGET_AVX512ER"
18683  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %<iptr>1<round_saeonly_op3>}"
18684  [(set_attr "length_immediate" "1")
18685   (set_attr "type" "sse")
18686   (set_attr "prefix" "evex")
18687   (set_attr "mode" "<MODE>")])
18688
18689;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18690;;
18691;; XOP instructions
18692;;
18693;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
18694
18695(define_code_iterator xop_plus [plus ss_plus])
18696
18697(define_code_attr macs [(plus "macs") (ss_plus "macss")])
18698(define_code_attr madcs [(plus "madcs") (ss_plus "madcss")])
18699
18700;; XOP parallel integer multiply/add instructions.
18701
18702(define_insn "xop_p<macs><ssemodesuffix><ssemodesuffix>"
18703  [(set (match_operand:VI24_128 0 "register_operand" "=x")
18704	(xop_plus:VI24_128
18705	 (mult:VI24_128
18706	  (match_operand:VI24_128 1 "nonimmediate_operand" "%x")
18707	  (match_operand:VI24_128 2 "nonimmediate_operand" "xm"))
18708	 (match_operand:VI24_128 3 "register_operand" "x")))]
18709  "TARGET_XOP"
18710  "vp<macs><ssemodesuffix><ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18711  [(set_attr "type" "ssemuladd")
18712   (set_attr "mode" "TI")])
18713
18714(define_insn "xop_p<macs>dql"
18715  [(set (match_operand:V2DI 0 "register_operand" "=x")
18716	(xop_plus:V2DI
18717	 (mult:V2DI
18718	  (sign_extend:V2DI
18719	   (vec_select:V2SI
18720	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18721	    (parallel [(const_int 0) (const_int 2)])))
18722	  (sign_extend:V2DI
18723	   (vec_select:V2SI
18724	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18725	    (parallel [(const_int 0) (const_int 2)]))))
18726	 (match_operand:V2DI 3 "register_operand" "x")))]
18727  "TARGET_XOP"
18728  "vp<macs>dql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18729  [(set_attr "type" "ssemuladd")
18730   (set_attr "mode" "TI")])
18731
18732(define_insn "xop_p<macs>dqh"
18733  [(set (match_operand:V2DI 0 "register_operand" "=x")
18734	(xop_plus:V2DI
18735	 (mult:V2DI
18736	  (sign_extend:V2DI
18737	   (vec_select:V2SI
18738	    (match_operand:V4SI 1 "nonimmediate_operand" "%x")
18739	    (parallel [(const_int 1) (const_int 3)])))
18740	  (sign_extend:V2DI
18741	   (vec_select:V2SI
18742	    (match_operand:V4SI 2 "nonimmediate_operand" "xm")
18743	    (parallel [(const_int 1) (const_int 3)]))))
18744	 (match_operand:V2DI 3 "register_operand" "x")))]
18745  "TARGET_XOP"
18746  "vp<macs>dqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18747  [(set_attr "type" "ssemuladd")
18748   (set_attr "mode" "TI")])
18749
18750;; XOP parallel integer multiply/add instructions for the intrinisics
18751(define_insn "xop_p<macs>wd"
18752  [(set (match_operand:V4SI 0 "register_operand" "=x")
18753	(xop_plus:V4SI
18754	 (mult:V4SI
18755	  (sign_extend:V4SI
18756	   (vec_select:V4HI
18757	    (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18758	    (parallel [(const_int 1) (const_int 3)
18759		       (const_int 5) (const_int 7)])))
18760	  (sign_extend:V4SI
18761	   (vec_select:V4HI
18762	    (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18763	    (parallel [(const_int 1) (const_int 3)
18764		       (const_int 5) (const_int 7)]))))
18765	 (match_operand:V4SI 3 "register_operand" "x")))]
18766  "TARGET_XOP"
18767  "vp<macs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18768  [(set_attr "type" "ssemuladd")
18769   (set_attr "mode" "TI")])
18770
18771(define_insn "xop_p<madcs>wd"
18772  [(set (match_operand:V4SI 0 "register_operand" "=x")
18773	(xop_plus:V4SI
18774	 (plus:V4SI
18775	  (mult:V4SI
18776	   (sign_extend:V4SI
18777	    (vec_select:V4HI
18778	     (match_operand:V8HI 1 "nonimmediate_operand" "%x")
18779	     (parallel [(const_int 0) (const_int 2)
18780			(const_int 4) (const_int 6)])))
18781	   (sign_extend:V4SI
18782	    (vec_select:V4HI
18783	     (match_operand:V8HI 2 "nonimmediate_operand" "xm")
18784	     (parallel [(const_int 0) (const_int 2)
18785			(const_int 4) (const_int 6)]))))
18786	  (mult:V4SI
18787	   (sign_extend:V4SI
18788	    (vec_select:V4HI
18789	     (match_dup 1)
18790	     (parallel [(const_int 1) (const_int 3)
18791			(const_int 5) (const_int 7)])))
18792	   (sign_extend:V4SI
18793	    (vec_select:V4HI
18794	     (match_dup 2)
18795	     (parallel [(const_int 1) (const_int 3)
18796			(const_int 5) (const_int 7)])))))
18797	 (match_operand:V4SI 3 "register_operand" "x")))]
18798  "TARGET_XOP"
18799  "vp<madcs>wd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18800  [(set_attr "type" "ssemuladd")
18801   (set_attr "mode" "TI")])
18802
18803;; XOP parallel XMM conditional moves
18804(define_insn "xop_pcmov_<mode><avxsizesuffix>"
18805  [(set (match_operand:V_128_256 0 "register_operand" "=x,x")
18806	(if_then_else:V_128_256
18807	  (match_operand:V_128_256 3 "nonimmediate_operand" "x,m")
18808	  (match_operand:V_128_256 1 "register_operand" "x,x")
18809	  (match_operand:V_128_256 2 "nonimmediate_operand" "xm,x")))]
18810  "TARGET_XOP"
18811  "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
18812  [(set_attr "type" "sse4arg")])
18813
18814;; XOP horizontal add/subtract instructions
18815(define_insn "xop_phadd<u>bw"
18816  [(set (match_operand:V8HI 0 "register_operand" "=x")
18817	(plus:V8HI
18818	 (any_extend:V8HI
18819	  (vec_select:V8QI
18820	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18821	   (parallel [(const_int 0) (const_int 2)
18822		      (const_int 4) (const_int 6)
18823		      (const_int 8) (const_int 10)
18824		      (const_int 12) (const_int 14)])))
18825	 (any_extend:V8HI
18826	  (vec_select:V8QI
18827	   (match_dup 1)
18828	   (parallel [(const_int 1) (const_int 3)
18829		      (const_int 5) (const_int 7)
18830		      (const_int 9) (const_int 11)
18831		      (const_int 13) (const_int 15)])))))]
18832  "TARGET_XOP"
18833  "vphadd<u>bw\t{%1, %0|%0, %1}"
18834  [(set_attr "type" "sseiadd1")])
18835
18836(define_insn "xop_phadd<u>bd"
18837  [(set (match_operand:V4SI 0 "register_operand" "=x")
18838	(plus:V4SI
18839	 (plus:V4SI
18840	  (any_extend:V4SI
18841	   (vec_select:V4QI
18842	    (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18843	    (parallel [(const_int 0) (const_int 4)
18844		       (const_int 8) (const_int 12)])))
18845	  (any_extend:V4SI
18846	   (vec_select:V4QI
18847	    (match_dup 1)
18848	    (parallel [(const_int 1) (const_int 5)
18849		       (const_int 9) (const_int 13)]))))
18850	 (plus:V4SI
18851	  (any_extend:V4SI
18852	   (vec_select:V4QI
18853	    (match_dup 1)
18854	    (parallel [(const_int 2) (const_int 6)
18855		       (const_int 10) (const_int 14)])))
18856	  (any_extend:V4SI
18857	   (vec_select:V4QI
18858	    (match_dup 1)
18859	    (parallel [(const_int 3) (const_int 7)
18860		       (const_int 11) (const_int 15)]))))))]
18861  "TARGET_XOP"
18862  "vphadd<u>bd\t{%1, %0|%0, %1}"
18863  [(set_attr "type" "sseiadd1")])
18864
18865(define_insn "xop_phadd<u>bq"
18866  [(set (match_operand:V2DI 0 "register_operand" "=x")
18867	(plus:V2DI
18868	 (plus:V2DI
18869	  (plus:V2DI
18870	   (any_extend:V2DI
18871	    (vec_select:V2QI
18872	     (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18873	     (parallel [(const_int 0) (const_int 8)])))
18874	   (any_extend:V2DI
18875	    (vec_select:V2QI
18876	     (match_dup 1)
18877	     (parallel [(const_int 1) (const_int 9)]))))
18878	  (plus:V2DI
18879	   (any_extend:V2DI
18880	    (vec_select:V2QI
18881	     (match_dup 1)
18882	     (parallel [(const_int 2) (const_int 10)])))
18883	   (any_extend:V2DI
18884	    (vec_select:V2QI
18885	     (match_dup 1)
18886	     (parallel [(const_int 3) (const_int 11)])))))
18887	 (plus:V2DI
18888	  (plus:V2DI
18889	   (any_extend:V2DI
18890	    (vec_select:V2QI
18891	     (match_dup 1)
18892	     (parallel [(const_int 4) (const_int 12)])))
18893	   (any_extend:V2DI
18894	    (vec_select:V2QI
18895	     (match_dup 1)
18896	     (parallel [(const_int 5) (const_int 13)]))))
18897	  (plus:V2DI
18898	   (any_extend:V2DI
18899	    (vec_select:V2QI
18900	     (match_dup 1)
18901	     (parallel [(const_int 6) (const_int 14)])))
18902	   (any_extend:V2DI
18903	    (vec_select:V2QI
18904	     (match_dup 1)
18905	     (parallel [(const_int 7) (const_int 15)])))))))]
18906  "TARGET_XOP"
18907  "vphadd<u>bq\t{%1, %0|%0, %1}"
18908  [(set_attr "type" "sseiadd1")])
18909
18910(define_insn "xop_phadd<u>wd"
18911  [(set (match_operand:V4SI 0 "register_operand" "=x")
18912	(plus:V4SI
18913	 (any_extend:V4SI
18914	  (vec_select:V4HI
18915	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18916	   (parallel [(const_int 0) (const_int 2)
18917		      (const_int 4) (const_int 6)])))
18918	 (any_extend:V4SI
18919	  (vec_select:V4HI
18920	   (match_dup 1)
18921	   (parallel [(const_int 1) (const_int 3)
18922		      (const_int 5) (const_int 7)])))))]
18923  "TARGET_XOP"
18924  "vphadd<u>wd\t{%1, %0|%0, %1}"
18925  [(set_attr "type" "sseiadd1")])
18926
18927(define_insn "xop_phadd<u>wq"
18928  [(set (match_operand:V2DI 0 "register_operand" "=x")
18929	(plus:V2DI
18930	 (plus:V2DI
18931	  (any_extend:V2DI
18932	   (vec_select:V2HI
18933	    (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18934	    (parallel [(const_int 0) (const_int 4)])))
18935	  (any_extend:V2DI
18936	   (vec_select:V2HI
18937	    (match_dup 1)
18938	    (parallel [(const_int 1) (const_int 5)]))))
18939	 (plus:V2DI
18940	  (any_extend:V2DI
18941	   (vec_select:V2HI
18942	    (match_dup 1)
18943	    (parallel [(const_int 2) (const_int 6)])))
18944	  (any_extend:V2DI
18945	   (vec_select:V2HI
18946	    (match_dup 1)
18947	    (parallel [(const_int 3) (const_int 7)]))))))]
18948  "TARGET_XOP"
18949  "vphadd<u>wq\t{%1, %0|%0, %1}"
18950  [(set_attr "type" "sseiadd1")])
18951
18952(define_insn "xop_phadd<u>dq"
18953  [(set (match_operand:V2DI 0 "register_operand" "=x")
18954	(plus:V2DI
18955	 (any_extend:V2DI
18956	  (vec_select:V2SI
18957	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
18958	   (parallel [(const_int 0) (const_int 2)])))
18959	 (any_extend:V2DI
18960	  (vec_select:V2SI
18961	   (match_dup 1)
18962	   (parallel [(const_int 1) (const_int 3)])))))]
18963  "TARGET_XOP"
18964  "vphadd<u>dq\t{%1, %0|%0, %1}"
18965  [(set_attr "type" "sseiadd1")])
18966
18967(define_insn "xop_phsubbw"
18968  [(set (match_operand:V8HI 0 "register_operand" "=x")
18969	(minus:V8HI
18970	 (sign_extend:V8HI
18971	  (vec_select:V8QI
18972	   (match_operand:V16QI 1 "nonimmediate_operand" "xm")
18973	   (parallel [(const_int 0) (const_int 2)
18974		      (const_int 4) (const_int 6)
18975		      (const_int 8) (const_int 10)
18976		      (const_int 12) (const_int 14)])))
18977	 (sign_extend:V8HI
18978	  (vec_select:V8QI
18979	   (match_dup 1)
18980	   (parallel [(const_int 1) (const_int 3)
18981		      (const_int 5) (const_int 7)
18982		      (const_int 9) (const_int 11)
18983		      (const_int 13) (const_int 15)])))))]
18984  "TARGET_XOP"
18985  "vphsubbw\t{%1, %0|%0, %1}"
18986  [(set_attr "type" "sseiadd1")])
18987
18988(define_insn "xop_phsubwd"
18989  [(set (match_operand:V4SI 0 "register_operand" "=x")
18990	(minus:V4SI
18991	 (sign_extend:V4SI
18992	  (vec_select:V4HI
18993	   (match_operand:V8HI 1 "nonimmediate_operand" "xm")
18994	   (parallel [(const_int 0) (const_int 2)
18995		      (const_int 4) (const_int 6)])))
18996	 (sign_extend:V4SI
18997	  (vec_select:V4HI
18998	   (match_dup 1)
18999	   (parallel [(const_int 1) (const_int 3)
19000		      (const_int 5) (const_int 7)])))))]
19001  "TARGET_XOP"
19002  "vphsubwd\t{%1, %0|%0, %1}"
19003  [(set_attr "type" "sseiadd1")])
19004
19005(define_insn "xop_phsubdq"
19006  [(set (match_operand:V2DI 0 "register_operand" "=x")
19007	(minus:V2DI
19008	 (sign_extend:V2DI
19009	  (vec_select:V2SI
19010	   (match_operand:V4SI 1 "nonimmediate_operand" "xm")
19011	   (parallel [(const_int 0) (const_int 2)])))
19012	 (sign_extend:V2DI
19013	  (vec_select:V2SI
19014	   (match_dup 1)
19015	   (parallel [(const_int 1) (const_int 3)])))))]
19016  "TARGET_XOP"
19017  "vphsubdq\t{%1, %0|%0, %1}"
19018  [(set_attr "type" "sseiadd1")])
19019
19020;; XOP permute instructions
19021(define_insn "xop_pperm"
19022  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19023	(unspec:V16QI
19024	  [(match_operand:V16QI 1 "register_operand" "x,x")
19025	   (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
19026	   (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
19027	  UNSPEC_XOP_PERMUTE))]
19028  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19029  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19030  [(set_attr "type" "sse4arg")
19031   (set_attr "mode" "TI")])
19032
19033;; XOP pack instructions that combine two vectors into a smaller vector
19034(define_insn "xop_pperm_pack_v2di_v4si"
19035  [(set (match_operand:V4SI 0 "register_operand" "=x,x")
19036	(vec_concat:V4SI
19037	 (truncate:V2SI
19038	  (match_operand:V2DI 1 "register_operand" "x,x"))
19039	 (truncate:V2SI
19040	  (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
19041   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19042  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19043  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19044  [(set_attr "type" "sse4arg")
19045   (set_attr "mode" "TI")])
19046
19047(define_insn "xop_pperm_pack_v4si_v8hi"
19048  [(set (match_operand:V8HI 0 "register_operand" "=x,x")
19049	(vec_concat:V8HI
19050	 (truncate:V4HI
19051	  (match_operand:V4SI 1 "register_operand" "x,x"))
19052	 (truncate:V4HI
19053	  (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
19054   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19055  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19056  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19057  [(set_attr "type" "sse4arg")
19058   (set_attr "mode" "TI")])
19059
19060(define_insn "xop_pperm_pack_v8hi_v16qi"
19061  [(set (match_operand:V16QI 0 "register_operand" "=x,x")
19062	(vec_concat:V16QI
19063	 (truncate:V8QI
19064	  (match_operand:V8HI 1 "register_operand" "x,x"))
19065	 (truncate:V8QI
19066	  (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
19067   (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
19068  "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
19069  "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19070  [(set_attr "type" "sse4arg")
19071   (set_attr "mode" "TI")])
19072
19073;; XOP packed rotate instructions
19074(define_expand "rotl<mode>3"
19075  [(set (match_operand:VI_128 0 "register_operand")
19076	(rotate:VI_128
19077	 (match_operand:VI_128 1 "nonimmediate_operand")
19078	 (match_operand:SI 2 "general_operand")))]
19079  "TARGET_XOP"
19080{
19081  /* If we were given a scalar, convert it to parallel */
19082  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19083    {
19084      rtvec vs = rtvec_alloc (<ssescalarnum>);
19085      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19086      rtx reg = gen_reg_rtx (<MODE>mode);
19087      rtx op2 = operands[2];
19088      int i;
19089
19090      if (GET_MODE (op2) != <ssescalarmode>mode)
19091	{
19092	  op2 = gen_reg_rtx (<ssescalarmode>mode);
19093	  convert_move (op2, operands[2], false);
19094	}
19095
19096      for (i = 0; i < <ssescalarnum>; i++)
19097	RTVEC_ELT (vs, i) = op2;
19098
19099      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19100      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19101      DONE;
19102    }
19103})
19104
19105(define_expand "rotr<mode>3"
19106  [(set (match_operand:VI_128 0 "register_operand")
19107	(rotatert:VI_128
19108	 (match_operand:VI_128 1 "nonimmediate_operand")
19109	 (match_operand:SI 2 "general_operand")))]
19110  "TARGET_XOP"
19111{
19112  /* If we were given a scalar, convert it to parallel */
19113  if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
19114    {
19115      rtvec vs = rtvec_alloc (<ssescalarnum>);
19116      rtx par = gen_rtx_PARALLEL (<MODE>mode, vs);
19117      rtx neg = gen_reg_rtx (<MODE>mode);
19118      rtx reg = gen_reg_rtx (<MODE>mode);
19119      rtx op2 = operands[2];
19120      int i;
19121
19122      if (GET_MODE (op2) != <ssescalarmode>mode)
19123	{
19124	  op2 = gen_reg_rtx (<ssescalarmode>mode);
19125	  convert_move (op2, operands[2], false);
19126	}
19127
19128      for (i = 0; i < <ssescalarnum>; i++)
19129	RTVEC_ELT (vs, i) = op2;
19130
19131      emit_insn (gen_vec_init<mode><ssescalarmodelower> (reg, par));
19132      emit_insn (gen_neg<mode>2 (neg, reg));
19133      emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
19134      DONE;
19135    }
19136})
19137
19138(define_insn "xop_rotl<mode>3"
19139  [(set (match_operand:VI_128 0 "register_operand" "=x")
19140	(rotate:VI_128
19141	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19142	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19143  "TARGET_XOP"
19144  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19145  [(set_attr "type" "sseishft")
19146   (set_attr "length_immediate" "1")
19147   (set_attr "mode" "TI")])
19148
19149(define_insn "xop_rotr<mode>3"
19150  [(set (match_operand:VI_128 0 "register_operand" "=x")
19151	(rotatert:VI_128
19152	 (match_operand:VI_128 1 "nonimmediate_operand" "xm")
19153	 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
19154  "TARGET_XOP"
19155{
19156  operands[3]
19157    = GEN_INT (GET_MODE_BITSIZE (<ssescalarmode>mode) - INTVAL (operands[2]));
19158  return \"vprot<ssemodesuffix>\t{%3, %1, %0|%0, %1, %3}\";
19159}
19160  [(set_attr "type" "sseishft")
19161   (set_attr "length_immediate" "1")
19162   (set_attr "mode" "TI")])
19163
19164(define_expand "vrotr<mode>3"
19165  [(match_operand:VI_128 0 "register_operand")
19166   (match_operand:VI_128 1 "register_operand")
19167   (match_operand:VI_128 2 "register_operand")]
19168  "TARGET_XOP"
19169{
19170  rtx reg = gen_reg_rtx (<MODE>mode);
19171  emit_insn (gen_neg<mode>2 (reg, operands[2]));
19172  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
19173  DONE;
19174})
19175
19176(define_expand "vrotl<mode>3"
19177  [(match_operand:VI_128 0 "register_operand")
19178   (match_operand:VI_128 1 "register_operand")
19179   (match_operand:VI_128 2 "register_operand")]
19180  "TARGET_XOP"
19181{
19182  emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
19183  DONE;
19184})
19185
19186(define_insn "xop_vrotl<mode>3"
19187  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19188	(if_then_else:VI_128
19189	 (ge:VI_128
19190	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19191	  (const_int 0))
19192	 (rotate:VI_128
19193	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19194	  (match_dup 2))
19195	 (rotatert:VI_128
19196	  (match_dup 1)
19197	  (neg:VI_128 (match_dup 2)))))]
19198  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19199  "vprot<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19200  [(set_attr "type" "sseishft")
19201   (set_attr "prefix_data16" "0")
19202   (set_attr "prefix_extra" "2")
19203   (set_attr "mode" "TI")])
19204
19205;; XOP packed shift instructions.
19206(define_expand "vlshr<mode>3"
19207  [(set (match_operand:VI12_128 0 "register_operand")
19208	(lshiftrt:VI12_128
19209	  (match_operand:VI12_128 1 "register_operand")
19210	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
19211  "TARGET_XOP"
19212{
19213  rtx neg = gen_reg_rtx (<MODE>mode);
19214  emit_insn (gen_neg<mode>2 (neg, operands[2]));
19215  emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19216  DONE;
19217})
19218
19219(define_expand "vlshr<mode>3"
19220  [(set (match_operand:VI48_128 0 "register_operand")
19221	(lshiftrt:VI48_128
19222	  (match_operand:VI48_128 1 "register_operand")
19223	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
19224  "TARGET_AVX2 || TARGET_XOP"
19225{
19226  if (!TARGET_AVX2)
19227    {
19228      rtx neg = gen_reg_rtx (<MODE>mode);
19229      emit_insn (gen_neg<mode>2 (neg, operands[2]));
19230      emit_insn (gen_xop_shl<mode>3 (operands[0], operands[1], neg));
19231      DONE;
19232    }
19233})
19234
19235(define_expand "vlshr<mode>3"
19236  [(set (match_operand:VI48_512 0 "register_operand")
19237	(lshiftrt:VI48_512
19238	  (match_operand:VI48_512 1 "register_operand")
19239	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
19240  "TARGET_AVX512F")
19241
19242(define_expand "vlshr<mode>3"
19243  [(set (match_operand:VI48_256 0 "register_operand")
19244	(lshiftrt:VI48_256
19245	  (match_operand:VI48_256 1 "register_operand")
19246	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
19247  "TARGET_AVX2")
19248
19249(define_expand "vashrv8hi3<mask_name>"
19250  [(set (match_operand:V8HI 0 "register_operand")
19251	(ashiftrt:V8HI
19252	  (match_operand:V8HI 1 "register_operand")
19253	  (match_operand:V8HI 2 "nonimmediate_operand")))]
19254  "TARGET_XOP || (TARGET_AVX512BW && TARGET_AVX512VL)"
19255{
19256  if (TARGET_XOP)
19257    {
19258      rtx neg = gen_reg_rtx (V8HImode);
19259      emit_insn (gen_negv8hi2 (neg, operands[2]));
19260      emit_insn (gen_xop_shav8hi3 (operands[0], operands[1], neg));
19261      DONE;
19262    }
19263})
19264
19265(define_expand "vashrv16qi3"
19266  [(set (match_operand:V16QI 0 "register_operand")
19267	(ashiftrt:V16QI
19268	  (match_operand:V16QI 1 "register_operand")
19269	  (match_operand:V16QI 2 "nonimmediate_operand")))]
19270  "TARGET_XOP"
19271{
19272   rtx neg = gen_reg_rtx (V16QImode);
19273   emit_insn (gen_negv16qi2 (neg, operands[2]));
19274   emit_insn (gen_xop_shav16qi3 (operands[0], operands[1], neg));
19275   DONE;
19276})
19277
19278(define_expand "vashrv2di3<mask_name>"
19279  [(set (match_operand:V2DI 0 "register_operand")
19280	(ashiftrt:V2DI
19281	  (match_operand:V2DI 1 "register_operand")
19282	  (match_operand:V2DI 2 "nonimmediate_operand")))]
19283  "TARGET_XOP || TARGET_AVX512VL"
19284{
19285  if (TARGET_XOP)
19286    {
19287      rtx neg = gen_reg_rtx (V2DImode);
19288      emit_insn (gen_negv2di2 (neg, operands[2]));
19289      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], neg));
19290      DONE;
19291    }
19292})
19293
19294(define_expand "vashrv4si3"
19295  [(set (match_operand:V4SI 0 "register_operand")
19296	(ashiftrt:V4SI (match_operand:V4SI 1 "register_operand")
19297		       (match_operand:V4SI 2 "nonimmediate_operand")))]
19298  "TARGET_AVX2 || TARGET_XOP"
19299{
19300  if (!TARGET_AVX2)
19301    {
19302      rtx neg = gen_reg_rtx (V4SImode);
19303      emit_insn (gen_negv4si2 (neg, operands[2]));
19304      emit_insn (gen_xop_shav4si3 (operands[0], operands[1], neg));
19305      DONE;
19306    }
19307})
19308
19309(define_expand "vashrv16si3"
19310  [(set (match_operand:V16SI 0 "register_operand")
19311	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
19312		        (match_operand:V16SI 2 "nonimmediate_operand")))]
19313  "TARGET_AVX512F")
19314
19315(define_expand "vashrv8si3"
19316  [(set (match_operand:V8SI 0 "register_operand")
19317	(ashiftrt:V8SI (match_operand:V8SI 1 "register_operand")
19318		       (match_operand:V8SI 2 "nonimmediate_operand")))]
19319  "TARGET_AVX2")
19320
19321(define_expand "vashl<mode>3"
19322  [(set (match_operand:VI12_128 0 "register_operand")
19323	(ashift:VI12_128
19324	  (match_operand:VI12_128 1 "register_operand")
19325	  (match_operand:VI12_128 2 "nonimmediate_operand")))]
19326  "TARGET_XOP"
19327{
19328  emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19329  DONE;
19330})
19331
19332(define_expand "vashl<mode>3"
19333  [(set (match_operand:VI48_128 0 "register_operand")
19334	(ashift:VI48_128
19335	  (match_operand:VI48_128 1 "register_operand")
19336	  (match_operand:VI48_128 2 "nonimmediate_operand")))]
19337  "TARGET_AVX2 || TARGET_XOP"
19338{
19339  if (!TARGET_AVX2)
19340    {
19341      operands[2] = force_reg (<MODE>mode, operands[2]);
19342      emit_insn (gen_xop_sha<mode>3 (operands[0], operands[1], operands[2]));
19343      DONE;
19344    }
19345})
19346
19347(define_expand "vashl<mode>3"
19348  [(set (match_operand:VI48_512 0 "register_operand")
19349	(ashift:VI48_512
19350	  (match_operand:VI48_512 1 "register_operand")
19351	  (match_operand:VI48_512 2 "nonimmediate_operand")))]
19352  "TARGET_AVX512F")
19353
19354(define_expand "vashl<mode>3"
19355  [(set (match_operand:VI48_256 0 "register_operand")
19356	(ashift:VI48_256
19357	  (match_operand:VI48_256 1 "register_operand")
19358	  (match_operand:VI48_256 2 "nonimmediate_operand")))]
19359  "TARGET_AVX2")
19360
19361(define_insn "xop_sha<mode>3"
19362  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19363	(if_then_else:VI_128
19364	 (ge:VI_128
19365	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19366	  (const_int 0))
19367	 (ashift:VI_128
19368	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19369	  (match_dup 2))
19370	 (ashiftrt:VI_128
19371	  (match_dup 1)
19372	  (neg:VI_128 (match_dup 2)))))]
19373  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19374  "vpsha<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19375  [(set_attr "type" "sseishft")
19376   (set_attr "prefix_data16" "0")
19377   (set_attr "prefix_extra" "2")
19378   (set_attr "mode" "TI")])
19379
19380(define_insn "xop_shl<mode>3"
19381  [(set (match_operand:VI_128 0 "register_operand" "=x,x")
19382	(if_then_else:VI_128
19383	 (ge:VI_128
19384	  (match_operand:VI_128 2 "nonimmediate_operand" "x,m")
19385	  (const_int 0))
19386	 (ashift:VI_128
19387	  (match_operand:VI_128 1 "nonimmediate_operand" "xm,x")
19388	  (match_dup 2))
19389	 (lshiftrt:VI_128
19390	  (match_dup 1)
19391	  (neg:VI_128 (match_dup 2)))))]
19392  "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
19393  "vpshl<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19394  [(set_attr "type" "sseishft")
19395   (set_attr "prefix_data16" "0")
19396   (set_attr "prefix_extra" "2")
19397   (set_attr "mode" "TI")])
19398
19399(define_expand "<shift_insn><mode>3"
19400  [(set (match_operand:VI1_AVX512 0 "register_operand")
19401	(any_shift:VI1_AVX512
19402	  (match_operand:VI1_AVX512 1 "register_operand")
19403	  (match_operand:SI 2 "nonmemory_operand")))]
19404  "TARGET_SSE2"
19405{
19406  if (TARGET_XOP && <MODE>mode == V16QImode)
19407    {
19408      bool negate = false;
19409      rtx (*gen) (rtx, rtx, rtx);
19410      rtx tmp, par;
19411      int i;
19412
19413      if (<CODE> != ASHIFT)
19414	{
19415	  if (CONST_INT_P (operands[2]))
19416	    operands[2] = GEN_INT (-INTVAL (operands[2]));
19417	  else
19418	    negate = true;
19419	}
19420      par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
19421      tmp = lowpart_subreg (QImode, operands[2], SImode);
19422      for (i = 0; i < 16; i++)
19423	XVECEXP (par, 0, i) = tmp;
19424
19425      tmp = gen_reg_rtx (V16QImode);
19426      emit_insn (gen_vec_initv16qiqi (tmp, par));
19427
19428      if (negate)
19429	emit_insn (gen_negv16qi2 (tmp, tmp));
19430
19431      gen = (<CODE> == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3);
19432      emit_insn (gen (operands[0], operands[1], tmp));
19433    }
19434  else
19435    ix86_expand_vecop_qihi (<CODE>, operands[0], operands[1], operands[2]);
19436  DONE;
19437})
19438
19439(define_expand "ashrv2di3"
19440  [(set (match_operand:V2DI 0 "register_operand")
19441	(ashiftrt:V2DI
19442	  (match_operand:V2DI 1 "register_operand")
19443	  (match_operand:DI 2 "nonmemory_operand")))]
19444  "TARGET_XOP || TARGET_AVX512VL"
19445{
19446  if (!TARGET_AVX512VL)
19447    {
19448      rtx reg = gen_reg_rtx (V2DImode);
19449      rtx par;
19450      bool negate = false;
19451      int i;
19452
19453      if (CONST_INT_P (operands[2]))
19454	operands[2] = GEN_INT (-INTVAL (operands[2]));
19455      else
19456	negate = true;
19457
19458      par = gen_rtx_PARALLEL (V2DImode, rtvec_alloc (2));
19459      for (i = 0; i < 2; i++)
19460	XVECEXP (par, 0, i) = operands[2];
19461
19462      emit_insn (gen_vec_initv2didi (reg, par));
19463
19464      if (negate)
19465	emit_insn (gen_negv2di2 (reg, reg));
19466
19467      emit_insn (gen_xop_shav2di3 (operands[0], operands[1], reg));
19468      DONE;
19469    }
19470})
19471
19472;; XOP FRCZ support
19473(define_insn "xop_frcz<mode>2"
19474  [(set (match_operand:FMAMODE 0 "register_operand" "=x")
19475	(unspec:FMAMODE
19476	 [(match_operand:FMAMODE 1 "nonimmediate_operand" "xm")]
19477	 UNSPEC_FRCZ))]
19478  "TARGET_XOP"
19479  "vfrcz<ssemodesuffix>\t{%1, %0|%0, %1}"
19480  [(set_attr "type" "ssecvt1")
19481   (set_attr "mode" "<MODE>")])
19482
19483(define_expand "xop_vmfrcz<mode>2"
19484  [(set (match_operand:VF_128 0 "register_operand")
19485	(vec_merge:VF_128
19486	  (unspec:VF_128
19487	   [(match_operand:VF_128 1 "nonimmediate_operand")]
19488	   UNSPEC_FRCZ)
19489	  (match_dup 2)
19490	  (const_int 1)))]
19491  "TARGET_XOP"
19492  "operands[2] = CONST0_RTX (<MODE>mode);")
19493
19494(define_insn "*xop_vmfrcz<mode>2"
19495  [(set (match_operand:VF_128 0 "register_operand" "=x")
19496	(vec_merge:VF_128
19497	  (unspec:VF_128
19498	   [(match_operand:VF_128 1 "nonimmediate_operand" "xm")]
19499	   UNSPEC_FRCZ)
19500	  (match_operand:VF_128 2 "const0_operand")
19501	  (const_int 1)))]
19502  "TARGET_XOP"
19503  "vfrcz<ssescalarmodesuffix>\t{%1, %0|%0, %<iptr>1}"
19504  [(set_attr "type" "ssecvt1")
19505   (set_attr "mode" "<MODE>")])
19506
19507(define_insn "xop_maskcmp<mode>3"
19508  [(set (match_operand:VI_128 0 "register_operand" "=x")
19509	(match_operator:VI_128 1 "ix86_comparison_int_operator"
19510	 [(match_operand:VI_128 2 "register_operand" "x")
19511	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
19512  "TARGET_XOP"
19513  "vpcom%Y1<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19514  [(set_attr "type" "sse4arg")
19515   (set_attr "prefix_data16" "0")
19516   (set_attr "prefix_rep" "0")
19517   (set_attr "prefix_extra" "2")
19518   (set_attr "length_immediate" "1")
19519   (set_attr "mode" "TI")])
19520
19521(define_insn "xop_maskcmp_uns<mode>3"
19522  [(set (match_operand:VI_128 0 "register_operand" "=x")
19523	(match_operator:VI_128 1 "ix86_comparison_uns_operator"
19524	 [(match_operand:VI_128 2 "register_operand" "x")
19525	  (match_operand:VI_128 3 "nonimmediate_operand" "xm")]))]
19526  "TARGET_XOP"
19527  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19528  [(set_attr "type" "ssecmp")
19529   (set_attr "prefix_data16" "0")
19530   (set_attr "prefix_rep" "0")
19531   (set_attr "prefix_extra" "2")
19532   (set_attr "length_immediate" "1")
19533   (set_attr "mode" "TI")])
19534
19535;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
19536;; and pcomneu* not to be converted to the signed ones in case somebody needs
19537;; the exact instruction generated for the intrinsic.
19538(define_insn "xop_maskcmp_uns2<mode>3"
19539  [(set (match_operand:VI_128 0 "register_operand" "=x")
19540	(unspec:VI_128
19541	 [(match_operator:VI_128 1 "ix86_comparison_uns_operator"
19542	  [(match_operand:VI_128 2 "register_operand" "x")
19543	   (match_operand:VI_128 3 "nonimmediate_operand" "xm")])]
19544	 UNSPEC_XOP_UNSIGNED_CMP))]
19545  "TARGET_XOP"
19546  "vpcom%Y1u<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}"
19547  [(set_attr "type" "ssecmp")
19548   (set_attr "prefix_data16" "0")
19549   (set_attr "prefix_extra" "2")
19550   (set_attr "length_immediate" "1")
19551   (set_attr "mode" "TI")])
19552
19553;; Pcomtrue and pcomfalse support.  These are useless instructions, but are
19554;; being added here to be complete.
19555(define_insn "xop_pcom_tf<mode>3"
19556  [(set (match_operand:VI_128 0 "register_operand" "=x")
19557	(unspec:VI_128
19558	  [(match_operand:VI_128 1 "register_operand" "x")
19559	   (match_operand:VI_128 2 "nonimmediate_operand" "xm")
19560	   (match_operand:SI 3 "const_int_operand" "n")]
19561	  UNSPEC_XOP_TRUEFALSE))]
19562  "TARGET_XOP"
19563{
19564  return ((INTVAL (operands[3]) != 0)
19565	  ? "vpcomtrue<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
19566	  : "vpcomfalse<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}");
19567}
19568  [(set_attr "type" "ssecmp")
19569   (set_attr "prefix_data16" "0")
19570   (set_attr "prefix_extra" "2")
19571   (set_attr "length_immediate" "1")
19572   (set_attr "mode" "TI")])
19573
19574(define_insn "xop_vpermil2<mode>3"
19575  [(set (match_operand:VF_128_256 0 "register_operand" "=x,x")
19576	(unspec:VF_128_256
19577	  [(match_operand:VF_128_256 1 "register_operand" "x,x")
19578	   (match_operand:VF_128_256 2 "nonimmediate_operand" "x,m")
19579	   (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "xm,x")
19580	   (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
19581	  UNSPEC_VPERMIL2))]
19582  "TARGET_XOP"
19583  "vpermil2<ssemodesuffix>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
19584  [(set_attr "type" "sse4arg")
19585   (set_attr "length_immediate" "1")
19586   (set_attr "mode" "<MODE>")])
19587
19588;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19589
19590(define_insn "aesenc"
19591  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19592	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19593		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19594		      UNSPEC_AESENC))]
19595  "TARGET_AES"
19596  "@
19597   aesenc\t{%2, %0|%0, %2}
19598   vaesenc\t{%2, %1, %0|%0, %1, %2}"
19599  [(set_attr "isa" "noavx,avx")
19600   (set_attr "type" "sselog1")
19601   (set_attr "prefix_extra" "1")
19602   (set_attr "prefix" "orig,vex")
19603   (set_attr "btver2_decode" "double,double")
19604   (set_attr "mode" "TI")])
19605
19606(define_insn "aesenclast"
19607  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19608	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19609		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19610		      UNSPEC_AESENCLAST))]
19611  "TARGET_AES"
19612  "@
19613   aesenclast\t{%2, %0|%0, %2}
19614   vaesenclast\t{%2, %1, %0|%0, %1, %2}"
19615  [(set_attr "isa" "noavx,avx")
19616   (set_attr "type" "sselog1")
19617   (set_attr "prefix_extra" "1")
19618   (set_attr "prefix" "orig,vex")
19619   (set_attr "btver2_decode" "double,double") 
19620   (set_attr "mode" "TI")])
19621
19622(define_insn "aesdec"
19623  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19624	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19625		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19626		      UNSPEC_AESDEC))]
19627  "TARGET_AES"
19628  "@
19629   aesdec\t{%2, %0|%0, %2}
19630   vaesdec\t{%2, %1, %0|%0, %1, %2}"
19631  [(set_attr "isa" "noavx,avx")
19632   (set_attr "type" "sselog1")
19633   (set_attr "prefix_extra" "1")
19634   (set_attr "prefix" "orig,vex")
19635   (set_attr "btver2_decode" "double,double") 
19636   (set_attr "mode" "TI")])
19637
19638(define_insn "aesdeclast"
19639  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19640	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19641		       (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
19642		      UNSPEC_AESDECLAST))]
19643  "TARGET_AES"
19644  "@
19645   aesdeclast\t{%2, %0|%0, %2}
19646   vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
19647  [(set_attr "isa" "noavx,avx")
19648   (set_attr "type" "sselog1")
19649   (set_attr "prefix_extra" "1")
19650   (set_attr "prefix" "orig,vex")
19651   (set_attr "btver2_decode" "double,double")
19652   (set_attr "mode" "TI")])
19653
19654(define_insn "aesimc"
19655  [(set (match_operand:V2DI 0 "register_operand" "=x")
19656	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")]
19657		      UNSPEC_AESIMC))]
19658  "TARGET_AES"
19659  "%vaesimc\t{%1, %0|%0, %1}"
19660  [(set_attr "type" "sselog1")
19661   (set_attr "prefix_extra" "1")
19662   (set_attr "prefix" "maybe_vex")
19663   (set_attr "mode" "TI")])
19664
19665(define_insn "aeskeygenassist"
19666  [(set (match_operand:V2DI 0 "register_operand" "=x")
19667	(unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")
19668		      (match_operand:SI 2 "const_0_to_255_operand" "n")]
19669		     UNSPEC_AESKEYGENASSIST))]
19670  "TARGET_AES"
19671  "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
19672  [(set_attr "type" "sselog1")
19673   (set_attr "prefix_extra" "1")
19674   (set_attr "length_immediate" "1")
19675   (set_attr "prefix" "maybe_vex")
19676   (set_attr "mode" "TI")])
19677
19678(define_insn "pclmulqdq"
19679  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
19680	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
19681		      (match_operand:V2DI 2 "vector_operand" "xBm,xm")
19682		      (match_operand:SI 3 "const_0_to_255_operand" "n,n")]
19683		     UNSPEC_PCLMUL))]
19684  "TARGET_PCLMUL"
19685  "@
19686   pclmulqdq\t{%3, %2, %0|%0, %2, %3}
19687   vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
19688  [(set_attr "isa" "noavx,avx")
19689   (set_attr "type" "sselog1")
19690   (set_attr "prefix_extra" "1")
19691   (set_attr "length_immediate" "1")
19692   (set_attr "prefix" "orig,vex")
19693   (set_attr "mode" "TI")])
19694
19695(define_expand "avx_vzeroall"
19696  [(match_par_dup 0 [(const_int 0)])]
19697  "TARGET_AVX"
19698{
19699  int nregs = TARGET_64BIT ? 16 : 8;
19700  int regno;
19701
19702  operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
19703
19704  XVECEXP (operands[0], 0, 0)
19705    = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
19706			       UNSPECV_VZEROALL);
19707
19708  for (regno = 0; regno < nregs; regno++)
19709    XVECEXP (operands[0], 0, regno + 1)
19710      = gen_rtx_SET (gen_rtx_REG (V8SImode, GET_SSE_REGNO (regno)),
19711		     CONST0_RTX (V8SImode));
19712})
19713
19714(define_insn "*avx_vzeroall"
19715  [(match_parallel 0 "vzeroall_operation"
19716    [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
19717  "TARGET_AVX"
19718  "vzeroall"
19719  [(set_attr "type" "sse")
19720   (set_attr "modrm" "0")
19721   (set_attr "memory" "none")
19722   (set_attr "prefix" "vex")
19723   (set_attr "btver2_decode" "vector")
19724   (set_attr "mode" "OI")])
19725
19726;; Clear the upper 128bits of AVX registers, equivalent to a NOP
19727;; if the upper 128bits are unused.  Initially we expand the instructions
19728;; as though they had no effect on the SSE registers, but later add SETs and
19729;; CLOBBERs to the PARALLEL to model the real effect.
19730(define_expand "avx_vzeroupper"
19731  [(parallel [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
19732  "TARGET_AVX")
19733
19734(define_insn "*avx_vzeroupper"
19735  [(match_parallel 0 "vzeroupper_pattern"
19736     [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
19737  "TARGET_AVX && XVECLEN (operands[0], 0) == (TARGET_64BIT ? 16 : 8) + 1"
19738  "vzeroupper"
19739  [(set_attr "type" "sse")
19740   (set_attr "modrm" "0")
19741   (set_attr "memory" "none")
19742   (set_attr "prefix" "vex")
19743   (set_attr "btver2_decode" "vector")
19744   (set_attr "mode" "OI")])
19745
19746(define_insn_and_split "*avx_vzeroupper_1"
19747  [(match_parallel 0 "vzeroupper_pattern"
19748     [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
19749  "TARGET_AVX && XVECLEN (operands[0], 0) != (TARGET_64BIT ? 16 : 8) + 1"
19750  "#"
19751  "&& epilogue_completed"
19752  [(match_dup 0)]
19753{
19754  /* For IPA-RA purposes, make it clear the instruction clobbers
19755     even XMM registers not mentioned explicitly in the pattern.  */
19756  unsigned int nregs = TARGET_64BIT ? 16 : 8;
19757  unsigned int npats = XVECLEN (operands[0], 0);
19758  rtvec vec = rtvec_alloc (nregs + 1);
19759  RTVEC_ELT (vec, 0) = XVECEXP (operands[0], 0, 0);
19760  for (unsigned int i = 0, j = 1; i < nregs; ++i)
19761    {
19762      unsigned int regno = GET_SSE_REGNO (i);
19763      if (j < npats
19764	  && REGNO (SET_DEST (XVECEXP (operands[0], 0, j))) == regno)
19765	{
19766	  RTVEC_ELT (vec, i + 1) = XVECEXP (operands[0], 0, j);
19767	  j++;
19768	}
19769      else
19770	{
19771	  rtx reg = gen_rtx_REG (V2DImode, regno);
19772	  RTVEC_ELT (vec, i + 1) = gen_rtx_CLOBBER (VOIDmode, reg);
19773	}
19774    }
19775  operands[0] = gen_rtx_PARALLEL (VOIDmode, vec);
19776}
19777  [(set_attr "type" "sse")
19778   (set_attr "modrm" "0")
19779   (set_attr "memory" "none")
19780   (set_attr "prefix" "vex")
19781   (set_attr "btver2_decode" "vector")
19782   (set_attr "mode" "OI")])
19783
19784(define_mode_attr pbroadcast_evex_isa
19785  [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
19786   (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
19787   (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
19788   (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")])
19789
19790(define_insn "avx2_pbroadcast<mode>"
19791  [(set (match_operand:VI 0 "register_operand" "=x,v")
19792	(vec_duplicate:VI
19793	  (vec_select:<ssescalarmode>
19794	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "xm,vm")
19795	    (parallel [(const_int 0)]))))]
19796  "TARGET_AVX2"
19797  "vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}"
19798  [(set_attr "isa" "*,<pbroadcast_evex_isa>")
19799   (set_attr "type" "ssemov")
19800   (set_attr "prefix_extra" "1")
19801   (set_attr "prefix" "vex,evex")
19802   (set_attr "mode" "<sseinsnmode>")])
19803
19804(define_insn "avx2_pbroadcast<mode>_1"
19805  [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
19806	(vec_duplicate:VI_256
19807	  (vec_select:<ssescalarmode>
19808	    (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
19809	    (parallel [(const_int 0)]))))]
19810  "TARGET_AVX2"
19811  "@
19812   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19813   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
19814   vpbroadcast<ssemodesuffix>\t{%1, %0|%0, %<iptr>1}
19815   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}"
19816  [(set_attr "isa" "*,*,<pbroadcast_evex_isa>,<pbroadcast_evex_isa>")
19817   (set_attr "type" "ssemov")
19818   (set_attr "prefix_extra" "1")
19819   (set_attr "prefix" "vex")
19820   (set_attr "mode" "<sseinsnmode>")])
19821
19822(define_insn "<avx2_avx512>_permvar<mode><mask_name>"
19823  [(set (match_operand:VI48F_256_512 0 "register_operand" "=v")
19824	(unspec:VI48F_256_512
19825	  [(match_operand:VI48F_256_512 1 "nonimmediate_operand" "vm")
19826	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19827	  UNSPEC_VPERMVAR))]
19828  "TARGET_AVX2 && <mask_mode512bit_condition>"
19829  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19830  [(set_attr "type" "sselog")
19831   (set_attr "prefix" "<mask_prefix2>")
19832   (set_attr "mode" "<sseinsnmode>")])
19833
19834(define_insn "<avx512>_permvar<mode><mask_name>"
19835  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
19836	(unspec:VI1_AVX512VL
19837	  [(match_operand:VI1_AVX512VL 1 "nonimmediate_operand" "vm")
19838	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19839	  UNSPEC_VPERMVAR))]
19840  "TARGET_AVX512VBMI && <mask_mode512bit_condition>"
19841  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19842  [(set_attr "type" "sselog")
19843   (set_attr "prefix" "<mask_prefix2>")
19844   (set_attr "mode" "<sseinsnmode>")])
19845
19846(define_insn "<avx512>_permvar<mode><mask_name>"
19847  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
19848	(unspec:VI2_AVX512VL
19849	  [(match_operand:VI2_AVX512VL 1 "nonimmediate_operand" "vm")
19850	   (match_operand:<sseintvecmode> 2 "register_operand" "v")]
19851	  UNSPEC_VPERMVAR))]
19852  "TARGET_AVX512BW && <mask_mode512bit_condition>"
19853  "vperm<ssemodesuffix>\t{%1, %2, %0<mask_operand3>|%0<mask_operand3>, %2, %1}"
19854  [(set_attr "type" "sselog")
19855   (set_attr "prefix" "<mask_prefix2>")
19856   (set_attr "mode" "<sseinsnmode>")])
19857
19858;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
19859;; If it so happens that the input is in memory, use vbroadcast.
19860;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
19861(define_insn "*avx_vperm_broadcast_v4sf"
19862  [(set (match_operand:V4SF 0 "register_operand" "=v,v,v")
19863	(vec_select:V4SF
19864	  (match_operand:V4SF 1 "nonimmediate_operand" "m,o,v")
19865	  (match_parallel 2 "avx_vbroadcast_operand"
19866	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
19867  "TARGET_AVX"
19868{
19869  int elt = INTVAL (operands[3]);
19870  switch (which_alternative)
19871    {
19872    case 0:
19873    case 1:
19874      operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
19875      return "vbroadcastss\t{%1, %0|%0, %k1}";
19876    case 2:
19877      operands[2] = GEN_INT (elt * 0x55);
19878      return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
19879    default:
19880      gcc_unreachable ();
19881    }
19882}
19883  [(set_attr "type" "ssemov,ssemov,sselog1")
19884   (set_attr "prefix_extra" "1")
19885   (set_attr "length_immediate" "0,0,1")
19886   (set_attr "prefix" "maybe_evex")
19887   (set_attr "mode" "SF,SF,V4SF")])
19888
19889(define_insn_and_split "*avx_vperm_broadcast_<mode>"
19890  [(set (match_operand:VF_256 0 "register_operand" "=v,v,v")
19891	(vec_select:VF_256
19892	  (match_operand:VF_256 1 "nonimmediate_operand" "m,o,?v")
19893	  (match_parallel 2 "avx_vbroadcast_operand"
19894	    [(match_operand 3 "const_int_operand" "C,n,n")])))]
19895  "TARGET_AVX
19896   && (<MODE>mode != V4DFmode || !TARGET_AVX2 || operands[3] == const0_rtx)"
19897  "#"
19898  "&& reload_completed"
19899  [(set (match_dup 0) (vec_duplicate:VF_256 (match_dup 1)))]
19900{
19901  rtx op0 = operands[0], op1 = operands[1];
19902  int elt = INTVAL (operands[3]);
19903
19904  if (REG_P (op1))
19905    {
19906      int mask;
19907
19908      if (TARGET_AVX2 && elt == 0)
19909	{
19910	  emit_insn (gen_vec_dup<mode> (op0, gen_lowpart (<ssescalarmode>mode,
19911							  op1)));
19912	  DONE;
19913	}
19914
19915      /* Shuffle element we care about into all elements of the 128-bit lane.
19916	 The other lane gets shuffled too, but we don't care.  */
19917      if (<MODE>mode == V4DFmode)
19918	mask = (elt & 1 ? 15 : 0);
19919      else
19920	mask = (elt & 3) * 0x55;
19921      emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
19922
19923      /* Shuffle the lane we care about into both lanes of the dest.  */
19924      mask = (elt / (<ssescalarnum> / 2)) * 0x11;
19925      if (EXT_REX_SSE_REG_P (op0))
19926	{
19927	  /* There is no EVEX VPERM2F128, but we can use either VBROADCASTSS
19928	     or VSHUFF128.  */
19929	  gcc_assert (<MODE>mode == V8SFmode);
19930	  if ((mask & 1) == 0)
19931	    emit_insn (gen_avx2_vec_dupv8sf (op0,
19932					     gen_lowpart (V4SFmode, op0)));
19933	  else
19934	    emit_insn (gen_avx512vl_shuf_f32x4_1 (op0, op0, op0,
19935						  GEN_INT (4), GEN_INT (5),
19936						  GEN_INT (6), GEN_INT (7),
19937						  GEN_INT (12), GEN_INT (13),
19938						  GEN_INT (14), GEN_INT (15)));
19939	  DONE;
19940	}
19941
19942      emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
19943      DONE;
19944    }
19945
19946  operands[1] = adjust_address (op1, <ssescalarmode>mode,
19947				elt * GET_MODE_SIZE (<ssescalarmode>mode));
19948})
19949
19950(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19951  [(set (match_operand:VF2 0 "register_operand")
19952	(vec_select:VF2
19953	  (match_operand:VF2 1 "nonimmediate_operand")
19954	  (match_operand:SI 2 "const_0_to_255_operand")))]
19955  "TARGET_AVX && <mask_mode512bit_condition>"
19956{
19957  int mask = INTVAL (operands[2]);
19958  rtx perm[<ssescalarnum>];
19959
19960  int i;
19961  for (i = 0; i < <ssescalarnum>; i = i + 2)
19962    {
19963      perm[i]     = GEN_INT (((mask >> i)       & 1) + i);
19964      perm[i + 1] = GEN_INT (((mask >> (i + 1)) & 1) + i);
19965    }
19966
19967  operands[2]
19968    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19969})
19970
19971(define_expand "<sse2_avx_avx512f>_vpermil<mode><mask_name>"
19972  [(set (match_operand:VF1 0 "register_operand")
19973	(vec_select:VF1
19974	  (match_operand:VF1 1 "nonimmediate_operand")
19975	  (match_operand:SI 2 "const_0_to_255_operand")))]
19976  "TARGET_AVX && <mask_mode512bit_condition>"
19977{
19978  int mask = INTVAL (operands[2]);
19979  rtx perm[<ssescalarnum>];
19980
19981  int i;
19982  for (i = 0; i < <ssescalarnum>; i = i + 4)
19983    {
19984      perm[i]     = GEN_INT (((mask >> 0) & 3) + i);
19985      perm[i + 1] = GEN_INT (((mask >> 2) & 3) + i);
19986      perm[i + 2] = GEN_INT (((mask >> 4) & 3) + i);
19987      perm[i + 3] = GEN_INT (((mask >> 6) & 3) + i);
19988    }
19989
19990  operands[2]
19991    = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
19992})
19993
19994;; This pattern needs to come before the avx2_perm*/avx512f_perm*
19995;; patterns, as they have the same RTL representation (vpermilp*
19996;; being a subset of what vpermp* can do), but vpermilp* has shorter
19997;; latency as it never crosses lanes.
19998(define_insn "*<sse2_avx_avx512f>_vpermilp<mode><mask_name>"
19999  [(set (match_operand:VF 0 "register_operand" "=v")
20000	(vec_select:VF
20001	  (match_operand:VF 1 "nonimmediate_operand" "vm")
20002	  (match_parallel 2 ""
20003	    [(match_operand 3 "const_int_operand")])))]
20004  "TARGET_AVX && <mask_mode512bit_condition>
20005   && avx_vpermilp_parallel (operands[2], <MODE>mode)"
20006{
20007  int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
20008  operands[2] = GEN_INT (mask);
20009  return "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}";
20010}
20011  [(set_attr "type" "sselog")
20012   (set_attr "prefix_extra" "1")
20013   (set_attr "length_immediate" "1")
20014   (set_attr "prefix" "<mask_prefix>")
20015   (set_attr "mode" "<sseinsnmode>")])
20016
20017(define_expand "avx2_perm<mode>"
20018  [(match_operand:VI8F_256 0 "register_operand")
20019   (match_operand:VI8F_256 1 "nonimmediate_operand")
20020   (match_operand:SI 2 "const_0_to_255_operand")]
20021  "TARGET_AVX2"
20022{
20023  int mask = INTVAL (operands[2]);
20024  emit_insn (gen_avx2_perm<mode>_1 (operands[0], operands[1],
20025				    GEN_INT ((mask >> 0) & 3),
20026				    GEN_INT ((mask >> 2) & 3),
20027				    GEN_INT ((mask >> 4) & 3),
20028				    GEN_INT ((mask >> 6) & 3)));
20029  DONE;
20030})
20031
20032(define_expand "avx512vl_perm<mode>_mask"
20033  [(match_operand:VI8F_256 0 "register_operand")
20034   (match_operand:VI8F_256 1 "nonimmediate_operand")
20035   (match_operand:SI 2 "const_0_to_255_operand")
20036   (match_operand:VI8F_256 3 "nonimm_or_0_operand")
20037   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20038  "TARGET_AVX512VL"
20039{
20040  int mask = INTVAL (operands[2]);
20041  emit_insn (gen_<avx2_avx512>_perm<mode>_1_mask (operands[0], operands[1],
20042						  GEN_INT ((mask >> 0) & 3),
20043						  GEN_INT ((mask >> 2) & 3),
20044						  GEN_INT ((mask >> 4) & 3),
20045						  GEN_INT ((mask >> 6) & 3),
20046						  operands[3], operands[4]));
20047  DONE;
20048})
20049
20050(define_insn "avx2_perm<mode>_1<mask_name>"
20051  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20052	(vec_select:VI8F_256
20053	  (match_operand:VI8F_256 1 "nonimmediate_operand" "vm")
20054	  (parallel [(match_operand 2 "const_0_to_3_operand")
20055		     (match_operand 3 "const_0_to_3_operand")
20056		     (match_operand 4 "const_0_to_3_operand")
20057		     (match_operand 5 "const_0_to_3_operand")])))]
20058  "TARGET_AVX2 && <mask_mode512bit_condition>"
20059{
20060  int mask = 0;
20061  mask |= INTVAL (operands[2]) << 0;
20062  mask |= INTVAL (operands[3]) << 2;
20063  mask |= INTVAL (operands[4]) << 4;
20064  mask |= INTVAL (operands[5]) << 6;
20065  operands[2] = GEN_INT (mask);
20066  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand6>|%0<mask_operand6>, %1, %2}";
20067}
20068  [(set_attr "type" "sselog")
20069   (set_attr "prefix" "<mask_prefix2>")
20070   (set_attr "mode" "<sseinsnmode>")])
20071
20072(define_expand "avx512f_perm<mode>"
20073  [(match_operand:V8FI 0 "register_operand")
20074   (match_operand:V8FI 1 "nonimmediate_operand")
20075   (match_operand:SI 2 "const_0_to_255_operand")]
20076  "TARGET_AVX512F"
20077{
20078  int mask = INTVAL (operands[2]);
20079  emit_insn (gen_avx512f_perm<mode>_1 (operands[0], operands[1],
20080				       GEN_INT ((mask >> 0) & 3),
20081				       GEN_INT ((mask >> 2) & 3),
20082				       GEN_INT ((mask >> 4) & 3),
20083				       GEN_INT ((mask >> 6) & 3),
20084				       GEN_INT (((mask >> 0) & 3) + 4),
20085				       GEN_INT (((mask >> 2) & 3) + 4),
20086				       GEN_INT (((mask >> 4) & 3) + 4),
20087				       GEN_INT (((mask >> 6) & 3) + 4)));
20088  DONE;
20089})
20090
20091(define_expand "avx512f_perm<mode>_mask"
20092  [(match_operand:V8FI 0 "register_operand")
20093   (match_operand:V8FI 1 "nonimmediate_operand")
20094   (match_operand:SI 2 "const_0_to_255_operand")
20095   (match_operand:V8FI 3 "nonimm_or_0_operand")
20096   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20097  "TARGET_AVX512F"
20098{
20099  int mask = INTVAL (operands[2]);
20100  emit_insn (gen_avx512f_perm<mode>_1_mask (operands[0], operands[1],
20101					    GEN_INT ((mask >> 0) & 3),
20102					    GEN_INT ((mask >> 2) & 3),
20103					    GEN_INT ((mask >> 4) & 3),
20104					    GEN_INT ((mask >> 6) & 3),
20105					    GEN_INT (((mask >> 0) & 3) + 4),
20106					    GEN_INT (((mask >> 2) & 3) + 4),
20107					    GEN_INT (((mask >> 4) & 3) + 4),
20108					    GEN_INT (((mask >> 6) & 3) + 4),
20109					    operands[3], operands[4]));
20110  DONE;
20111})
20112
20113(define_insn "avx512f_perm<mode>_1<mask_name>"
20114  [(set (match_operand:V8FI 0 "register_operand" "=v")
20115	(vec_select:V8FI
20116	  (match_operand:V8FI 1 "nonimmediate_operand" "vm")
20117	  (parallel [(match_operand 2 "const_0_to_3_operand")
20118		     (match_operand 3 "const_0_to_3_operand")
20119		     (match_operand 4 "const_0_to_3_operand")
20120		     (match_operand 5 "const_0_to_3_operand")
20121		     (match_operand 6 "const_4_to_7_operand")
20122		     (match_operand 7 "const_4_to_7_operand")
20123		     (match_operand 8 "const_4_to_7_operand")
20124		     (match_operand 9 "const_4_to_7_operand")])))]
20125  "TARGET_AVX512F && <mask_mode512bit_condition>
20126   && (INTVAL (operands[2]) == (INTVAL (operands[6]) - 4)
20127       && INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
20128       && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
20129       && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4))"
20130{
20131  int mask = 0;
20132  mask |= INTVAL (operands[2]) << 0;
20133  mask |= INTVAL (operands[3]) << 2;
20134  mask |= INTVAL (operands[4]) << 4;
20135  mask |= INTVAL (operands[5]) << 6;
20136  operands[2] = GEN_INT (mask);
20137  return "vperm<ssemodesuffix>\t{%2, %1, %0<mask_operand10>|%0<mask_operand10>, %1, %2}";
20138}
20139  [(set_attr "type" "sselog")
20140   (set_attr "prefix" "<mask_prefix2>")
20141   (set_attr "mode" "<sseinsnmode>")])
20142
20143(define_insn "avx2_permv2ti"
20144  [(set (match_operand:V4DI 0 "register_operand" "=x")
20145	(unspec:V4DI
20146	  [(match_operand:V4DI 1 "register_operand" "x")
20147	   (match_operand:V4DI 2 "nonimmediate_operand" "xm")
20148	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
20149	  UNSPEC_VPERMTI))]
20150  "TARGET_AVX2"
20151  "vperm2i128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20152  [(set_attr "type" "sselog")
20153   (set_attr "prefix" "vex")
20154   (set_attr "mode" "OI")])
20155
20156(define_insn "avx2_vec_dupv4df"
20157  [(set (match_operand:V4DF 0 "register_operand" "=v")
20158	(vec_duplicate:V4DF
20159	  (vec_select:DF
20160	    (match_operand:V2DF 1 "register_operand" "v")
20161	    (parallel [(const_int 0)]))))]
20162  "TARGET_AVX2"
20163  "vbroadcastsd\t{%1, %0|%0, %1}"
20164  [(set_attr "type" "sselog1")
20165   (set_attr "prefix" "maybe_evex")
20166   (set_attr "mode" "V4DF")])
20167
20168(define_insn "<avx512>_vec_dup<mode>_1"
20169  [(set (match_operand:VI_AVX512BW 0 "register_operand" "=v,v")
20170	(vec_duplicate:VI_AVX512BW
20171	  (vec_select:<ssescalarmode>
20172	    (match_operand:VI_AVX512BW 1 "nonimmediate_operand" "v,m")
20173	    (parallel [(const_int 0)]))))]
20174  "TARGET_AVX512F"
20175  "@
20176   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %x1}
20177   vpbroadcast<ssemodesuffix>\t{%x1, %0|%0, %<iptr>1}"
20178  [(set_attr "type" "ssemov")
20179   (set_attr "prefix" "evex")
20180   (set_attr "mode" "<sseinsnmode>")])
20181
20182(define_insn "<avx512>_vec_dup<mode><mask_name>"
20183  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v")
20184	(vec_duplicate:V48_AVX512VL
20185	  (vec_select:<ssescalarmode>
20186	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20187	    (parallel [(const_int 0)]))))]
20188  "TARGET_AVX512F"
20189{
20190  /*  There is no DF broadcast (in AVX-512*) to 128b register.
20191      Mimic it with integer variant.  */
20192  if (<MODE>mode == V2DFmode)
20193    return "vpbroadcastq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}";
20194
20195  return "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}";
20196}
20197  [(set_attr "type" "ssemov")
20198   (set_attr "prefix" "evex")
20199   (set_attr "mode" "<sseinsnmode>")])
20200
20201(define_insn "<avx512>_vec_dup<mode><mask_name>"
20202  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
20203	(vec_duplicate:VI12_AVX512VL
20204	  (vec_select:<ssescalarmode>
20205	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20206	    (parallel [(const_int 0)]))))]
20207  "TARGET_AVX512BW"
20208  "vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %<iptr>1}"
20209  [(set_attr "type" "ssemov")
20210   (set_attr "prefix" "evex")
20211   (set_attr "mode" "<sseinsnmode>")])
20212
20213(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20214  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
20215	(vec_duplicate:V16FI
20216	  (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
20217  "TARGET_AVX512F"
20218  "@
20219   vshuf<shuffletype>32x4\t{$0x0, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x0}
20220   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20221  [(set_attr "type" "ssemov")
20222   (set_attr "prefix" "evex")
20223   (set_attr "mode" "<sseinsnmode>")])
20224
20225(define_insn "<mask_codefor>avx512f_broadcast<mode><mask_name>"
20226  [(set (match_operand:V8FI 0 "register_operand" "=v,v")
20227	(vec_duplicate:V8FI
20228	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
20229  "TARGET_AVX512F"
20230  "@
20231   vshuf<shuffletype>64x2\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
20232   vbroadcast<shuffletype>64x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20233  [(set_attr "type" "ssemov")
20234   (set_attr "prefix" "evex")
20235   (set_attr "mode" "<sseinsnmode>")])
20236
20237(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20238  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v")
20239	(vec_duplicate:VI12_AVX512VL
20240	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20241  "TARGET_AVX512BW"
20242  "@
20243   vpbroadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}
20244   vpbroadcast<bcstscalarsuff>\t{%k1, %0<mask_operand2>|%0<mask_operand2>, %k1}"
20245  [(set_attr "type" "ssemov")
20246   (set_attr "prefix" "evex")
20247   (set_attr "mode" "<sseinsnmode>")])
20248
20249(define_insn "<mask_codefor><avx512>_vec_dup_gpr<mode><mask_name>"
20250  [(set (match_operand:V48_AVX512VL 0 "register_operand" "=v,v")
20251	(vec_duplicate:V48_AVX512VL
20252	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "vm,r")))]
20253  "TARGET_AVX512F"
20254  "v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20255  [(set_attr "type" "ssemov")
20256   (set_attr "prefix" "evex")
20257   (set_attr "mode" "<sseinsnmode>")
20258   (set (attr "enabled")
20259     (if_then_else (eq_attr "alternative" "1")
20260	(symbol_ref "GET_MODE_CLASS (<ssescalarmode>mode) == MODE_INT
20261		     && (<ssescalarmode>mode != DImode || TARGET_64BIT)")
20262	(const_int 1)))])
20263
20264(define_insn "vec_dupv4sf"
20265  [(set (match_operand:V4SF 0 "register_operand" "=v,v,x")
20266	(vec_duplicate:V4SF
20267	  (match_operand:SF 1 "nonimmediate_operand" "Yv,m,0")))]
20268  "TARGET_SSE"
20269  "@
20270   vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
20271   vbroadcastss\t{%1, %0|%0, %1}
20272   shufps\t{$0, %0, %0|%0, %0, 0}"
20273  [(set_attr "isa" "avx,avx,noavx")
20274   (set_attr "type" "sseshuf1,ssemov,sseshuf1")
20275   (set_attr "length_immediate" "1,0,1")
20276   (set_attr "prefix_extra" "0,1,*")
20277   (set_attr "prefix" "maybe_evex,maybe_evex,orig")
20278   (set_attr "mode" "V4SF")])
20279
20280(define_insn "*vec_dupv4si"
20281  [(set (match_operand:V4SI 0 "register_operand"     "=v,v,x")
20282	(vec_duplicate:V4SI
20283	  (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
20284  "TARGET_SSE"
20285  "@
20286   %vpshufd\t{$0, %1, %0|%0, %1, 0}
20287   vbroadcastss\t{%1, %0|%0, %1}
20288   shufps\t{$0, %0, %0|%0, %0, 0}"
20289  [(set_attr "isa" "sse2,avx,noavx")
20290   (set_attr "type" "sselog1,ssemov,sselog1")
20291   (set_attr "length_immediate" "1,0,1")
20292   (set_attr "prefix_extra" "0,1,*")
20293   (set_attr "prefix" "maybe_vex,maybe_evex,orig")
20294   (set_attr "mode" "TI,V4SF,V4SF")])
20295
20296(define_insn "*vec_dupv2di"
20297  [(set (match_operand:V2DI 0 "register_operand"     "=x,v,v,x")
20298	(vec_duplicate:V2DI
20299	  (match_operand:DI 1 "nonimmediate_operand" " 0,Yv,vm,0")))]
20300  "TARGET_SSE"
20301  "@
20302   punpcklqdq\t%0, %0
20303   vpunpcklqdq\t{%d1, %0|%0, %d1}
20304   %vmovddup\t{%1, %0|%0, %1}
20305   movlhps\t%0, %0"
20306  [(set_attr "isa" "sse2_noavx,avx,sse3,noavx")
20307   (set_attr "type" "sselog1,sselog1,sselog1,ssemov")
20308   (set_attr "prefix" "orig,maybe_evex,maybe_vex,orig")
20309   (set_attr "mode" "TI,TI,DF,V4SF")])
20310
20311(define_insn "avx2_vbroadcasti128_<mode>"
20312  [(set (match_operand:VI_256 0 "register_operand" "=x,v,v")
20313	(vec_concat:VI_256
20314	  (match_operand:<ssehalfvecmode> 1 "memory_operand" "m,m,m")
20315	  (match_dup 1)))]
20316  "TARGET_AVX2"
20317  "@
20318   vbroadcasti128\t{%1, %0|%0, %1}
20319   vbroadcast<i128vldq>\t{%1, %0|%0, %1}
20320   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}"
20321  [(set_attr "isa" "*,avx512dq,avx512vl")
20322   (set_attr "type" "ssemov")
20323   (set_attr "prefix_extra" "1")
20324   (set_attr "prefix" "vex,evex,evex")
20325   (set_attr "mode" "OI")])
20326
20327;; Modes handled by AVX vec_dup patterns.
20328(define_mode_iterator AVX_VEC_DUP_MODE
20329  [V8SI V8SF V4DI V4DF])
20330(define_mode_attr vecdupssescalarmodesuffix
20331  [(V8SF "ss") (V4DF "sd") (V8SI "ss") (V4DI "sd")])
20332;; Modes handled by AVX2 vec_dup patterns.
20333(define_mode_iterator AVX2_VEC_DUP_MODE
20334  [V32QI V16QI V16HI V8HI V8SI V4SI])
20335
20336(define_insn "*vec_dup<mode>"
20337  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand" "=x,x,v")
20338	(vec_duplicate:AVX2_VEC_DUP_MODE
20339	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,x,$r")))]
20340  "TARGET_AVX2"
20341  "@
20342   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20343   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20344   #"
20345  [(set_attr "isa" "*,*,noavx512vl")
20346   (set_attr "type" "ssemov")
20347   (set_attr "prefix_extra" "1")
20348   (set_attr "prefix" "maybe_evex")
20349   (set_attr "mode" "<sseinsnmode>")
20350   (set (attr "preferred_for_speed")
20351     (cond [(eq_attr "alternative" "2")
20352	      (symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
20353	   ]
20354	   (symbol_ref "true")))])
20355
20356(define_insn "vec_dup<mode>"
20357  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand" "=x,x,x,v,x")
20358	(vec_duplicate:AVX_VEC_DUP_MODE
20359	  (match_operand:<ssescalarmode> 1 "nonimmediate_operand" "m,m,x,v,?x")))]
20360  "TARGET_AVX"
20361  "@
20362   v<sseintprefix>broadcast<bcstscalarsuff>\t{%1, %0|%0, %1}
20363   vbroadcast<vecdupssescalarmodesuffix>\t{%1, %0|%0, %1}
20364   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %0|%0, %x1}
20365   v<sseintprefix>broadcast<bcstscalarsuff>\t{%x1, %g0|%g0, %x1}
20366   #"
20367  [(set_attr "type" "ssemov")
20368   (set_attr "prefix_extra" "1")
20369   (set_attr "prefix" "maybe_evex")
20370   (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
20371   (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
20372
20373(define_split
20374  [(set (match_operand:AVX2_VEC_DUP_MODE 0 "register_operand")
20375	(vec_duplicate:AVX2_VEC_DUP_MODE
20376	  (match_operand:<ssescalarmode> 1 "register_operand")))]
20377  "TARGET_AVX2
20378   /* Disable this splitter if avx512vl_vec_dup_gprv*[qhs]i insn is
20379      available, because then we can broadcast from GPRs directly.
20380      For V*[QH]I modes it requires both -mavx512vl and -mavx512bw,
20381      for V*SI mode it requires just -mavx512vl.  */
20382   && !(TARGET_AVX512VL
20383	&& (TARGET_AVX512BW || <ssescalarmode>mode == SImode))
20384   && reload_completed && GENERAL_REG_P (operands[1])"
20385  [(const_int 0)]
20386{
20387  emit_insn (gen_vec_setv4si_0 (gen_lowpart (V4SImode, operands[0]),
20388				CONST0_RTX (V4SImode),
20389				gen_lowpart (SImode, operands[1])));
20390  emit_insn (gen_avx2_pbroadcast<mode> (operands[0],
20391					gen_lowpart (<ssexmmmode>mode,
20392						     operands[0])));
20393  DONE;
20394})
20395
20396(define_split
20397  [(set (match_operand:AVX_VEC_DUP_MODE 0 "register_operand")
20398	(vec_duplicate:AVX_VEC_DUP_MODE
20399	  (match_operand:<ssescalarmode> 1 "register_operand")))]
20400  "TARGET_AVX && !TARGET_AVX2 && reload_completed"
20401  [(set (match_dup 2)
20402	(vec_duplicate:<ssehalfvecmode> (match_dup 1)))
20403   (set (match_dup 0)
20404	(vec_concat:AVX_VEC_DUP_MODE (match_dup 2) (match_dup 2)))]
20405  "operands[2] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);")
20406
20407(define_insn "avx_vbroadcastf128_<mode>"
20408  [(set (match_operand:V_256 0 "register_operand" "=x,x,x,v,v,v,v")
20409	(vec_concat:V_256
20410	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "m,0,?x,m,0,m,0")
20411	  (match_dup 1)))]
20412  "TARGET_AVX"
20413  "@
20414   vbroadcast<i128>\t{%1, %0|%0, %1}
20415   vinsert<i128>\t{$1, %1, %0, %0|%0, %0, %1, 1}
20416   vperm2<i128>\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}
20417   vbroadcast<i128vldq>\t{%1, %0|%0, %1}
20418   vinsert<i128vldq>\t{$1, %1, %0, %0|%0, %0, %1, 1}
20419   vbroadcast<shuffletype>32x4\t{%1, %0|%0, %1}
20420   vinsert<shuffletype>32x4\t{$1, %1, %0, %0|%0, %0, %1, 1}"
20421  [(set_attr "isa" "*,*,*,avx512dq,avx512dq,avx512vl,avx512vl")
20422   (set_attr "type" "ssemov,sselog1,sselog1,ssemov,sselog1,ssemov,sselog1")
20423   (set_attr "prefix_extra" "1")
20424   (set_attr "length_immediate" "0,1,1,0,1,0,1")
20425   (set_attr "prefix" "vex,vex,vex,evex,evex,evex,evex")
20426   (set_attr "mode" "<sseinsnmode>")])
20427
20428;; For broadcast[i|f]32x2.  Yes there is no v4sf version, only v4si.
20429(define_mode_iterator VI4F_BRCST32x2
20430  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
20431   V16SF (V8SF "TARGET_AVX512VL")])
20432
20433(define_mode_attr 64x2mode
20434  [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
20435
20436(define_mode_attr 32x2mode
20437  [(V16SF "V2SF") (V16SI "V2SI") (V8SI "V2SI")
20438  (V8SF "V2SF") (V4SI "V2SI")])
20439
20440(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>"
20441  [(set (match_operand:VI4F_BRCST32x2 0 "register_operand" "=v")
20442	(vec_duplicate:VI4F_BRCST32x2
20443	  (vec_select:<32x2mode>
20444	    (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "vm")
20445	    (parallel [(const_int 0) (const_int 1)]))))]
20446  "TARGET_AVX512DQ"
20447  "vbroadcast<shuffletype>32x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
20448  [(set_attr "type" "ssemov")
20449   (set_attr "prefix_extra" "1")
20450   (set_attr "prefix" "evex")
20451   (set_attr "mode" "<sseinsnmode>")])
20452
20453(define_insn "<mask_codefor>avx512vl_broadcast<mode><mask_name>_1"
20454  [(set (match_operand:VI4F_256 0 "register_operand" "=v,v")
20455        (vec_duplicate:VI4F_256
20456         (match_operand:<ssexmmmode> 1 "nonimmediate_operand" "v,m")))]
20457  "TARGET_AVX512VL"
20458  "@
20459   vshuf<shuffletype>32x4\t{$0x0, %t1, %t1, %0<mask_operand2>|%0<mask_operand2>, %t1, %t1, 0x0}
20460   vbroadcast<shuffletype>32x4\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20461  [(set_attr "type" "ssemov")
20462   (set_attr "prefix_extra" "1")
20463   (set_attr "prefix" "evex")
20464   (set_attr "mode" "<sseinsnmode>")])
20465
20466(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
20467  [(set (match_operand:V16FI 0 "register_operand" "=v,v")
20468       (vec_duplicate:V16FI
20469         (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "v,m")))]
20470  "TARGET_AVX512DQ"
20471  "@
20472   vshuf<shuffletype>32x4\t{$0x44, %g1, %g1, %0<mask_operand2>|%0<mask_operand2>, %g1, %g1, 0x44}
20473   vbroadcast<shuffletype>32x8\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20474  [(set_attr "type" "ssemov")
20475   (set_attr "prefix_extra" "1")
20476   (set_attr "prefix" "evex")
20477   (set_attr "mode" "<sseinsnmode>")])
20478
20479;; For broadcast[i|f]64x2
20480(define_mode_iterator VI8F_BRCST64x2
20481  [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
20482
20483(define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
20484  [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
20485       (vec_duplicate:VI8F_BRCST64x2
20486         (match_operand:<64x2mode> 1 "nonimmediate_operand" "v,m")))]
20487  "TARGET_AVX512DQ"
20488  "@
20489   vshuf<shuffletype>64x2\t{$0x0, %<xtg_mode>1, %<xtg_mode>1, %0<mask_operand2>|%0<mask_operand2>, %<xtg_mode>1, %<xtg_mode>1, 0x0}
20490   vbroadcast<shuffletype>64x2\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
20491  [(set_attr "type" "ssemov")
20492   (set_attr "prefix_extra" "1")
20493   (set_attr "prefix" "evex")
20494   (set_attr "mode" "<sseinsnmode>")])
20495
20496(define_insn "avx512cd_maskb_vec_dup<mode>"
20497  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
20498	(vec_duplicate:VI8_AVX512VL
20499	  (zero_extend:DI
20500	    (match_operand:QI 1 "register_operand" "k"))))]
20501  "TARGET_AVX512CD"
20502  "vpbroadcastmb2q\t{%1, %0|%0, %1}"
20503  [(set_attr "type" "mskmov")
20504   (set_attr "prefix" "evex")
20505   (set_attr "mode" "XI")])
20506
20507(define_insn "avx512cd_maskw_vec_dup<mode>"
20508  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
20509	(vec_duplicate:VI4_AVX512VL
20510	  (zero_extend:SI
20511	    (match_operand:HI 1 "register_operand" "k"))))]
20512  "TARGET_AVX512CD"
20513  "vpbroadcastmw2d\t{%1, %0|%0, %1}"
20514  [(set_attr "type" "mskmov")
20515   (set_attr "prefix" "evex")
20516   (set_attr "mode" "XI")])
20517
20518(define_insn "<sse2_avx_avx512f>_vpermilvar<mode>3<mask_name>"
20519  [(set (match_operand:VF 0 "register_operand" "=v")
20520	(unspec:VF
20521	  [(match_operand:VF 1 "register_operand" "v")
20522	   (match_operand:<sseintvecmode> 2 "nonimmediate_operand" "vm")]
20523	  UNSPEC_VPERMIL))]
20524  "TARGET_AVX && <mask_mode512bit_condition>"
20525  "vpermil<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
20526  [(set_attr "type" "sselog")
20527   (set_attr "prefix_extra" "1")
20528   (set_attr "btver2_decode" "vector")
20529   (set_attr "prefix" "<mask_prefix>")
20530   (set_attr "mode" "<sseinsnmode>")])
20531
20532(define_mode_iterator VPERMI2
20533  [V16SI V16SF V8DI V8DF
20534   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
20535   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
20536   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
20537   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
20538   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
20539   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
20540   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
20541   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
20542
20543(define_mode_iterator VPERMI2I
20544  [V16SI V8DI
20545   (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
20546   (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
20547   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
20548   (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
20549   (V64QI "TARGET_AVX512VBMI") (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
20550   (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
20551
20552(define_expand "<avx512>_vpermi2var<mode>3_mask"
20553  [(set (match_operand:VPERMI2 0 "register_operand")
20554	(vec_merge:VPERMI2
20555	  (unspec:VPERMI2
20556	    [(match_operand:<sseintvecmode> 2 "register_operand")
20557	     (match_operand:VPERMI2 1 "register_operand")
20558	     (match_operand:VPERMI2 3 "nonimmediate_operand")]
20559	    UNSPEC_VPERMT2)
20560	  (match_dup 5)
20561	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
20562  "TARGET_AVX512F"
20563{
20564  operands[2] = force_reg (<sseintvecmode>mode, operands[2]);
20565  operands[5] = gen_lowpart (<MODE>mode, operands[2]);
20566})
20567
20568(define_insn "*<avx512>_vpermi2var<mode>3_mask"
20569  [(set (match_operand:VPERMI2I 0 "register_operand" "=v")
20570	(vec_merge:VPERMI2I
20571	  (unspec:VPERMI2I
20572	    [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20573	     (match_operand:VPERMI2I 1 "register_operand" "v")
20574	     (match_operand:VPERMI2I 3 "nonimmediate_operand" "vm")]
20575	    UNSPEC_VPERMT2)
20576	  (match_dup 2)
20577	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20578  "TARGET_AVX512F"
20579  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20580  [(set_attr "type" "sselog")
20581   (set_attr "prefix" "evex")
20582   (set_attr "mode" "<sseinsnmode>")])
20583
20584(define_insn "*<avx512>_vpermi2var<mode>3_mask"
20585  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
20586	(vec_merge:VF_AVX512VL
20587	  (unspec:VF_AVX512VL
20588	    [(match_operand:<sseintvecmode> 2 "register_operand" "0")
20589	     (match_operand:VF_AVX512VL 1 "register_operand" "v")
20590	     (match_operand:VF_AVX512VL 3 "nonimmediate_operand" "vm")]
20591	    UNSPEC_VPERMT2)
20592	  (subreg:VF_AVX512VL (match_dup 2) 0)
20593	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20594  "TARGET_AVX512F"
20595  "vpermi2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20596  [(set_attr "type" "sselog")
20597   (set_attr "prefix" "evex")
20598   (set_attr "mode" "<sseinsnmode>")])
20599
20600(define_expand "<avx512>_vpermt2var<mode>3_maskz"
20601  [(match_operand:VPERMI2 0 "register_operand")
20602   (match_operand:<sseintvecmode> 1 "register_operand")
20603   (match_operand:VPERMI2 2 "register_operand")
20604   (match_operand:VPERMI2 3 "nonimmediate_operand")
20605   (match_operand:<avx512fmaskmode> 4 "register_operand")]
20606  "TARGET_AVX512F"
20607{
20608  emit_insn (gen_<avx512>_vpermt2var<mode>3_maskz_1 (
20609	operands[0], operands[1], operands[2], operands[3],
20610	CONST0_RTX (<MODE>mode), operands[4]));
20611  DONE;
20612})
20613
20614(define_insn "<avx512>_vpermt2var<mode>3<sd_maskz_name>"
20615  [(set (match_operand:VPERMI2 0 "register_operand" "=v,v")
20616	(unspec:VPERMI2
20617	  [(match_operand:<sseintvecmode> 1 "register_operand" "v,0")
20618	   (match_operand:VPERMI2 2 "register_operand" "0,v")
20619	   (match_operand:VPERMI2 3 "nonimmediate_operand" "vm,vm")]
20620	  UNSPEC_VPERMT2))]
20621  "TARGET_AVX512F"
20622  "@
20623   vpermt2<ssemodesuffix>\t{%3, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %3}
20624   vpermi2<ssemodesuffix>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
20625  [(set_attr "type" "sselog")
20626   (set_attr "prefix" "evex")
20627   (set_attr "mode" "<sseinsnmode>")])
20628
20629(define_insn "<avx512>_vpermt2var<mode>3_mask"
20630  [(set (match_operand:VPERMI2 0 "register_operand" "=v")
20631	(vec_merge:VPERMI2
20632	  (unspec:VPERMI2
20633	    [(match_operand:<sseintvecmode> 1 "register_operand" "v")
20634	    (match_operand:VPERMI2 2 "register_operand" "0")
20635	    (match_operand:VPERMI2 3 "nonimmediate_operand" "vm")]
20636	    UNSPEC_VPERMT2)
20637	  (match_dup 2)
20638	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
20639  "TARGET_AVX512F"
20640  "vpermt2<ssemodesuffix>\t{%3, %1, %0%{%4%}|%0%{%4%}, %1, %3}"
20641  [(set_attr "type" "sselog")
20642   (set_attr "prefix" "evex")
20643   (set_attr "mode" "<sseinsnmode>")])
20644
20645(define_expand "avx_vperm2f128<mode>3"
20646  [(set (match_operand:AVX256MODE2P 0 "register_operand")
20647	(unspec:AVX256MODE2P
20648	  [(match_operand:AVX256MODE2P 1 "register_operand")
20649	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand")
20650	   (match_operand:SI 3 "const_0_to_255_operand")]
20651	  UNSPEC_VPERMIL2F128))]
20652  "TARGET_AVX"
20653{
20654  int mask = INTVAL (operands[3]);
20655  if ((mask & 0x88) == 0)
20656    {
20657      rtx perm[<ssescalarnum>], t1, t2;
20658      int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
20659
20660      base = (mask & 3) * nelt2;
20661      for (i = 0; i < nelt2; ++i)
20662	perm[i] = GEN_INT (base + i);
20663
20664      base = ((mask >> 4) & 3) * nelt2;
20665      for (i = 0; i < nelt2; ++i)
20666	perm[i + nelt2] = GEN_INT (base + i);
20667
20668      t2 = gen_rtx_VEC_CONCAT (<ssedoublevecmode>mode,
20669			       operands[1], operands[2]);
20670      t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
20671      t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
20672      t2 = gen_rtx_SET (operands[0], t2);
20673      emit_insn (t2);
20674      DONE;
20675    }
20676})
20677
20678;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
20679;; means that in order to represent this properly in rtl we'd have to
20680;; nest *another* vec_concat with a zero operand and do the select from
20681;; a 4x wide vector.  That doesn't seem very nice.
20682(define_insn "*avx_vperm2f128<mode>_full"
20683  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20684	(unspec:AVX256MODE2P
20685	  [(match_operand:AVX256MODE2P 1 "register_operand" "x")
20686	   (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
20687	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
20688	  UNSPEC_VPERMIL2F128))]
20689  "TARGET_AVX"
20690  "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
20691  [(set_attr "type" "sselog")
20692   (set_attr "prefix_extra" "1")
20693   (set_attr "length_immediate" "1")
20694   (set_attr "prefix" "vex")
20695   (set_attr "mode" "<sseinsnmode>")])
20696
20697(define_insn "*avx_vperm2f128<mode>_nozero"
20698  [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
20699	(vec_select:AVX256MODE2P
20700	  (vec_concat:<ssedoublevecmode>
20701	    (match_operand:AVX256MODE2P 1 "register_operand" "x")
20702	    (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
20703	  (match_parallel 3 ""
20704	    [(match_operand 4 "const_int_operand")])))]
20705  "TARGET_AVX
20706   && avx_vperm2f128_parallel (operands[3], <MODE>mode)"
20707{
20708  int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
20709  if (mask == 0x12)
20710    return "vinsert<i128>\t{$0, %x2, %1, %0|%0, %1, %x2, 0}";
20711  if (mask == 0x20)
20712    return "vinsert<i128>\t{$1, %x2, %1, %0|%0, %1, %x2, 1}";
20713  operands[3] = GEN_INT (mask);
20714  return "vperm2<i128>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
20715}
20716  [(set_attr "type" "sselog")
20717   (set_attr "prefix_extra" "1")
20718   (set_attr "length_immediate" "1")
20719   (set_attr "prefix" "vex")
20720   (set_attr "mode" "<sseinsnmode>")])
20721
20722(define_insn "*ssse3_palignr<mode>_perm"
20723  [(set (match_operand:V_128 0 "register_operand" "=x,x,v")
20724      (vec_select:V_128
20725	(match_operand:V_128 1 "register_operand" "0,x,v")
20726	(match_parallel 2 "palignr_operand"
20727	  [(match_operand 3 "const_int_operand" "n,n,n")])))]
20728  "TARGET_SSSE3"
20729{
20730  operands[2] = (GEN_INT (INTVAL (operands[3])
20731		 * GET_MODE_UNIT_SIZE (GET_MODE (operands[0]))));
20732
20733  switch (which_alternative)
20734    {
20735    case 0:
20736      return "palignr\t{%2, %1, %0|%0, %1, %2}";
20737    case 1:
20738    case 2:
20739      return "vpalignr\t{%2, %1, %1, %0|%0, %1, %1, %2}";
20740    default:
20741      gcc_unreachable ();
20742    }
20743}
20744  [(set_attr "isa" "noavx,avx,avx512bw")
20745   (set_attr "type" "sseishft")
20746   (set_attr "atom_unit" "sishuf")
20747   (set_attr "prefix_data16" "1,*,*")
20748   (set_attr "prefix_extra" "1")
20749   (set_attr "length_immediate" "1")
20750   (set_attr "prefix" "orig,vex,evex")])
20751
20752(define_expand "avx512vl_vinsert<mode>"
20753  [(match_operand:VI48F_256 0 "register_operand")
20754   (match_operand:VI48F_256 1 "register_operand")
20755   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20756   (match_operand:SI 3 "const_0_to_1_operand")
20757   (match_operand:VI48F_256 4 "register_operand")
20758   (match_operand:<avx512fmaskmode> 5 "register_operand")]
20759  "TARGET_AVX512VL"
20760{
20761  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
20762
20763  switch (INTVAL (operands[3]))
20764    {
20765    case 0:
20766      insn = gen_vec_set_lo_<mode>_mask;
20767      break;
20768    case 1:
20769      insn = gen_vec_set_hi_<mode>_mask;
20770      break;
20771    default:
20772      gcc_unreachable ();
20773    }
20774
20775  emit_insn (insn (operands[0], operands[1], operands[2], operands[4],
20776		   operands[5]));
20777  DONE;
20778})
20779
20780(define_expand "avx_vinsertf128<mode>"
20781  [(match_operand:V_256 0 "register_operand")
20782   (match_operand:V_256 1 "register_operand")
20783   (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand")
20784   (match_operand:SI 3 "const_0_to_1_operand")]
20785  "TARGET_AVX"
20786{
20787  rtx (*insn)(rtx, rtx, rtx);
20788
20789  switch (INTVAL (operands[3]))
20790    {
20791    case 0:
20792      insn = gen_vec_set_lo_<mode>;
20793      break;
20794    case 1:
20795      insn = gen_vec_set_hi_<mode>;
20796      break;
20797    default:
20798      gcc_unreachable ();
20799    }
20800
20801  emit_insn (insn (operands[0], operands[1], operands[2]));
20802  DONE;
20803})
20804
20805(define_insn "vec_set_lo_<mode><mask_name>"
20806  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20807	(vec_concat:VI8F_256
20808	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20809	  (vec_select:<ssehalfvecmode>
20810	    (match_operand:VI8F_256 1 "register_operand" "v")
20811	    (parallel [(const_int 2) (const_int 3)]))))]
20812  "TARGET_AVX && <mask_avx512dq_condition>"
20813{
20814  if (TARGET_AVX512DQ)
20815    return "vinsert<shuffletype>64x2\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20816  else if (TARGET_AVX512VL)
20817    return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20818  else
20819    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20820}
20821  [(set_attr "type" "sselog")
20822   (set_attr "prefix_extra" "1")
20823   (set_attr "length_immediate" "1")
20824   (set_attr "prefix" "vex")
20825   (set_attr "mode" "<sseinsnmode>")])
20826
20827(define_insn "vec_set_hi_<mode><mask_name>"
20828  [(set (match_operand:VI8F_256 0 "register_operand" "=v")
20829	(vec_concat:VI8F_256
20830	  (vec_select:<ssehalfvecmode>
20831	    (match_operand:VI8F_256 1 "register_operand" "v")
20832	    (parallel [(const_int 0) (const_int 1)]))
20833	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20834  "TARGET_AVX && <mask_avx512dq_condition>"
20835{
20836  if (TARGET_AVX512DQ)
20837    return "vinsert<shuffletype>64x2\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20838  else if (TARGET_AVX512VL)
20839    return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20840  else
20841    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20842}
20843  [(set_attr "type" "sselog")
20844   (set_attr "prefix_extra" "1")
20845   (set_attr "length_immediate" "1")
20846   (set_attr "prefix" "vex")
20847   (set_attr "mode" "<sseinsnmode>")])
20848
20849(define_insn "vec_set_lo_<mode><mask_name>"
20850  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20851	(vec_concat:VI4F_256
20852	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")
20853	  (vec_select:<ssehalfvecmode>
20854	    (match_operand:VI4F_256 1 "register_operand" "v")
20855	    (parallel [(const_int 4) (const_int 5)
20856		       (const_int 6) (const_int 7)]))))]
20857  "TARGET_AVX"
20858{
20859  if (TARGET_AVX512VL)
20860    return "vinsert<shuffletype>32x4\t{$0x0, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x0}";
20861  else
20862    return "vinsert<i128>\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}";
20863}
20864  [(set_attr "type" "sselog")
20865   (set_attr "prefix_extra" "1")
20866   (set_attr "length_immediate" "1")
20867   (set_attr "prefix" "vex")
20868   (set_attr "mode" "<sseinsnmode>")])
20869
20870(define_insn "vec_set_hi_<mode><mask_name>"
20871  [(set (match_operand:VI4F_256 0 "register_operand" "=v")
20872	(vec_concat:VI4F_256
20873	  (vec_select:<ssehalfvecmode>
20874	    (match_operand:VI4F_256 1 "register_operand" "v")
20875	    (parallel [(const_int 0) (const_int 1)
20876		       (const_int 2) (const_int 3)]))
20877	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" "vm")))]
20878  "TARGET_AVX"
20879{
20880  if (TARGET_AVX512VL)
20881    return "vinsert<shuffletype>32x4\t{$0x1, %2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2, 0x1}";
20882  else
20883    return "vinsert<i128>\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}";
20884}
20885  [(set_attr "type" "sselog")
20886   (set_attr "prefix_extra" "1")
20887   (set_attr "length_immediate" "1")
20888   (set_attr "prefix" "vex")
20889   (set_attr "mode" "<sseinsnmode>")])
20890
20891(define_insn "vec_set_lo_v16hi"
20892  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20893	(vec_concat:V16HI
20894	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")
20895	  (vec_select:V8HI
20896	    (match_operand:V16HI 1 "register_operand" "x,v")
20897	    (parallel [(const_int 8) (const_int 9)
20898		       (const_int 10) (const_int 11)
20899		       (const_int 12) (const_int 13)
20900		       (const_int 14) (const_int 15)]))))]
20901  "TARGET_AVX"
20902  "@
20903   vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20904   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20905  [(set_attr "type" "sselog")
20906   (set_attr "prefix_extra" "1")
20907   (set_attr "length_immediate" "1")
20908   (set_attr "prefix" "vex,evex")
20909   (set_attr "mode" "OI")])
20910
20911(define_insn "vec_set_hi_v16hi"
20912  [(set (match_operand:V16HI 0 "register_operand" "=x,v")
20913	(vec_concat:V16HI
20914	  (vec_select:V8HI
20915	    (match_operand:V16HI 1 "register_operand" "x,v")
20916	    (parallel [(const_int 0) (const_int 1)
20917		       (const_int 2) (const_int 3)
20918		       (const_int 4) (const_int 5)
20919		       (const_int 6) (const_int 7)]))
20920	  (match_operand:V8HI 2 "nonimmediate_operand" "xm,vm")))]
20921  "TARGET_AVX"
20922  "@
20923   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20924   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20925  [(set_attr "type" "sselog")
20926   (set_attr "prefix_extra" "1")
20927   (set_attr "length_immediate" "1")
20928   (set_attr "prefix" "vex,evex")
20929   (set_attr "mode" "OI")])
20930
20931(define_insn "vec_set_lo_v32qi"
20932  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20933	(vec_concat:V32QI
20934	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,v")
20935	  (vec_select:V16QI
20936	    (match_operand:V32QI 1 "register_operand" "x,v")
20937	    (parallel [(const_int 16) (const_int 17)
20938		       (const_int 18) (const_int 19)
20939		       (const_int 20) (const_int 21)
20940		       (const_int 22) (const_int 23)
20941		       (const_int 24) (const_int 25)
20942		       (const_int 26) (const_int 27)
20943		       (const_int 28) (const_int 29)
20944		       (const_int 30) (const_int 31)]))))]
20945  "TARGET_AVX"
20946  "@
20947   vinsert%~128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}
20948   vinserti32x4\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
20949  [(set_attr "type" "sselog")
20950   (set_attr "prefix_extra" "1")
20951   (set_attr "length_immediate" "1")
20952   (set_attr "prefix" "vex,evex")
20953   (set_attr "mode" "OI")])
20954
20955(define_insn "vec_set_hi_v32qi"
20956  [(set (match_operand:V32QI 0 "register_operand" "=x,v")
20957	(vec_concat:V32QI
20958	  (vec_select:V16QI
20959	    (match_operand:V32QI 1 "register_operand" "x,v")
20960	    (parallel [(const_int 0) (const_int 1)
20961		       (const_int 2) (const_int 3)
20962		       (const_int 4) (const_int 5)
20963		       (const_int 6) (const_int 7)
20964		       (const_int 8) (const_int 9)
20965		       (const_int 10) (const_int 11)
20966		       (const_int 12) (const_int 13)
20967		       (const_int 14) (const_int 15)]))
20968	  (match_operand:V16QI 2 "nonimmediate_operand" "xm,vm")))]
20969  "TARGET_AVX"
20970  "@
20971   vinsert%~128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}
20972   vinserti32x4\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
20973  [(set_attr "type" "sselog")
20974   (set_attr "prefix_extra" "1")
20975   (set_attr "length_immediate" "1")
20976   (set_attr "prefix" "vex,evex")
20977   (set_attr "mode" "OI")])
20978
20979(define_insn "<avx_avx2>_maskload<ssemodesuffix><avxsizesuffix>"
20980  [(set (match_operand:V48_AVX2 0 "register_operand" "=x")
20981	(unspec:V48_AVX2
20982	  [(match_operand:<sseintvecmode> 2 "register_operand" "x")
20983	   (match_operand:V48_AVX2 1 "memory_operand" "m")]
20984	  UNSPEC_MASKMOV))]
20985  "TARGET_AVX"
20986  "v<sseintprefix>maskmov<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
20987  [(set_attr "type" "sselog1")
20988   (set_attr "prefix_extra" "1")
20989   (set_attr "prefix" "vex")
20990   (set_attr "btver2_decode" "vector")
20991   (set_attr "mode" "<sseinsnmode>")])
20992
20993(define_insn "<avx_avx2>_maskstore<ssemodesuffix><avxsizesuffix>"
20994  [(set (match_operand:V48_AVX2 0 "memory_operand" "+m")
20995	(unspec:V48_AVX2
20996	  [(match_operand:<sseintvecmode> 1 "register_operand" "x")
20997	   (match_operand:V48_AVX2 2 "register_operand" "x")
20998	   (match_dup 0)]
20999	  UNSPEC_MASKMOV))]
21000  "TARGET_AVX"
21001  "v<sseintprefix>maskmov<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
21002  [(set_attr "type" "sselog1")
21003   (set_attr "prefix_extra" "1")
21004   (set_attr "prefix" "vex")
21005   (set_attr "btver2_decode" "vector") 
21006   (set_attr "mode" "<sseinsnmode>")])
21007
21008(define_expand "maskload<mode><sseintvecmodelower>"
21009  [(set (match_operand:V48_AVX2 0 "register_operand")
21010	(unspec:V48_AVX2
21011	  [(match_operand:<sseintvecmode> 2 "register_operand")
21012	   (match_operand:V48_AVX2 1 "memory_operand")]
21013	  UNSPEC_MASKMOV))]
21014  "TARGET_AVX")
21015
21016(define_expand "maskload<mode><avx512fmaskmodelower>"
21017  [(set (match_operand:V48_AVX512VL 0 "register_operand")
21018	(vec_merge:V48_AVX512VL
21019	  (match_operand:V48_AVX512VL 1 "memory_operand")
21020	  (match_dup 0)
21021	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21022  "TARGET_AVX512F")
21023
21024(define_expand "maskload<mode><avx512fmaskmodelower>"
21025  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
21026	(vec_merge:VI12_AVX512VL
21027	  (match_operand:VI12_AVX512VL 1 "memory_operand")
21028	  (match_dup 0)
21029	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21030  "TARGET_AVX512BW")
21031
21032(define_expand "maskstore<mode><sseintvecmodelower>"
21033  [(set (match_operand:V48_AVX2 0 "memory_operand")
21034	(unspec:V48_AVX2
21035	  [(match_operand:<sseintvecmode> 2 "register_operand")
21036	   (match_operand:V48_AVX2 1 "register_operand")
21037	   (match_dup 0)]
21038	  UNSPEC_MASKMOV))]
21039  "TARGET_AVX")
21040
21041(define_expand "maskstore<mode><avx512fmaskmodelower>"
21042  [(set (match_operand:V48_AVX512VL 0 "memory_operand")
21043	(vec_merge:V48_AVX512VL
21044	  (match_operand:V48_AVX512VL 1 "register_operand")
21045	  (match_dup 0)
21046	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21047  "TARGET_AVX512F")
21048
21049(define_expand "maskstore<mode><avx512fmaskmodelower>"
21050  [(set (match_operand:VI12_AVX512VL 0 "memory_operand")
21051	(vec_merge:VI12_AVX512VL
21052	  (match_operand:VI12_AVX512VL 1 "register_operand")
21053	  (match_dup 0)
21054	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
21055  "TARGET_AVX512BW")
21056
21057(define_expand "cbranch<mode>4"
21058  [(set (reg:CC FLAGS_REG)
21059	(compare:CC (match_operand:VI48_AVX 1 "register_operand")
21060		    (match_operand:VI48_AVX 2 "nonimmediate_operand")))
21061   (set (pc) (if_then_else
21062	       (match_operator 0 "bt_comparison_operator"
21063		[(reg:CC FLAGS_REG) (const_int 0)])
21064	       (label_ref (match_operand 3))
21065	       (pc)))]
21066  "TARGET_SSE4_1"
21067{
21068  ix86_expand_branch (GET_CODE (operands[0]),
21069		      operands[1], operands[2], operands[3]);
21070  DONE;
21071})
21072
21073
21074(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
21075  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
21076	(vec_concat:AVX256MODE2P
21077	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
21078	  (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
21079  "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
21080  "#"
21081  "&& reload_completed"
21082  [(set (match_dup 0) (match_dup 1))]
21083{
21084  if (REG_P (operands[0]))
21085    operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
21086  else
21087    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
21088				  <ssehalfvecmode>mode);
21089})
21090
21091;; Modes handled by vec_init expanders.
21092(define_mode_iterator VEC_INIT_MODE
21093  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21094   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21095   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21096   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
21097   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21098   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
21099   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
21100
21101;; Likewise, but for initialization from half sized vectors.
21102;; Thus, these are all VEC_INIT_MODE modes except V2??.
21103(define_mode_iterator VEC_INIT_HALF_MODE
21104  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
21105   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
21106   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
21107   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
21108   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
21109   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
21110   (V4TI "TARGET_AVX512F")])
21111
21112(define_expand "vec_init<mode><ssescalarmodelower>"
21113  [(match_operand:VEC_INIT_MODE 0 "register_operand")
21114   (match_operand 1)]
21115  "TARGET_SSE"
21116{
21117  ix86_expand_vector_init (false, operands[0], operands[1]);
21118  DONE;
21119})
21120
21121(define_expand "vec_init<mode><ssehalfvecmodelower>"
21122  [(match_operand:VEC_INIT_HALF_MODE 0 "register_operand")
21123   (match_operand 1)]
21124  "TARGET_SSE"
21125{
21126  ix86_expand_vector_init (false, operands[0], operands[1]);
21127  DONE;
21128})
21129
21130(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21131  [(set (match_operand:VI48_AVX512F_AVX512VL 0 "register_operand" "=v")
21132	(ashiftrt:VI48_AVX512F_AVX512VL
21133	  (match_operand:VI48_AVX512F_AVX512VL 1 "register_operand" "v")
21134	  (match_operand:VI48_AVX512F_AVX512VL 2 "nonimmediate_operand" "vm")))]
21135  "TARGET_AVX2 && <mask_mode512bit_condition>"
21136  "vpsrav<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21137  [(set_attr "type" "sseishft")
21138   (set_attr "prefix" "maybe_evex")
21139   (set_attr "mode" "<sseinsnmode>")])
21140
21141(define_insn "<avx2_avx512>_ashrv<mode><mask_name>"
21142  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21143	(ashiftrt:VI2_AVX512VL
21144	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21145	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21146  "TARGET_AVX512BW"
21147  "vpsravw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21148  [(set_attr "type" "sseishft")
21149   (set_attr "prefix" "maybe_evex")
21150   (set_attr "mode" "<sseinsnmode>")])
21151
21152(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
21153  [(set (match_operand:VI48_AVX512F 0 "register_operand" "=v")
21154	(any_lshift:VI48_AVX512F
21155	  (match_operand:VI48_AVX512F 1 "register_operand" "v")
21156	  (match_operand:VI48_AVX512F 2 "nonimmediate_operand" "vm")))]
21157  "TARGET_AVX2 && <mask_mode512bit_condition>"
21158  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21159  [(set_attr "type" "sseishft")
21160   (set_attr "prefix" "maybe_evex")
21161   (set_attr "mode" "<sseinsnmode>")])
21162
21163(define_insn "<avx2_avx512>_<shift_insn>v<mode><mask_name>"
21164  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21165	(any_lshift:VI2_AVX512VL
21166	  (match_operand:VI2_AVX512VL 1 "register_operand" "v")
21167	  (match_operand:VI2_AVX512VL 2 "nonimmediate_operand" "vm")))]
21168  "TARGET_AVX512BW"
21169  "vp<vshift>v<ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21170  [(set_attr "type" "sseishft")
21171   (set_attr "prefix" "maybe_evex")
21172   (set_attr "mode" "<sseinsnmode>")])
21173
21174(define_insn "avx_vec_concat<mode>"
21175  [(set (match_operand:V_256_512 0 "register_operand" "=x,v,x,Yv")
21176	(vec_concat:V_256_512
21177	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "x,v,xm,vm")
21178	  (match_operand:<ssehalfvecmode> 2 "nonimm_or_0_operand" "xm,vm,C,C")))]
21179  "TARGET_AVX
21180   && (operands[2] == CONST0_RTX (<ssehalfvecmode>mode)
21181       || !MEM_P (operands[1]))"
21182{
21183  switch (which_alternative)
21184    {
21185    case 0:
21186      return "vinsert<i128>\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21187    case 1:
21188      if (<MODE_SIZE> == 64)
21189	{
21190	  if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 4)
21191	    return "vinsert<shuffletype>32x8\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21192	  else
21193	    return "vinsert<shuffletype>64x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21194	}
21195      else
21196	{
21197	  if (TARGET_AVX512DQ && GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21198	    return "vinsert<shuffletype>64x2\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21199	  else
21200	    return "vinsert<shuffletype>32x4\t{$0x1, %2, %<xtg_mode>1, %0|%0, %<xtg_mode>1, %2, 0x1}";
21201	}
21202    case 2:
21203    case 3:
21204      switch (get_attr_mode (insn))
21205	{
21206	case MODE_V16SF:
21207	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21208	    return "vmovups\t{%1, %t0|%t0, %1}";
21209	  else
21210	    return "vmovaps\t{%1, %t0|%t0, %1}";
21211	case MODE_V8DF:
21212	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21213	    return "vmovupd\t{%1, %t0|%t0, %1}";
21214	  else
21215	    return "vmovapd\t{%1, %t0|%t0, %1}";
21216	case MODE_V8SF:
21217	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21218	    return "vmovups\t{%1, %x0|%x0, %1}";
21219	  else
21220	    return "vmovaps\t{%1, %x0|%x0, %1}";
21221	case MODE_V4DF:
21222	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21223	    return "vmovupd\t{%1, %x0|%x0, %1}";
21224	  else
21225	    return "vmovapd\t{%1, %x0|%x0, %1}";
21226	case MODE_XI:
21227	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21228	    {
21229	      if (which_alternative == 2)
21230		return "vmovdqu\t{%1, %t0|%t0, %1}";
21231	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21232		return "vmovdqu64\t{%1, %t0|%t0, %1}";
21233	      else
21234		return "vmovdqu32\t{%1, %t0|%t0, %1}";
21235	    }
21236	  else
21237	    {
21238	      if (which_alternative == 2)
21239		return "vmovdqa\t{%1, %t0|%t0, %1}";
21240	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21241		return "vmovdqa64\t{%1, %t0|%t0, %1}";
21242	      else
21243		return "vmovdqa32\t{%1, %t0|%t0, %1}";
21244	    }
21245	case MODE_OI:
21246	  if (misaligned_operand (operands[1], <ssehalfvecmode>mode))
21247	    {
21248	      if (which_alternative == 2)
21249		return "vmovdqu\t{%1, %x0|%x0, %1}";
21250	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21251		return "vmovdqu64\t{%1, %x0|%x0, %1}";
21252	      else
21253		return "vmovdqu32\t{%1, %x0|%x0, %1}";
21254	    }
21255	  else
21256	    {
21257	      if (which_alternative == 2)
21258		return "vmovdqa\t{%1, %x0|%x0, %1}";
21259	      else if (GET_MODE_SIZE (<ssescalarmode>mode) == 8)
21260		return "vmovdqa64\t{%1, %x0|%x0, %1}";
21261	      else
21262		return "vmovdqa32\t{%1, %x0|%x0, %1}";
21263	    }
21264	default:
21265	  gcc_unreachable ();
21266	}
21267    default:
21268      gcc_unreachable ();
21269    }
21270}
21271  [(set_attr "type" "sselog,sselog,ssemov,ssemov")
21272   (set_attr "prefix_extra" "1,1,*,*")
21273   (set_attr "length_immediate" "1,1,*,*")
21274   (set_attr "prefix" "maybe_evex")
21275   (set_attr "mode" "<sseinsnmode>")])
21276
21277(define_insn "vcvtph2ps<mask_name>"
21278  [(set (match_operand:V4SF 0 "register_operand" "=v")
21279	(vec_select:V4SF
21280	  (unspec:V8SF [(match_operand:V8HI 1 "register_operand" "v")]
21281		       UNSPEC_VCVTPH2PS)
21282	  (parallel [(const_int 0) (const_int 1)
21283		     (const_int 2) (const_int 3)])))]
21284  "TARGET_F16C || TARGET_AVX512VL"
21285  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21286  [(set_attr "type" "ssecvt")
21287   (set_attr "prefix" "maybe_evex")
21288   (set_attr "mode" "V4SF")])
21289
21290(define_insn "*vcvtph2ps_load<mask_name>"
21291  [(set (match_operand:V4SF 0 "register_operand" "=v")
21292	(unspec:V4SF [(match_operand:V4HI 1 "memory_operand" "m")]
21293		     UNSPEC_VCVTPH2PS))]
21294  "TARGET_F16C || TARGET_AVX512VL"
21295  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21296  [(set_attr "type" "ssecvt")
21297   (set_attr "prefix" "vex")
21298   (set_attr "mode" "V8SF")])
21299
21300(define_insn "vcvtph2ps256<mask_name>"
21301  [(set (match_operand:V8SF 0 "register_operand" "=v")
21302	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
21303		     UNSPEC_VCVTPH2PS))]
21304  "TARGET_F16C || TARGET_AVX512VL"
21305  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
21306  [(set_attr "type" "ssecvt")
21307   (set_attr "prefix" "vex")
21308   (set_attr "btver2_decode" "double")
21309   (set_attr "mode" "V8SF")])
21310
21311(define_insn "<mask_codefor>avx512f_vcvtph2ps512<mask_name><round_saeonly_name>"
21312  [(set (match_operand:V16SF 0 "register_operand" "=v")
21313	(unspec:V16SF
21314	  [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
21315	  UNSPEC_VCVTPH2PS))]
21316  "TARGET_AVX512F"
21317  "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
21318  [(set_attr "type" "ssecvt")
21319   (set_attr "prefix" "evex")
21320   (set_attr "mode" "V16SF")])
21321
21322(define_expand "vcvtps2ph_mask"
21323  [(set (match_operand:V8HI 0 "register_operand")
21324	(vec_merge:V8HI
21325	  (vec_concat:V8HI
21326	    (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21327			  (match_operand:SI 2 "const_0_to_255_operand")]
21328			  UNSPEC_VCVTPS2PH)
21329	    (match_dup 5))
21330	   (match_operand:V8HI 3 "nonimm_or_0_operand")
21331	   (match_operand:QI 4 "register_operand")))]
21332  "TARGET_AVX512VL"
21333  "operands[5] = CONST0_RTX (V4HImode);")
21334
21335(define_expand "vcvtps2ph"
21336  [(set (match_operand:V8HI 0 "register_operand")
21337	(vec_concat:V8HI
21338	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand")
21339			(match_operand:SI 2 "const_0_to_255_operand")]
21340		       UNSPEC_VCVTPS2PH)
21341	  (match_dup 3)))]
21342  "TARGET_F16C"
21343  "operands[3] = CONST0_RTX (V4HImode);")
21344
21345(define_insn "*vcvtps2ph<mask_name>"
21346  [(set (match_operand:V8HI 0 "register_operand" "=v")
21347	(vec_concat:V8HI
21348	  (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21349			(match_operand:SI 2 "const_0_to_255_operand" "N")]
21350		       UNSPEC_VCVTPS2PH)
21351	  (match_operand:V4HI 3 "const0_operand")))]
21352  "(TARGET_F16C || TARGET_AVX512VL) && <mask_avx512vl_condition>"
21353  "vcvtps2ph\t{%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2}"
21354  [(set_attr "type" "ssecvt")
21355   (set_attr "prefix" "maybe_evex")
21356   (set_attr "mode" "V4SF")])
21357
21358(define_insn "*vcvtps2ph_store<mask_name>"
21359  [(set (match_operand:V4HI 0 "memory_operand" "=m")
21360	(unspec:V4HI [(match_operand:V4SF 1 "register_operand" "v")
21361		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
21362		     UNSPEC_VCVTPS2PH))]
21363  "TARGET_F16C || TARGET_AVX512VL"
21364  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21365  [(set_attr "type" "ssecvt")
21366   (set_attr "prefix" "maybe_evex")
21367   (set_attr "mode" "V4SF")])
21368
21369(define_insn "vcvtps2ph256<mask_name>"
21370  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=vm")
21371	(unspec:V8HI [(match_operand:V8SF 1 "register_operand" "v")
21372		      (match_operand:SI 2 "const_0_to_255_operand" "N")]
21373		     UNSPEC_VCVTPS2PH))]
21374  "TARGET_F16C || TARGET_AVX512VL"
21375  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21376  [(set_attr "type" "ssecvt")
21377   (set_attr "prefix" "maybe_evex")
21378   (set_attr "btver2_decode" "vector")
21379   (set_attr "mode" "V8SF")])
21380
21381(define_insn "<mask_codefor>avx512f_vcvtps2ph512<mask_name>"
21382  [(set (match_operand:V16HI 0 "nonimmediate_operand" "=vm")
21383	(unspec:V16HI
21384	  [(match_operand:V16SF 1 "register_operand" "v")
21385	   (match_operand:SI 2 "const_0_to_255_operand" "N")]
21386	  UNSPEC_VCVTPS2PH))]
21387  "TARGET_AVX512F"
21388  "vcvtps2ph\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
21389  [(set_attr "type" "ssecvt")
21390   (set_attr "prefix" "evex")
21391   (set_attr "mode" "V16SF")])
21392
21393;; For gather* insn patterns
21394(define_mode_iterator VEC_GATHER_MODE
21395		      [V2DI V2DF V4DI V4DF V4SI V4SF V8SI V8SF])
21396(define_mode_attr VEC_GATHER_IDXSI
21397		      [(V2DI "V4SI") (V4DI "V4SI") (V8DI "V8SI")
21398		       (V2DF "V4SI") (V4DF "V4SI") (V8DF "V8SI")
21399		       (V4SI "V4SI") (V8SI "V8SI") (V16SI "V16SI")
21400		       (V4SF "V4SI") (V8SF "V8SI") (V16SF "V16SI")])
21401
21402(define_mode_attr VEC_GATHER_IDXDI
21403		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
21404		       (V2DF "V2DI") (V4DF "V4DI") (V8DF "V8DI")
21405		       (V4SI "V2DI") (V8SI "V4DI") (V16SI "V8DI")
21406		       (V4SF "V2DI") (V8SF "V4DI") (V16SF "V8DI")])
21407
21408(define_mode_attr VEC_GATHER_SRCDI
21409		      [(V2DI "V2DI") (V4DI "V4DI") (V8DI "V8DI")
21410		       (V2DF "V2DF") (V4DF "V4DF") (V8DF "V8DF")
21411		       (V4SI "V4SI") (V8SI "V4SI") (V16SI "V8SI")
21412		       (V4SF "V4SF") (V8SF "V4SF") (V16SF "V8SF")])
21413
21414(define_expand "avx2_gathersi<mode>"
21415  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
21416		   (unspec:VEC_GATHER_MODE
21417		     [(match_operand:VEC_GATHER_MODE 1 "register_operand")
21418		      (mem:<ssescalarmode>
21419			(match_par_dup 6
21420			  [(match_operand 2 "vsib_address_operand")
21421			   (match_operand:<VEC_GATHER_IDXSI>
21422			      3 "register_operand")
21423			   (match_operand:SI 5 "const1248_operand ")]))
21424		      (mem:BLK (scratch))
21425		      (match_operand:VEC_GATHER_MODE 4 "register_operand")]
21426		     UNSPEC_GATHER))
21427	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
21428  "TARGET_AVX2"
21429{
21430  operands[6]
21431    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21432					operands[5]), UNSPEC_VSIBADDR);
21433})
21434
21435(define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>"
21436  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21437	(unspec:VEC_GATHER_MODE
21438	  [(match_operand:VEC_GATHER_MODE 2 "register_operand" "0")
21439	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
21440	     [(unspec:P
21441		[(match_operand:P 3 "vsib_address_operand" "Tv")
21442		 (match_operand:<VEC_GATHER_IDXSI> 4 "register_operand" "x")
21443		 (match_operand:SI 6 "const1248_operand" "n")]
21444		UNSPEC_VSIBADDR)])
21445	   (mem:BLK (scratch))
21446	   (match_operand:VEC_GATHER_MODE 5 "register_operand" "1")]
21447	  UNSPEC_GATHER))
21448   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21449  "TARGET_AVX2"
21450  "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %7, %0|%0, %7, %1}"
21451  [(set_attr "type" "ssemov")
21452   (set_attr "prefix" "vex")
21453   (set_attr "mode" "<sseinsnmode>")])
21454
21455(define_insn "*avx2_gathersi<VEC_GATHER_MODE:mode>_2"
21456  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21457	(unspec:VEC_GATHER_MODE
21458	  [(pc)
21459	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21460	     [(unspec:P
21461		[(match_operand:P 2 "vsib_address_operand" "Tv")
21462		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
21463		 (match_operand:SI 5 "const1248_operand" "n")]
21464		UNSPEC_VSIBADDR)])
21465	   (mem:BLK (scratch))
21466	   (match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
21467	  UNSPEC_GATHER))
21468   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21469  "TARGET_AVX2"
21470  "%M2v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
21471  [(set_attr "type" "ssemov")
21472   (set_attr "prefix" "vex")
21473   (set_attr "mode" "<sseinsnmode>")])
21474
21475(define_expand "avx2_gatherdi<mode>"
21476  [(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand")
21477		   (unspec:VEC_GATHER_MODE
21478		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
21479		      (mem:<ssescalarmode>
21480			(match_par_dup 6
21481			  [(match_operand 2 "vsib_address_operand")
21482			   (match_operand:<VEC_GATHER_IDXDI>
21483			      3 "register_operand")
21484			   (match_operand:SI 5 "const1248_operand ")]))
21485		      (mem:BLK (scratch))
21486		      (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand")]
21487		     UNSPEC_GATHER))
21488	      (clobber (match_scratch:VEC_GATHER_MODE 7))])]
21489  "TARGET_AVX2"
21490{
21491  operands[6]
21492    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21493					operands[5]), UNSPEC_VSIBADDR);
21494})
21495
21496(define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>"
21497  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21498	(unspec:VEC_GATHER_MODE
21499	  [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
21500	   (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
21501	     [(unspec:P
21502		[(match_operand:P 3 "vsib_address_operand" "Tv")
21503		 (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
21504		 (match_operand:SI 6 "const1248_operand" "n")]
21505		UNSPEC_VSIBADDR)])
21506	   (mem:BLK (scratch))
21507	   (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
21508	  UNSPEC_GATHER))
21509   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21510  "TARGET_AVX2"
21511  "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %2|%2, %7, %5}"
21512  [(set_attr "type" "ssemov")
21513   (set_attr "prefix" "vex")
21514   (set_attr "mode" "<sseinsnmode>")])
21515
21516(define_insn "*avx2_gatherdi<VEC_GATHER_MODE:mode>_2"
21517  [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
21518	(unspec:VEC_GATHER_MODE
21519	  [(pc)
21520	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21521	     [(unspec:P
21522		[(match_operand:P 2 "vsib_address_operand" "Tv")
21523		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
21524		 (match_operand:SI 5 "const1248_operand" "n")]
21525		UNSPEC_VSIBADDR)])
21526	   (mem:BLK (scratch))
21527	   (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
21528	  UNSPEC_GATHER))
21529   (clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
21530  "TARGET_AVX2"
21531{
21532  if (<VEC_GATHER_MODE:MODE>mode != <VEC_GATHER_SRCDI>mode)
21533    return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
21534  return "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
21535}
21536  [(set_attr "type" "ssemov")
21537   (set_attr "prefix" "vex")
21538   (set_attr "mode" "<sseinsnmode>")])
21539
21540(define_insn "*avx2_gatherdi<VI4F_256:mode>_3"
21541  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
21542	(vec_select:<VEC_GATHER_SRCDI>
21543	  (unspec:VI4F_256
21544	    [(match_operand:<VEC_GATHER_SRCDI> 2 "register_operand" "0")
21545	     (match_operator:<ssescalarmode> 7 "vsib_mem_operator"
21546	       [(unspec:P
21547		  [(match_operand:P 3 "vsib_address_operand" "Tv")
21548		   (match_operand:<VEC_GATHER_IDXDI> 4 "register_operand" "x")
21549		   (match_operand:SI 6 "const1248_operand" "n")]
21550		  UNSPEC_VSIBADDR)])
21551	     (mem:BLK (scratch))
21552	     (match_operand:<VEC_GATHER_SRCDI> 5 "register_operand" "1")]
21553	     UNSPEC_GATHER)
21554	  (parallel [(const_int 0) (const_int 1)
21555		     (const_int 2) (const_int 3)])))
21556   (clobber (match_scratch:VI4F_256 1 "=&x"))]
21557  "TARGET_AVX2"
21558  "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %7, %0|%0, %7, %5}"
21559  [(set_attr "type" "ssemov")
21560   (set_attr "prefix" "vex")
21561   (set_attr "mode" "<sseinsnmode>")])
21562
21563(define_insn "*avx2_gatherdi<VI4F_256:mode>_4"
21564  [(set (match_operand:<VEC_GATHER_SRCDI> 0 "register_operand" "=&x")
21565	(vec_select:<VEC_GATHER_SRCDI>
21566	  (unspec:VI4F_256
21567	    [(pc)
21568	     (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21569	       [(unspec:P
21570		  [(match_operand:P 2 "vsib_address_operand" "Tv")
21571		   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
21572		   (match_operand:SI 5 "const1248_operand" "n")]
21573		  UNSPEC_VSIBADDR)])
21574	     (mem:BLK (scratch))
21575	     (match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
21576	    UNSPEC_GATHER)
21577	  (parallel [(const_int 0) (const_int 1)
21578		     (const_int 2) (const_int 3)])))
21579   (clobber (match_scratch:VI4F_256 1 "=&x"))]
21580  "TARGET_AVX2"
21581  "%M2v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}"
21582  [(set_attr "type" "ssemov")
21583   (set_attr "prefix" "vex")
21584   (set_attr "mode" "<sseinsnmode>")])
21585
21586(define_expand "<avx512>_gathersi<mode>"
21587  [(parallel [(set (match_operand:VI48F 0 "register_operand")
21588		   (unspec:VI48F
21589		     [(match_operand:VI48F 1 "register_operand")
21590		      (match_operand:<avx512fmaskmode> 4 "register_operand")
21591		      (mem:<ssescalarmode>
21592			(match_par_dup 6
21593			  [(match_operand 2 "vsib_address_operand")
21594			   (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand")
21595			   (match_operand:SI 5 "const1248_operand")]))]
21596		     UNSPEC_GATHER))
21597	      (clobber (match_scratch:<avx512fmaskmode> 7))])]
21598  "TARGET_AVX512F"
21599{
21600  operands[6]
21601    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21602					operands[5]), UNSPEC_VSIBADDR);
21603})
21604
21605(define_insn "*avx512f_gathersi<VI48F:mode>"
21606  [(set (match_operand:VI48F 0 "register_operand" "=&v")
21607	(unspec:VI48F
21608	  [(match_operand:VI48F 1 "register_operand" "0")
21609	   (match_operand:<avx512fmaskmode> 7 "register_operand" "2")
21610	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21611	     [(unspec:P
21612		[(match_operand:P 4 "vsib_address_operand" "Tv")
21613		 (match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "v")
21614		 (match_operand:SI 5 "const1248_operand" "n")]
21615		UNSPEC_VSIBADDR)])]
21616	  UNSPEC_GATHER))
21617   (clobber (match_scratch:<avx512fmaskmode> 2 "=&Yk"))]
21618  "TARGET_AVX512F"
21619;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21620;; gas changed what it requires incompatibly.
21621  "%M4v<sseintprefix>gatherd<ssemodesuffix>\t{%6, %0%{%2%}|%0%{%2%}, %X6}"
21622  [(set_attr "type" "ssemov")
21623   (set_attr "prefix" "evex")
21624   (set_attr "mode" "<sseinsnmode>")])
21625
21626(define_insn "*avx512f_gathersi<VI48F:mode>_2"
21627  [(set (match_operand:VI48F 0 "register_operand" "=&v")
21628	(unspec:VI48F
21629	  [(pc)
21630	   (match_operand:<avx512fmaskmode> 6 "register_operand" "1")
21631	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21632	     [(unspec:P
21633		[(match_operand:P 3 "vsib_address_operand" "Tv")
21634		 (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21635		 (match_operand:SI 4 "const1248_operand" "n")]
21636		UNSPEC_VSIBADDR)])]
21637	  UNSPEC_GATHER))
21638   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21639  "TARGET_AVX512F"
21640;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21641;; gas changed what it requires incompatibly.
21642  "%M3v<sseintprefix>gatherd<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}"
21643  [(set_attr "type" "ssemov")
21644   (set_attr "prefix" "evex")
21645   (set_attr "mode" "<sseinsnmode>")])
21646
21647
21648(define_expand "<avx512>_gatherdi<mode>"
21649  [(parallel [(set (match_operand:VI48F 0 "register_operand")
21650		   (unspec:VI48F
21651		     [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand")
21652		      (match_operand:QI 4 "register_operand")
21653		      (mem:<ssescalarmode>
21654			(match_par_dup 6
21655			  [(match_operand 2 "vsib_address_operand")
21656			   (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand")
21657			   (match_operand:SI 5 "const1248_operand")]))]
21658		     UNSPEC_GATHER))
21659	      (clobber (match_scratch:QI 7))])]
21660  "TARGET_AVX512F"
21661{
21662  operands[6]
21663    = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, operands[2], operands[3],
21664					operands[5]), UNSPEC_VSIBADDR);
21665})
21666
21667(define_insn "*avx512f_gatherdi<VI48F:mode>"
21668  [(set (match_operand:VI48F 0 "register_operand" "=&v")
21669	(unspec:VI48F
21670	  [(match_operand:<VEC_GATHER_SRCDI> 1 "register_operand" "0")
21671	   (match_operand:QI 7 "register_operand" "2")
21672	   (match_operator:<ssescalarmode> 6 "vsib_mem_operator"
21673	     [(unspec:P
21674		[(match_operand:P 4 "vsib_address_operand" "Tv")
21675		 (match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "v")
21676		 (match_operand:SI 5 "const1248_operand" "n")]
21677		UNSPEC_VSIBADDR)])]
21678	  UNSPEC_GATHER))
21679   (clobber (match_scratch:QI 2 "=&Yk"))]
21680  "TARGET_AVX512F"
21681;; %X6 so that we don't emit any *WORD PTR for -masm=intel, as
21682;; gas changed what it requires incompatibly.
21683  "%M4v<sseintprefix>gatherq<ssemodesuffix>\t{%6, %1%{%2%}|%1%{%2%}, %X6}"
21684  [(set_attr "type" "ssemov")
21685   (set_attr "prefix" "evex")
21686   (set_attr "mode" "<sseinsnmode>")])
21687
21688(define_insn "*avx512f_gatherdi<VI48F:mode>_2"
21689  [(set (match_operand:VI48F 0 "register_operand" "=&v")
21690	(unspec:VI48F
21691	  [(pc)
21692	   (match_operand:QI 6 "register_operand" "1")
21693	   (match_operator:<ssescalarmode> 5 "vsib_mem_operator"
21694	     [(unspec:P
21695		[(match_operand:P 3 "vsib_address_operand" "Tv")
21696		 (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21697		 (match_operand:SI 4 "const1248_operand" "n")]
21698		UNSPEC_VSIBADDR)])]
21699	  UNSPEC_GATHER))
21700   (clobber (match_scratch:QI 1 "=&Yk"))]
21701  "TARGET_AVX512F"
21702{
21703  /* %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21704     gas changed what it requires incompatibly.  */
21705  if (<VI48F:MODE>mode != <VEC_GATHER_SRCDI>mode)
21706    {
21707      if (<VI48F:MODE_SIZE> != 64)
21708	return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %x0%{%1%}|%x0%{%1%}, %X5}";
21709      else
21710	return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %t0%{%1%}|%t0%{%1%}, %X5}";
21711    }
21712  return "%M3v<sseintprefix>gatherq<ssemodesuffix>\t{%5, %0%{%1%}|%0%{%1%}, %X5}";
21713}
21714  [(set_attr "type" "ssemov")
21715   (set_attr "prefix" "evex")
21716   (set_attr "mode" "<sseinsnmode>")])
21717
21718(define_expand "<avx512>_scattersi<mode>"
21719  [(parallel [(set (mem:VI48F
21720		     (match_par_dup 5
21721		       [(match_operand 0 "vsib_address_operand")
21722			(match_operand:<VEC_GATHER_IDXSI> 2 "register_operand")
21723			(match_operand:SI 4 "const1248_operand")]))
21724		   (unspec:VI48F
21725		     [(match_operand:<avx512fmaskmode> 1 "register_operand")
21726		      (match_operand:VI48F 3 "register_operand")]
21727		     UNSPEC_SCATTER))
21728	      (clobber (match_scratch:<avx512fmaskmode> 6))])]
21729  "TARGET_AVX512F"
21730{
21731  operands[5]
21732    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
21733					operands[4], operands[1]),
21734					UNSPEC_VSIBADDR);
21735})
21736
21737(define_insn "*avx512f_scattersi<VI48F:mode>"
21738  [(set (match_operator:VI48F 5 "vsib_mem_operator"
21739	  [(unspec:P
21740	     [(match_operand:P 0 "vsib_address_operand" "Tv")
21741	      (match_operand:<VEC_GATHER_IDXSI> 2 "register_operand" "v")
21742	      (match_operand:SI 4 "const1248_operand" "n")
21743	      (match_operand:<avx512fmaskmode> 6 "register_operand" "1")]
21744	     UNSPEC_VSIBADDR)])
21745	(unspec:VI48F
21746	  [(match_dup 6)
21747	   (match_operand:VI48F 3 "register_operand" "v")]
21748	  UNSPEC_SCATTER))
21749   (clobber (match_scratch:<avx512fmaskmode> 1 "=&Yk"))]
21750  "TARGET_AVX512F"
21751;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21752;; gas changed what it requires incompatibly.
21753  "%M0v<sseintprefix>scatterd<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21754  [(set_attr "type" "ssemov")
21755   (set_attr "prefix" "evex")
21756   (set_attr "mode" "<sseinsnmode>")])
21757
21758(define_expand "<avx512>_scatterdi<mode>"
21759  [(parallel [(set (mem:VI48F
21760		     (match_par_dup 5
21761		       [(match_operand 0 "vsib_address_operand")
21762			(match_operand:<VEC_GATHER_IDXDI> 2 "register_operand")
21763			(match_operand:SI 4 "const1248_operand")]))
21764		   (unspec:VI48F
21765		     [(match_operand:QI 1 "register_operand")
21766		      (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand")]
21767		     UNSPEC_SCATTER))
21768	      (clobber (match_scratch:QI 6))])]
21769  "TARGET_AVX512F"
21770{
21771  operands[5]
21772    = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, operands[0], operands[2],
21773					operands[4], operands[1]),
21774					UNSPEC_VSIBADDR);
21775})
21776
21777(define_insn "*avx512f_scatterdi<VI48F:mode>"
21778  [(set (match_operator:VI48F 5 "vsib_mem_operator"
21779	  [(unspec:P
21780	     [(match_operand:P 0 "vsib_address_operand" "Tv")
21781	      (match_operand:<VEC_GATHER_IDXDI> 2 "register_operand" "v")
21782	      (match_operand:SI 4 "const1248_operand" "n")
21783	      (match_operand:QI 6 "register_operand" "1")]
21784	     UNSPEC_VSIBADDR)])
21785	(unspec:VI48F
21786	  [(match_dup 6)
21787	   (match_operand:<VEC_GATHER_SRCDI> 3 "register_operand" "v")]
21788	  UNSPEC_SCATTER))
21789   (clobber (match_scratch:QI 1 "=&Yk"))]
21790  "TARGET_AVX512F"
21791;; %X5 so that we don't emit any *WORD PTR for -masm=intel, as
21792;; gas changed what it requires incompatibly.
21793  "%M0v<sseintprefix>scatterq<ssemodesuffix>\t{%3, %5%{%1%}|%X5%{%1%}, %3}"
21794  [(set_attr "type" "ssemov")
21795   (set_attr "prefix" "evex")
21796   (set_attr "mode" "<sseinsnmode>")])
21797
21798(define_insn "<avx512>_compress<mode>_mask"
21799  [(set (match_operand:VI48F 0 "register_operand" "=v")
21800	(unspec:VI48F
21801	  [(match_operand:VI48F 1 "register_operand" "v")
21802	   (match_operand:VI48F 2 "nonimm_or_0_operand" "0C")
21803	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21804	  UNSPEC_COMPRESS))]
21805  "TARGET_AVX512F"
21806  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21807  [(set_attr "type" "ssemov")
21808   (set_attr "prefix" "evex")
21809   (set_attr "mode" "<sseinsnmode>")])
21810
21811(define_insn "compress<mode>_mask"
21812  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v")
21813	(unspec:VI12_AVX512VLBW
21814	  [(match_operand:VI12_AVX512VLBW 1 "register_operand" "v")
21815	   (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C")
21816	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")]
21817	  UNSPEC_COMPRESS))]
21818  "TARGET_AVX512VBMI2"
21819  "vpcompress<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21820  [(set_attr "type" "ssemov")
21821   (set_attr "prefix" "evex")
21822   (set_attr "mode" "<sseinsnmode>")])
21823
21824(define_insn "<avx512>_compressstore<mode>_mask"
21825  [(set (match_operand:VI48F 0 "memory_operand" "=m")
21826	(unspec:VI48F
21827	  [(match_operand:VI48F 1 "register_operand" "x")
21828	   (match_dup 0)
21829	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21830	  UNSPEC_COMPRESS_STORE))]
21831  "TARGET_AVX512F"
21832  "v<sseintprefix>compress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21833  [(set_attr "type" "ssemov")
21834   (set_attr "prefix" "evex")
21835   (set_attr "memory" "store")
21836   (set_attr "mode" "<sseinsnmode>")])
21837
21838(define_insn "compressstore<mode>_mask"
21839  [(set (match_operand:VI12_AVX512VLBW 0 "memory_operand" "=m")
21840	(unspec:VI12_AVX512VLBW
21841	  [(match_operand:VI12_AVX512VLBW 1 "register_operand" "x")
21842	   (match_dup 0)
21843	   (match_operand:<avx512fmaskmode> 2 "register_operand" "Yk")]
21844	  UNSPEC_COMPRESS_STORE))]
21845  "TARGET_AVX512VBMI2"
21846  "vpcompress<ssemodesuffix>\t{%1, %0%{%2%}|%0%{%2%}, %1}"
21847  [(set_attr "type" "ssemov")
21848   (set_attr "prefix" "evex")
21849   (set_attr "memory" "store")
21850   (set_attr "mode" "<sseinsnmode>")])
21851
21852(define_expand "<avx512>_expand<mode>_maskz"
21853  [(set (match_operand:VI48F 0 "register_operand")
21854	(unspec:VI48F
21855	  [(match_operand:VI48F 1 "nonimmediate_operand")
21856	   (match_operand:VI48F 2 "nonimm_or_0_operand")
21857	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
21858	  UNSPEC_EXPAND))]
21859  "TARGET_AVX512F"
21860  "operands[2] = CONST0_RTX (<MODE>mode);")
21861
21862(define_insn "<avx512>_expand<mode>_mask"
21863  [(set (match_operand:VI48F 0 "register_operand" "=v,v")
21864	(unspec:VI48F
21865	  [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
21866	   (match_operand:VI48F 2 "nonimm_or_0_operand" "0C,0C")
21867	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21868	  UNSPEC_EXPAND))]
21869  "TARGET_AVX512F"
21870  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21871  [(set_attr "type" "ssemov")
21872   (set_attr "prefix" "evex")
21873   (set_attr "memory" "none,load")
21874   (set_attr "mode" "<sseinsnmode>")])
21875
21876(define_insn "expand<mode>_mask"
21877  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand" "=v,v")
21878	(unspec:VI12_AVX512VLBW
21879	  [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand" "v,m")
21880	   (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand" "0C,0C")
21881	   (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")]
21882	  UNSPEC_EXPAND))]
21883  "TARGET_AVX512VBMI2"
21884  "v<sseintprefix>expand<ssemodesuffix>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
21885  [(set_attr "type" "ssemov")
21886   (set_attr "prefix" "evex")
21887   (set_attr "memory" "none,load")
21888   (set_attr "mode" "<sseinsnmode>")])
21889
21890(define_expand "expand<mode>_maskz"
21891  [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
21892	(unspec:VI12_AVX512VLBW
21893	  [(match_operand:VI12_AVX512VLBW 1 "nonimmediate_operand")
21894	   (match_operand:VI12_AVX512VLBW 2 "nonimm_or_0_operand")
21895	   (match_operand:<avx512fmaskmode> 3 "register_operand")]
21896	  UNSPEC_EXPAND))]
21897  "TARGET_AVX512VBMI2"
21898  "operands[2] = CONST0_RTX (<MODE>mode);")
21899
21900(define_insn "avx512dq_rangep<mode><mask_name><round_saeonly_name>"
21901  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21902	(unspec:VF_AVX512VL
21903	  [(match_operand:VF_AVX512VL 1 "register_operand" "v")
21904	   (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
21905	   (match_operand:SI 3 "const_0_to_15_operand")]
21906	  UNSPEC_RANGE))]
21907  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
21908  "vrange<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
21909  [(set_attr "type" "sse")
21910   (set_attr "prefix" "evex")
21911   (set_attr "mode" "<MODE>")])
21912
21913(define_insn "avx512dq_ranges<mode><mask_scalar_name><round_saeonly_scalar_name>"
21914  [(set (match_operand:VF_128 0 "register_operand" "=v")
21915	(vec_merge:VF_128
21916	  (unspec:VF_128
21917	    [(match_operand:VF_128 1 "register_operand" "v")
21918	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21919	     (match_operand:SI 3 "const_0_to_15_operand")]
21920	    UNSPEC_RANGE)
21921	  (match_dup 1)
21922	  (const_int 1)))]
21923  "TARGET_AVX512DQ"
21924  "vrange<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}"
21925  [(set_attr "type" "sse")
21926   (set_attr "prefix" "evex")
21927   (set_attr "mode" "<MODE>")])
21928
21929(define_insn "avx512dq_fpclass<mode><mask_scalar_merge_name>"
21930  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21931          (unspec:<avx512fmaskmode>
21932            [(match_operand:VF_AVX512VL 1 "vector_operand" "vm")
21933             (match_operand 2 "const_0_to_255_operand" "n")]
21934             UNSPEC_FPCLASS))]
21935   "TARGET_AVX512DQ"
21936   "vfpclass<ssemodesuffix><vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
21937  [(set_attr "type" "sse")
21938   (set_attr "length_immediate" "1")
21939   (set_attr "prefix" "evex")
21940   (set_attr "mode" "<MODE>")])
21941
21942(define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>"
21943  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
21944	(and:<avx512fmaskmode>
21945	  (unspec:<avx512fmaskmode>
21946	    [(match_operand:VF_128 1 "nonimmediate_operand" "vm")
21947             (match_operand 2 "const_0_to_255_operand" "n")]
21948	    UNSPEC_FPCLASS)
21949	  (const_int 1)))]
21950   "TARGET_AVX512DQ"
21951   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
21952  [(set_attr "type" "sse")
21953   (set_attr "length_immediate" "1")
21954   (set_attr "prefix" "evex")
21955   (set_attr "mode" "<MODE>")])
21956
21957(define_insn "<avx512>_getmant<mode><mask_name><round_saeonly_name>"
21958  [(set (match_operand:VF_AVX512VL 0 "register_operand" "=v")
21959	(unspec:VF_AVX512VL
21960	  [(match_operand:VF_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
21961	   (match_operand:SI 2 "const_0_to_15_operand")]
21962	  UNSPEC_GETMANT))]
21963  "TARGET_AVX512F"
21964  "vgetmant<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}";
21965  [(set_attr "prefix" "evex")
21966   (set_attr "mode" "<MODE>")])
21967
21968(define_insn "avx512f_vgetmant<mode><mask_scalar_name><round_saeonly_scalar_name>"
21969  [(set (match_operand:VF_128 0 "register_operand" "=v")
21970	(vec_merge:VF_128
21971	  (unspec:VF_128
21972	    [(match_operand:VF_128 1 "register_operand" "v")
21973	     (match_operand:VF_128 2 "<round_saeonly_scalar_nimm_predicate>" "<round_saeonly_scalar_constraint>")
21974	     (match_operand:SI 3 "const_0_to_15_operand")]
21975	    UNSPEC_GETMANT)
21976	  (match_dup 1)
21977	  (const_int 1)))]
21978   "TARGET_AVX512F"
21979   "vgetmant<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %<iptr>2<round_saeonly_scalar_mask_op4>, %3}";
21980   [(set_attr "prefix" "evex")
21981   (set_attr "mode" "<ssescalarmode>")])
21982
21983;; The correct representation for this is absolutely enormous, and
21984;; surely not generally useful.
21985(define_insn "<mask_codefor>avx512bw_dbpsadbw<mode><mask_name>"
21986  [(set (match_operand:VI2_AVX512VL 0 "register_operand" "=v")
21987	(unspec:VI2_AVX512VL
21988	  [(match_operand:<dbpsadbwmode> 1 "register_operand" "v")
21989	   (match_operand:<dbpsadbwmode> 2 "nonimmediate_operand" "vm")
21990	   (match_operand:SI 3 "const_0_to_255_operand")]
21991	  UNSPEC_DBPSADBW))]
21992   "TARGET_AVX512BW"
21993  "vdbpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
21994  [(set_attr "type" "sselog1")
21995   (set_attr "length_immediate" "1")
21996   (set_attr "prefix" "evex")
21997   (set_attr "mode" "<sseinsnmode>")])
21998
21999(define_insn "clz<mode>2<mask_name>"
22000  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22001	(clz:VI48_AVX512VL
22002	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22003  "TARGET_AVX512CD"
22004  "vplzcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22005  [(set_attr "type" "sse")
22006   (set_attr "prefix" "evex")
22007   (set_attr "mode" "<sseinsnmode>")])
22008
22009(define_insn "<mask_codefor>conflict<mode><mask_name>"
22010  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22011	(unspec:VI48_AVX512VL
22012	  [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")]
22013	  UNSPEC_CONFLICT))]
22014  "TARGET_AVX512CD"
22015  "vpconflict<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
22016  [(set_attr "type" "sse")
22017   (set_attr "prefix" "evex")
22018   (set_attr "mode" "<sseinsnmode>")])
22019
22020(define_insn "sha1msg1"
22021  [(set (match_operand:V4SI 0 "register_operand" "=x")
22022	(unspec:V4SI
22023	  [(match_operand:V4SI 1 "register_operand" "0")
22024	   (match_operand:V4SI 2 "vector_operand" "xBm")]
22025	  UNSPEC_SHA1MSG1))]
22026  "TARGET_SHA"
22027  "sha1msg1\t{%2, %0|%0, %2}"
22028  [(set_attr "type" "sselog1")
22029   (set_attr "mode" "TI")])
22030
22031(define_insn "sha1msg2"
22032  [(set (match_operand:V4SI 0 "register_operand" "=x")
22033	(unspec:V4SI
22034	  [(match_operand:V4SI 1 "register_operand" "0")
22035	   (match_operand:V4SI 2 "vector_operand" "xBm")]
22036	  UNSPEC_SHA1MSG2))]
22037  "TARGET_SHA"
22038  "sha1msg2\t{%2, %0|%0, %2}"
22039  [(set_attr "type" "sselog1")
22040   (set_attr "mode" "TI")])
22041
22042(define_insn "sha1nexte"
22043  [(set (match_operand:V4SI 0 "register_operand" "=x")
22044	(unspec:V4SI
22045	  [(match_operand:V4SI 1 "register_operand" "0")
22046	   (match_operand:V4SI 2 "vector_operand" "xBm")]
22047	  UNSPEC_SHA1NEXTE))]
22048  "TARGET_SHA"
22049  "sha1nexte\t{%2, %0|%0, %2}"
22050  [(set_attr "type" "sselog1")
22051   (set_attr "mode" "TI")])
22052
22053(define_insn "sha1rnds4"
22054  [(set (match_operand:V4SI 0 "register_operand" "=x")
22055	(unspec:V4SI
22056	  [(match_operand:V4SI 1 "register_operand" "0")
22057	   (match_operand:V4SI 2 "vector_operand" "xBm")
22058	   (match_operand:SI 3 "const_0_to_3_operand" "n")]
22059	  UNSPEC_SHA1RNDS4))]
22060  "TARGET_SHA"
22061  "sha1rnds4\t{%3, %2, %0|%0, %2, %3}"
22062  [(set_attr "type" "sselog1")
22063   (set_attr "length_immediate" "1")
22064   (set_attr "mode" "TI")])
22065
22066(define_insn "sha256msg1"
22067  [(set (match_operand:V4SI 0 "register_operand" "=x")
22068	(unspec:V4SI
22069	  [(match_operand:V4SI 1 "register_operand" "0")
22070	   (match_operand:V4SI 2 "vector_operand" "xBm")]
22071	  UNSPEC_SHA256MSG1))]
22072  "TARGET_SHA"
22073  "sha256msg1\t{%2, %0|%0, %2}"
22074  [(set_attr "type" "sselog1")
22075   (set_attr "mode" "TI")])
22076
22077(define_insn "sha256msg2"
22078  [(set (match_operand:V4SI 0 "register_operand" "=x")
22079	(unspec:V4SI
22080	  [(match_operand:V4SI 1 "register_operand" "0")
22081	   (match_operand:V4SI 2 "vector_operand" "xBm")]
22082	  UNSPEC_SHA256MSG2))]
22083  "TARGET_SHA"
22084  "sha256msg2\t{%2, %0|%0, %2}"
22085  [(set_attr "type" "sselog1")
22086   (set_attr "mode" "TI")])
22087
22088(define_insn "sha256rnds2"
22089  [(set (match_operand:V4SI 0 "register_operand" "=x")
22090	(unspec:V4SI
22091	  [(match_operand:V4SI 1 "register_operand" "0")
22092	   (match_operand:V4SI 2 "vector_operand" "xBm")
22093	   (match_operand:V4SI 3 "register_operand" "Yz")]
22094	  UNSPEC_SHA256RNDS2))]
22095  "TARGET_SHA"
22096  "sha256rnds2\t{%3, %2, %0|%0, %2, %3}"
22097  [(set_attr "type" "sselog1")
22098   (set_attr "length_immediate" "1")
22099   (set_attr "mode" "TI")])
22100
22101(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
22102  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22103	(vec_concat:AVX512MODE2P
22104	  (vec_concat:<ssehalfvecmode>
22105	    (match_operand:<ssequartermode> 1 "nonimmediate_operand" "xm,x")
22106	    (unspec:<ssequartermode> [(const_int 0)] UNSPEC_CAST))
22107	  (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22108  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22109  "#"
22110  "&& reload_completed"
22111  [(set (match_dup 0) (match_dup 1))]
22112{
22113  if (REG_P (operands[0]))
22114    operands[0] = gen_lowpart (<ssequartermode>mode, operands[0]);
22115  else
22116    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22117				  <ssequartermode>mode);
22118})
22119
22120(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_256<castmode>"
22121  [(set (match_operand:AVX512MODE2P 0 "nonimmediate_operand" "=x,m")
22122	(vec_concat:AVX512MODE2P
22123	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")
22124	  (unspec:<ssehalfvecmode> [(const_int 0)] UNSPEC_CAST)))]
22125  "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
22126  "#"
22127  "&& reload_completed"
22128  [(set (match_dup 0) (match_dup 1))]
22129{
22130  if (REG_P (operands[0]))
22131    operands[0] = gen_lowpart (<ssehalfvecmode>mode, operands[0]);
22132  else
22133    operands[1] = lowpart_subreg (<MODE>mode, operands[1],
22134				  <ssehalfvecmode>mode);
22135})
22136
22137(define_int_iterator VPMADD52
22138	[UNSPEC_VPMADD52LUQ
22139	 UNSPEC_VPMADD52HUQ])
22140
22141(define_int_attr vpmadd52type
22142  [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")])
22143
22144(define_expand "vpamdd52huq<mode>_maskz"
22145  [(match_operand:VI8_AVX512VL 0 "register_operand")
22146   (match_operand:VI8_AVX512VL 1 "register_operand")
22147   (match_operand:VI8_AVX512VL 2 "register_operand")
22148   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22149   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22150  "TARGET_AVX512IFMA"
22151{
22152  emit_insn (gen_vpamdd52huq<mode>_maskz_1 (
22153    operands[0], operands[1], operands[2], operands[3],
22154    CONST0_RTX (<MODE>mode), operands[4]));
22155  DONE;
22156})
22157
22158(define_expand "vpamdd52luq<mode>_maskz"
22159  [(match_operand:VI8_AVX512VL 0 "register_operand")
22160   (match_operand:VI8_AVX512VL 1 "register_operand")
22161   (match_operand:VI8_AVX512VL 2 "register_operand")
22162   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand")
22163   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22164  "TARGET_AVX512IFMA"
22165{
22166  emit_insn (gen_vpamdd52luq<mode>_maskz_1 (
22167    operands[0], operands[1], operands[2], operands[3],
22168    CONST0_RTX (<MODE>mode), operands[4]));
22169  DONE;
22170})
22171
22172(define_insn "vpamdd52<vpmadd52type><mode><sd_maskz_name>"
22173  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22174	(unspec:VI8_AVX512VL
22175	  [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22176	   (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22177	   (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22178	  VPMADD52))]
22179  "TARGET_AVX512IFMA"
22180  "vpmadd52<vpmadd52type>\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}"
22181  [(set_attr "type" "ssemuladd")
22182   (set_attr "prefix" "evex")
22183   (set_attr "mode" "<sseinsnmode>")])
22184
22185(define_insn "vpamdd52<vpmadd52type><mode>_mask"
22186  [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v")
22187	(vec_merge:VI8_AVX512VL
22188	  (unspec:VI8_AVX512VL
22189	    [(match_operand:VI8_AVX512VL 1 "register_operand" "0")
22190	     (match_operand:VI8_AVX512VL 2 "register_operand" "v")
22191	     (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")]
22192	    VPMADD52)
22193	  (match_dup 1)
22194	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22195  "TARGET_AVX512IFMA"
22196  "vpmadd52<vpmadd52type>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
22197  [(set_attr "type" "ssemuladd")
22198   (set_attr "prefix" "evex")
22199   (set_attr "mode" "<sseinsnmode>")])
22200
22201(define_insn "vpmultishiftqb<mode><mask_name>"
22202  [(set (match_operand:VI1_AVX512VL 0 "register_operand" "=v")
22203	(unspec:VI1_AVX512VL
22204	  [(match_operand:VI1_AVX512VL 1 "register_operand" "v")
22205	   (match_operand:VI1_AVX512VL 2 "nonimmediate_operand" "vm")]
22206	  UNSPEC_VPMULTISHIFT))]
22207  "TARGET_AVX512VBMI"
22208  "vpmultishiftqb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
22209  [(set_attr "type" "sselog")
22210   (set_attr "prefix" "evex")
22211   (set_attr "mode" "<sseinsnmode>")])
22212
22213(define_mode_iterator IMOD4
22214  [(V64SF "TARGET_AVX5124FMAPS") (V64SI "TARGET_AVX5124VNNIW")])
22215
22216(define_mode_attr imod4_narrow
22217  [(V64SF "V16SF") (V64SI "V16SI")])
22218
22219(define_expand "mov<mode>"
22220  [(set (match_operand:IMOD4 0 "nonimmediate_operand")
22221	(match_operand:IMOD4 1 "nonimm_or_0_operand"))]
22222  "TARGET_AVX512F"
22223{
22224  ix86_expand_vector_move (<MODE>mode, operands);
22225  DONE;
22226})
22227
22228(define_insn_and_split "*mov<mode>_internal"
22229  [(set (match_operand:IMOD4 0 "nonimmediate_operand" "=v,v ,m")
22230	(match_operand:IMOD4 1 "nonimm_or_0_operand"  " C,vm,v"))]
22231  "TARGET_AVX512F
22232   && (register_operand (operands[0], <MODE>mode)
22233       || register_operand (operands[1], <MODE>mode))"
22234  "#"
22235  "&& reload_completed"
22236  [(const_int 0)]
22237{
22238  rtx op0, op1;
22239  int i;
22240
22241  for (i = 0; i < 4; i++)
22242    {
22243      op0 = simplify_subreg
22244	     (<imod4_narrow>mode, operands[0], <MODE>mode, i * 64);
22245      op1 = simplify_subreg
22246	     (<imod4_narrow>mode, operands[1], <MODE>mode, i * 64);
22247      emit_move_insn (op0, op1);
22248    }
22249  DONE;
22250})
22251
22252(define_insn "avx5124fmaddps_4fmaddps"
22253  [(set (match_operand:V16SF 0 "register_operand" "=v")
22254	(unspec:V16SF
22255	  [(match_operand:V16SF 1 "register_operand" "0")
22256	   (match_operand:V64SF 2 "register_operand" "v")
22257	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22258  "TARGET_AVX5124FMAPS"
22259  "v4fmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22260   [(set_attr ("type") ("ssemuladd"))
22261    (set_attr ("prefix") ("evex"))
22262    (set_attr ("mode") ("V16SF"))])
22263
22264(define_insn "avx5124fmaddps_4fmaddps_mask"
22265  [(set (match_operand:V16SF 0 "register_operand" "=v")
22266	(vec_merge:V16SF
22267	  (unspec:V16SF
22268	     [(match_operand:V64SF 1 "register_operand" "v")
22269	      (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22270	  (match_operand:V16SF 3 "register_operand" "0")
22271	  (match_operand:HI 4 "register_operand" "Yk")))]
22272  "TARGET_AVX5124FMAPS"
22273  "v4fmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22274   [(set_attr ("type") ("ssemuladd"))
22275    (set_attr ("prefix") ("evex"))
22276    (set_attr ("mode") ("V16SF"))])
22277
22278(define_insn "avx5124fmaddps_4fmaddps_maskz"
22279  [(set (match_operand:V16SF 0 "register_operand" "=v")
22280	(vec_merge:V16SF
22281	  (unspec:V16SF
22282	    [(match_operand:V16SF 1 "register_operand" "0")
22283	     (match_operand:V64SF 2 "register_operand" "v")
22284	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22285	  (match_operand:V16SF 4 "const0_operand" "C")
22286	  (match_operand:HI 5 "register_operand" "Yk")))]
22287  "TARGET_AVX5124FMAPS"
22288  "v4fmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22289   [(set_attr ("type") ("ssemuladd"))
22290    (set_attr ("prefix") ("evex"))
22291    (set_attr ("mode") ("V16SF"))])
22292
22293(define_insn "avx5124fmaddps_4fmaddss"
22294  [(set (match_operand:V4SF 0 "register_operand" "=v")
22295	(unspec:V4SF
22296	  [(match_operand:V4SF 1 "register_operand" "0")
22297	   (match_operand:V64SF 2 "register_operand" "v")
22298	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD))]
22299  "TARGET_AVX5124FMAPS"
22300  "v4fmaddss\t{%3, %x2, %0|%0, %x2, %3}"
22301   [(set_attr ("type") ("ssemuladd"))
22302    (set_attr ("prefix") ("evex"))
22303    (set_attr ("mode") ("SF"))])
22304
22305(define_insn "avx5124fmaddps_4fmaddss_mask"
22306  [(set (match_operand:V4SF 0 "register_operand" "=v")
22307	(vec_merge:V4SF
22308	  (unspec:V4SF
22309	    [(match_operand:V64SF 1 "register_operand" "v")
22310	     (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FMADD)
22311	  (match_operand:V4SF 3 "register_operand" "0")
22312	  (match_operand:QI 4 "register_operand" "Yk")))]
22313  "TARGET_AVX5124FMAPS"
22314  "v4fmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
22315   [(set_attr ("type") ("ssemuladd"))
22316    (set_attr ("prefix") ("evex"))
22317    (set_attr ("mode") ("SF"))])
22318
22319(define_insn "avx5124fmaddps_4fmaddss_maskz"
22320  [(set (match_operand:V4SF 0 "register_operand" "=v")
22321	(vec_merge:V4SF
22322	  (unspec:V4SF
22323	    [(match_operand:V4SF 1 "register_operand" "0")
22324	     (match_operand:V64SF 2 "register_operand" "v")
22325	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FMADD)
22326	  (match_operand:V4SF 4 "const0_operand" "C")
22327	  (match_operand:QI 5 "register_operand" "Yk")))]
22328  "TARGET_AVX5124FMAPS"
22329  "v4fmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
22330   [(set_attr ("type") ("ssemuladd"))
22331    (set_attr ("prefix") ("evex"))
22332    (set_attr ("mode") ("SF"))])
22333
22334(define_insn "avx5124fmaddps_4fnmaddps"
22335  [(set (match_operand:V16SF 0 "register_operand" "=v")
22336	(unspec:V16SF
22337	  [(match_operand:V16SF 1 "register_operand" "0")
22338	   (match_operand:V64SF 2 "register_operand" "v")
22339	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
22340  "TARGET_AVX5124FMAPS"
22341  "v4fnmaddps\t{%3, %g2, %0|%0, %g2, %3}"
22342   [(set_attr ("type") ("ssemuladd"))
22343    (set_attr ("prefix") ("evex"))
22344    (set_attr ("mode") ("V16SF"))])
22345
22346(define_insn "avx5124fmaddps_4fnmaddps_mask"
22347  [(set (match_operand:V16SF 0 "register_operand" "=v")
22348	(vec_merge:V16SF
22349	  (unspec:V16SF
22350	     [(match_operand:V64SF 1 "register_operand" "v")
22351	      (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22352	  (match_operand:V16SF 3 "register_operand" "0")
22353	  (match_operand:HI 4 "register_operand" "Yk")))]
22354  "TARGET_AVX5124FMAPS"
22355  "v4fnmaddps\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22356   [(set_attr ("type") ("ssemuladd"))
22357    (set_attr ("prefix") ("evex"))
22358    (set_attr ("mode") ("V16SF"))])
22359
22360(define_insn "avx5124fmaddps_4fnmaddps_maskz"
22361  [(set (match_operand:V16SF 0 "register_operand" "=v")
22362	(vec_merge:V16SF
22363	  (unspec:V16SF
22364	    [(match_operand:V16SF 1 "register_operand" "0")
22365	     (match_operand:V64SF 2 "register_operand" "v")
22366	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22367	  (match_operand:V16SF 4 "const0_operand" "C")
22368	  (match_operand:HI 5 "register_operand" "Yk")))]
22369  "TARGET_AVX5124FMAPS"
22370  "v4fnmaddps\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22371   [(set_attr ("type") ("ssemuladd"))
22372    (set_attr ("prefix") ("evex"))
22373    (set_attr ("mode") ("V16SF"))])
22374
22375(define_insn "avx5124fmaddps_4fnmaddss"
22376  [(set (match_operand:V4SF 0 "register_operand" "=v")
22377	(unspec:V4SF
22378	  [(match_operand:V4SF 1 "register_operand" "0")
22379	   (match_operand:V64SF 2 "register_operand" "v")
22380	   (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD))]
22381  "TARGET_AVX5124FMAPS"
22382  "v4fnmaddss\t{%3, %x2, %0|%0, %x2, %3}"
22383   [(set_attr ("type") ("ssemuladd"))
22384    (set_attr ("prefix") ("evex"))
22385    (set_attr ("mode") ("SF"))])
22386
22387(define_insn "avx5124fmaddps_4fnmaddss_mask"
22388  [(set (match_operand:V4SF 0 "register_operand" "=v")
22389	(vec_merge:V4SF
22390	  (unspec:V4SF
22391	    [(match_operand:V64SF 1 "register_operand" "v")
22392	     (match_operand:V4SF 2 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22393	  (match_operand:V4SF 3 "register_operand" "0")
22394	  (match_operand:QI 4 "register_operand" "Yk")))]
22395  "TARGET_AVX5124FMAPS"
22396  "v4fnmaddss\t{%2, %x1, %0%{%4%}|%0%{%4%}, %x1, %2}"
22397   [(set_attr ("type") ("ssemuladd"))
22398    (set_attr ("prefix") ("evex"))
22399    (set_attr ("mode") ("SF"))])
22400
22401(define_insn "avx5124fmaddps_4fnmaddss_maskz"
22402  [(set (match_operand:V4SF 0 "register_operand" "=v")
22403	(vec_merge:V4SF
22404	  (unspec:V4SF
22405	    [(match_operand:V4SF 1 "register_operand" "0")
22406	     (match_operand:V64SF 2 "register_operand" "v")
22407	     (match_operand:V4SF 3 "memory_operand" "m")] UNSPEC_VP4FNMADD)
22408	  (match_operand:V4SF 4 "const0_operand" "C")
22409	  (match_operand:QI 5 "register_operand" "Yk")))]
22410  "TARGET_AVX5124FMAPS"
22411  "v4fnmaddss\t{%3, %x2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %x2, %3}"
22412   [(set_attr ("type") ("ssemuladd"))
22413    (set_attr ("prefix") ("evex"))
22414    (set_attr ("mode") ("SF"))])
22415
22416(define_insn "avx5124vnniw_vp4dpwssd"
22417  [(set (match_operand:V16SI 0 "register_operand" "=v")
22418	(unspec:V16SI
22419	  [(match_operand:V16SI 1 "register_operand" "0")
22420	   (match_operand:V64SI 2 "register_operand" "v")
22421	   (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD))]
22422  "TARGET_AVX5124VNNIW"
22423  "vp4dpwssd\t{%3, %g2, %0|%0, %g2, %3}"
22424   [(set_attr ("type") ("ssemuladd"))
22425    (set_attr ("prefix") ("evex"))
22426    (set_attr ("mode") ("TI"))])
22427
22428(define_insn "avx5124vnniw_vp4dpwssd_mask"
22429  [(set (match_operand:V16SI 0 "register_operand" "=v")
22430	(vec_merge:V16SI
22431	  (unspec:V16SI
22432	     [(match_operand:V64SI 1 "register_operand" "v")
22433	      (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
22434	  (match_operand:V16SI 3 "register_operand" "0")
22435	  (match_operand:HI 4 "register_operand" "Yk")))]
22436  "TARGET_AVX5124VNNIW"
22437  "vp4dpwssd\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22438   [(set_attr ("type") ("ssemuladd"))
22439    (set_attr ("prefix") ("evex"))
22440    (set_attr ("mode") ("TI"))])
22441
22442(define_insn "avx5124vnniw_vp4dpwssd_maskz"
22443  [(set (match_operand:V16SI 0 "register_operand" "=v")
22444	(vec_merge:V16SI
22445	  (unspec:V16SI
22446	    [(match_operand:V16SI 1 "register_operand" "0")
22447	     (match_operand:V64SI 2 "register_operand" "v")
22448	     (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSD)
22449	  (match_operand:V16SI 4 "const0_operand" "C")
22450	  (match_operand:HI 5 "register_operand" "Yk")))]
22451  "TARGET_AVX5124VNNIW"
22452  "vp4dpwssd\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22453   [(set_attr ("type") ("ssemuladd"))
22454    (set_attr ("prefix") ("evex"))
22455    (set_attr ("mode") ("TI"))])
22456
22457(define_insn "avx5124vnniw_vp4dpwssds"
22458  [(set (match_operand:V16SI 0 "register_operand" "=v")
22459	(unspec:V16SI
22460	  [(match_operand:V16SI 1 "register_operand" "0")
22461	   (match_operand:V64SI 2 "register_operand" "v")
22462	   (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS))]
22463  "TARGET_AVX5124VNNIW"
22464  "vp4dpwssds\t{%3, %g2, %0|%0, %g2, %3}"
22465   [(set_attr ("type") ("ssemuladd"))
22466    (set_attr ("prefix") ("evex"))
22467    (set_attr ("mode") ("TI"))])
22468
22469(define_insn "avx5124vnniw_vp4dpwssds_mask"
22470  [(set (match_operand:V16SI 0 "register_operand" "=v")
22471	(vec_merge:V16SI
22472	  (unspec:V16SI
22473	     [(match_operand:V64SI 1 "register_operand" "v")
22474	      (match_operand:V4SI 2 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
22475	  (match_operand:V16SI 3 "register_operand" "0")
22476	  (match_operand:HI 4 "register_operand" "Yk")))]
22477  "TARGET_AVX5124VNNIW"
22478  "vp4dpwssds\t{%2, %g1, %0%{%4%}|%0%{%4%}, %g1, %2}"
22479   [(set_attr ("type") ("ssemuladd"))
22480    (set_attr ("prefix") ("evex"))
22481    (set_attr ("mode") ("TI"))])
22482
22483(define_insn "avx5124vnniw_vp4dpwssds_maskz"
22484  [(set (match_operand:V16SI 0 "register_operand" "=v")
22485	(vec_merge:V16SI
22486	  (unspec:V16SI
22487	    [(match_operand:V16SI 1 "register_operand" "0")
22488	     (match_operand:V64SI 2 "register_operand" "v")
22489	     (match_operand:V4SI 3 "memory_operand" "m")] UNSPEC_VP4DPWSSDS)
22490	  (match_operand:V16SI 4 "const0_operand" "C")
22491	  (match_operand:HI 5 "register_operand" "Yk")))]
22492  "TARGET_AVX5124VNNIW"
22493  "vp4dpwssds\t{%3, %g2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %g2, %3}"
22494   [(set_attr ("type") ("ssemuladd"))
22495    (set_attr ("prefix") ("evex"))
22496    (set_attr ("mode") ("TI"))])
22497
22498(define_insn "vpopcount<mode><mask_name>"
22499  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
22500	(popcount:VI48_AVX512VL
22501	  (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "vm")))]
22502  "TARGET_AVX512VPOPCNTDQ"
22503  "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
22504
22505;; Save multiple registers out-of-line.
22506(define_insn "*save_multiple<mode>"
22507  [(match_parallel 0 "save_multiple"
22508    [(use (match_operand:P 1 "symbol_operand"))])]
22509  "TARGET_SSE && TARGET_64BIT"
22510  "call\t%P1")
22511
22512;; Restore multiple registers out-of-line.
22513(define_insn "*restore_multiple<mode>"
22514  [(match_parallel 0 "restore_multiple"
22515    [(use (match_operand:P 1 "symbol_operand"))])]
22516  "TARGET_SSE && TARGET_64BIT"
22517  "call\t%P1")
22518
22519;; Restore multiple registers out-of-line and return.
22520(define_insn "*restore_multiple_and_return<mode>"
22521  [(match_parallel 0 "restore_multiple"
22522    [(return)
22523     (use (match_operand:P 1 "symbol_operand"))
22524     (set (reg:DI SP_REG) (reg:DI R10_REG))
22525    ])]
22526  "TARGET_SSE && TARGET_64BIT"
22527  "jmp\t%P1")
22528
22529;; Restore multiple registers out-of-line when hard frame pointer is used,
22530;; perform the leave operation prior to returning (from the function).
22531(define_insn "*restore_multiple_leave_return<mode>"
22532  [(match_parallel 0 "restore_multiple"
22533    [(return)
22534     (use (match_operand:P 1 "symbol_operand"))
22535     (set (reg:DI SP_REG) (plus:DI (reg:DI BP_REG) (const_int 8)))
22536     (set (reg:DI BP_REG) (mem:DI (reg:DI BP_REG)))
22537     (clobber (mem:BLK (scratch)))
22538    ])]
22539  "TARGET_SSE && TARGET_64BIT"
22540  "jmp\t%P1")
22541
22542(define_insn "vpopcount<mode><mask_name>"
22543  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
22544	(popcount:VI12_AVX512VL
22545	  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "vm")))]
22546  "TARGET_AVX512BITALG"
22547  "vpopcnt<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
22548
22549(define_insn "vgf2p8affineinvqb_<mode><mask_name>"
22550  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
22551	(unspec:VI1_AVX512F
22552	  [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
22553	   (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
22554	   (match_operand 3 "const_0_to_255_operand" "n,n")]
22555	  UNSPEC_GF2P8AFFINEINV))]
22556  "TARGET_GFNI"
22557  "@
22558   gf2p8affineinvqb\t{%3, %2, %0| %0, %2, %3}
22559   vgf2p8affineinvqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
22560  [(set_attr "isa" "noavx,avx")
22561   (set_attr "prefix_data16" "1,*")
22562   (set_attr "prefix_extra" "1")
22563   (set_attr "prefix" "orig,maybe_evex")
22564   (set_attr "mode" "<sseinsnmode>")])
22565
22566(define_insn "vgf2p8affineqb_<mode><mask_name>"
22567  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
22568	(unspec:VI1_AVX512F
22569	  [(match_operand:VI1_AVX512F 1 "register_operand" "0,v")
22570	   (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")
22571	   (match_operand 3 "const_0_to_255_operand" "n,n")]
22572	  UNSPEC_GF2P8AFFINE))]
22573  "TARGET_GFNI"
22574  "@
22575   gf2p8affineqb\t{%3, %2, %0| %0, %2, %3}
22576   vgf2p8affineqb\t{%3, %2, %1, %0<mask_operand4>| %0<mask_operand4>, %1, %2, %3}"
22577  [(set_attr "isa" "noavx,avx")
22578   (set_attr "prefix_data16" "1,*")
22579   (set_attr "prefix_extra" "1")
22580   (set_attr "prefix" "orig,maybe_evex")
22581   (set_attr "mode" "<sseinsnmode>")])
22582
22583(define_insn "vgf2p8mulb_<mode><mask_name>"
22584  [(set (match_operand:VI1_AVX512F 0 "register_operand" "=x,v")
22585	(unspec:VI1_AVX512F
22586	  [(match_operand:VI1_AVX512F 1 "register_operand" "%0,v")
22587	   (match_operand:VI1_AVX512F 2 "vector_operand" "xBm,vm")]
22588	  UNSPEC_GF2P8MUL))]
22589  "TARGET_GFNI"
22590  "@
22591   gf2p8mulb\t{%2, %0| %0, %2}
22592   vgf2p8mulb\t{%2, %1, %0<mask_operand3>| %0<mask_operand3>, %1, %2}"
22593  [(set_attr "isa" "noavx,avx")
22594   (set_attr "prefix_data16" "1,*")
22595   (set_attr "prefix_extra" "1")
22596   (set_attr "prefix" "orig,maybe_evex")
22597   (set_attr "mode" "<sseinsnmode>")])
22598
22599(define_insn "vpshrd_<mode><mask_name>"
22600  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22601	(unspec:VI248_AVX512VL
22602	  [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
22603	   (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
22604	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
22605	  UNSPEC_VPSHRD))]
22606  "TARGET_AVX512VBMI2"
22607  "vpshrd<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
22608   [(set_attr ("prefix") ("evex"))])
22609
22610(define_insn "vpshld_<mode><mask_name>"
22611  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22612	(unspec:VI248_AVX512VL
22613	  [(match_operand:VI248_AVX512VL 1 "register_operand" "v")
22614	   (match_operand:VI248_AVX512VL 2 "nonimmediate_operand" "vm")
22615	   (match_operand:SI 3 "const_0_to_255_operand" "n")]
22616	  UNSPEC_VPSHLD))]
22617  "TARGET_AVX512VBMI2"
22618  "vpshld<ssemodesuffix>\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3 }"
22619   [(set_attr ("prefix") ("evex"))])
22620
22621(define_insn "vpshrdv_<mode>"
22622  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22623	(unspec:VI248_AVX512VL
22624	  [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22625	   (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22626	   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22627	  UNSPEC_VPSHRDV))]
22628  "TARGET_AVX512VBMI2"
22629  "vpshrdv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22630   [(set_attr ("prefix") ("evex"))
22631   (set_attr "mode" "<sseinsnmode>")])
22632
22633(define_insn "vpshrdv_<mode>_mask"
22634  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22635	(vec_merge:VI248_AVX512VL
22636	  (unspec:VI248_AVX512VL
22637	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22638	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22639	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22640	    UNSPEC_VPSHRDV)
22641	  (match_dup 1)
22642	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22643  "TARGET_AVX512VBMI2"
22644  "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22645   [(set_attr ("prefix") ("evex"))
22646   (set_attr "mode" "<sseinsnmode>")])
22647
22648(define_expand "vpshrdv_<mode>_maskz"
22649  [(match_operand:VI248_AVX512VL 0 "register_operand")
22650   (match_operand:VI248_AVX512VL 1 "register_operand")
22651   (match_operand:VI248_AVX512VL 2 "register_operand")
22652   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22653   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22654  "TARGET_AVX512VBMI2"
22655{
22656  emit_insn (gen_vpshrdv_<mode>_maskz_1 (operands[0], operands[1],
22657					 operands[2], operands[3],
22658					 CONST0_RTX (<MODE>mode),
22659						     operands[4]));
22660  DONE;
22661})
22662
22663(define_insn "vpshrdv_<mode>_maskz_1"
22664  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22665	(vec_merge:VI248_AVX512VL
22666	  (unspec:VI248_AVX512VL
22667	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22668	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22669	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22670	    UNSPEC_VPSHRDV)
22671	  (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22672	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22673  "TARGET_AVX512VBMI2"
22674  "vpshrdv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22675   [(set_attr ("prefix") ("evex"))
22676   (set_attr "mode" "<sseinsnmode>")])
22677
22678(define_insn "vpshldv_<mode>"
22679  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22680	(unspec:VI248_AVX512VL
22681	  [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22682	   (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22683	   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22684	  UNSPEC_VPSHLDV))]
22685  "TARGET_AVX512VBMI2"
22686  "vpshldv<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3 }"
22687   [(set_attr ("prefix") ("evex"))
22688   (set_attr "mode" "<sseinsnmode>")])
22689
22690(define_insn "vpshldv_<mode>_mask"
22691  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22692	(vec_merge:VI248_AVX512VL
22693	  (unspec:VI248_AVX512VL
22694	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22695	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22696	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22697	    UNSPEC_VPSHLDV)
22698	  (match_dup 1)
22699	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22700  "TARGET_AVX512VBMI2"
22701  "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22702   [(set_attr ("prefix") ("evex"))
22703   (set_attr "mode" "<sseinsnmode>")])
22704
22705(define_expand "vpshldv_<mode>_maskz"
22706  [(match_operand:VI248_AVX512VL 0 "register_operand")
22707   (match_operand:VI248_AVX512VL 1 "register_operand")
22708   (match_operand:VI248_AVX512VL 2 "register_operand")
22709   (match_operand:VI248_AVX512VL 3 "nonimmediate_operand")
22710   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22711  "TARGET_AVX512VBMI2"
22712{
22713  emit_insn (gen_vpshldv_<mode>_maskz_1 (operands[0], operands[1],
22714					 operands[2], operands[3],
22715					 CONST0_RTX (<MODE>mode),
22716						     operands[4]));
22717  DONE;
22718})
22719
22720(define_insn "vpshldv_<mode>_maskz_1"
22721  [(set (match_operand:VI248_AVX512VL 0 "register_operand" "=v")
22722	(vec_merge:VI248_AVX512VL
22723	  (unspec:VI248_AVX512VL
22724	    [(match_operand:VI248_AVX512VL 1 "register_operand" "0")
22725	     (match_operand:VI248_AVX512VL 2 "register_operand" "v")
22726	     (match_operand:VI248_AVX512VL 3 "nonimmediate_operand" "vm")]
22727	    UNSPEC_VPSHLDV)
22728	  (match_operand:VI248_AVX512VL 4 "const0_operand" "C")
22729	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22730  "TARGET_AVX512VBMI2"
22731  "vpshldv<ssemodesuffix>\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22732   [(set_attr ("prefix") ("evex"))
22733   (set_attr "mode" "<sseinsnmode>")])
22734
22735(define_insn "vpdpbusd_<mode>"
22736  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22737	(unspec:VI4_AVX512VL
22738	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22739	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22740	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22741	  UNSPEC_VPMADDUBSWACCD))]
22742  "TARGET_AVX512VNNI"
22743  "vpdpbusd\t{%3, %2, %0|%0, %2, %3 }"
22744   [(set_attr ("prefix") ("evex"))])
22745
22746(define_insn "vpdpbusd_<mode>_mask"
22747  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22748	(vec_merge:VI4_AVX512VL
22749	  (unspec:VI4_AVX512VL
22750	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22751	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22752	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22753	    UNSPEC_VPMADDUBSWACCD)
22754	  (match_dup 1)
22755	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22756  "TARGET_AVX512VNNI"
22757  "vpdpbusd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22758   [(set_attr ("prefix") ("evex"))])
22759
22760(define_expand "vpdpbusd_<mode>_maskz"
22761  [(match_operand:VI4_AVX512VL 0 "register_operand")
22762   (match_operand:VI4_AVX512VL 1 "register_operand")
22763   (match_operand:VI4_AVX512VL 2 "register_operand")
22764   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22765   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22766  "TARGET_AVX512VNNI"
22767{
22768  emit_insn (gen_vpdpbusd_<mode>_maskz_1 (operands[0], operands[1],
22769					  operands[2], operands[3],
22770					  CONST0_RTX (<MODE>mode),
22771						      operands[4]));
22772  DONE;
22773})
22774
22775(define_insn "vpdpbusd_<mode>_maskz_1"
22776  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22777	(vec_merge:VI4_AVX512VL
22778	  (unspec:VI4_AVX512VL
22779	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22780	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22781	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
22782	    ] UNSPEC_VPMADDUBSWACCD)
22783	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22784	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22785  "TARGET_AVX512VNNI"
22786  "vpdpbusd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22787   [(set_attr ("prefix") ("evex"))])
22788
22789
22790(define_insn "vpdpbusds_<mode>"
22791  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22792	(unspec:VI4_AVX512VL
22793	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22794	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22795	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22796	  UNSPEC_VPMADDUBSWACCSSD))]
22797  "TARGET_AVX512VNNI"
22798  "vpdpbusds\t{%3, %2, %0|%0, %2, %3 }"
22799   [(set_attr ("prefix") ("evex"))])
22800
22801(define_insn "vpdpbusds_<mode>_mask"
22802  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22803	(vec_merge:VI4_AVX512VL
22804	  (unspec:VI4_AVX512VL
22805	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22806	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22807	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22808	    UNSPEC_VPMADDUBSWACCSSD)
22809	  (match_dup 1)
22810	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22811  "TARGET_AVX512VNNI"
22812  "vpdpbusds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22813   [(set_attr ("prefix") ("evex"))])
22814
22815(define_expand "vpdpbusds_<mode>_maskz"
22816  [(match_operand:VI4_AVX512VL 0 "register_operand")
22817   (match_operand:VI4_AVX512VL 1 "register_operand")
22818   (match_operand:VI4_AVX512VL 2 "register_operand")
22819   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22820   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22821  "TARGET_AVX512VNNI"
22822{
22823  emit_insn (gen_vpdpbusds_<mode>_maskz_1 (operands[0], operands[1],
22824					   operands[2], operands[3],
22825					   CONST0_RTX (<MODE>mode),
22826						       operands[4]));
22827  DONE;
22828})
22829
22830(define_insn "vpdpbusds_<mode>_maskz_1"
22831  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22832	(vec_merge:VI4_AVX512VL
22833	  (unspec:VI4_AVX512VL
22834	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22835	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22836	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22837	    UNSPEC_VPMADDUBSWACCSSD)
22838	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22839	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22840  "TARGET_AVX512VNNI"
22841  "vpdpbusds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22842   [(set_attr ("prefix") ("evex"))])
22843
22844
22845(define_insn "vpdpwssd_<mode>"
22846  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22847	(unspec:VI4_AVX512VL
22848	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22849	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22850	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22851	  UNSPEC_VPMADDWDACCD))]
22852  "TARGET_AVX512VNNI"
22853  "vpdpwssd\t{%3, %2, %0|%0, %2, %3 }"
22854   [(set_attr ("prefix") ("evex"))])
22855
22856(define_insn "vpdpwssd_<mode>_mask"
22857  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22858	(vec_merge:VI4_AVX512VL
22859	  (unspec:VI4_AVX512VL
22860	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22861	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22862	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22863	    UNSPEC_VPMADDWDACCD)
22864	  (match_dup 1)
22865	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22866  "TARGET_AVX512VNNI"
22867  "vpdpwssd\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22868   [(set_attr ("prefix") ("evex"))])
22869
22870(define_expand "vpdpwssd_<mode>_maskz"
22871  [(match_operand:VI4_AVX512VL 0 "register_operand")
22872   (match_operand:VI4_AVX512VL 1 "register_operand")
22873   (match_operand:VI4_AVX512VL 2 "register_operand")
22874   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22875   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22876  "TARGET_AVX512VNNI"
22877{
22878  emit_insn (gen_vpdpwssd_<mode>_maskz_1 (operands[0], operands[1],
22879					  operands[2], operands[3],
22880					  CONST0_RTX (<MODE>mode),
22881						      operands[4]));
22882  DONE;
22883})
22884
22885(define_insn "vpdpwssd_<mode>_maskz_1"
22886  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22887	(vec_merge:VI4_AVX512VL
22888	  (unspec:VI4_AVX512VL
22889	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22890	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22891	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22892	    UNSPEC_VPMADDWDACCD)
22893	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22894	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22895  "TARGET_AVX512VNNI"
22896  "vpdpwssd\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22897   [(set_attr ("prefix") ("evex"))])
22898
22899
22900(define_insn "vpdpwssds_<mode>"
22901  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22902	(unspec:VI4_AVX512VL
22903	  [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22904	   (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22905	   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22906	  UNSPEC_VPMADDWDACCSSD))]
22907  "TARGET_AVX512VNNI"
22908  "vpdpwssds\t{%3, %2, %0|%0, %2, %3 }"
22909   [(set_attr ("prefix") ("evex"))])
22910
22911(define_insn "vpdpwssds_<mode>_mask"
22912  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22913	(vec_merge:VI4_AVX512VL
22914	  (unspec:VI4_AVX512VL
22915	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22916	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22917	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22918	    UNSPEC_VPMADDWDACCSSD)
22919	  (match_dup 1)
22920	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
22921  "TARGET_AVX512VNNI"
22922  "vpdpwssds\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3 }"
22923   [(set_attr ("prefix") ("evex"))])
22924
22925(define_expand "vpdpwssds_<mode>_maskz"
22926  [(match_operand:VI4_AVX512VL 0 "register_operand")
22927   (match_operand:VI4_AVX512VL 1 "register_operand")
22928   (match_operand:VI4_AVX512VL 2 "register_operand")
22929   (match_operand:VI4_AVX512VL 3 "nonimmediate_operand")
22930   (match_operand:<avx512fmaskmode> 4 "register_operand")]
22931  "TARGET_AVX512VNNI"
22932{
22933  emit_insn (gen_vpdpwssds_<mode>_maskz_1 (operands[0], operands[1],
22934					   operands[2], operands[3],
22935					   CONST0_RTX (<MODE>mode),
22936						       operands[4]));
22937  DONE;
22938})
22939
22940(define_insn "vpdpwssds_<mode>_maskz_1"
22941  [(set (match_operand:VI4_AVX512VL 0 "register_operand" "=v")
22942	(vec_merge:VI4_AVX512VL
22943	  (unspec:VI4_AVX512VL
22944	    [(match_operand:VI4_AVX512VL 1 "register_operand" "0")
22945	     (match_operand:VI4_AVX512VL 2 "register_operand" "v")
22946	     (match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
22947	    UNSPEC_VPMADDWDACCSSD)
22948	  (match_operand:VI4_AVX512VL 4 "const0_operand" "C")
22949	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
22950  "TARGET_AVX512VNNI"
22951  "vpdpwssds\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3 }"
22952   [(set_attr ("prefix") ("evex"))])
22953
22954(define_insn "vaesdec_<mode>"
22955  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22956	(unspec:VI1_AVX512VL_F
22957	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22958	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22959	  UNSPEC_VAESDEC))]
22960  "TARGET_VAES"
22961  "vaesdec\t{%2, %1, %0|%0, %1, %2}"
22962)
22963
22964(define_insn "vaesdeclast_<mode>"
22965  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22966	(unspec:VI1_AVX512VL_F
22967	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22968	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22969	  UNSPEC_VAESDECLAST))]
22970  "TARGET_VAES"
22971  "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
22972)
22973
22974(define_insn "vaesenc_<mode>"
22975  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22976	(unspec:VI1_AVX512VL_F
22977	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22978	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22979	  UNSPEC_VAESENC))]
22980  "TARGET_VAES"
22981  "vaesenc\t{%2, %1, %0|%0, %1, %2}"
22982)
22983
22984(define_insn "vaesenclast_<mode>"
22985  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
22986	(unspec:VI1_AVX512VL_F
22987	  [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
22988	   (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
22989	  UNSPEC_VAESENCLAST))]
22990  "TARGET_VAES"
22991  "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
22992)
22993
22994(define_insn "vpclmulqdq_<mode>"
22995  [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
22996	(unspec:VI8_FVL [(match_operand:VI8_FVL 1 "register_operand" "v")
22997			 (match_operand:VI8_FVL 2 "vector_operand" "vm")
22998			 (match_operand:SI 3 "const_0_to_255_operand" "n")]
22999			UNSPEC_VPCLMULQDQ))]
23000  "TARGET_VPCLMULQDQ"
23001  "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
23002  [(set_attr "mode" "DI")])
23003
23004(define_insn "avx512vl_vpshufbitqmb<mode><mask_scalar_merge_name>"
23005  [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
23006	(unspec:<avx512fmaskmode>
23007	  [(match_operand:VI1_AVX512VLBW 1 "register_operand" "v")
23008	   (match_operand:VI1_AVX512VLBW 2 "nonimmediate_operand" "vm")]
23009	  UNSPEC_VPSHUFBIT))]
23010  "TARGET_AVX512BITALG"
23011  "vpshufbitqmb\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
23012  [(set_attr "prefix" "evex")
23013   (set_attr "mode" "<sseinsnmode>")])
23014
23015(define_mode_iterator VI48_AVX512VP2VL
23016  [V8DI
23017  (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
23018  (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
23019
23020(define_mode_iterator MASK_DWI [P2QI P2HI])
23021
23022(define_expand "mov<mode>"
23023  [(set (match_operand:MASK_DWI 0 "nonimmediate_operand")
23024	(match_operand:MASK_DWI 1 "nonimmediate_operand"))]
23025  "TARGET_AVX512VP2INTERSECT"
23026{
23027  if (MEM_P (operands[0]) && MEM_P (operands[1]))
23028    operands[1] = force_reg (<MODE>mode, operands[1]);
23029})
23030
23031(define_insn_and_split "*mov<mode>_internal"
23032  [(set (match_operand:MASK_DWI 0 "nonimmediate_operand" "=k,o")
23033	(match_operand:MASK_DWI 1 "nonimmediate_operand" "ko,k"))]
23034  "TARGET_AVX512VP2INTERSECT
23035   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
23036  "#"
23037  "&& reload_completed"
23038  [(set (match_dup 0) (match_dup 1))
23039   (set (match_dup 2) (match_dup 3))]
23040{
23041  split_double_mode (<MODE>mode, &operands[0], 2, &operands[0], &operands[2]);
23042})
23043
23044(define_insn "avx512vp2intersect_2intersect<mode>"
23045  [(set (match_operand:P2QI 0 "register_operand" "=k")
23046	(unspec:P2QI
23047	  [(match_operand:VI48_AVX512VP2VL 1 "register_operand" "v")
23048	   (match_operand:VI48_AVX512VP2VL 2 "vector_operand" "vm")]
23049	  UNSPEC_VP2INTERSECT))]
23050  "TARGET_AVX512VP2INTERSECT"
23051  "vp2intersect<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
23052  [(set_attr ("prefix") ("evex"))])
23053
23054(define_insn "avx512vp2intersect_2intersectv16si"
23055  [(set (match_operand:P2HI 0 "register_operand" "=k")
23056	(unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
23057		      (match_operand:V16SI 2 "vector_operand" "vm")]
23058		UNSPEC_VP2INTERSECT))]
23059  "TARGET_AVX512VP2INTERSECT"
23060  "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
23061  [(set_attr ("prefix") ("evex"))])
23062
23063(define_mode_iterator BF16 [V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
23064;; Converting from BF to SF
23065(define_mode_attr bf16_cvt_2sf
23066  [(V32HI  "V16SF") (V16HI  "V8SF") (V8HI  "V4SF")])
23067;; Converting from SF to BF
23068(define_mode_attr sf_cvt_bf16
23069  [(V4SF  "V8HI") (V8SF  "V8HI") (V16SF  "V16HI")])
23070;; Mapping from BF to SF
23071(define_mode_attr sf_bf16
23072  [(V4SF  "V8HI") (V8SF  "V16HI") (V16SF  "V32HI")])
23073
23074(define_expand "avx512f_cvtne2ps2bf16_<mode>_maskz"
23075  [(match_operand:BF16 0 "register_operand")
23076   (match_operand:<bf16_cvt_2sf> 1 "register_operand")
23077   (match_operand:<bf16_cvt_2sf> 2 "register_operand")
23078   (match_operand:<avx512fmaskmode> 3 "register_operand")]
23079  "TARGET_AVX512BF16"
23080{
23081  emit_insn (gen_avx512f_cvtne2ps2bf16_<mode>_mask(operands[0], operands[1],
23082    operands[2], CONST0_RTX(<MODE>mode), operands[3]));
23083  DONE;
23084})
23085
23086(define_insn "avx512f_cvtne2ps2bf16_<mode><mask_name>"
23087  [(set (match_operand:BF16 0 "register_operand" "=v")
23088	(unspec:BF16
23089	  [(match_operand:<bf16_cvt_2sf> 1 "register_operand" "v")
23090	   (match_operand:<bf16_cvt_2sf> 2 "register_operand" "v")]
23091        UNSPEC_VCVTNE2PS2BF16))]
23092  "TARGET_AVX512BF16"
23093  "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
23094
23095(define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
23096  [(match_operand:<sf_cvt_bf16> 0 "register_operand")
23097   (match_operand:VF1_AVX512VL 1 "register_operand")
23098   (match_operand:<avx512fmaskmode> 2 "register_operand")]
23099  "TARGET_AVX512BF16"
23100{
23101  emit_insn (gen_avx512f_cvtneps2bf16_<mode>_mask(operands[0], operands[1],
23102    CONST0_RTX(<sf_cvt_bf16>mode), operands[2]));
23103  DONE;
23104})
23105
23106(define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
23107  [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
23108	(unspec:<sf_cvt_bf16>
23109	  [(match_operand:VF1_AVX512VL 1 "register_operand" "v")]
23110        UNSPEC_VCVTNEPS2BF16))]
23111  "TARGET_AVX512BF16"
23112  "vcvtneps2bf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}")
23113
23114(define_expand "avx512f_dpbf16ps_<mode>_maskz"
23115  [(match_operand:VF1_AVX512VL 0 "register_operand")
23116   (match_operand:VF1_AVX512VL 1 "register_operand")
23117   (match_operand:<sf_bf16> 2 "register_operand")
23118   (match_operand:<sf_bf16> 3 "register_operand")
23119   (match_operand:<avx512fmaskhalfmode> 4 "register_operand")]
23120  "TARGET_AVX512BF16"
23121{
23122  emit_insn (gen_avx512f_dpbf16ps_<mode>_maskz_1(operands[0], operands[1],
23123    operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
23124  DONE;
23125})
23126
23127(define_insn "avx512f_dpbf16ps_<mode><maskz_half_name>"
23128  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23129	(unspec:VF1_AVX512VL
23130	  [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23131	   (match_operand:<sf_bf16> 2 "register_operand" "v")
23132	   (match_operand:<sf_bf16> 3 "register_operand" "v")]
23133        UNSPEC_VDPBF16PS))]
23134  "TARGET_AVX512BF16"
23135  "vdpbf16ps\t{%3, %2, %0<maskz_half_operand4>|%0<maskz_half_operand4>, %2, %3}")
23136
23137(define_insn "avx512f_dpbf16ps_<mode>_mask"
23138  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
23139	(vec_merge:VF1_AVX512VL
23140	  (unspec:VF1_AVX512VL
23141	    [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
23142	     (match_operand:<sf_bf16> 2 "register_operand" "v")
23143	     (match_operand:<sf_bf16> 3 "register_operand" "v")]
23144             UNSPEC_VDPBF16PS)
23145          (match_dup 1)
23146          (match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
23147  "TARGET_AVX512BF16"
23148  "vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
23149