1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988-2015 Free Software Foundation, Inc.
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 3, or (at your option)
9any later version.
10
11GCC is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING3.  If not see
18<http://www.gnu.org/licenses/>.  */
19
20#include "config.h"
21#include "system.h"
22#include "coretypes.h"
23#include "tm.h"
24#include "rtl.h"
25#include "hash-set.h"
26#include "machmode.h"
27#include "vec.h"
28#include "double-int.h"
29#include "input.h"
30#include "alias.h"
31#include "symtab.h"
32#include "wide-int.h"
33#include "inchash.h"
34#include "tree.h"
35#include "fold-const.h"
36#include "stringpool.h"
37#include "attribs.h"
38#include "calls.h"
39#include "stor-layout.h"
40#include "varasm.h"
41#include "tm_p.h"
42#include "regs.h"
43#include "hard-reg-set.h"
44#include "insn-config.h"
45#include "conditions.h"
46#include "output.h"
47#include "insn-codes.h"
48#include "insn-attr.h"
49#include "flags.h"
50#include "except.h"
51#include "function.h"
52#include "recog.h"
53#include "hashtab.h"
54#include "statistics.h"
55#include "real.h"
56#include "fixed-value.h"
57#include "expmed.h"
58#include "dojump.h"
59#include "explow.h"
60#include "emit-rtl.h"
61#include "stmt.h"
62#include "expr.h"
63#include "optabs.h"
64#include "diagnostic-core.h"
65#include "toplev.h"
66#include "predict.h"
67#include "dominance.h"
68#include "cfg.h"
69#include "cfgrtl.h"
70#include "cfganal.h"
71#include "lcm.h"
72#include "cfgbuild.h"
73#include "cfgcleanup.h"
74#include "basic-block.h"
75#include "ggc.h"
76#include "target.h"
77#include "target-def.h"
78#include "common/common-target.h"
79#include "langhooks.h"
80#include "reload.h"
81#include "hash-map.h"
82#include "is-a.h"
83#include "plugin-api.h"
84#include "ipa-ref.h"
85#include "cgraph.h"
86#include "hash-table.h"
87#include "tree-ssa-alias.h"
88#include "internal-fn.h"
89#include "gimple-fold.h"
90#include "tree-eh.h"
91#include "gimple-expr.h"
92#include "gimple.h"
93#include "gimplify.h"
94#include "cfgloop.h"
95#include "dwarf2.h"
96#include "df.h"
97#include "tm-constrs.h"
98#include "params.h"
99#include "cselib.h"
100#include "debug.h"
101#include "sched-int.h"
102#include "sbitmap.h"
103#include "fibheap.h"
104#include "opts.h"
105#include "diagnostic.h"
106#include "dumpfile.h"
107#include "tree-pass.h"
108#include "context.h"
109#include "pass_manager.h"
110#include "target-globals.h"
111#include "tree-vectorizer.h"
112#include "shrink-wrap.h"
113#include "builtins.h"
114#include "rtl-iter.h"
115#include "tree-iterator.h"
116#include "tree-chkp.h"
117#include "rtl-chkp.h"
118#include "dojump.h"
119
120static rtx legitimize_dllimport_symbol (rtx, bool);
121static rtx legitimize_pe_coff_extern_decl (rtx, bool);
122static rtx legitimize_pe_coff_symbol (rtx, bool);
123
124#ifndef CHECK_STACK_LIMIT
125#define CHECK_STACK_LIMIT (-1)
126#endif
127
128/* Return index of given mode in mult and division cost tables.  */
129#define MODE_INDEX(mode)					\
130  ((mode) == QImode ? 0						\
131   : (mode) == HImode ? 1					\
132   : (mode) == SImode ? 2					\
133   : (mode) == DImode ? 3					\
134   : 4)
135
136/* Processor costs (relative to an add) */
137/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
138#define COSTS_N_BYTES(N) ((N) * 2)
139
140#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
141
142static stringop_algs ix86_size_memcpy[2] = {
143  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
144  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
145static stringop_algs ix86_size_memset[2] = {
146  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
147  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
148
149const
150struct processor_costs ix86_size_cost = {/* costs for tuning for size */
151  COSTS_N_BYTES (2),			/* cost of an add instruction */
152  COSTS_N_BYTES (3),			/* cost of a lea instruction */
153  COSTS_N_BYTES (2),			/* variable shift costs */
154  COSTS_N_BYTES (3),			/* constant shift costs */
155  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
156   COSTS_N_BYTES (3),			/*				 HI */
157   COSTS_N_BYTES (3),			/*				 SI */
158   COSTS_N_BYTES (3),			/*				 DI */
159   COSTS_N_BYTES (5)},			/*			      other */
160  0,					/* cost of multiply per each bit set */
161  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
162   COSTS_N_BYTES (3),			/*			    HI */
163   COSTS_N_BYTES (3),			/*			    SI */
164   COSTS_N_BYTES (3),			/*			    DI */
165   COSTS_N_BYTES (5)},			/*			    other */
166  COSTS_N_BYTES (3),			/* cost of movsx */
167  COSTS_N_BYTES (3),			/* cost of movzx */
168  0,					/* "large" insn */
169  2,					/* MOVE_RATIO */
170  2,				     /* cost for loading QImode using movzbl */
171  {2, 2, 2},				/* cost of loading integer registers
172					   in QImode, HImode and SImode.
173					   Relative to reg-reg move (2).  */
174  {2, 2, 2},				/* cost of storing integer registers */
175  2,					/* cost of reg,reg fld/fst */
176  {2, 2, 2},				/* cost of loading fp registers
177					   in SFmode, DFmode and XFmode */
178  {2, 2, 2},				/* cost of storing fp registers
179					   in SFmode, DFmode and XFmode */
180  3,					/* cost of moving MMX register */
181  {3, 3},				/* cost of loading MMX registers
182					   in SImode and DImode */
183  {3, 3},				/* cost of storing MMX registers
184					   in SImode and DImode */
185  3,					/* cost of moving SSE register */
186  {3, 3, 3},				/* cost of loading SSE registers
187					   in SImode, DImode and TImode */
188  {3, 3, 3},				/* cost of storing SSE registers
189					   in SImode, DImode and TImode */
190  3,					/* MMX or SSE register to integer */
191  0,					/* size of l1 cache  */
192  0,					/* size of l2 cache  */
193  0,					/* size of prefetch block */
194  0,					/* number of parallel prefetches */
195  2,					/* Branch cost */
196  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
197  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
198  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
199  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
200  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
201  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
202  ix86_size_memcpy,
203  ix86_size_memset,
204  1,					/* scalar_stmt_cost.  */
205  1,					/* scalar load_cost.  */
206  1,					/* scalar_store_cost.  */
207  1,					/* vec_stmt_cost.  */
208  1,					/* vec_to_scalar_cost.  */
209  1,					/* scalar_to_vec_cost.  */
210  1,					/* vec_align_load_cost.  */
211  1,					/* vec_unalign_load_cost.  */
212  1,					/* vec_store_cost.  */
213  1,					/* cond_taken_branch_cost.  */
214  1,					/* cond_not_taken_branch_cost.  */
215};
216
217/* Processor costs (relative to an add) */
218static stringop_algs i386_memcpy[2] = {
219  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
220  DUMMY_STRINGOP_ALGS};
221static stringop_algs i386_memset[2] = {
222  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
223  DUMMY_STRINGOP_ALGS};
224
225static const
226struct processor_costs i386_cost = {	/* 386 specific costs */
227  COSTS_N_INSNS (1),			/* cost of an add instruction */
228  COSTS_N_INSNS (1),			/* cost of a lea instruction */
229  COSTS_N_INSNS (3),			/* variable shift costs */
230  COSTS_N_INSNS (2),			/* constant shift costs */
231  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
232   COSTS_N_INSNS (6),			/*				 HI */
233   COSTS_N_INSNS (6),			/*				 SI */
234   COSTS_N_INSNS (6),			/*				 DI */
235   COSTS_N_INSNS (6)},			/*			      other */
236  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
237  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
238   COSTS_N_INSNS (23),			/*			    HI */
239   COSTS_N_INSNS (23),			/*			    SI */
240   COSTS_N_INSNS (23),			/*			    DI */
241   COSTS_N_INSNS (23)},			/*			    other */
242  COSTS_N_INSNS (3),			/* cost of movsx */
243  COSTS_N_INSNS (2),			/* cost of movzx */
244  15,					/* "large" insn */
245  3,					/* MOVE_RATIO */
246  4,				     /* cost for loading QImode using movzbl */
247  {2, 4, 2},				/* cost of loading integer registers
248					   in QImode, HImode and SImode.
249					   Relative to reg-reg move (2).  */
250  {2, 4, 2},				/* cost of storing integer registers */
251  2,					/* cost of reg,reg fld/fst */
252  {8, 8, 8},				/* cost of loading fp registers
253					   in SFmode, DFmode and XFmode */
254  {8, 8, 8},				/* cost of storing fp registers
255					   in SFmode, DFmode and XFmode */
256  2,					/* cost of moving MMX register */
257  {4, 8},				/* cost of loading MMX registers
258					   in SImode and DImode */
259  {4, 8},				/* cost of storing MMX registers
260					   in SImode and DImode */
261  2,					/* cost of moving SSE register */
262  {4, 8, 16},				/* cost of loading SSE registers
263					   in SImode, DImode and TImode */
264  {4, 8, 16},				/* cost of storing SSE registers
265					   in SImode, DImode and TImode */
266  3,					/* MMX or SSE register to integer */
267  0,					/* size of l1 cache  */
268  0,					/* size of l2 cache  */
269  0,					/* size of prefetch block */
270  0,					/* number of parallel prefetches */
271  1,					/* Branch cost */
272  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
273  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
274  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
275  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
276  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
277  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
278  i386_memcpy,
279  i386_memset,
280  1,					/* scalar_stmt_cost.  */
281  1,					/* scalar load_cost.  */
282  1,					/* scalar_store_cost.  */
283  1,					/* vec_stmt_cost.  */
284  1,					/* vec_to_scalar_cost.  */
285  1,					/* scalar_to_vec_cost.  */
286  1,					/* vec_align_load_cost.  */
287  2,					/* vec_unalign_load_cost.  */
288  1,					/* vec_store_cost.  */
289  3,					/* cond_taken_branch_cost.  */
290  1,					/* cond_not_taken_branch_cost.  */
291};
292
293static stringop_algs i486_memcpy[2] = {
294  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
295  DUMMY_STRINGOP_ALGS};
296static stringop_algs i486_memset[2] = {
297  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
298  DUMMY_STRINGOP_ALGS};
299
300static const
301struct processor_costs i486_cost = {	/* 486 specific costs */
302  COSTS_N_INSNS (1),			/* cost of an add instruction */
303  COSTS_N_INSNS (1),			/* cost of a lea instruction */
304  COSTS_N_INSNS (3),			/* variable shift costs */
305  COSTS_N_INSNS (2),			/* constant shift costs */
306  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
307   COSTS_N_INSNS (12),			/*				 HI */
308   COSTS_N_INSNS (12),			/*				 SI */
309   COSTS_N_INSNS (12),			/*				 DI */
310   COSTS_N_INSNS (12)},			/*			      other */
311  1,					/* cost of multiply per each bit set */
312  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
313   COSTS_N_INSNS (40),			/*			    HI */
314   COSTS_N_INSNS (40),			/*			    SI */
315   COSTS_N_INSNS (40),			/*			    DI */
316   COSTS_N_INSNS (40)},			/*			    other */
317  COSTS_N_INSNS (3),			/* cost of movsx */
318  COSTS_N_INSNS (2),			/* cost of movzx */
319  15,					/* "large" insn */
320  3,					/* MOVE_RATIO */
321  4,				     /* cost for loading QImode using movzbl */
322  {2, 4, 2},				/* cost of loading integer registers
323					   in QImode, HImode and SImode.
324					   Relative to reg-reg move (2).  */
325  {2, 4, 2},				/* cost of storing integer registers */
326  2,					/* cost of reg,reg fld/fst */
327  {8, 8, 8},				/* cost of loading fp registers
328					   in SFmode, DFmode and XFmode */
329  {8, 8, 8},				/* cost of storing fp registers
330					   in SFmode, DFmode and XFmode */
331  2,					/* cost of moving MMX register */
332  {4, 8},				/* cost of loading MMX registers
333					   in SImode and DImode */
334  {4, 8},				/* cost of storing MMX registers
335					   in SImode and DImode */
336  2,					/* cost of moving SSE register */
337  {4, 8, 16},				/* cost of loading SSE registers
338					   in SImode, DImode and TImode */
339  {4, 8, 16},				/* cost of storing SSE registers
340					   in SImode, DImode and TImode */
341  3,					/* MMX or SSE register to integer */
342  4,					/* size of l1 cache.  486 has 8kB cache
343					   shared for code and data, so 4kB is
344					   not really precise.  */
345  4,					/* size of l2 cache  */
346  0,					/* size of prefetch block */
347  0,					/* number of parallel prefetches */
348  1,					/* Branch cost */
349  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
350  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
351  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
352  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
353  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
354  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
355  i486_memcpy,
356  i486_memset,
357  1,					/* scalar_stmt_cost.  */
358  1,					/* scalar load_cost.  */
359  1,					/* scalar_store_cost.  */
360  1,					/* vec_stmt_cost.  */
361  1,					/* vec_to_scalar_cost.  */
362  1,					/* scalar_to_vec_cost.  */
363  1,					/* vec_align_load_cost.  */
364  2,					/* vec_unalign_load_cost.  */
365  1,					/* vec_store_cost.  */
366  3,					/* cond_taken_branch_cost.  */
367  1,					/* cond_not_taken_branch_cost.  */
368};
369
370static stringop_algs pentium_memcpy[2] = {
371  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
372  DUMMY_STRINGOP_ALGS};
373static stringop_algs pentium_memset[2] = {
374  {libcall, {{-1, rep_prefix_4_byte, false}}},
375  DUMMY_STRINGOP_ALGS};
376
377static const
378struct processor_costs pentium_cost = {
379  COSTS_N_INSNS (1),			/* cost of an add instruction */
380  COSTS_N_INSNS (1),			/* cost of a lea instruction */
381  COSTS_N_INSNS (4),			/* variable shift costs */
382  COSTS_N_INSNS (1),			/* constant shift costs */
383  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
384   COSTS_N_INSNS (11),			/*				 HI */
385   COSTS_N_INSNS (11),			/*				 SI */
386   COSTS_N_INSNS (11),			/*				 DI */
387   COSTS_N_INSNS (11)},			/*			      other */
388  0,					/* cost of multiply per each bit set */
389  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
390   COSTS_N_INSNS (25),			/*			    HI */
391   COSTS_N_INSNS (25),			/*			    SI */
392   COSTS_N_INSNS (25),			/*			    DI */
393   COSTS_N_INSNS (25)},			/*			    other */
394  COSTS_N_INSNS (3),			/* cost of movsx */
395  COSTS_N_INSNS (2),			/* cost of movzx */
396  8,					/* "large" insn */
397  6,					/* MOVE_RATIO */
398  6,				     /* cost for loading QImode using movzbl */
399  {2, 4, 2},				/* cost of loading integer registers
400					   in QImode, HImode and SImode.
401					   Relative to reg-reg move (2).  */
402  {2, 4, 2},				/* cost of storing integer registers */
403  2,					/* cost of reg,reg fld/fst */
404  {2, 2, 6},				/* cost of loading fp registers
405					   in SFmode, DFmode and XFmode */
406  {4, 4, 6},				/* cost of storing fp registers
407					   in SFmode, DFmode and XFmode */
408  8,					/* cost of moving MMX register */
409  {8, 8},				/* cost of loading MMX registers
410					   in SImode and DImode */
411  {8, 8},				/* cost of storing MMX registers
412					   in SImode and DImode */
413  2,					/* cost of moving SSE register */
414  {4, 8, 16},				/* cost of loading SSE registers
415					   in SImode, DImode and TImode */
416  {4, 8, 16},				/* cost of storing SSE registers
417					   in SImode, DImode and TImode */
418  3,					/* MMX or SSE register to integer */
419  8,					/* size of l1 cache.  */
420  8,					/* size of l2 cache  */
421  0,					/* size of prefetch block */
422  0,					/* number of parallel prefetches */
423  2,					/* Branch cost */
424  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
425  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
426  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
427  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
428  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
429  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
430  pentium_memcpy,
431  pentium_memset,
432  1,					/* scalar_stmt_cost.  */
433  1,					/* scalar load_cost.  */
434  1,					/* scalar_store_cost.  */
435  1,					/* vec_stmt_cost.  */
436  1,					/* vec_to_scalar_cost.  */
437  1,					/* scalar_to_vec_cost.  */
438  1,					/* vec_align_load_cost.  */
439  2,					/* vec_unalign_load_cost.  */
440  1,					/* vec_store_cost.  */
441  3,					/* cond_taken_branch_cost.  */
442  1,					/* cond_not_taken_branch_cost.  */
443};
444
445/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
446   (we ensure the alignment).  For small blocks inline loop is still a
447   noticeable win, for bigger blocks either rep movsl or rep movsb is
448   way to go.  Rep movsb has apparently more expensive startup time in CPU,
449   but after 4K the difference is down in the noise.  */
450static stringop_algs pentiumpro_memcpy[2] = {
451  {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
452                       {8192, rep_prefix_4_byte, false},
453                       {-1, rep_prefix_1_byte, false}}},
454  DUMMY_STRINGOP_ALGS};
455static stringop_algs pentiumpro_memset[2] = {
456  {rep_prefix_4_byte, {{1024, unrolled_loop, false},
457                       {8192, rep_prefix_4_byte, false},
458                       {-1, libcall, false}}},
459  DUMMY_STRINGOP_ALGS};
460static const
461struct processor_costs pentiumpro_cost = {
462  COSTS_N_INSNS (1),			/* cost of an add instruction */
463  COSTS_N_INSNS (1),			/* cost of a lea instruction */
464  COSTS_N_INSNS (1),			/* variable shift costs */
465  COSTS_N_INSNS (1),			/* constant shift costs */
466  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
467   COSTS_N_INSNS (4),			/*				 HI */
468   COSTS_N_INSNS (4),			/*				 SI */
469   COSTS_N_INSNS (4),			/*				 DI */
470   COSTS_N_INSNS (4)},			/*			      other */
471  0,					/* cost of multiply per each bit set */
472  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
473   COSTS_N_INSNS (17),			/*			    HI */
474   COSTS_N_INSNS (17),			/*			    SI */
475   COSTS_N_INSNS (17),			/*			    DI */
476   COSTS_N_INSNS (17)},			/*			    other */
477  COSTS_N_INSNS (1),			/* cost of movsx */
478  COSTS_N_INSNS (1),			/* cost of movzx */
479  8,					/* "large" insn */
480  6,					/* MOVE_RATIO */
481  2,				     /* cost for loading QImode using movzbl */
482  {4, 4, 4},				/* cost of loading integer registers
483					   in QImode, HImode and SImode.
484					   Relative to reg-reg move (2).  */
485  {2, 2, 2},				/* cost of storing integer registers */
486  2,					/* cost of reg,reg fld/fst */
487  {2, 2, 6},				/* cost of loading fp registers
488					   in SFmode, DFmode and XFmode */
489  {4, 4, 6},				/* cost of storing fp registers
490					   in SFmode, DFmode and XFmode */
491  2,					/* cost of moving MMX register */
492  {2, 2},				/* cost of loading MMX registers
493					   in SImode and DImode */
494  {2, 2},				/* cost of storing MMX registers
495					   in SImode and DImode */
496  2,					/* cost of moving SSE register */
497  {2, 2, 8},				/* cost of loading SSE registers
498					   in SImode, DImode and TImode */
499  {2, 2, 8},				/* cost of storing SSE registers
500					   in SImode, DImode and TImode */
501  3,					/* MMX or SSE register to integer */
502  8,					/* size of l1 cache.  */
503  256,					/* size of l2 cache  */
504  32,					/* size of prefetch block */
505  6,					/* number of parallel prefetches */
506  2,					/* Branch cost */
507  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
508  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
509  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
510  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
511  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
512  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
513  pentiumpro_memcpy,
514  pentiumpro_memset,
515  1,					/* scalar_stmt_cost.  */
516  1,					/* scalar load_cost.  */
517  1,					/* scalar_store_cost.  */
518  1,					/* vec_stmt_cost.  */
519  1,					/* vec_to_scalar_cost.  */
520  1,					/* scalar_to_vec_cost.  */
521  1,					/* vec_align_load_cost.  */
522  2,					/* vec_unalign_load_cost.  */
523  1,					/* vec_store_cost.  */
524  3,					/* cond_taken_branch_cost.  */
525  1,					/* cond_not_taken_branch_cost.  */
526};
527
528static stringop_algs geode_memcpy[2] = {
529  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
530  DUMMY_STRINGOP_ALGS};
531static stringop_algs geode_memset[2] = {
532  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
533  DUMMY_STRINGOP_ALGS};
534static const
535struct processor_costs geode_cost = {
536  COSTS_N_INSNS (1),			/* cost of an add instruction */
537  COSTS_N_INSNS (1),			/* cost of a lea instruction */
538  COSTS_N_INSNS (2),			/* variable shift costs */
539  COSTS_N_INSNS (1),			/* constant shift costs */
540  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
541   COSTS_N_INSNS (4),			/*				 HI */
542   COSTS_N_INSNS (7),			/*				 SI */
543   COSTS_N_INSNS (7),			/*				 DI */
544   COSTS_N_INSNS (7)},			/*			      other */
545  0,					/* cost of multiply per each bit set */
546  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
547   COSTS_N_INSNS (23),			/*			    HI */
548   COSTS_N_INSNS (39),			/*			    SI */
549   COSTS_N_INSNS (39),			/*			    DI */
550   COSTS_N_INSNS (39)},			/*			    other */
551  COSTS_N_INSNS (1),			/* cost of movsx */
552  COSTS_N_INSNS (1),			/* cost of movzx */
553  8,					/* "large" insn */
554  4,					/* MOVE_RATIO */
555  1,				     /* cost for loading QImode using movzbl */
556  {1, 1, 1},				/* cost of loading integer registers
557					   in QImode, HImode and SImode.
558					   Relative to reg-reg move (2).  */
559  {1, 1, 1},				/* cost of storing integer registers */
560  1,					/* cost of reg,reg fld/fst */
561  {1, 1, 1},				/* cost of loading fp registers
562					   in SFmode, DFmode and XFmode */
563  {4, 6, 6},				/* cost of storing fp registers
564					   in SFmode, DFmode and XFmode */
565
566  2,					/* cost of moving MMX register */
567  {2, 2},				/* cost of loading MMX registers
568					   in SImode and DImode */
569  {2, 2},				/* cost of storing MMX registers
570					   in SImode and DImode */
571  2,					/* cost of moving SSE register */
572  {2, 2, 8},				/* cost of loading SSE registers
573					   in SImode, DImode and TImode */
574  {2, 2, 8},				/* cost of storing SSE registers
575					   in SImode, DImode and TImode */
576  3,					/* MMX or SSE register to integer */
577  64,					/* size of l1 cache.  */
578  128,					/* size of l2 cache.  */
579  32,					/* size of prefetch block */
580  1,					/* number of parallel prefetches */
581  1,					/* Branch cost */
582  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
583  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
584  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
585  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
586  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
587  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
588  geode_memcpy,
589  geode_memset,
590  1,					/* scalar_stmt_cost.  */
591  1,					/* scalar load_cost.  */
592  1,					/* scalar_store_cost.  */
593  1,					/* vec_stmt_cost.  */
594  1,					/* vec_to_scalar_cost.  */
595  1,					/* scalar_to_vec_cost.  */
596  1,					/* vec_align_load_cost.  */
597  2,					/* vec_unalign_load_cost.  */
598  1,					/* vec_store_cost.  */
599  3,					/* cond_taken_branch_cost.  */
600  1,					/* cond_not_taken_branch_cost.  */
601};
602
603static stringop_algs k6_memcpy[2] = {
604  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
605  DUMMY_STRINGOP_ALGS};
606static stringop_algs k6_memset[2] = {
607  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
608  DUMMY_STRINGOP_ALGS};
609static const
610struct processor_costs k6_cost = {
611  COSTS_N_INSNS (1),			/* cost of an add instruction */
612  COSTS_N_INSNS (2),			/* cost of a lea instruction */
613  COSTS_N_INSNS (1),			/* variable shift costs */
614  COSTS_N_INSNS (1),			/* constant shift costs */
615  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
616   COSTS_N_INSNS (3),			/*				 HI */
617   COSTS_N_INSNS (3),			/*				 SI */
618   COSTS_N_INSNS (3),			/*				 DI */
619   COSTS_N_INSNS (3)},			/*			      other */
620  0,					/* cost of multiply per each bit set */
621  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
622   COSTS_N_INSNS (18),			/*			    HI */
623   COSTS_N_INSNS (18),			/*			    SI */
624   COSTS_N_INSNS (18),			/*			    DI */
625   COSTS_N_INSNS (18)},			/*			    other */
626  COSTS_N_INSNS (2),			/* cost of movsx */
627  COSTS_N_INSNS (2),			/* cost of movzx */
628  8,					/* "large" insn */
629  4,					/* MOVE_RATIO */
630  3,				     /* cost for loading QImode using movzbl */
631  {4, 5, 4},				/* cost of loading integer registers
632					   in QImode, HImode and SImode.
633					   Relative to reg-reg move (2).  */
634  {2, 3, 2},				/* cost of storing integer registers */
635  4,					/* cost of reg,reg fld/fst */
636  {6, 6, 6},				/* cost of loading fp registers
637					   in SFmode, DFmode and XFmode */
638  {4, 4, 4},				/* cost of storing fp registers
639					   in SFmode, DFmode and XFmode */
640  2,					/* cost of moving MMX register */
641  {2, 2},				/* cost of loading MMX registers
642					   in SImode and DImode */
643  {2, 2},				/* cost of storing MMX registers
644					   in SImode and DImode */
645  2,					/* cost of moving SSE register */
646  {2, 2, 8},				/* cost of loading SSE registers
647					   in SImode, DImode and TImode */
648  {2, 2, 8},				/* cost of storing SSE registers
649					   in SImode, DImode and TImode */
650  6,					/* MMX or SSE register to integer */
651  32,					/* size of l1 cache.  */
652  32,					/* size of l2 cache.  Some models
653					   have integrated l2 cache, but
654					   optimizing for k6 is not important
655					   enough to worry about that.  */
656  32,					/* size of prefetch block */
657  1,					/* number of parallel prefetches */
658  1,					/* Branch cost */
659  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
660  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
661  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
662  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
663  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
664  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
665  k6_memcpy,
666  k6_memset,
667  1,					/* scalar_stmt_cost.  */
668  1,					/* scalar load_cost.  */
669  1,					/* scalar_store_cost.  */
670  1,					/* vec_stmt_cost.  */
671  1,					/* vec_to_scalar_cost.  */
672  1,					/* scalar_to_vec_cost.  */
673  1,					/* vec_align_load_cost.  */
674  2,					/* vec_unalign_load_cost.  */
675  1,					/* vec_store_cost.  */
676  3,					/* cond_taken_branch_cost.  */
677  1,					/* cond_not_taken_branch_cost.  */
678};
679
680/* For some reason, Athlon deals better with REP prefix (relative to loops)
681   compared to K8. Alignment becomes important after 8 bytes for memcpy and
682   128 bytes for memset.  */
683static stringop_algs athlon_memcpy[2] = {
684  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
685  DUMMY_STRINGOP_ALGS};
686static stringop_algs athlon_memset[2] = {
687  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
688  DUMMY_STRINGOP_ALGS};
689static const
690struct processor_costs athlon_cost = {
691  COSTS_N_INSNS (1),			/* cost of an add instruction */
692  COSTS_N_INSNS (2),			/* cost of a lea instruction */
693  COSTS_N_INSNS (1),			/* variable shift costs */
694  COSTS_N_INSNS (1),			/* constant shift costs */
695  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
696   COSTS_N_INSNS (5),			/*				 HI */
697   COSTS_N_INSNS (5),			/*				 SI */
698   COSTS_N_INSNS (5),			/*				 DI */
699   COSTS_N_INSNS (5)},			/*			      other */
700  0,					/* cost of multiply per each bit set */
701  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
702   COSTS_N_INSNS (26),			/*			    HI */
703   COSTS_N_INSNS (42),			/*			    SI */
704   COSTS_N_INSNS (74),			/*			    DI */
705   COSTS_N_INSNS (74)},			/*			    other */
706  COSTS_N_INSNS (1),			/* cost of movsx */
707  COSTS_N_INSNS (1),			/* cost of movzx */
708  8,					/* "large" insn */
709  9,					/* MOVE_RATIO */
710  4,				     /* cost for loading QImode using movzbl */
711  {3, 4, 3},				/* cost of loading integer registers
712					   in QImode, HImode and SImode.
713					   Relative to reg-reg move (2).  */
714  {3, 4, 3},				/* cost of storing integer registers */
715  4,					/* cost of reg,reg fld/fst */
716  {4, 4, 12},				/* cost of loading fp registers
717					   in SFmode, DFmode and XFmode */
718  {6, 6, 8},				/* cost of storing fp registers
719					   in SFmode, DFmode and XFmode */
720  2,					/* cost of moving MMX register */
721  {4, 4},				/* cost of loading MMX registers
722					   in SImode and DImode */
723  {4, 4},				/* cost of storing MMX registers
724					   in SImode and DImode */
725  2,					/* cost of moving SSE register */
726  {4, 4, 6},				/* cost of loading SSE registers
727					   in SImode, DImode and TImode */
728  {4, 4, 5},				/* cost of storing SSE registers
729					   in SImode, DImode and TImode */
730  5,					/* MMX or SSE register to integer */
731  64,					/* size of l1 cache.  */
732  256,					/* size of l2 cache.  */
733  64,					/* size of prefetch block */
734  6,					/* number of parallel prefetches */
735  5,					/* Branch cost */
736  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
737  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
738  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
739  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
740  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
741  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
742  athlon_memcpy,
743  athlon_memset,
744  1,					/* scalar_stmt_cost.  */
745  1,					/* scalar load_cost.  */
746  1,					/* scalar_store_cost.  */
747  1,					/* vec_stmt_cost.  */
748  1,					/* vec_to_scalar_cost.  */
749  1,					/* scalar_to_vec_cost.  */
750  1,					/* vec_align_load_cost.  */
751  2,					/* vec_unalign_load_cost.  */
752  1,					/* vec_store_cost.  */
753  3,					/* cond_taken_branch_cost.  */
754  1,					/* cond_not_taken_branch_cost.  */
755};
756
757/* K8 has optimized REP instruction for medium sized blocks, but for very
758   small blocks it is better to use loop. For large blocks, libcall can
759   do nontemporary accesses and beat inline considerably.  */
760static stringop_algs k8_memcpy[2] = {
761  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
762             {-1, rep_prefix_4_byte, false}}},
763  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
764             {-1, libcall, false}}}};
765static stringop_algs k8_memset[2] = {
766  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
767             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
768  {libcall, {{48, unrolled_loop, false},
769             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
770static const
771struct processor_costs k8_cost = {
772  COSTS_N_INSNS (1),			/* cost of an add instruction */
773  COSTS_N_INSNS (2),			/* cost of a lea instruction */
774  COSTS_N_INSNS (1),			/* variable shift costs */
775  COSTS_N_INSNS (1),			/* constant shift costs */
776  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
777   COSTS_N_INSNS (4),			/*				 HI */
778   COSTS_N_INSNS (3),			/*				 SI */
779   COSTS_N_INSNS (4),			/*				 DI */
780   COSTS_N_INSNS (5)},			/*			      other */
781  0,					/* cost of multiply per each bit set */
782  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
783   COSTS_N_INSNS (26),			/*			    HI */
784   COSTS_N_INSNS (42),			/*			    SI */
785   COSTS_N_INSNS (74),			/*			    DI */
786   COSTS_N_INSNS (74)},			/*			    other */
787  COSTS_N_INSNS (1),			/* cost of movsx */
788  COSTS_N_INSNS (1),			/* cost of movzx */
789  8,					/* "large" insn */
790  9,					/* MOVE_RATIO */
791  4,				     /* cost for loading QImode using movzbl */
792  {3, 4, 3},				/* cost of loading integer registers
793					   in QImode, HImode and SImode.
794					   Relative to reg-reg move (2).  */
795  {3, 4, 3},				/* cost of storing integer registers */
796  4,					/* cost of reg,reg fld/fst */
797  {4, 4, 12},				/* cost of loading fp registers
798					   in SFmode, DFmode and XFmode */
799  {6, 6, 8},				/* cost of storing fp registers
800					   in SFmode, DFmode and XFmode */
801  2,					/* cost of moving MMX register */
802  {3, 3},				/* cost of loading MMX registers
803					   in SImode and DImode */
804  {4, 4},				/* cost of storing MMX registers
805					   in SImode and DImode */
806  2,					/* cost of moving SSE register */
807  {4, 3, 6},				/* cost of loading SSE registers
808					   in SImode, DImode and TImode */
809  {4, 4, 5},				/* cost of storing SSE registers
810					   in SImode, DImode and TImode */
811  5,					/* MMX or SSE register to integer */
812  64,					/* size of l1 cache.  */
813  512,					/* size of l2 cache.  */
814  64,					/* size of prefetch block */
815  /* New AMD processors never drop prefetches; if they cannot be performed
816     immediately, they are queued.  We set number of simultaneous prefetches
817     to a large constant to reflect this (it probably is not a good idea not
818     to limit number of prefetches at all, as their execution also takes some
819     time).  */
820  100,					/* number of parallel prefetches */
821  3,					/* Branch cost */
822  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
823  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
824  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
825  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
826  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
827  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
828
829  k8_memcpy,
830  k8_memset,
831  4,					/* scalar_stmt_cost.  */
832  2,					/* scalar load_cost.  */
833  2,					/* scalar_store_cost.  */
834  5,					/* vec_stmt_cost.  */
835  0,					/* vec_to_scalar_cost.  */
836  2,					/* scalar_to_vec_cost.  */
837  2,					/* vec_align_load_cost.  */
838  3,					/* vec_unalign_load_cost.  */
839  3,					/* vec_store_cost.  */
840  3,					/* cond_taken_branch_cost.  */
841  2,					/* cond_not_taken_branch_cost.  */
842};
843
844/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
845   very small blocks it is better to use loop. For large blocks, libcall can
846   do nontemporary accesses and beat inline considerably.  */
847static stringop_algs amdfam10_memcpy[2] = {
848  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
849             {-1, rep_prefix_4_byte, false}}},
850  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
851             {-1, libcall, false}}}};
852static stringop_algs amdfam10_memset[2] = {
853  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
854             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
855  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
856             {-1, libcall, false}}}};
857struct processor_costs amdfam10_cost = {
858  COSTS_N_INSNS (1),			/* cost of an add instruction */
859  COSTS_N_INSNS (2),			/* cost of a lea instruction */
860  COSTS_N_INSNS (1),			/* variable shift costs */
861  COSTS_N_INSNS (1),			/* constant shift costs */
862  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
863   COSTS_N_INSNS (4),			/*				 HI */
864   COSTS_N_INSNS (3),			/*				 SI */
865   COSTS_N_INSNS (4),			/*				 DI */
866   COSTS_N_INSNS (5)},			/*			      other */
867  0,					/* cost of multiply per each bit set */
868  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
869   COSTS_N_INSNS (35),			/*			    HI */
870   COSTS_N_INSNS (51),			/*			    SI */
871   COSTS_N_INSNS (83),			/*			    DI */
872   COSTS_N_INSNS (83)},			/*			    other */
873  COSTS_N_INSNS (1),			/* cost of movsx */
874  COSTS_N_INSNS (1),			/* cost of movzx */
875  8,					/* "large" insn */
876  9,					/* MOVE_RATIO */
877  4,				     /* cost for loading QImode using movzbl */
878  {3, 4, 3},				/* cost of loading integer registers
879					   in QImode, HImode and SImode.
880					   Relative to reg-reg move (2).  */
881  {3, 4, 3},				/* cost of storing integer registers */
882  4,					/* cost of reg,reg fld/fst */
883  {4, 4, 12},				/* cost of loading fp registers
884		   			   in SFmode, DFmode and XFmode */
885  {6, 6, 8},				/* cost of storing fp registers
886 		   			   in SFmode, DFmode and XFmode */
887  2,					/* cost of moving MMX register */
888  {3, 3},				/* cost of loading MMX registers
889					   in SImode and DImode */
890  {4, 4},				/* cost of storing MMX registers
891					   in SImode and DImode */
892  2,					/* cost of moving SSE register */
893  {4, 4, 3},				/* cost of loading SSE registers
894					   in SImode, DImode and TImode */
895  {4, 4, 5},				/* cost of storing SSE registers
896					   in SImode, DImode and TImode */
897  3,					/* MMX or SSE register to integer */
898  					/* On K8:
899  					    MOVD reg64, xmmreg Double FSTORE 4
900					    MOVD reg32, xmmreg Double FSTORE 4
901					   On AMDFAM10:
902					    MOVD reg64, xmmreg Double FADD 3
903							       1/1  1/1
904					    MOVD reg32, xmmreg Double FADD 3
905							       1/1  1/1 */
906  64,					/* size of l1 cache.  */
907  512,					/* size of l2 cache.  */
908  64,					/* size of prefetch block */
909  /* New AMD processors never drop prefetches; if they cannot be performed
910     immediately, they are queued.  We set number of simultaneous prefetches
911     to a large constant to reflect this (it probably is not a good idea not
912     to limit number of prefetches at all, as their execution also takes some
913     time).  */
914  100,					/* number of parallel prefetches */
915  2,					/* Branch cost */
916  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
917  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
918  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
919  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
920  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
921  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
922
923  amdfam10_memcpy,
924  amdfam10_memset,
925  4,					/* scalar_stmt_cost.  */
926  2,					/* scalar load_cost.  */
927  2,					/* scalar_store_cost.  */
928  6,					/* vec_stmt_cost.  */
929  0,					/* vec_to_scalar_cost.  */
930  2,					/* scalar_to_vec_cost.  */
931  2,					/* vec_align_load_cost.  */
932  2,					/* vec_unalign_load_cost.  */
933  2,					/* vec_store_cost.  */
934  2,					/* cond_taken_branch_cost.  */
935  1,					/* cond_not_taken_branch_cost.  */
936};
937
938/*  BDVER1 has optimized REP instruction for medium sized blocks, but for
939    very small blocks it is better to use loop. For large blocks, libcall
940    can do nontemporary accesses and beat inline considerably.  */
941static stringop_algs bdver1_memcpy[2] = {
942  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
943             {-1, rep_prefix_4_byte, false}}},
944  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
945             {-1, libcall, false}}}};
946static stringop_algs bdver1_memset[2] = {
947  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
948             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
949  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
950             {-1, libcall, false}}}};
951
952const struct processor_costs bdver1_cost = {
953  COSTS_N_INSNS (1),			/* cost of an add instruction */
954  COSTS_N_INSNS (1),			/* cost of a lea instruction */
955  COSTS_N_INSNS (1),			/* variable shift costs */
956  COSTS_N_INSNS (1),			/* constant shift costs */
957  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
958   COSTS_N_INSNS (4),			/*				 HI */
959   COSTS_N_INSNS (4),			/*				 SI */
960   COSTS_N_INSNS (6),			/*				 DI */
961   COSTS_N_INSNS (6)},			/*			      other */
962  0,					/* cost of multiply per each bit set */
963  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
964   COSTS_N_INSNS (35),			/*			    HI */
965   COSTS_N_INSNS (51),			/*			    SI */
966   COSTS_N_INSNS (83),			/*			    DI */
967   COSTS_N_INSNS (83)},			/*			    other */
968  COSTS_N_INSNS (1),			/* cost of movsx */
969  COSTS_N_INSNS (1),			/* cost of movzx */
970  8,					/* "large" insn */
971  9,					/* MOVE_RATIO */
972  4,				     /* cost for loading QImode using movzbl */
973  {5, 5, 4},				/* cost of loading integer registers
974					   in QImode, HImode and SImode.
975					   Relative to reg-reg move (2).  */
976  {4, 4, 4},				/* cost of storing integer registers */
977  2,					/* cost of reg,reg fld/fst */
978  {5, 5, 12},				/* cost of loading fp registers
979		   			   in SFmode, DFmode and XFmode */
980  {4, 4, 8},				/* cost of storing fp registers
981 		   			   in SFmode, DFmode and XFmode */
982  2,					/* cost of moving MMX register */
983  {4, 4},				/* cost of loading MMX registers
984					   in SImode and DImode */
985  {4, 4},				/* cost of storing MMX registers
986					   in SImode and DImode */
987  2,					/* cost of moving SSE register */
988  {4, 4, 4},				/* cost of loading SSE registers
989					   in SImode, DImode and TImode */
990  {4, 4, 4},				/* cost of storing SSE registers
991					   in SImode, DImode and TImode */
992  2,					/* MMX or SSE register to integer */
993  					/* On K8:
994					    MOVD reg64, xmmreg Double FSTORE 4
995					    MOVD reg32, xmmreg Double FSTORE 4
996					   On AMDFAM10:
997					    MOVD reg64, xmmreg Double FADD 3
998							       1/1  1/1
999					    MOVD reg32, xmmreg Double FADD 3
1000							       1/1  1/1 */
1001  16,					/* size of l1 cache.  */
1002  2048,					/* size of l2 cache.  */
1003  64,					/* size of prefetch block */
1004  /* New AMD processors never drop prefetches; if they cannot be performed
1005     immediately, they are queued.  We set number of simultaneous prefetches
1006     to a large constant to reflect this (it probably is not a good idea not
1007     to limit number of prefetches at all, as their execution also takes some
1008     time).  */
1009  100,					/* number of parallel prefetches */
1010  2,					/* Branch cost */
1011  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1012  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1013  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1014  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1015  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1016  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1017
1018  bdver1_memcpy,
1019  bdver1_memset,
1020  6,					/* scalar_stmt_cost.  */
1021  4,					/* scalar load_cost.  */
1022  4,					/* scalar_store_cost.  */
1023  6,					/* vec_stmt_cost.  */
1024  0,					/* vec_to_scalar_cost.  */
1025  2,					/* scalar_to_vec_cost.  */
1026  4,					/* vec_align_load_cost.  */
1027  4,					/* vec_unalign_load_cost.  */
1028  4,					/* vec_store_cost.  */
1029  2,					/* cond_taken_branch_cost.  */
1030  1,					/* cond_not_taken_branch_cost.  */
1031};
1032
1033/*  BDVER2 has optimized REP instruction for medium sized blocks, but for
1034    very small blocks it is better to use loop. For large blocks, libcall
1035    can do nontemporary accesses and beat inline considerably.  */
1036
1037static stringop_algs bdver2_memcpy[2] = {
1038  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1039             {-1, rep_prefix_4_byte, false}}},
1040  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1041             {-1, libcall, false}}}};
1042static stringop_algs bdver2_memset[2] = {
1043  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1044             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1045  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1046             {-1, libcall, false}}}};
1047
1048const struct processor_costs bdver2_cost = {
1049  COSTS_N_INSNS (1),			/* cost of an add instruction */
1050  COSTS_N_INSNS (1),			/* cost of a lea instruction */
1051  COSTS_N_INSNS (1),			/* variable shift costs */
1052  COSTS_N_INSNS (1),			/* constant shift costs */
1053  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
1054   COSTS_N_INSNS (4),			/*				 HI */
1055   COSTS_N_INSNS (4),			/*				 SI */
1056   COSTS_N_INSNS (6),			/*				 DI */
1057   COSTS_N_INSNS (6)},			/*			      other */
1058  0,					/* cost of multiply per each bit set */
1059  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1060   COSTS_N_INSNS (35),			/*			    HI */
1061   COSTS_N_INSNS (51),			/*			    SI */
1062   COSTS_N_INSNS (83),			/*			    DI */
1063   COSTS_N_INSNS (83)},			/*			    other */
1064  COSTS_N_INSNS (1),			/* cost of movsx */
1065  COSTS_N_INSNS (1),			/* cost of movzx */
1066  8,					/* "large" insn */
1067  9,					/* MOVE_RATIO */
1068  4,				     /* cost for loading QImode using movzbl */
1069  {5, 5, 4},				/* cost of loading integer registers
1070					   in QImode, HImode and SImode.
1071					   Relative to reg-reg move (2).  */
1072  {4, 4, 4},				/* cost of storing integer registers */
1073  2,					/* cost of reg,reg fld/fst */
1074  {5, 5, 12},				/* cost of loading fp registers
1075		   			   in SFmode, DFmode and XFmode */
1076  {4, 4, 8},				/* cost of storing fp registers
1077 		   			   in SFmode, DFmode and XFmode */
1078  2,					/* cost of moving MMX register */
1079  {4, 4},				/* cost of loading MMX registers
1080					   in SImode and DImode */
1081  {4, 4},				/* cost of storing MMX registers
1082					   in SImode and DImode */
1083  2,					/* cost of moving SSE register */
1084  {4, 4, 4},				/* cost of loading SSE registers
1085					   in SImode, DImode and TImode */
1086  {4, 4, 4},				/* cost of storing SSE registers
1087					   in SImode, DImode and TImode */
1088  2,					/* MMX or SSE register to integer */
1089  					/* On K8:
1090					    MOVD reg64, xmmreg Double FSTORE 4
1091					    MOVD reg32, xmmreg Double FSTORE 4
1092					   On AMDFAM10:
1093					    MOVD reg64, xmmreg Double FADD 3
1094							       1/1  1/1
1095					    MOVD reg32, xmmreg Double FADD 3
1096							       1/1  1/1 */
1097  16,					/* size of l1 cache.  */
1098  2048,					/* size of l2 cache.  */
1099  64,					/* size of prefetch block */
1100  /* New AMD processors never drop prefetches; if they cannot be performed
1101     immediately, they are queued.  We set number of simultaneous prefetches
1102     to a large constant to reflect this (it probably is not a good idea not
1103     to limit number of prefetches at all, as their execution also takes some
1104     time).  */
1105  100,					/* number of parallel prefetches */
1106  2,					/* Branch cost */
1107  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1108  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1109  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1110  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1111  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1112  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1113
1114  bdver2_memcpy,
1115  bdver2_memset,
1116  6,					/* scalar_stmt_cost.  */
1117  4,					/* scalar load_cost.  */
1118  4,					/* scalar_store_cost.  */
1119  6,					/* vec_stmt_cost.  */
1120  0,					/* vec_to_scalar_cost.  */
1121  2,					/* scalar_to_vec_cost.  */
1122  4,					/* vec_align_load_cost.  */
1123  4,					/* vec_unalign_load_cost.  */
1124  4,					/* vec_store_cost.  */
1125  2,					/* cond_taken_branch_cost.  */
1126  1,					/* cond_not_taken_branch_cost.  */
1127};
1128
1129
1130  /*  BDVER3 has optimized REP instruction for medium sized blocks, but for
1131      very small blocks it is better to use loop. For large blocks, libcall
1132      can do nontemporary accesses and beat inline considerably.  */
1133static stringop_algs bdver3_memcpy[2] = {
1134  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1135             {-1, rep_prefix_4_byte, false}}},
1136  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1137             {-1, libcall, false}}}};
1138static stringop_algs bdver3_memset[2] = {
1139  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1140             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1141  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1142             {-1, libcall, false}}}};
1143struct processor_costs bdver3_cost = {
1144  COSTS_N_INSNS (1),			/* cost of an add instruction */
1145  COSTS_N_INSNS (1),			/* cost of a lea instruction */
1146  COSTS_N_INSNS (1),			/* variable shift costs */
1147  COSTS_N_INSNS (1),			/* constant shift costs */
1148  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
1149   COSTS_N_INSNS (4),			/*				 HI */
1150   COSTS_N_INSNS (4),			/*				 SI */
1151   COSTS_N_INSNS (6),			/*				 DI */
1152   COSTS_N_INSNS (6)},			/*			      other */
1153  0,					/* cost of multiply per each bit set */
1154  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1155   COSTS_N_INSNS (35),			/*			    HI */
1156   COSTS_N_INSNS (51),			/*			    SI */
1157   COSTS_N_INSNS (83),			/*			    DI */
1158   COSTS_N_INSNS (83)},			/*			    other */
1159  COSTS_N_INSNS (1),			/* cost of movsx */
1160  COSTS_N_INSNS (1),			/* cost of movzx */
1161  8,					/* "large" insn */
1162  9,					/* MOVE_RATIO */
1163  4,				     /* cost for loading QImode using movzbl */
1164  {5, 5, 4},				/* cost of loading integer registers
1165					   in QImode, HImode and SImode.
1166					   Relative to reg-reg move (2).  */
1167  {4, 4, 4},				/* cost of storing integer registers */
1168  2,					/* cost of reg,reg fld/fst */
1169  {5, 5, 12},				/* cost of loading fp registers
1170		   			   in SFmode, DFmode and XFmode */
1171  {4, 4, 8},				/* cost of storing fp registers
1172 		   			   in SFmode, DFmode and XFmode */
1173  2,					/* cost of moving MMX register */
1174  {4, 4},				/* cost of loading MMX registers
1175					   in SImode and DImode */
1176  {4, 4},				/* cost of storing MMX registers
1177					   in SImode and DImode */
1178  2,					/* cost of moving SSE register */
1179  {4, 4, 4},				/* cost of loading SSE registers
1180					   in SImode, DImode and TImode */
1181  {4, 4, 4},				/* cost of storing SSE registers
1182					   in SImode, DImode and TImode */
1183  2,					/* MMX or SSE register to integer */
1184  16,					/* size of l1 cache.  */
1185  2048,					/* size of l2 cache.  */
1186  64,					/* size of prefetch block */
1187  /* New AMD processors never drop prefetches; if they cannot be performed
1188     immediately, they are queued.  We set number of simultaneous prefetches
1189     to a large constant to reflect this (it probably is not a good idea not
1190     to limit number of prefetches at all, as their execution also takes some
1191     time).  */
1192  100,					/* number of parallel prefetches */
1193  2,					/* Branch cost */
1194  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1195  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1196  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1197  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1198  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1199  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1200
1201  bdver3_memcpy,
1202  bdver3_memset,
1203  6,					/* scalar_stmt_cost.  */
1204  4,					/* scalar load_cost.  */
1205  4,					/* scalar_store_cost.  */
1206  6,					/* vec_stmt_cost.  */
1207  0,					/* vec_to_scalar_cost.  */
1208  2,					/* scalar_to_vec_cost.  */
1209  4,					/* vec_align_load_cost.  */
1210  4,					/* vec_unalign_load_cost.  */
1211  4,					/* vec_store_cost.  */
1212  2,					/* cond_taken_branch_cost.  */
1213  1,					/* cond_not_taken_branch_cost.  */
1214};
1215
1216/*  BDVER4 has optimized REP instruction for medium sized blocks, but for
1217    very small blocks it is better to use loop. For large blocks, libcall
1218    can do nontemporary accesses and beat inline considerably.  */
1219static stringop_algs bdver4_memcpy[2] = {
1220  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1221             {-1, rep_prefix_4_byte, false}}},
1222  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1223             {-1, libcall, false}}}};
1224static stringop_algs bdver4_memset[2] = {
1225  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1226             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1227  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1228             {-1, libcall, false}}}};
1229struct processor_costs bdver4_cost = {
1230  COSTS_N_INSNS (1),			/* cost of an add instruction */
1231  COSTS_N_INSNS (1),			/* cost of a lea instruction */
1232  COSTS_N_INSNS (1),			/* variable shift costs */
1233  COSTS_N_INSNS (1),			/* constant shift costs */
1234  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
1235   COSTS_N_INSNS (4),			/*				 HI */
1236   COSTS_N_INSNS (4),			/*				 SI */
1237   COSTS_N_INSNS (6),			/*				 DI */
1238   COSTS_N_INSNS (6)},			/*			      other */
1239  0,					/* cost of multiply per each bit set */
1240  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1241   COSTS_N_INSNS (35),			/*			    HI */
1242   COSTS_N_INSNS (51),			/*			    SI */
1243   COSTS_N_INSNS (83),			/*			    DI */
1244   COSTS_N_INSNS (83)},			/*			    other */
1245  COSTS_N_INSNS (1),			/* cost of movsx */
1246  COSTS_N_INSNS (1),			/* cost of movzx */
1247  8,					/* "large" insn */
1248  9,					/* MOVE_RATIO */
1249  4,				     /* cost for loading QImode using movzbl */
1250  {5, 5, 4},				/* cost of loading integer registers
1251					   in QImode, HImode and SImode.
1252					   Relative to reg-reg move (2).  */
1253  {4, 4, 4},				/* cost of storing integer registers */
1254  2,					/* cost of reg,reg fld/fst */
1255  {5, 5, 12},				/* cost of loading fp registers
1256		   			   in SFmode, DFmode and XFmode */
1257  {4, 4, 8},				/* cost of storing fp registers
1258 		   			   in SFmode, DFmode and XFmode */
1259  2,					/* cost of moving MMX register */
1260  {4, 4},				/* cost of loading MMX registers
1261					   in SImode and DImode */
1262  {4, 4},				/* cost of storing MMX registers
1263					   in SImode and DImode */
1264  2,					/* cost of moving SSE register */
1265  {4, 4, 4},				/* cost of loading SSE registers
1266					   in SImode, DImode and TImode */
1267  {4, 4, 4},				/* cost of storing SSE registers
1268					   in SImode, DImode and TImode */
1269  2,					/* MMX or SSE register to integer */
1270  16,					/* size of l1 cache.  */
1271  2048,					/* size of l2 cache.  */
1272  64,					/* size of prefetch block */
1273  /* New AMD processors never drop prefetches; if they cannot be performed
1274     immediately, they are queued.  We set number of simultaneous prefetches
1275     to a large constant to reflect this (it probably is not a good idea not
1276     to limit number of prefetches at all, as their execution also takes some
1277     time).  */
1278  100,					/* number of parallel prefetches */
1279  2,					/* Branch cost */
1280  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1281  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
1282  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
1283  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1284  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1285  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
1286
1287  bdver4_memcpy,
1288  bdver4_memset,
1289  6,					/* scalar_stmt_cost.  */
1290  4,					/* scalar load_cost.  */
1291  4,					/* scalar_store_cost.  */
1292  6,					/* vec_stmt_cost.  */
1293  0,					/* vec_to_scalar_cost.  */
1294  2,					/* scalar_to_vec_cost.  */
1295  4,					/* vec_align_load_cost.  */
1296  4,					/* vec_unalign_load_cost.  */
1297  4,					/* vec_store_cost.  */
1298  2,					/* cond_taken_branch_cost.  */
1299  1,					/* cond_not_taken_branch_cost.  */
1300};
1301
1302  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
1303     very small blocks it is better to use loop. For large blocks, libcall can
1304     do nontemporary accesses and beat inline considerably.  */
1305static stringop_algs btver1_memcpy[2] = {
1306  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1307             {-1, rep_prefix_4_byte, false}}},
1308  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1309             {-1, libcall, false}}}};
1310static stringop_algs btver1_memset[2] = {
1311  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1312             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1313  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1314             {-1, libcall, false}}}};
1315const struct processor_costs btver1_cost = {
1316  COSTS_N_INSNS (1),			/* cost of an add instruction */
1317  COSTS_N_INSNS (2),			/* cost of a lea instruction */
1318  COSTS_N_INSNS (1),			/* variable shift costs */
1319  COSTS_N_INSNS (1),			/* constant shift costs */
1320  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1321   COSTS_N_INSNS (4),			/*				 HI */
1322   COSTS_N_INSNS (3),			/*				 SI */
1323   COSTS_N_INSNS (4),			/*				 DI */
1324   COSTS_N_INSNS (5)},			/*			      other */
1325  0,					/* cost of multiply per each bit set */
1326  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1327   COSTS_N_INSNS (35),			/*			    HI */
1328   COSTS_N_INSNS (51),			/*			    SI */
1329   COSTS_N_INSNS (83),			/*			    DI */
1330   COSTS_N_INSNS (83)},			/*			    other */
1331  COSTS_N_INSNS (1),			/* cost of movsx */
1332  COSTS_N_INSNS (1),			/* cost of movzx */
1333  8,					/* "large" insn */
1334  9,					/* MOVE_RATIO */
1335  4,				     /* cost for loading QImode using movzbl */
1336  {3, 4, 3},				/* cost of loading integer registers
1337					   in QImode, HImode and SImode.
1338					   Relative to reg-reg move (2).  */
1339  {3, 4, 3},				/* cost of storing integer registers */
1340  4,					/* cost of reg,reg fld/fst */
1341  {4, 4, 12},				/* cost of loading fp registers
1342					   in SFmode, DFmode and XFmode */
1343  {6, 6, 8},				/* cost of storing fp registers
1344					   in SFmode, DFmode and XFmode */
1345  2,					/* cost of moving MMX register */
1346  {3, 3},				/* cost of loading MMX registers
1347					   in SImode and DImode */
1348  {4, 4},				/* cost of storing MMX registers
1349					   in SImode and DImode */
1350  2,					/* cost of moving SSE register */
1351  {4, 4, 3},				/* cost of loading SSE registers
1352					   in SImode, DImode and TImode */
1353  {4, 4, 5},				/* cost of storing SSE registers
1354					   in SImode, DImode and TImode */
1355  3,					/* MMX or SSE register to integer */
1356					/* On K8:
1357					   MOVD reg64, xmmreg Double FSTORE 4
1358					   MOVD reg32, xmmreg Double FSTORE 4
1359					   On AMDFAM10:
1360					   MOVD reg64, xmmreg Double FADD 3
1361							       1/1  1/1
1362					    MOVD reg32, xmmreg Double FADD 3
1363							       1/1  1/1 */
1364  32,					/* size of l1 cache.  */
1365  512,					/* size of l2 cache.  */
1366  64,					/* size of prefetch block */
1367  100,					/* number of parallel prefetches */
1368  2,					/* Branch cost */
1369  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
1370  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
1371  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
1372  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1373  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1374  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
1375
1376  btver1_memcpy,
1377  btver1_memset,
1378  4,					/* scalar_stmt_cost.  */
1379  2,					/* scalar load_cost.  */
1380  2,					/* scalar_store_cost.  */
1381  6,					/* vec_stmt_cost.  */
1382  0,					/* vec_to_scalar_cost.  */
1383  2,					/* scalar_to_vec_cost.  */
1384  2,					/* vec_align_load_cost.  */
1385  2,					/* vec_unalign_load_cost.  */
1386  2,					/* vec_store_cost.  */
1387  2,					/* cond_taken_branch_cost.  */
1388  1,					/* cond_not_taken_branch_cost.  */
1389};
1390
1391static stringop_algs btver2_memcpy[2] = {
1392  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
1393             {-1, rep_prefix_4_byte, false}}},
1394  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
1395             {-1, libcall, false}}}};
1396static stringop_algs btver2_memset[2] = {
1397  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
1398             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1399  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
1400             {-1, libcall, false}}}};
1401const struct processor_costs btver2_cost = {
1402  COSTS_N_INSNS (1),			/* cost of an add instruction */
1403  COSTS_N_INSNS (2),			/* cost of a lea instruction */
1404  COSTS_N_INSNS (1),			/* variable shift costs */
1405  COSTS_N_INSNS (1),			/* constant shift costs */
1406  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1407   COSTS_N_INSNS (4),			/*				 HI */
1408   COSTS_N_INSNS (3),			/*				 SI */
1409   COSTS_N_INSNS (4),			/*				 DI */
1410   COSTS_N_INSNS (5)},			/*			      other */
1411  0,					/* cost of multiply per each bit set */
1412  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
1413   COSTS_N_INSNS (35),			/*			    HI */
1414   COSTS_N_INSNS (51),			/*			    SI */
1415   COSTS_N_INSNS (83),			/*			    DI */
1416   COSTS_N_INSNS (83)},			/*			    other */
1417  COSTS_N_INSNS (1),			/* cost of movsx */
1418  COSTS_N_INSNS (1),			/* cost of movzx */
1419  8,					/* "large" insn */
1420  9,					/* MOVE_RATIO */
1421  4,				     /* cost for loading QImode using movzbl */
1422  {3, 4, 3},				/* cost of loading integer registers
1423					   in QImode, HImode and SImode.
1424					   Relative to reg-reg move (2).  */
1425  {3, 4, 3},				/* cost of storing integer registers */
1426  4,					/* cost of reg,reg fld/fst */
1427  {4, 4, 12},				/* cost of loading fp registers
1428					   in SFmode, DFmode and XFmode */
1429  {6, 6, 8},				/* cost of storing fp registers
1430					   in SFmode, DFmode and XFmode */
1431  2,					/* cost of moving MMX register */
1432  {3, 3},				/* cost of loading MMX registers
1433					   in SImode and DImode */
1434  {4, 4},				/* cost of storing MMX registers
1435					   in SImode and DImode */
1436  2,					/* cost of moving SSE register */
1437  {4, 4, 3},				/* cost of loading SSE registers
1438					   in SImode, DImode and TImode */
1439  {4, 4, 5},				/* cost of storing SSE registers
1440					   in SImode, DImode and TImode */
1441  3,					/* MMX or SSE register to integer */
1442					/* On K8:
1443					   MOVD reg64, xmmreg Double FSTORE 4
1444					   MOVD reg32, xmmreg Double FSTORE 4
1445					   On AMDFAM10:
1446					   MOVD reg64, xmmreg Double FADD 3
1447							       1/1  1/1
1448					    MOVD reg32, xmmreg Double FADD 3
1449							       1/1  1/1 */
1450  32,					/* size of l1 cache.  */
1451  2048,					/* size of l2 cache.  */
1452  64,					/* size of prefetch block */
1453  100,					/* number of parallel prefetches */
1454  2,					/* Branch cost */
1455  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
1456  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
1457  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
1458  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1459  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1460  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
1461  btver2_memcpy,
1462  btver2_memset,
1463  4,					/* scalar_stmt_cost.  */
1464  2,					/* scalar load_cost.  */
1465  2,					/* scalar_store_cost.  */
1466  6,					/* vec_stmt_cost.  */
1467  0,					/* vec_to_scalar_cost.  */
1468  2,					/* scalar_to_vec_cost.  */
1469  2,					/* vec_align_load_cost.  */
1470  2,					/* vec_unalign_load_cost.  */
1471  2,					/* vec_store_cost.  */
1472  2,					/* cond_taken_branch_cost.  */
1473  1,					/* cond_not_taken_branch_cost.  */
1474};
1475
1476static stringop_algs pentium4_memcpy[2] = {
1477  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1478  DUMMY_STRINGOP_ALGS};
1479static stringop_algs pentium4_memset[2] = {
1480  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1481             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1482  DUMMY_STRINGOP_ALGS};
1483
1484static const
1485struct processor_costs pentium4_cost = {
1486  COSTS_N_INSNS (1),			/* cost of an add instruction */
1487  COSTS_N_INSNS (3),			/* cost of a lea instruction */
1488  COSTS_N_INSNS (4),			/* variable shift costs */
1489  COSTS_N_INSNS (4),			/* constant shift costs */
1490  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
1491   COSTS_N_INSNS (15),			/*				 HI */
1492   COSTS_N_INSNS (15),			/*				 SI */
1493   COSTS_N_INSNS (15),			/*				 DI */
1494   COSTS_N_INSNS (15)},			/*			      other */
1495  0,					/* cost of multiply per each bit set */
1496  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
1497   COSTS_N_INSNS (56),			/*			    HI */
1498   COSTS_N_INSNS (56),			/*			    SI */
1499   COSTS_N_INSNS (56),			/*			    DI */
1500   COSTS_N_INSNS (56)},			/*			    other */
1501  COSTS_N_INSNS (1),			/* cost of movsx */
1502  COSTS_N_INSNS (1),			/* cost of movzx */
1503  16,					/* "large" insn */
1504  6,					/* MOVE_RATIO */
1505  2,				     /* cost for loading QImode using movzbl */
1506  {4, 5, 4},				/* cost of loading integer registers
1507					   in QImode, HImode and SImode.
1508					   Relative to reg-reg move (2).  */
1509  {2, 3, 2},				/* cost of storing integer registers */
1510  2,					/* cost of reg,reg fld/fst */
1511  {2, 2, 6},				/* cost of loading fp registers
1512					   in SFmode, DFmode and XFmode */
1513  {4, 4, 6},				/* cost of storing fp registers
1514					   in SFmode, DFmode and XFmode */
1515  2,					/* cost of moving MMX register */
1516  {2, 2},				/* cost of loading MMX registers
1517					   in SImode and DImode */
1518  {2, 2},				/* cost of storing MMX registers
1519					   in SImode and DImode */
1520  12,					/* cost of moving SSE register */
1521  {12, 12, 12},				/* cost of loading SSE registers
1522					   in SImode, DImode and TImode */
1523  {2, 2, 8},				/* cost of storing SSE registers
1524					   in SImode, DImode and TImode */
1525  10,					/* MMX or SSE register to integer */
1526  8,					/* size of l1 cache.  */
1527  256,					/* size of l2 cache.  */
1528  64,					/* size of prefetch block */
1529  6,					/* number of parallel prefetches */
1530  2,					/* Branch cost */
1531  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
1532  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
1533  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
1534  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
1535  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
1536  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
1537  pentium4_memcpy,
1538  pentium4_memset,
1539  1,					/* scalar_stmt_cost.  */
1540  1,					/* scalar load_cost.  */
1541  1,					/* scalar_store_cost.  */
1542  1,					/* vec_stmt_cost.  */
1543  1,					/* vec_to_scalar_cost.  */
1544  1,					/* scalar_to_vec_cost.  */
1545  1,					/* vec_align_load_cost.  */
1546  2,					/* vec_unalign_load_cost.  */
1547  1,					/* vec_store_cost.  */
1548  3,					/* cond_taken_branch_cost.  */
1549  1,					/* cond_not_taken_branch_cost.  */
1550};
1551
1552static stringop_algs nocona_memcpy[2] = {
1553  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
1554  {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
1555             {100000, unrolled_loop, false}, {-1, libcall, false}}}};
1556
1557static stringop_algs nocona_memset[2] = {
1558  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
1559             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1560  {libcall, {{24, loop, false}, {64, unrolled_loop, false},
1561             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1562
1563static const
1564struct processor_costs nocona_cost = {
1565  COSTS_N_INSNS (1),			/* cost of an add instruction */
1566  COSTS_N_INSNS (1),			/* cost of a lea instruction */
1567  COSTS_N_INSNS (1),			/* variable shift costs */
1568  COSTS_N_INSNS (1),			/* constant shift costs */
1569  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
1570   COSTS_N_INSNS (10),			/*				 HI */
1571   COSTS_N_INSNS (10),			/*				 SI */
1572   COSTS_N_INSNS (10),			/*				 DI */
1573   COSTS_N_INSNS (10)},			/*			      other */
1574  0,					/* cost of multiply per each bit set */
1575  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
1576   COSTS_N_INSNS (66),			/*			    HI */
1577   COSTS_N_INSNS (66),			/*			    SI */
1578   COSTS_N_INSNS (66),			/*			    DI */
1579   COSTS_N_INSNS (66)},			/*			    other */
1580  COSTS_N_INSNS (1),			/* cost of movsx */
1581  COSTS_N_INSNS (1),			/* cost of movzx */
1582  16,					/* "large" insn */
1583  17,					/* MOVE_RATIO */
1584  4,				     /* cost for loading QImode using movzbl */
1585  {4, 4, 4},				/* cost of loading integer registers
1586					   in QImode, HImode and SImode.
1587					   Relative to reg-reg move (2).  */
1588  {4, 4, 4},				/* cost of storing integer registers */
1589  3,					/* cost of reg,reg fld/fst */
1590  {12, 12, 12},				/* cost of loading fp registers
1591					   in SFmode, DFmode and XFmode */
1592  {4, 4, 4},				/* cost of storing fp registers
1593					   in SFmode, DFmode and XFmode */
1594  6,					/* cost of moving MMX register */
1595  {12, 12},				/* cost of loading MMX registers
1596					   in SImode and DImode */
1597  {12, 12},				/* cost of storing MMX registers
1598					   in SImode and DImode */
1599  6,					/* cost of moving SSE register */
1600  {12, 12, 12},				/* cost of loading SSE registers
1601					   in SImode, DImode and TImode */
1602  {12, 12, 12},				/* cost of storing SSE registers
1603					   in SImode, DImode and TImode */
1604  8,					/* MMX or SSE register to integer */
1605  8,					/* size of l1 cache.  */
1606  1024,					/* size of l2 cache.  */
1607  64,					/* size of prefetch block */
1608  8,					/* number of parallel prefetches */
1609  1,					/* Branch cost */
1610  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
1611  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1612  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
1613  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
1614  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
1615  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
1616  nocona_memcpy,
1617  nocona_memset,
1618  1,					/* scalar_stmt_cost.  */
1619  1,					/* scalar load_cost.  */
1620  1,					/* scalar_store_cost.  */
1621  1,					/* vec_stmt_cost.  */
1622  1,					/* vec_to_scalar_cost.  */
1623  1,					/* scalar_to_vec_cost.  */
1624  1,					/* vec_align_load_cost.  */
1625  2,					/* vec_unalign_load_cost.  */
1626  1,					/* vec_store_cost.  */
1627  3,					/* cond_taken_branch_cost.  */
1628  1,					/* cond_not_taken_branch_cost.  */
1629};
1630
1631static stringop_algs atom_memcpy[2] = {
1632  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1633  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1634             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1635static stringop_algs atom_memset[2] = {
1636  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1637             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1638  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1639             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1640static const
1641struct processor_costs atom_cost = {
1642  COSTS_N_INSNS (1),			/* cost of an add instruction */
1643  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1644  COSTS_N_INSNS (1),			/* variable shift costs */
1645  COSTS_N_INSNS (1),			/* constant shift costs */
1646  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1647   COSTS_N_INSNS (4),			/*				 HI */
1648   COSTS_N_INSNS (3),			/*				 SI */
1649   COSTS_N_INSNS (4),			/*				 DI */
1650   COSTS_N_INSNS (2)},			/*			      other */
1651  0,					/* cost of multiply per each bit set */
1652  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1653   COSTS_N_INSNS (26),			/*			    HI */
1654   COSTS_N_INSNS (42),			/*			    SI */
1655   COSTS_N_INSNS (74),			/*			    DI */
1656   COSTS_N_INSNS (74)},			/*			    other */
1657  COSTS_N_INSNS (1),			/* cost of movsx */
1658  COSTS_N_INSNS (1),			/* cost of movzx */
1659  8,					/* "large" insn */
1660  17,					/* MOVE_RATIO */
1661  4,					/* cost for loading QImode using movzbl */
1662  {4, 4, 4},				/* cost of loading integer registers
1663					   in QImode, HImode and SImode.
1664					   Relative to reg-reg move (2).  */
1665  {4, 4, 4},				/* cost of storing integer registers */
1666  4,					/* cost of reg,reg fld/fst */
1667  {12, 12, 12},				/* cost of loading fp registers
1668					   in SFmode, DFmode and XFmode */
1669  {6, 6, 8},				/* cost of storing fp registers
1670					   in SFmode, DFmode and XFmode */
1671  2,					/* cost of moving MMX register */
1672  {8, 8},				/* cost of loading MMX registers
1673					   in SImode and DImode */
1674  {8, 8},				/* cost of storing MMX registers
1675					   in SImode and DImode */
1676  2,					/* cost of moving SSE register */
1677  {8, 8, 8},				/* cost of loading SSE registers
1678					   in SImode, DImode and TImode */
1679  {8, 8, 8},				/* cost of storing SSE registers
1680					   in SImode, DImode and TImode */
1681  5,					/* MMX or SSE register to integer */
1682  32,					/* size of l1 cache.  */
1683  256,					/* size of l2 cache.  */
1684  64,					/* size of prefetch block */
1685  6,					/* number of parallel prefetches */
1686  3,					/* Branch cost */
1687  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1688  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1689  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1690  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1691  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1692  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1693  atom_memcpy,
1694  atom_memset,
1695  1,					/* scalar_stmt_cost.  */
1696  1,					/* scalar load_cost.  */
1697  1,					/* scalar_store_cost.  */
1698  1,					/* vec_stmt_cost.  */
1699  1,					/* vec_to_scalar_cost.  */
1700  1,					/* scalar_to_vec_cost.  */
1701  1,					/* vec_align_load_cost.  */
1702  2,					/* vec_unalign_load_cost.  */
1703  1,					/* vec_store_cost.  */
1704  3,					/* cond_taken_branch_cost.  */
1705  1,					/* cond_not_taken_branch_cost.  */
1706};
1707
1708static stringop_algs slm_memcpy[2] = {
1709  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1710  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1711             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1712static stringop_algs slm_memset[2] = {
1713  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1714             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1715  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1716             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1717static const
1718struct processor_costs slm_cost = {
1719  COSTS_N_INSNS (1),			/* cost of an add instruction */
1720  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1721  COSTS_N_INSNS (1),			/* variable shift costs */
1722  COSTS_N_INSNS (1),			/* constant shift costs */
1723  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1724   COSTS_N_INSNS (3),			/*				 HI */
1725   COSTS_N_INSNS (3),			/*				 SI */
1726   COSTS_N_INSNS (4),			/*				 DI */
1727   COSTS_N_INSNS (2)},			/*			      other */
1728  0,					/* cost of multiply per each bit set */
1729  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1730   COSTS_N_INSNS (26),			/*			    HI */
1731   COSTS_N_INSNS (42),			/*			    SI */
1732   COSTS_N_INSNS (74),			/*			    DI */
1733   COSTS_N_INSNS (74)},			/*			    other */
1734  COSTS_N_INSNS (1),			/* cost of movsx */
1735  COSTS_N_INSNS (1),			/* cost of movzx */
1736  8,					/* "large" insn */
1737  17,					/* MOVE_RATIO */
1738  4,					/* cost for loading QImode using movzbl */
1739  {4, 4, 4},				/* cost of loading integer registers
1740					   in QImode, HImode and SImode.
1741					   Relative to reg-reg move (2).  */
1742  {4, 4, 4},				/* cost of storing integer registers */
1743  4,					/* cost of reg,reg fld/fst */
1744  {12, 12, 12},				/* cost of loading fp registers
1745					   in SFmode, DFmode and XFmode */
1746  {6, 6, 8},				/* cost of storing fp registers
1747					   in SFmode, DFmode and XFmode */
1748  2,					/* cost of moving MMX register */
1749  {8, 8},				/* cost of loading MMX registers
1750					   in SImode and DImode */
1751  {8, 8},				/* cost of storing MMX registers
1752					   in SImode and DImode */
1753  2,					/* cost of moving SSE register */
1754  {8, 8, 8},				/* cost of loading SSE registers
1755					   in SImode, DImode and TImode */
1756  {8, 8, 8},				/* cost of storing SSE registers
1757					   in SImode, DImode and TImode */
1758  5,					/* MMX or SSE register to integer */
1759  32,					/* size of l1 cache.  */
1760  256,					/* size of l2 cache.  */
1761  64,					/* size of prefetch block */
1762  6,					/* number of parallel prefetches */
1763  3,					/* Branch cost */
1764  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1765  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1766  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1767  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1768  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1769  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1770  slm_memcpy,
1771  slm_memset,
1772  1,					/* scalar_stmt_cost.  */
1773  1,					/* scalar load_cost.  */
1774  1,					/* scalar_store_cost.  */
1775  1,					/* vec_stmt_cost.  */
1776  4,					/* vec_to_scalar_cost.  */
1777  1,					/* scalar_to_vec_cost.  */
1778  1,					/* vec_align_load_cost.  */
1779  2,					/* vec_unalign_load_cost.  */
1780  1,					/* vec_store_cost.  */
1781  3,					/* cond_taken_branch_cost.  */
1782  1,					/* cond_not_taken_branch_cost.  */
1783};
1784
1785static stringop_algs intel_memcpy[2] = {
1786  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
1787  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
1788             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1789static stringop_algs intel_memset[2] = {
1790  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
1791             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
1792  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
1793             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
1794static const
1795struct processor_costs intel_cost = {
1796  COSTS_N_INSNS (1),			/* cost of an add instruction */
1797  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1798  COSTS_N_INSNS (1),			/* variable shift costs */
1799  COSTS_N_INSNS (1),			/* constant shift costs */
1800  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1801   COSTS_N_INSNS (3),			/*				 HI */
1802   COSTS_N_INSNS (3),			/*				 SI */
1803   COSTS_N_INSNS (4),			/*				 DI */
1804   COSTS_N_INSNS (2)},			/*			      other */
1805  0,					/* cost of multiply per each bit set */
1806  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1807   COSTS_N_INSNS (26),			/*			    HI */
1808   COSTS_N_INSNS (42),			/*			    SI */
1809   COSTS_N_INSNS (74),			/*			    DI */
1810   COSTS_N_INSNS (74)},			/*			    other */
1811  COSTS_N_INSNS (1),			/* cost of movsx */
1812  COSTS_N_INSNS (1),			/* cost of movzx */
1813  8,					/* "large" insn */
1814  17,					/* MOVE_RATIO */
1815  4,					/* cost for loading QImode using movzbl */
1816  {4, 4, 4},				/* cost of loading integer registers
1817					   in QImode, HImode and SImode.
1818					   Relative to reg-reg move (2).  */
1819  {4, 4, 4},				/* cost of storing integer registers */
1820  4,					/* cost of reg,reg fld/fst */
1821  {12, 12, 12},				/* cost of loading fp registers
1822					   in SFmode, DFmode and XFmode */
1823  {6, 6, 8},				/* cost of storing fp registers
1824					   in SFmode, DFmode and XFmode */
1825  2,					/* cost of moving MMX register */
1826  {8, 8},				/* cost of loading MMX registers
1827					   in SImode and DImode */
1828  {8, 8},				/* cost of storing MMX registers
1829					   in SImode and DImode */
1830  2,					/* cost of moving SSE register */
1831  {8, 8, 8},				/* cost of loading SSE registers
1832					   in SImode, DImode and TImode */
1833  {8, 8, 8},				/* cost of storing SSE registers
1834					   in SImode, DImode and TImode */
1835  5,					/* MMX or SSE register to integer */
1836  32,					/* size of l1 cache.  */
1837  256,					/* size of l2 cache.  */
1838  64,					/* size of prefetch block */
1839  6,					/* number of parallel prefetches */
1840  3,					/* Branch cost */
1841  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1842  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1843  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1844  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1845  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1846  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1847  intel_memcpy,
1848  intel_memset,
1849  1,					/* scalar_stmt_cost.  */
1850  1,					/* scalar load_cost.  */
1851  1,					/* scalar_store_cost.  */
1852  1,					/* vec_stmt_cost.  */
1853  4,					/* vec_to_scalar_cost.  */
1854  1,					/* scalar_to_vec_cost.  */
1855  1,					/* vec_align_load_cost.  */
1856  2,					/* vec_unalign_load_cost.  */
1857  1,					/* vec_store_cost.  */
1858  3,					/* cond_taken_branch_cost.  */
1859  1,					/* cond_not_taken_branch_cost.  */
1860};
1861
1862/* Generic should produce code tuned for Core-i7 (and newer chips)
1863   and btver1 (and newer chips).  */
1864
1865static stringop_algs generic_memcpy[2] = {
1866  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1867             {-1, libcall, false}}},
1868  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1869             {-1, libcall, false}}}};
1870static stringop_algs generic_memset[2] = {
1871  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
1872             {-1, libcall, false}}},
1873  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
1874             {-1, libcall, false}}}};
1875static const
1876struct processor_costs generic_cost = {
1877  COSTS_N_INSNS (1),			/* cost of an add instruction */
1878  /* On all chips taken into consideration lea is 2 cycles and more.  With
1879     this cost however our current implementation of synth_mult results in
1880     use of unnecessary temporary registers causing regression on several
1881     SPECfp benchmarks.  */
1882  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1883  COSTS_N_INSNS (1),			/* variable shift costs */
1884  COSTS_N_INSNS (1),			/* constant shift costs */
1885  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1886   COSTS_N_INSNS (4),			/*				 HI */
1887   COSTS_N_INSNS (3),			/*				 SI */
1888   COSTS_N_INSNS (4),			/*				 DI */
1889   COSTS_N_INSNS (2)},			/*			      other */
1890  0,					/* cost of multiply per each bit set */
1891  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1892   COSTS_N_INSNS (26),			/*			    HI */
1893   COSTS_N_INSNS (42),			/*			    SI */
1894   COSTS_N_INSNS (74),			/*			    DI */
1895   COSTS_N_INSNS (74)},			/*			    other */
1896  COSTS_N_INSNS (1),			/* cost of movsx */
1897  COSTS_N_INSNS (1),			/* cost of movzx */
1898  8,					/* "large" insn */
1899  17,					/* MOVE_RATIO */
1900  4,				     /* cost for loading QImode using movzbl */
1901  {4, 4, 4},				/* cost of loading integer registers
1902					   in QImode, HImode and SImode.
1903					   Relative to reg-reg move (2).  */
1904  {4, 4, 4},				/* cost of storing integer registers */
1905  4,					/* cost of reg,reg fld/fst */
1906  {12, 12, 12},				/* cost of loading fp registers
1907					   in SFmode, DFmode and XFmode */
1908  {6, 6, 8},				/* cost of storing fp registers
1909					   in SFmode, DFmode and XFmode */
1910  2,					/* cost of moving MMX register */
1911  {8, 8},				/* cost of loading MMX registers
1912					   in SImode and DImode */
1913  {8, 8},				/* cost of storing MMX registers
1914					   in SImode and DImode */
1915  2,					/* cost of moving SSE register */
1916  {8, 8, 8},				/* cost of loading SSE registers
1917					   in SImode, DImode and TImode */
1918  {8, 8, 8},				/* cost of storing SSE registers
1919					   in SImode, DImode and TImode */
1920  5,					/* MMX or SSE register to integer */
1921  32,					/* size of l1 cache.  */
1922  512,					/* size of l2 cache.  */
1923  64,					/* size of prefetch block */
1924  6,					/* number of parallel prefetches */
1925  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1926     value is increased to perhaps more appropriate value of 5.  */
1927  3,					/* Branch cost */
1928  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
1929  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
1930  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
1931  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
1932  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
1933  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
1934  generic_memcpy,
1935  generic_memset,
1936  1,					/* scalar_stmt_cost.  */
1937  1,					/* scalar load_cost.  */
1938  1,					/* scalar_store_cost.  */
1939  1,					/* vec_stmt_cost.  */
1940  1,					/* vec_to_scalar_cost.  */
1941  1,					/* scalar_to_vec_cost.  */
1942  1,					/* vec_align_load_cost.  */
1943  2,					/* vec_unalign_load_cost.  */
1944  1,					/* vec_store_cost.  */
1945  3,					/* cond_taken_branch_cost.  */
1946  1,					/* cond_not_taken_branch_cost.  */
1947};
1948
1949/* core_cost should produce code tuned for Core familly of CPUs.  */
1950static stringop_algs core_memcpy[2] = {
1951  {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
1952  {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
1953             {-1, libcall, false}}}};
1954static stringop_algs core_memset[2] = {
1955  {libcall, {{6, loop_1_byte, true},
1956             {24, loop, true},
1957             {8192, rep_prefix_4_byte, true},
1958             {-1, libcall, false}}},
1959  {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
1960             {-1, libcall, false}}}};
1961
1962static const
1963struct processor_costs core_cost = {
1964  COSTS_N_INSNS (1),			/* cost of an add instruction */
1965  /* On all chips taken into consideration lea is 2 cycles and more.  With
1966     this cost however our current implementation of synth_mult results in
1967     use of unnecessary temporary registers causing regression on several
1968     SPECfp benchmarks.  */
1969  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
1970  COSTS_N_INSNS (1),			/* variable shift costs */
1971  COSTS_N_INSNS (1),			/* constant shift costs */
1972  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
1973   COSTS_N_INSNS (4),			/*				 HI */
1974   COSTS_N_INSNS (3),			/*				 SI */
1975   COSTS_N_INSNS (4),			/*				 DI */
1976   COSTS_N_INSNS (2)},			/*			      other */
1977  0,					/* cost of multiply per each bit set */
1978  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
1979   COSTS_N_INSNS (26),			/*			    HI */
1980   COSTS_N_INSNS (42),			/*			    SI */
1981   COSTS_N_INSNS (74),			/*			    DI */
1982   COSTS_N_INSNS (74)},			/*			    other */
1983  COSTS_N_INSNS (1),			/* cost of movsx */
1984  COSTS_N_INSNS (1),			/* cost of movzx */
1985  8,					/* "large" insn */
1986  17,					/* MOVE_RATIO */
1987  4,				     /* cost for loading QImode using movzbl */
1988  {4, 4, 4},				/* cost of loading integer registers
1989					   in QImode, HImode and SImode.
1990					   Relative to reg-reg move (2).  */
1991  {4, 4, 4},				/* cost of storing integer registers */
1992  4,					/* cost of reg,reg fld/fst */
1993  {12, 12, 12},				/* cost of loading fp registers
1994					   in SFmode, DFmode and XFmode */
1995  {6, 6, 8},				/* cost of storing fp registers
1996					   in SFmode, DFmode and XFmode */
1997  2,					/* cost of moving MMX register */
1998  {8, 8},				/* cost of loading MMX registers
1999					   in SImode and DImode */
2000  {8, 8},				/* cost of storing MMX registers
2001					   in SImode and DImode */
2002  2,					/* cost of moving SSE register */
2003  {8, 8, 8},				/* cost of loading SSE registers
2004					   in SImode, DImode and TImode */
2005  {8, 8, 8},				/* cost of storing SSE registers
2006					   in SImode, DImode and TImode */
2007  5,					/* MMX or SSE register to integer */
2008  64,					/* size of l1 cache.  */
2009  512,					/* size of l2 cache.  */
2010  64,					/* size of prefetch block */
2011  6,					/* number of parallel prefetches */
2012  /* FIXME perhaps more appropriate value is 5.  */
2013  3,					/* Branch cost */
2014  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
2015  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
2016  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
2017  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
2018  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
2019  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
2020  core_memcpy,
2021  core_memset,
2022  1,					/* scalar_stmt_cost.  */
2023  1,					/* scalar load_cost.  */
2024  1,					/* scalar_store_cost.  */
2025  1,					/* vec_stmt_cost.  */
2026  1,					/* vec_to_scalar_cost.  */
2027  1,					/* scalar_to_vec_cost.  */
2028  1,					/* vec_align_load_cost.  */
2029  2,					/* vec_unalign_load_cost.  */
2030  1,					/* vec_store_cost.  */
2031  3,					/* cond_taken_branch_cost.  */
2032  1,					/* cond_not_taken_branch_cost.  */
2033};
2034
2035
2036/* Set by -mtune.  */
2037const struct processor_costs *ix86_tune_cost = &pentium_cost;
2038
2039/* Set by -mtune or -Os.  */
2040const struct processor_costs *ix86_cost = &pentium_cost;
2041
2042/* Processor feature/optimization bitmasks.  */
2043#define m_386 (1<<PROCESSOR_I386)
2044#define m_486 (1<<PROCESSOR_I486)
2045#define m_PENT (1<<PROCESSOR_PENTIUM)
2046#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
2047#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
2048#define m_NOCONA (1<<PROCESSOR_NOCONA)
2049#define m_P4_NOCONA (m_PENT4 | m_NOCONA)
2050#define m_CORE2 (1<<PROCESSOR_CORE2)
2051#define m_NEHALEM (1<<PROCESSOR_NEHALEM)
2052#define m_SANDYBRIDGE (1<<PROCESSOR_SANDYBRIDGE)
2053#define m_HASWELL (1<<PROCESSOR_HASWELL)
2054#define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_HASWELL)
2055#define m_BONNELL (1<<PROCESSOR_BONNELL)
2056#define m_SILVERMONT (1<<PROCESSOR_SILVERMONT)
2057#define m_KNL (1<<PROCESSOR_KNL)
2058#define m_INTEL (1<<PROCESSOR_INTEL)
2059
2060#define m_GEODE (1<<PROCESSOR_GEODE)
2061#define m_K6 (1<<PROCESSOR_K6)
2062#define m_K6_GEODE (m_K6 | m_GEODE)
2063#define m_K8 (1<<PROCESSOR_K8)
2064#define m_ATHLON (1<<PROCESSOR_ATHLON)
2065#define m_ATHLON_K8 (m_K8 | m_ATHLON)
2066#define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
2067#define m_BDVER1 (1<<PROCESSOR_BDVER1)
2068#define m_BDVER2 (1<<PROCESSOR_BDVER2)
2069#define m_BDVER3 (1<<PROCESSOR_BDVER3)
2070#define m_BDVER4 (1<<PROCESSOR_BDVER4)
2071#define m_BTVER1 (1<<PROCESSOR_BTVER1)
2072#define m_BTVER2 (1<<PROCESSOR_BTVER2)
2073#define m_BDVER	(m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4)
2074#define m_BTVER (m_BTVER1 | m_BTVER2)
2075#define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER)
2076
2077#define m_GENERIC (1<<PROCESSOR_GENERIC)
2078
2079const char* ix86_tune_feature_names[X86_TUNE_LAST] = {
2080#undef DEF_TUNE
2081#define DEF_TUNE(tune, name, selector) name,
2082#include "x86-tune.def"
2083#undef DEF_TUNE
2084};
2085
2086/* Feature tests against the various tunings.  */
2087unsigned char ix86_tune_features[X86_TUNE_LAST];
2088
2089/* Feature tests against the various tunings used to create ix86_tune_features
2090   based on the processor mask.  */
2091static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
2092#undef DEF_TUNE
2093#define DEF_TUNE(tune, name, selector) selector,
2094#include "x86-tune.def"
2095#undef DEF_TUNE
2096};
2097
2098/* Feature tests against the various architecture variations.  */
2099unsigned char ix86_arch_features[X86_ARCH_LAST];
2100
2101/* Feature tests against the various architecture variations, used to create
2102   ix86_arch_features based on the processor mask.  */
2103static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
2104  /* X86_ARCH_CMOV: Conditional move was added for pentiumpro.  */
2105  ~(m_386 | m_486 | m_PENT | m_K6),
2106
2107  /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486.  */
2108  ~m_386,
2109
2110  /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
2111  ~(m_386 | m_486),
2112
2113  /* X86_ARCH_XADD: Exchange and add was added for 80486.  */
2114  ~m_386,
2115
2116  /* X86_ARCH_BSWAP: Byteswap was added for 80486.  */
2117  ~m_386,
2118};
2119
2120/* In case the average insn count for single function invocation is
2121   lower than this constant, emit fast (but longer) prologue and
2122   epilogue code.  */
2123#define FAST_PROLOGUE_INSN_COUNT 20
2124
2125/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
2126static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
2127static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
2128static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
2129
2130/* Array of the smallest class containing reg number REGNO, indexed by
2131   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
2132
2133enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
2134{
2135  /* ax, dx, cx, bx */
2136  AREG, DREG, CREG, BREG,
2137  /* si, di, bp, sp */
2138  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
2139  /* FP registers */
2140  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
2141  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
2142  /* arg pointer */
2143  NON_Q_REGS,
2144  /* flags, fpsr, fpcr, frame */
2145  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
2146  /* SSE registers */
2147  SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2148  SSE_REGS, SSE_REGS,
2149  /* MMX registers */
2150  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
2151  MMX_REGS, MMX_REGS,
2152  /* REX registers */
2153  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2154  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
2155  /* SSE REX registers */
2156  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
2157  SSE_REGS, SSE_REGS,
2158  /* AVX-512 SSE registers */
2159  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2160  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2161  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2162  EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS, EVEX_SSE_REGS,
2163  /* Mask registers.  */
2164  MASK_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2165  MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS, MASK_EVEX_REGS,
2166  /* MPX bound registers */
2167  BND_REGS, BND_REGS, BND_REGS, BND_REGS,
2168};
2169
2170/* The "default" register map used in 32bit mode.  */
2171
2172int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
2173{
2174  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
2175  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
2176  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
2177  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
2178  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
2179  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
2180  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
2181  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
2182  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
2183  93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
2184  101, 102, 103, 104,			/* bound registers */
2185};
2186
2187/* The "default" register map used in 64bit mode.  */
2188
2189int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
2190{
2191  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
2192  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
2193  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
2194  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
2195  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
2196  8,9,10,11,12,13,14,15,		/* extended integer registers */
2197  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
2198  67, 68, 69, 70, 71, 72, 73, 74,       /* AVX-512 registers 16-23 */
2199  75, 76, 77, 78, 79, 80, 81, 82,       /* AVX-512 registers 24-31 */
2200  118, 119, 120, 121, 122, 123, 124, 125, /* Mask registers */
2201  126, 127, 128, 129,			/* bound registers */
2202};
2203
2204/* Define the register numbers to be used in Dwarf debugging information.
2205   The SVR4 reference port C compiler uses the following register numbers
2206   in its Dwarf output code:
2207	0 for %eax (gcc regno = 0)
2208	1 for %ecx (gcc regno = 2)
2209	2 for %edx (gcc regno = 1)
2210	3 for %ebx (gcc regno = 3)
2211	4 for %esp (gcc regno = 7)
2212	5 for %ebp (gcc regno = 6)
2213	6 for %esi (gcc regno = 4)
2214	7 for %edi (gcc regno = 5)
2215   The following three DWARF register numbers are never generated by
2216   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
2217   believes these numbers have these meanings.
2218	8  for %eip    (no gcc equivalent)
2219	9  for %eflags (gcc regno = 17)
2220	10 for %trapno (no gcc equivalent)
2221   It is not at all clear how we should number the FP stack registers
2222   for the x86 architecture.  If the version of SDB on x86/svr4 were
2223   a bit less brain dead with respect to floating-point then we would
2224   have a precedent to follow with respect to DWARF register numbers
2225   for x86 FP registers, but the SDB on x86/svr4 is so completely
2226   broken with respect to FP registers that it is hardly worth thinking
2227   of it as something to strive for compatibility with.
2228   The version of x86/svr4 SDB I have at the moment does (partially)
2229   seem to believe that DWARF register number 11 is associated with
2230   the x86 register %st(0), but that's about all.  Higher DWARF
2231   register numbers don't seem to be associated with anything in
2232   particular, and even for DWARF regno 11, SDB only seems to under-
2233   stand that it should say that a variable lives in %st(0) (when
2234   asked via an `=' command) if we said it was in DWARF regno 11,
2235   but SDB still prints garbage when asked for the value of the
2236   variable in question (via a `/' command).
2237   (Also note that the labels SDB prints for various FP stack regs
2238   when doing an `x' command are all wrong.)
2239   Note that these problems generally don't affect the native SVR4
2240   C compiler because it doesn't allow the use of -O with -g and
2241   because when it is *not* optimizing, it allocates a memory
2242   location for each floating-point variable, and the memory
2243   location is what gets described in the DWARF AT_location
2244   attribute for the variable in question.
2245   Regardless of the severe mental illness of the x86/svr4 SDB, we
2246   do something sensible here and we use the following DWARF
2247   register numbers.  Note that these are all stack-top-relative
2248   numbers.
2249	11 for %st(0) (gcc regno = 8)
2250	12 for %st(1) (gcc regno = 9)
2251	13 for %st(2) (gcc regno = 10)
2252	14 for %st(3) (gcc regno = 11)
2253	15 for %st(4) (gcc regno = 12)
2254	16 for %st(5) (gcc regno = 13)
2255	17 for %st(6) (gcc regno = 14)
2256	18 for %st(7) (gcc regno = 15)
2257*/
2258int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
2259{
2260  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
2261  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
2262  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, fpcr, frame */
2263  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
2264  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
2265  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
2266  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
2267  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 16-23*/
2268  -1, -1, -1, -1, -1, -1, -1, -1,       /* AVX-512 registers 24-31*/
2269  93, 94, 95, 96, 97, 98, 99, 100,      /* Mask registers */
2270  101, 102, 103, 104,			/* bound registers */
2271};
2272
2273/* Define parameter passing and return registers.  */
2274
2275static int const x86_64_int_parameter_registers[6] =
2276{
2277  DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
2278};
2279
2280static int const x86_64_ms_abi_int_parameter_registers[4] =
2281{
2282  CX_REG, DX_REG, R8_REG, R9_REG
2283};
2284
2285static int const x86_64_int_return_registers[4] =
2286{
2287  AX_REG, DX_REG, DI_REG, SI_REG
2288};
2289
2290/* Additional registers that are clobbered by SYSV calls.  */
2291
2292int const x86_64_ms_sysv_extra_clobbered_registers[12] =
2293{
2294  SI_REG, DI_REG,
2295  XMM6_REG, XMM7_REG,
2296  XMM8_REG, XMM9_REG, XMM10_REG, XMM11_REG,
2297  XMM12_REG, XMM13_REG, XMM14_REG, XMM15_REG
2298};
2299
2300/* Define the structure for the machine field in struct function.  */
2301
2302struct GTY(()) stack_local_entry {
2303  unsigned short mode;
2304  unsigned short n;
2305  rtx rtl;
2306  struct stack_local_entry *next;
2307};
2308
2309/* Structure describing stack frame layout.
2310   Stack grows downward:
2311
2312   [arguments]
2313					<- ARG_POINTER
2314   saved pc
2315
2316   saved static chain			if ix86_static_chain_on_stack
2317
2318   saved frame pointer			if frame_pointer_needed
2319					<- HARD_FRAME_POINTER
2320   [saved regs]
2321					<- regs_save_offset
2322   [padding0]
2323
2324   [saved SSE regs]
2325					<- sse_regs_save_offset
2326   [padding1]          |
2327		       |		<- FRAME_POINTER
2328   [va_arg registers]  |
2329		       |
2330   [frame]	       |
2331		       |
2332   [padding2]	       | = to_allocate
2333					<- STACK_POINTER
2334  */
2335struct ix86_frame
2336{
2337  int nsseregs;
2338  int nregs;
2339  int va_arg_size;
2340  int red_zone_size;
2341  int outgoing_arguments_size;
2342
2343  /* The offsets relative to ARG_POINTER.  */
2344  HOST_WIDE_INT frame_pointer_offset;
2345  HOST_WIDE_INT hard_frame_pointer_offset;
2346  HOST_WIDE_INT stack_pointer_offset;
2347  HOST_WIDE_INT hfp_save_offset;
2348  HOST_WIDE_INT reg_save_offset;
2349  HOST_WIDE_INT sse_reg_save_offset;
2350
2351  /* When save_regs_using_mov is set, emit prologue using
2352     move instead of push instructions.  */
2353  bool save_regs_using_mov;
2354};
2355
2356/* Which cpu are we scheduling for.  */
2357enum attr_cpu ix86_schedule;
2358
2359/* Which cpu are we optimizing for.  */
2360enum processor_type ix86_tune;
2361
2362/* Which instruction set architecture to use.  */
2363enum processor_type ix86_arch;
2364
2365/* True if processor has SSE prefetch instruction.  */
2366unsigned char x86_prefetch_sse;
2367
2368/* -mstackrealign option */
2369static const char ix86_force_align_arg_pointer_string[]
2370  = "force_align_arg_pointer";
2371
2372static rtx (*ix86_gen_leave) (void);
2373static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
2374static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
2375static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
2376static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
2377static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
2378static rtx (*ix86_gen_monitorx) (rtx, rtx, rtx);
2379static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
2380static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
2381static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
2382static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
2383static rtx (*ix86_gen_tls_global_dynamic_64) (rtx, rtx, rtx);
2384static rtx (*ix86_gen_tls_local_dynamic_base_64) (rtx, rtx);
2385
2386/* Preferred alignment for stack boundary in bits.  */
2387unsigned int ix86_preferred_stack_boundary;
2388
2389/* Alignment for incoming stack boundary in bits specified at
2390   command line.  */
2391static unsigned int ix86_user_incoming_stack_boundary;
2392
2393/* Default alignment for incoming stack boundary in bits.  */
2394static unsigned int ix86_default_incoming_stack_boundary;
2395
2396/* Alignment for incoming stack boundary in bits.  */
2397unsigned int ix86_incoming_stack_boundary;
2398
2399/* Calling abi specific va_list type nodes.  */
2400static GTY(()) tree sysv_va_list_type_node;
2401static GTY(()) tree ms_va_list_type_node;
2402
2403/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
2404char internal_label_prefix[16];
2405int internal_label_prefix_len;
2406
2407/* Fence to use after loop using movnt.  */
2408tree x86_mfence;
2409
2410/* Register class used for passing given 64bit part of the argument.
2411   These represent classes as documented by the PS ABI, with the exception
2412   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
2413   use SF or DFmode move instead of DImode to avoid reformatting penalties.
2414
2415   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
2416   whenever possible (upper half does contain padding).  */
2417enum x86_64_reg_class
2418  {
2419    X86_64_NO_CLASS,
2420    X86_64_INTEGER_CLASS,
2421    X86_64_INTEGERSI_CLASS,
2422    X86_64_SSE_CLASS,
2423    X86_64_SSESF_CLASS,
2424    X86_64_SSEDF_CLASS,
2425    X86_64_SSEUP_CLASS,
2426    X86_64_X87_CLASS,
2427    X86_64_X87UP_CLASS,
2428    X86_64_COMPLEX_X87_CLASS,
2429    X86_64_MEMORY_CLASS
2430  };
2431
2432#define MAX_CLASSES 8
2433
2434/* Table of constants used by fldpi, fldln2, etc....  */
2435static REAL_VALUE_TYPE ext_80387_constants_table [5];
2436static bool ext_80387_constants_init = 0;
2437
2438
2439static struct machine_function * ix86_init_machine_status (void);
2440static rtx ix86_function_value (const_tree, const_tree, bool);
2441static bool ix86_function_value_regno_p (const unsigned int);
2442static unsigned int ix86_function_arg_boundary (machine_mode,
2443						const_tree);
2444static rtx ix86_static_chain (const_tree, bool);
2445static int ix86_function_regparm (const_tree, const_tree);
2446static void ix86_compute_frame_layout (struct ix86_frame *);
2447static bool ix86_expand_vector_init_one_nonzero (bool, machine_mode,
2448						 rtx, rtx, int);
2449static void ix86_add_new_builtins (HOST_WIDE_INT);
2450static tree ix86_canonical_va_list_type (tree);
2451static void predict_jump (int);
2452static unsigned int split_stack_prologue_scratch_regno (void);
2453static bool i386_asm_output_addr_const_extra (FILE *, rtx);
2454
2455enum ix86_function_specific_strings
2456{
2457  IX86_FUNCTION_SPECIFIC_ARCH,
2458  IX86_FUNCTION_SPECIFIC_TUNE,
2459  IX86_FUNCTION_SPECIFIC_MAX
2460};
2461
2462static char *ix86_target_string (HOST_WIDE_INT, int, const char *,
2463				 const char *, enum fpmath_unit, bool);
2464static void ix86_function_specific_save (struct cl_target_option *,
2465					 struct gcc_options *opts);
2466static void ix86_function_specific_restore (struct gcc_options *opts,
2467					    struct cl_target_option *);
2468static void ix86_function_specific_post_stream_in (struct cl_target_option *);
2469static void ix86_function_specific_print (FILE *, int,
2470					  struct cl_target_option *);
2471static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2472static bool ix86_valid_target_attribute_inner_p (tree, char *[],
2473						 struct gcc_options *,
2474						 struct gcc_options *,
2475						 struct gcc_options *);
2476static bool ix86_can_inline_p (tree, tree);
2477static void ix86_set_current_function (tree);
2478static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2479
2480static enum calling_abi ix86_function_abi (const_tree);
2481
2482
2483#ifndef SUBTARGET32_DEFAULT_CPU
2484#define SUBTARGET32_DEFAULT_CPU "i386"
2485#endif
2486
2487/* Whether -mtune= or -march= were specified */
2488static int ix86_tune_defaulted;
2489static int ix86_arch_specified;
2490
2491/* Vectorization library interface and handlers.  */
2492static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2493
2494static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2495static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2496
2497/* Processor target table, indexed by processor number */
2498struct ptt
2499{
2500  const char *const name;			/* processor name  */
2501  const struct processor_costs *cost;		/* Processor costs */
2502  const int align_loop;				/* Default alignments.  */
2503  const int align_loop_max_skip;
2504  const int align_jump;
2505  const int align_jump_max_skip;
2506  const int align_func;
2507};
2508
2509/* This table must be in sync with enum processor_type in i386.h.  */
2510static const struct ptt processor_target_table[PROCESSOR_max] =
2511{
2512  {"generic", &generic_cost, 16, 10, 16, 10, 16},
2513  {"i386", &i386_cost, 4, 3, 4, 3, 4},
2514  {"i486", &i486_cost, 16, 15, 16, 15, 16},
2515  {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
2516  {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
2517  {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
2518  {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
2519  {"core2", &core_cost, 16, 10, 16, 10, 16},
2520  {"nehalem", &core_cost, 16, 10, 16, 10, 16},
2521  {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
2522  {"haswell", &core_cost, 16, 10, 16, 10, 16},
2523  {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
2524  {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
2525  {"knl", &slm_cost, 16, 15, 16, 7, 16},
2526  {"intel", &intel_cost, 16, 15, 16, 7, 16},
2527  {"geode", &geode_cost, 0, 0, 0, 0, 0},
2528  {"k6", &k6_cost, 32, 7, 32, 7, 32},
2529  {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
2530  {"k8", &k8_cost, 16, 7, 16, 7, 16},
2531  {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
2532  {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
2533  {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
2534  {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
2535  {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
2536  {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
2537  {"btver2", &btver2_cost, 16, 10, 16, 7, 11}
2538};
2539
2540static unsigned int
2541rest_of_handle_insert_vzeroupper (void)
2542{
2543  int i;
2544
2545  /* vzeroupper instructions are inserted immediately after reload to
2546     account for possible spills from 256bit registers.  The pass
2547     reuses mode switching infrastructure by re-running mode insertion
2548     pass, so disable entities that have already been processed.  */
2549  for (i = 0; i < MAX_386_ENTITIES; i++)
2550    ix86_optimize_mode_switching[i] = 0;
2551
2552  ix86_optimize_mode_switching[AVX_U128] = 1;
2553
2554  /* Call optimize_mode_switching.  */
2555  g->get_passes ()->execute_pass_mode_switching ();
2556  return 0;
2557}
2558
2559namespace {
2560
2561const pass_data pass_data_insert_vzeroupper =
2562{
2563  RTL_PASS, /* type */
2564  "vzeroupper", /* name */
2565  OPTGROUP_NONE, /* optinfo_flags */
2566  TV_NONE, /* tv_id */
2567  0, /* properties_required */
2568  0, /* properties_provided */
2569  0, /* properties_destroyed */
2570  0, /* todo_flags_start */
2571  TODO_df_finish, /* todo_flags_finish */
2572};
2573
2574class pass_insert_vzeroupper : public rtl_opt_pass
2575{
2576public:
2577  pass_insert_vzeroupper(gcc::context *ctxt)
2578    : rtl_opt_pass(pass_data_insert_vzeroupper, ctxt)
2579  {}
2580
2581  /* opt_pass methods: */
2582  virtual bool gate (function *)
2583    {
2584      return TARGET_AVX && !TARGET_AVX512F
2585	     && TARGET_VZEROUPPER && flag_expensive_optimizations
2586	     && !optimize_size;
2587    }
2588
2589  virtual unsigned int execute (function *)
2590    {
2591      return rest_of_handle_insert_vzeroupper ();
2592    }
2593
2594}; // class pass_insert_vzeroupper
2595
2596} // anon namespace
2597
2598rtl_opt_pass *
2599make_pass_insert_vzeroupper (gcc::context *ctxt)
2600{
2601  return new pass_insert_vzeroupper (ctxt);
2602}
2603
2604/* Return true if a red-zone is in use.  */
2605
2606static inline bool
2607ix86_using_red_zone (void)
2608{
2609  return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2610}
2611
2612/* Return a string that documents the current -m options.  The caller is
2613   responsible for freeing the string.  */
2614
2615static char *
2616ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
2617		    const char *tune, enum fpmath_unit fpmath,
2618		    bool add_nl_p)
2619{
2620  struct ix86_target_opts
2621  {
2622    const char *option;		/* option string */
2623    HOST_WIDE_INT mask;		/* isa mask options */
2624  };
2625
2626  /* This table is ordered so that options like -msse4.2 that imply
2627     preceding options while match those first.  */
2628  static struct ix86_target_opts isa_opts[] =
2629  {
2630    { "-mfma4",		OPTION_MASK_ISA_FMA4 },
2631    { "-mfma",		OPTION_MASK_ISA_FMA },
2632    { "-mxop",		OPTION_MASK_ISA_XOP },
2633    { "-mlwp",		OPTION_MASK_ISA_LWP },
2634    { "-mavx512f",	OPTION_MASK_ISA_AVX512F },
2635    { "-mavx512er",	OPTION_MASK_ISA_AVX512ER },
2636    { "-mavx512cd",	OPTION_MASK_ISA_AVX512CD },
2637    { "-mavx512pf",	OPTION_MASK_ISA_AVX512PF },
2638    { "-mavx512dq",	OPTION_MASK_ISA_AVX512DQ },
2639    { "-mavx512bw",	OPTION_MASK_ISA_AVX512BW },
2640    { "-mavx512vl",	OPTION_MASK_ISA_AVX512VL },
2641    { "-mavx512ifma",	OPTION_MASK_ISA_AVX512IFMA },
2642    { "-mavx512vbmi",	OPTION_MASK_ISA_AVX512VBMI },
2643    { "-msse4a",	OPTION_MASK_ISA_SSE4A },
2644    { "-msse4.2",	OPTION_MASK_ISA_SSE4_2 },
2645    { "-msse4.1",	OPTION_MASK_ISA_SSE4_1 },
2646    { "-mssse3",	OPTION_MASK_ISA_SSSE3 },
2647    { "-msse3",		OPTION_MASK_ISA_SSE3 },
2648    { "-msse2",		OPTION_MASK_ISA_SSE2 },
2649    { "-msse",		OPTION_MASK_ISA_SSE },
2650    { "-m3dnow",	OPTION_MASK_ISA_3DNOW },
2651    { "-m3dnowa",	OPTION_MASK_ISA_3DNOW_A },
2652    { "-mmmx",		OPTION_MASK_ISA_MMX },
2653    { "-mabm",		OPTION_MASK_ISA_ABM },
2654    { "-mbmi",		OPTION_MASK_ISA_BMI },
2655    { "-mbmi2",		OPTION_MASK_ISA_BMI2 },
2656    { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
2657    { "-mhle",		OPTION_MASK_ISA_HLE },
2658    { "-mfxsr",		OPTION_MASK_ISA_FXSR },
2659    { "-mrdseed",	OPTION_MASK_ISA_RDSEED },
2660    { "-mprfchw",	OPTION_MASK_ISA_PRFCHW },
2661    { "-madx",		OPTION_MASK_ISA_ADX },
2662    { "-mtbm",		OPTION_MASK_ISA_TBM },
2663    { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
2664    { "-mmovbe",	OPTION_MASK_ISA_MOVBE },
2665    { "-mcrc32",	OPTION_MASK_ISA_CRC32 },
2666    { "-maes",		OPTION_MASK_ISA_AES },
2667    { "-msha",		OPTION_MASK_ISA_SHA },
2668    { "-mpclmul",	OPTION_MASK_ISA_PCLMUL },
2669    { "-mfsgsbase",	OPTION_MASK_ISA_FSGSBASE },
2670    { "-mrdrnd",	OPTION_MASK_ISA_RDRND },
2671    { "-mf16c",		OPTION_MASK_ISA_F16C },
2672    { "-mrtm",		OPTION_MASK_ISA_RTM },
2673    { "-mxsave",	OPTION_MASK_ISA_XSAVE },
2674    { "-mxsaveopt",	OPTION_MASK_ISA_XSAVEOPT },
2675    { "-mprefetchwt1",	OPTION_MASK_ISA_PREFETCHWT1 },
2676    { "-mclflushopt",   OPTION_MASK_ISA_CLFLUSHOPT },
2677    { "-mxsavec",	OPTION_MASK_ISA_XSAVEC },
2678    { "-mxsaves",	OPTION_MASK_ISA_XSAVES },
2679    { "-mmpx",          OPTION_MASK_ISA_MPX },
2680    { "-mclwb",		OPTION_MASK_ISA_CLWB },
2681    { "-mpcommit",	OPTION_MASK_ISA_PCOMMIT },
2682    { "-mmwaitx",	OPTION_MASK_ISA_MWAITX  },
2683  };
2684
2685  /* Flag options.  */
2686  static struct ix86_target_opts flag_opts[] =
2687  {
2688    { "-m128bit-long-double",		MASK_128BIT_LONG_DOUBLE },
2689    { "-mlong-double-128",		MASK_LONG_DOUBLE_128 },
2690    { "-mlong-double-64",		MASK_LONG_DOUBLE_64 },
2691    { "-m80387",			MASK_80387 },
2692    { "-maccumulate-outgoing-args",	MASK_ACCUMULATE_OUTGOING_ARGS },
2693    { "-malign-double",			MASK_ALIGN_DOUBLE },
2694    { "-mcld",				MASK_CLD },
2695    { "-mfp-ret-in-387",		MASK_FLOAT_RETURNS },
2696    { "-mieee-fp",			MASK_IEEE_FP },
2697    { "-minline-all-stringops",		MASK_INLINE_ALL_STRINGOPS },
2698    { "-minline-stringops-dynamically",	MASK_INLINE_STRINGOPS_DYNAMICALLY },
2699    { "-mms-bitfields",			MASK_MS_BITFIELD_LAYOUT },
2700    { "-mno-align-stringops",		MASK_NO_ALIGN_STRINGOPS },
2701    { "-mno-fancy-math-387",		MASK_NO_FANCY_MATH_387 },
2702    { "-mno-push-args",			MASK_NO_PUSH_ARGS },
2703    { "-mno-red-zone",			MASK_NO_RED_ZONE },
2704    { "-momit-leaf-frame-pointer",	MASK_OMIT_LEAF_FRAME_POINTER },
2705    { "-mrecip",			MASK_RECIP },
2706    { "-mrtd",				MASK_RTD },
2707    { "-msseregparm",			MASK_SSEREGPARM },
2708    { "-mstack-arg-probe",		MASK_STACK_PROBE },
2709    { "-mtls-direct-seg-refs",		MASK_TLS_DIRECT_SEG_REFS },
2710    { "-mvect8-ret-in-mem",		MASK_VECT8_RETURNS },
2711    { "-m8bit-idiv",			MASK_USE_8BIT_IDIV },
2712    { "-mvzeroupper",			MASK_VZEROUPPER },
2713    { "-mavx256-split-unaligned-load",	MASK_AVX256_SPLIT_UNALIGNED_LOAD},
2714    { "-mavx256-split-unaligned-store",	MASK_AVX256_SPLIT_UNALIGNED_STORE},
2715    { "-mprefer-avx128",		MASK_PREFER_AVX128},
2716  };
2717
2718  const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2719
2720  char isa_other[40];
2721  char target_other[40];
2722  unsigned num = 0;
2723  unsigned i, j;
2724  char *ret;
2725  char *ptr;
2726  size_t len;
2727  size_t line_len;
2728  size_t sep_len;
2729  const char *abi;
2730
2731  memset (opts, '\0', sizeof (opts));
2732
2733  /* Add -march= option.  */
2734  if (arch)
2735    {
2736      opts[num][0] = "-march=";
2737      opts[num++][1] = arch;
2738    }
2739
2740  /* Add -mtune= option.  */
2741  if (tune)
2742    {
2743      opts[num][0] = "-mtune=";
2744      opts[num++][1] = tune;
2745    }
2746
2747  /* Add -m32/-m64/-mx32.  */
2748  if ((isa & OPTION_MASK_ISA_64BIT) != 0)
2749    {
2750      if ((isa & OPTION_MASK_ABI_64) != 0)
2751	abi = "-m64";
2752      else
2753	abi = "-mx32";
2754      isa &= ~ (OPTION_MASK_ISA_64BIT
2755		| OPTION_MASK_ABI_64
2756		| OPTION_MASK_ABI_X32);
2757    }
2758  else
2759    abi = "-m32";
2760  opts[num++][0] = abi;
2761
2762  /* Pick out the options in isa options.  */
2763  for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2764    {
2765      if ((isa & isa_opts[i].mask) != 0)
2766	{
2767	  opts[num++][0] = isa_opts[i].option;
2768	  isa &= ~ isa_opts[i].mask;
2769	}
2770    }
2771
2772  if (isa && add_nl_p)
2773    {
2774      opts[num++][0] = isa_other;
2775      sprintf (isa_other, "(other isa: %#" HOST_WIDE_INT_PRINT "x)",
2776	       isa);
2777    }
2778
2779  /* Add flag options.  */
2780  for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2781    {
2782      if ((flags & flag_opts[i].mask) != 0)
2783	{
2784	  opts[num++][0] = flag_opts[i].option;
2785	  flags &= ~ flag_opts[i].mask;
2786	}
2787    }
2788
2789  if (flags && add_nl_p)
2790    {
2791      opts[num++][0] = target_other;
2792      sprintf (target_other, "(other flags: %#x)", flags);
2793    }
2794
2795  /* Add -fpmath= option.  */
2796  if (fpmath)
2797    {
2798      opts[num][0] = "-mfpmath=";
2799      switch ((int) fpmath)
2800	{
2801	case FPMATH_387:
2802	  opts[num++][1] = "387";
2803	  break;
2804
2805	case FPMATH_SSE:
2806	  opts[num++][1] = "sse";
2807	  break;
2808
2809	case FPMATH_387 | FPMATH_SSE:
2810	  opts[num++][1] = "sse+387";
2811	  break;
2812
2813	default:
2814	  gcc_unreachable ();
2815	}
2816    }
2817
2818  /* Any options?  */
2819  if (num == 0)
2820    return NULL;
2821
2822  gcc_assert (num < ARRAY_SIZE (opts));
2823
2824  /* Size the string.  */
2825  len = 0;
2826  sep_len = (add_nl_p) ? 3 : 1;
2827  for (i = 0; i < num; i++)
2828    {
2829      len += sep_len;
2830      for (j = 0; j < 2; j++)
2831	if (opts[i][j])
2832	  len += strlen (opts[i][j]);
2833    }
2834
2835  /* Build the string.  */
2836  ret = ptr = (char *) xmalloc (len);
2837  line_len = 0;
2838
2839  for (i = 0; i < num; i++)
2840    {
2841      size_t len2[2];
2842
2843      for (j = 0; j < 2; j++)
2844	len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2845
2846      if (i != 0)
2847	{
2848	  *ptr++ = ' ';
2849	  line_len++;
2850
2851	  if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2852	    {
2853	      *ptr++ = '\\';
2854	      *ptr++ = '\n';
2855	      line_len = 0;
2856	    }
2857	}
2858
2859      for (j = 0; j < 2; j++)
2860	if (opts[i][j])
2861	  {
2862	    memcpy (ptr, opts[i][j], len2[j]);
2863	    ptr += len2[j];
2864	    line_len += len2[j];
2865	  }
2866    }
2867
2868  *ptr = '\0';
2869  gcc_assert (ret + len >= ptr);
2870
2871  return ret;
2872}
2873
2874/* Return true, if profiling code should be emitted before
2875   prologue. Otherwise it returns false.
2876   Note: For x86 with "hotfix" it is sorried.  */
2877static bool
2878ix86_profile_before_prologue (void)
2879{
2880  return flag_fentry != 0;
2881}
2882
2883/* Function that is callable from the debugger to print the current
2884   options.  */
2885void ATTRIBUTE_UNUSED
2886ix86_debug_options (void)
2887{
2888  char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2889				   ix86_arch_string, ix86_tune_string,
2890				   ix86_fpmath, true);
2891
2892  if (opts)
2893    {
2894      fprintf (stderr, "%s\n\n", opts);
2895      free (opts);
2896    }
2897  else
2898    fputs ("<no options>\n\n", stderr);
2899
2900  return;
2901}
2902
2903static const char *stringop_alg_names[] = {
2904#define DEF_ENUM
2905#define DEF_ALG(alg, name) #name,
2906#include "stringop.def"
2907#undef DEF_ENUM
2908#undef DEF_ALG
2909};
2910
2911/* Parse parameter string passed to -mmemcpy-strategy= or -mmemset-strategy=.
2912   The string is of the following form (or comma separated list of it):
2913
2914     strategy_alg:max_size:[align|noalign]
2915
2916   where the full size range for the strategy is either [0, max_size] or
2917   [min_size, max_size], in which min_size is the max_size + 1 of the
2918   preceding range.  The last size range must have max_size == -1.
2919
2920   Examples:
2921
2922    1.
2923       -mmemcpy-strategy=libcall:-1:noalign
2924
2925      this is equivalent to (for known size memcpy) -mstringop-strategy=libcall
2926
2927
2928   2.
2929      -mmemset-strategy=rep_8byte:16:noalign,vector_loop:2048:align,libcall:-1:noalign
2930
2931      This is to tell the compiler to use the following strategy for memset
2932      1) when the expected size is between [1, 16], use rep_8byte strategy;
2933      2) when the size is between [17, 2048], use vector_loop;
2934      3) when the size is > 2048, use libcall.  */
2935
2936struct stringop_size_range
2937{
2938  int max;
2939  stringop_alg alg;
2940  bool noalign;
2941};
2942
2943static void
2944ix86_parse_stringop_strategy_string (char *strategy_str, bool is_memset)
2945{
2946  const struct stringop_algs *default_algs;
2947  stringop_size_range input_ranges[MAX_STRINGOP_ALGS];
2948  char *curr_range_str, *next_range_str;
2949  int i = 0, n = 0;
2950
2951  if (is_memset)
2952    default_algs = &ix86_cost->memset[TARGET_64BIT != 0];
2953  else
2954    default_algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
2955
2956  curr_range_str = strategy_str;
2957
2958  do
2959    {
2960      int maxs;
2961      char alg_name[128];
2962      char align[16];
2963      next_range_str = strchr (curr_range_str, ',');
2964      if (next_range_str)
2965        *next_range_str++ = '\0';
2966
2967      if (3 != sscanf (curr_range_str, "%20[^:]:%d:%10s",
2968                       alg_name, &maxs, align))
2969        {
2970          error ("wrong arg %s to option %s", curr_range_str,
2971                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2972          return;
2973        }
2974
2975      if (n > 0 && (maxs < (input_ranges[n - 1].max + 1) && maxs != -1))
2976        {
2977          error ("size ranges of option %s should be increasing",
2978                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2979          return;
2980        }
2981
2982      for (i = 0; i < last_alg; i++)
2983	if (!strcmp (alg_name, stringop_alg_names[i]))
2984	  break;
2985
2986      if (i == last_alg)
2987        {
2988          error ("wrong stringop strategy name %s specified for option %s",
2989                 alg_name,
2990                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
2991          return;
2992        }
2993
2994      if ((stringop_alg) i == rep_prefix_8_byte
2995	  && !TARGET_64BIT)
2996	{
2997	  /* rep; movq isn't available in 32-bit code.  */
2998	  error ("stringop strategy name %s specified for option %s "
2999		 "not supported for 32-bit code",
3000                 alg_name,
3001                 is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3002	  return;
3003	}
3004
3005      input_ranges[n].max = maxs;
3006      input_ranges[n].alg = (stringop_alg) i;
3007      if (!strcmp (align, "align"))
3008        input_ranges[n].noalign = false;
3009      else if (!strcmp (align, "noalign"))
3010        input_ranges[n].noalign = true;
3011      else
3012        {
3013          error ("unknown alignment %s specified for option %s",
3014                 align, is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3015          return;
3016        }
3017      n++;
3018      curr_range_str = next_range_str;
3019    }
3020  while (curr_range_str);
3021
3022  if (input_ranges[n - 1].max != -1)
3023    {
3024      error ("the max value for the last size range should be -1"
3025             " for option %s",
3026             is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3027      return;
3028    }
3029
3030  if (n > MAX_STRINGOP_ALGS)
3031    {
3032      error ("too many size ranges specified in option %s",
3033             is_memset ? "-mmemset_strategy=" : "-mmemcpy_strategy=");
3034      return;
3035    }
3036
3037  /* Now override the default algs array.  */
3038  for (i = 0; i < n; i++)
3039    {
3040      *const_cast<int *>(&default_algs->size[i].max) = input_ranges[i].max;
3041      *const_cast<stringop_alg *>(&default_algs->size[i].alg)
3042          = input_ranges[i].alg;
3043      *const_cast<int *>(&default_algs->size[i].noalign)
3044          = input_ranges[i].noalign;
3045    }
3046}
3047
3048
3049/* parse -mtune-ctrl= option. When DUMP is true,
3050   print the features that are explicitly set.  */
3051
3052static void
3053parse_mtune_ctrl_str (bool dump)
3054{
3055  if (!ix86_tune_ctrl_string)
3056    return;
3057
3058  char *next_feature_string = NULL;
3059  char *curr_feature_string = xstrdup (ix86_tune_ctrl_string);
3060  char *orig = curr_feature_string;
3061  int i;
3062  do
3063    {
3064      bool clear = false;
3065
3066      next_feature_string = strchr (curr_feature_string, ',');
3067      if (next_feature_string)
3068        *next_feature_string++ = '\0';
3069      if (*curr_feature_string == '^')
3070        {
3071          curr_feature_string++;
3072          clear = true;
3073        }
3074      for (i = 0; i < X86_TUNE_LAST; i++)
3075        {
3076          if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
3077            {
3078              ix86_tune_features[i] = !clear;
3079              if (dump)
3080                fprintf (stderr, "Explicitly %s feature %s\n",
3081                         clear ? "clear" : "set", ix86_tune_feature_names[i]);
3082              break;
3083            }
3084        }
3085      if (i == X86_TUNE_LAST)
3086        error ("Unknown parameter to option -mtune-ctrl: %s",
3087               clear ? curr_feature_string - 1 : curr_feature_string);
3088      curr_feature_string = next_feature_string;
3089    }
3090  while (curr_feature_string);
3091  free (orig);
3092}
3093
3094/* Helper function to set ix86_tune_features. IX86_TUNE is the
3095   processor type.  */
3096
3097static void
3098set_ix86_tune_features (enum processor_type ix86_tune, bool dump)
3099{
3100  unsigned int ix86_tune_mask = 1u << ix86_tune;
3101  int i;
3102
3103  for (i = 0; i < X86_TUNE_LAST; ++i)
3104    {
3105      if (ix86_tune_no_default)
3106        ix86_tune_features[i] = 0;
3107      else
3108        ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3109    }
3110
3111  if (dump)
3112    {
3113      fprintf (stderr, "List of x86 specific tuning parameter names:\n");
3114      for (i = 0; i < X86_TUNE_LAST; i++)
3115        fprintf (stderr, "%s : %s\n", ix86_tune_feature_names[i],
3116                 ix86_tune_features[i] ? "on" : "off");
3117    }
3118
3119  parse_mtune_ctrl_str (dump);
3120}
3121
3122
3123/* Override various settings based on options.  If MAIN_ARGS_P, the
3124   options are from the command line, otherwise they are from
3125   attributes.  */
3126
3127static void
3128ix86_option_override_internal (bool main_args_p,
3129			       struct gcc_options *opts,
3130			       struct gcc_options *opts_set)
3131{
3132  int i;
3133  unsigned int ix86_arch_mask;
3134  const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
3135  const char *prefix;
3136  const char *suffix;
3137  const char *sw;
3138
3139#define PTA_3DNOW	 	(HOST_WIDE_INT_1 << 0)
3140#define PTA_3DNOW_A	 	(HOST_WIDE_INT_1 << 1)
3141#define PTA_64BIT		(HOST_WIDE_INT_1 << 2)
3142#define PTA_ABM			(HOST_WIDE_INT_1 << 3)
3143#define PTA_AES		 	(HOST_WIDE_INT_1 << 4)
3144#define PTA_AVX			(HOST_WIDE_INT_1 << 5)
3145#define PTA_BMI		 	(HOST_WIDE_INT_1 << 6)
3146#define PTA_CX16		(HOST_WIDE_INT_1 << 7)
3147#define PTA_F16C		(HOST_WIDE_INT_1 << 8)
3148#define PTA_FMA			(HOST_WIDE_INT_1 << 9)
3149#define PTA_FMA4	 	(HOST_WIDE_INT_1 << 10)
3150#define PTA_FSGSBASE		(HOST_WIDE_INT_1 << 11)
3151#define PTA_LWP		 	(HOST_WIDE_INT_1 << 12)
3152#define PTA_LZCNT	 	(HOST_WIDE_INT_1 << 13)
3153#define PTA_MMX			(HOST_WIDE_INT_1 << 14)
3154#define PTA_MOVBE		(HOST_WIDE_INT_1 << 15)
3155#define PTA_NO_SAHF		(HOST_WIDE_INT_1 << 16)
3156#define PTA_PCLMUL		(HOST_WIDE_INT_1 << 17)
3157#define PTA_POPCNT		(HOST_WIDE_INT_1 << 18)
3158#define PTA_PREFETCH_SSE	(HOST_WIDE_INT_1 << 19)
3159#define PTA_RDRND	 	(HOST_WIDE_INT_1 << 20)
3160#define PTA_SSE			(HOST_WIDE_INT_1 << 21)
3161#define PTA_SSE2		(HOST_WIDE_INT_1 << 22)
3162#define PTA_SSE3		(HOST_WIDE_INT_1 << 23)
3163#define PTA_SSE4_1	 	(HOST_WIDE_INT_1 << 24)
3164#define PTA_SSE4_2	 	(HOST_WIDE_INT_1 << 25)
3165#define PTA_SSE4A		(HOST_WIDE_INT_1 << 26)
3166#define PTA_SSSE3		(HOST_WIDE_INT_1 << 27)
3167#define PTA_TBM		 	(HOST_WIDE_INT_1 << 28)
3168#define PTA_XOP		 	(HOST_WIDE_INT_1 << 29)
3169#define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
3170#define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
3171#define PTA_RTM		 	(HOST_WIDE_INT_1 << 32)
3172#define PTA_HLE			(HOST_WIDE_INT_1 << 33)
3173#define PTA_PRFCHW		(HOST_WIDE_INT_1 << 34)
3174#define PTA_RDSEED		(HOST_WIDE_INT_1 << 35)
3175#define PTA_ADX			(HOST_WIDE_INT_1 << 36)
3176#define PTA_FXSR		(HOST_WIDE_INT_1 << 37)
3177#define PTA_XSAVE		(HOST_WIDE_INT_1 << 38)
3178#define PTA_XSAVEOPT		(HOST_WIDE_INT_1 << 39)
3179#define PTA_AVX512F		(HOST_WIDE_INT_1 << 40)
3180#define PTA_AVX512ER		(HOST_WIDE_INT_1 << 41)
3181#define PTA_AVX512PF		(HOST_WIDE_INT_1 << 42)
3182#define PTA_AVX512CD		(HOST_WIDE_INT_1 << 43)
3183#define PTA_MPX			(HOST_WIDE_INT_1 << 44)
3184#define PTA_SHA			(HOST_WIDE_INT_1 << 45)
3185#define PTA_PREFETCHWT1		(HOST_WIDE_INT_1 << 46)
3186#define PTA_CLFLUSHOPT		(HOST_WIDE_INT_1 << 47)
3187#define PTA_XSAVEC		(HOST_WIDE_INT_1 << 48)
3188#define PTA_XSAVES		(HOST_WIDE_INT_1 << 49)
3189#define PTA_AVX512DQ		(HOST_WIDE_INT_1 << 50)
3190#define PTA_AVX512BW		(HOST_WIDE_INT_1 << 51)
3191#define PTA_AVX512VL		(HOST_WIDE_INT_1 << 52)
3192#define PTA_AVX512IFMA		(HOST_WIDE_INT_1 << 53)
3193#define PTA_AVX512VBMI		(HOST_WIDE_INT_1 << 54)
3194#define PTA_CLWB		(HOST_WIDE_INT_1 << 55)
3195#define PTA_PCOMMIT		(HOST_WIDE_INT_1 << 56)
3196#define PTA_MWAITX		(HOST_WIDE_INT_1 << 57)
3197
3198#define PTA_CORE2 \
3199  (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
3200   | PTA_CX16 | PTA_FXSR)
3201#define PTA_NEHALEM \
3202  (PTA_CORE2 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_POPCNT)
3203#define PTA_WESTMERE \
3204  (PTA_NEHALEM | PTA_AES | PTA_PCLMUL)
3205#define PTA_SANDYBRIDGE \
3206  (PTA_WESTMERE | PTA_AVX | PTA_XSAVE | PTA_XSAVEOPT)
3207#define PTA_IVYBRIDGE \
3208  (PTA_SANDYBRIDGE | PTA_FSGSBASE | PTA_RDRND | PTA_F16C)
3209#define PTA_HASWELL \
3210  (PTA_IVYBRIDGE | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_LZCNT \
3211   | PTA_FMA | PTA_MOVBE | PTA_HLE)
3212#define PTA_BROADWELL \
3213  (PTA_HASWELL | PTA_ADX | PTA_PRFCHW | PTA_RDSEED)
3214#define PTA_KNL \
3215  (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
3216#define PTA_BONNELL \
3217  (PTA_CORE2 | PTA_MOVBE)
3218#define PTA_SILVERMONT \
3219  (PTA_WESTMERE | PTA_MOVBE)
3220
3221/* if this reaches 64, need to widen struct pta flags below */
3222
3223  static struct pta
3224    {
3225      const char *const name;		/* processor name or nickname.  */
3226      const enum processor_type processor;
3227      const enum attr_cpu schedule;
3228      const unsigned HOST_WIDE_INT flags;
3229    }
3230  const processor_alias_table[] =
3231    {
3232      {"i386", PROCESSOR_I386, CPU_NONE, 0},
3233      {"i486", PROCESSOR_I486, CPU_NONE, 0},
3234      {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3235      {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
3236      {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
3237      {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
3238      {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3239      {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3240      {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3241	PTA_MMX | PTA_SSE | PTA_FXSR},
3242      {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3243      {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
3244      {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_FXSR},
3245      {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3246	PTA_MMX | PTA_SSE | PTA_FXSR},
3247      {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3248	PTA_MMX | PTA_SSE | PTA_FXSR},
3249      {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
3250	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3251      {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
3252	PTA_MMX |PTA_SSE | PTA_SSE2 | PTA_FXSR},
3253      {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
3254	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_FXSR},
3255      {"prescott", PROCESSOR_NOCONA, CPU_NONE,
3256	PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_FXSR},
3257      {"nocona", PROCESSOR_NOCONA, CPU_NONE,
3258	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3259	| PTA_CX16 | PTA_NO_SAHF | PTA_FXSR},
3260      {"core2", PROCESSOR_CORE2, CPU_CORE2, PTA_CORE2},
3261      {"nehalem", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3262      {"corei7", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_NEHALEM},
3263      {"westmere", PROCESSOR_NEHALEM, CPU_NEHALEM, PTA_WESTMERE},
3264      {"sandybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3265	PTA_SANDYBRIDGE},
3266      {"corei7-avx", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3267	PTA_SANDYBRIDGE},
3268      {"ivybridge", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3269	PTA_IVYBRIDGE},
3270      {"core-avx-i", PROCESSOR_SANDYBRIDGE, CPU_NEHALEM,
3271	PTA_IVYBRIDGE},
3272      {"haswell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3273      {"core-avx2", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_HASWELL},
3274      {"broadwell", PROCESSOR_HASWELL, CPU_NEHALEM, PTA_BROADWELL},
3275      {"bonnell", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3276      {"atom", PROCESSOR_BONNELL, CPU_ATOM, PTA_BONNELL},
3277      {"silvermont", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3278      {"slm", PROCESSOR_SILVERMONT, CPU_SLM, PTA_SILVERMONT},
3279      {"knl", PROCESSOR_KNL, CPU_KNL, PTA_KNL},
3280      {"intel", PROCESSOR_INTEL, CPU_SLM, PTA_NEHALEM},
3281      {"geode", PROCESSOR_GEODE, CPU_GEODE,
3282	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3283      {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
3284      {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3285      {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW | PTA_PRFCHW},
3286      {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
3287	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3288      {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
3289	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
3290      {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
3291	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3292      {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
3293	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3294      {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
3295	PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_PRFCHW | PTA_FXSR},
3296      {"x86-64", PROCESSOR_K8, CPU_K8,
3297	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR},
3298      {"k8", PROCESSOR_K8, CPU_K8,
3299	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3300	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3301      {"k8-sse3", PROCESSOR_K8, CPU_K8,
3302	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3303	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3304      {"opteron", PROCESSOR_K8, CPU_K8,
3305	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3306	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3307      {"opteron-sse3", PROCESSOR_K8, CPU_K8,
3308	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3309	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3310      {"athlon64", PROCESSOR_K8, CPU_K8,
3311	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3312	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3313      {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
3314	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3315	| PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3316      {"athlon-fx", PROCESSOR_K8, CPU_K8,
3317	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
3318	| PTA_SSE2 | PTA_NO_SAHF | PTA_PRFCHW | PTA_FXSR},
3319      {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3320	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3321	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3322      {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
3323	PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
3324	| PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_PRFCHW | PTA_FXSR},
3325      {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
3326	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3327	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3328	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3329	| PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3330      {"bdver2", PROCESSOR_BDVER2, CPU_BDVER2,
3331	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3332	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3333	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3334	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3335	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE},
3336      {"bdver3", PROCESSOR_BDVER3, CPU_BDVER3,
3337	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3338	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3339	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
3340	| PTA_XOP | PTA_LWP | PTA_BMI | PTA_TBM | PTA_F16C
3341	| PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE
3342	| PTA_XSAVEOPT | PTA_FSGSBASE},
3343     {"bdver4", PROCESSOR_BDVER4, CPU_BDVER4,
3344	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
3345	| PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
3346	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2
3347	| PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_BMI | PTA_BMI2
3348	| PTA_TBM | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR
3349	| PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND
3350	| PTA_MOVBE | PTA_MWAITX},
3351      {"btver1", PROCESSOR_BTVER1, CPU_GENERIC,
3352	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
3353	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_PRFCHW
3354	| PTA_FXSR | PTA_XSAVE},
3355      {"btver2", PROCESSOR_BTVER2, CPU_BTVER2,
3356	PTA_64BIT | PTA_MMX |  PTA_SSE  | PTA_SSE2 | PTA_SSE3
3357	| PTA_SSSE3 | PTA_SSE4A |PTA_ABM | PTA_CX16 | PTA_SSE4_1
3358	| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX
3359	| PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_PRFCHW
3360	| PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT},
3361
3362      {"generic", PROCESSOR_GENERIC, CPU_GENERIC,
3363	PTA_64BIT
3364	| PTA_HLE /* flags are only used for -march switch.  */ },
3365    };
3366
3367  /* -mrecip options.  */
3368  static struct
3369    {
3370      const char *string;           /* option name */
3371      unsigned int mask;            /* mask bits to set */
3372    }
3373  const recip_options[] =
3374    {
3375      { "all",       RECIP_MASK_ALL },
3376      { "none",      RECIP_MASK_NONE },
3377      { "div",       RECIP_MASK_DIV },
3378      { "sqrt",      RECIP_MASK_SQRT },
3379      { "vec-div",   RECIP_MASK_VEC_DIV },
3380      { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
3381    };
3382
3383  int const pta_size = ARRAY_SIZE (processor_alias_table);
3384
3385  /* Set up prefix/suffix so the error messages refer to either the command
3386     line argument, or the attribute(target).  */
3387  if (main_args_p)
3388    {
3389      prefix = "-m";
3390      suffix = "";
3391      sw = "switch";
3392    }
3393  else
3394    {
3395      prefix = "option(\"";
3396      suffix = "\")";
3397      sw = "attribute";
3398    }
3399
3400  /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
3401     TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false.  */
3402  if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3403    opts->x_ix86_isa_flags &= ~(OPTION_MASK_ABI_64 | OPTION_MASK_ABI_X32);
3404#ifdef TARGET_BI_ARCH
3405  else
3406    {
3407#if TARGET_BI_ARCH == 1
3408      /* When TARGET_BI_ARCH == 1, by default, OPTION_MASK_ABI_64
3409	 is on and OPTION_MASK_ABI_X32 is off.  We turn off
3410	 OPTION_MASK_ABI_64 if OPTION_MASK_ABI_X32 is turned on by
3411	 -mx32.  */
3412      if (TARGET_X32_P (opts->x_ix86_isa_flags))
3413	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3414#else
3415      /* When TARGET_BI_ARCH == 2, by default, OPTION_MASK_ABI_X32 is
3416	 on and OPTION_MASK_ABI_64 is off.  We turn off
3417	 OPTION_MASK_ABI_X32 if OPTION_MASK_ABI_64 is turned on by
3418	 -m64 or OPTION_MASK_CODE16 is turned on by -m16.  */
3419      if (TARGET_LP64_P (opts->x_ix86_isa_flags)
3420	  || TARGET_16BIT_P (opts->x_ix86_isa_flags))
3421	opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3422#endif
3423    }
3424#endif
3425
3426  if (TARGET_X32_P (opts->x_ix86_isa_flags))
3427    {
3428      /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3429	 OPTION_MASK_ABI_64 for TARGET_X32.  */
3430      opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3431      opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_64;
3432    }
3433  else if (TARGET_16BIT_P (opts->x_ix86_isa_flags))
3434    opts->x_ix86_isa_flags &= ~(OPTION_MASK_ISA_64BIT
3435				| OPTION_MASK_ABI_X32
3436				| OPTION_MASK_ABI_64);
3437  else if (TARGET_LP64_P (opts->x_ix86_isa_flags))
3438    {
3439      /* Always turn on OPTION_MASK_ISA_64BIT and turn off
3440	 OPTION_MASK_ABI_X32 for TARGET_LP64.  */
3441      opts->x_ix86_isa_flags |= OPTION_MASK_ISA_64BIT;
3442      opts->x_ix86_isa_flags &= ~OPTION_MASK_ABI_X32;
3443    }
3444
3445#ifdef SUBTARGET_OVERRIDE_OPTIONS
3446  SUBTARGET_OVERRIDE_OPTIONS;
3447#endif
3448
3449#ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3450  SUBSUBTARGET_OVERRIDE_OPTIONS;
3451#endif
3452
3453  /* -fPIC is the default for x86_64.  */
3454  if (TARGET_MACHO && TARGET_64BIT_P (opts->x_ix86_isa_flags))
3455    opts->x_flag_pic = 2;
3456
3457  /* Need to check -mtune=generic first.  */
3458  if (opts->x_ix86_tune_string)
3459    {
3460      /* As special support for cross compilers we read -mtune=native
3461	     as -mtune=generic.  With native compilers we won't see the
3462	     -mtune=native, as it was changed by the driver.  */
3463      if (!strcmp (opts->x_ix86_tune_string, "native"))
3464	{
3465	  opts->x_ix86_tune_string = "generic";
3466	}
3467      else if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3468        warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated; use "
3469                 "%stune=k8%s or %stune=generic%s instead as appropriate",
3470                 prefix, suffix, prefix, suffix, prefix, suffix);
3471    }
3472  else
3473    {
3474      if (opts->x_ix86_arch_string)
3475	opts->x_ix86_tune_string = opts->x_ix86_arch_string;
3476      if (!opts->x_ix86_tune_string)
3477	{
3478	  opts->x_ix86_tune_string
3479	    = processor_target_table[TARGET_CPU_DEFAULT].name;
3480	  ix86_tune_defaulted = 1;
3481	}
3482
3483      /* opts->x_ix86_tune_string is set to opts->x_ix86_arch_string
3484	 or defaulted.  We need to use a sensible tune option.  */
3485      if (!strcmp (opts->x_ix86_tune_string, "x86-64"))
3486	{
3487	  opts->x_ix86_tune_string = "generic";
3488	}
3489    }
3490
3491  if (opts->x_ix86_stringop_alg == rep_prefix_8_byte
3492      && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
3493    {
3494      /* rep; movq isn't available in 32-bit code.  */
3495      error ("-mstringop-strategy=rep_8byte not supported for 32-bit code");
3496      opts->x_ix86_stringop_alg = no_stringop;
3497    }
3498
3499  if (!opts->x_ix86_arch_string)
3500    opts->x_ix86_arch_string
3501      = TARGET_64BIT_P (opts->x_ix86_isa_flags)
3502	? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3503  else
3504    ix86_arch_specified = 1;
3505
3506  if (opts_set->x_ix86_pmode)
3507    {
3508      if ((TARGET_LP64_P (opts->x_ix86_isa_flags)
3509	   && opts->x_ix86_pmode == PMODE_SI)
3510	  || (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
3511	       && opts->x_ix86_pmode == PMODE_DI))
3512	error ("address mode %qs not supported in the %s bit mode",
3513	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "short" : "long",
3514	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? "64" : "32");
3515    }
3516  else
3517    opts->x_ix86_pmode = TARGET_LP64_P (opts->x_ix86_isa_flags)
3518			 ? PMODE_DI : PMODE_SI;
3519
3520  if (!opts_set->x_ix86_abi)
3521    opts->x_ix86_abi = DEFAULT_ABI;
3522
3523  /* For targets using ms ABI enable ms-extensions, if not
3524     explicit turned off.  For non-ms ABI we turn off this
3525     option.  */
3526  if (!opts_set->x_flag_ms_extensions)
3527    opts->x_flag_ms_extensions = (MS_ABI == DEFAULT_ABI);
3528
3529  if (opts_set->x_ix86_cmodel)
3530    {
3531      switch (opts->x_ix86_cmodel)
3532	{
3533	case CM_SMALL:
3534	case CM_SMALL_PIC:
3535	  if (opts->x_flag_pic)
3536	    opts->x_ix86_cmodel = CM_SMALL_PIC;
3537	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3538	    error ("code model %qs not supported in the %s bit mode",
3539		   "small", "32");
3540	  break;
3541
3542	case CM_MEDIUM:
3543	case CM_MEDIUM_PIC:
3544	  if (opts->x_flag_pic)
3545	    opts->x_ix86_cmodel = CM_MEDIUM_PIC;
3546	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3547	    error ("code model %qs not supported in the %s bit mode",
3548		   "medium", "32");
3549	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3550	    error ("code model %qs not supported in x32 mode",
3551		   "medium");
3552	  break;
3553
3554	case CM_LARGE:
3555	case CM_LARGE_PIC:
3556	  if (opts->x_flag_pic)
3557	    opts->x_ix86_cmodel = CM_LARGE_PIC;
3558	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3559	    error ("code model %qs not supported in the %s bit mode",
3560		   "large", "32");
3561	  else if (TARGET_X32_P (opts->x_ix86_isa_flags))
3562	    error ("code model %qs not supported in x32 mode",
3563		   "large");
3564	  break;
3565
3566	case CM_32:
3567	  if (opts->x_flag_pic)
3568	    error ("code model %s does not support PIC mode", "32");
3569	  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3570	    error ("code model %qs not supported in the %s bit mode",
3571		   "32", "64");
3572	  break;
3573
3574	case CM_KERNEL:
3575	  if (opts->x_flag_pic)
3576	    {
3577	      error ("code model %s does not support PIC mode", "kernel");
3578	      opts->x_ix86_cmodel = CM_32;
3579	    }
3580	  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
3581	    error ("code model %qs not supported in the %s bit mode",
3582		   "kernel", "32");
3583	  break;
3584
3585	default:
3586	  gcc_unreachable ();
3587	}
3588    }
3589  else
3590    {
3591      /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3592	 use of rip-relative addressing.  This eliminates fixups that
3593	 would otherwise be needed if this object is to be placed in a
3594	 DLL, and is essentially just as efficient as direct addressing.  */
3595      if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3596	  && (TARGET_RDOS || TARGET_PECOFF))
3597	opts->x_ix86_cmodel = CM_MEDIUM_PIC, opts->x_flag_pic = 1;
3598      else if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3599	opts->x_ix86_cmodel = opts->x_flag_pic ? CM_SMALL_PIC : CM_SMALL;
3600      else
3601	opts->x_ix86_cmodel = CM_32;
3602    }
3603  if (TARGET_MACHO && opts->x_ix86_asm_dialect == ASM_INTEL)
3604    {
3605      error ("-masm=intel not supported in this configuration");
3606      opts->x_ix86_asm_dialect = ASM_ATT;
3607    }
3608  if ((TARGET_64BIT_P (opts->x_ix86_isa_flags) != 0)
3609      != ((opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3610    sorry ("%i-bit mode not compiled in",
3611	   (opts->x_ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3612
3613  for (i = 0; i < pta_size; i++)
3614    if (! strcmp (opts->x_ix86_arch_string, processor_alias_table[i].name))
3615      {
3616	ix86_schedule = processor_alias_table[i].schedule;
3617	ix86_arch = processor_alias_table[i].processor;
3618	/* Default cpu tuning to the architecture.  */
3619	ix86_tune = ix86_arch;
3620
3621	if (TARGET_64BIT_P (opts->x_ix86_isa_flags)
3622	    && !(processor_alias_table[i].flags & PTA_64BIT))
3623	  error ("CPU you selected does not support x86-64 "
3624		 "instruction set");
3625
3626	if (processor_alias_table[i].flags & PTA_MMX
3627	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3628	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3629	if (processor_alias_table[i].flags & PTA_3DNOW
3630	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3631	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3632	if (processor_alias_table[i].flags & PTA_3DNOW_A
3633	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3634	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3635	if (processor_alias_table[i].flags & PTA_SSE
3636	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3637	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3638	if (processor_alias_table[i].flags & PTA_SSE2
3639	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3640	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3641	if (processor_alias_table[i].flags & PTA_SSE3
3642	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3643	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3644	if (processor_alias_table[i].flags & PTA_SSSE3
3645	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3646	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3647	if (processor_alias_table[i].flags & PTA_SSE4_1
3648	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3649	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3650	if (processor_alias_table[i].flags & PTA_SSE4_2
3651	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3652	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3653	if (processor_alias_table[i].flags & PTA_AVX
3654	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3655	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3656	if (processor_alias_table[i].flags & PTA_AVX2
3657	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX2))
3658	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2;
3659	if (processor_alias_table[i].flags & PTA_FMA
3660	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3661	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3662	if (processor_alias_table[i].flags & PTA_SSE4A
3663	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3664	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3665	if (processor_alias_table[i].flags & PTA_FMA4
3666	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3667	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3668	if (processor_alias_table[i].flags & PTA_XOP
3669	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3670	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3671	if (processor_alias_table[i].flags & PTA_LWP
3672	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3673	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3674	if (processor_alias_table[i].flags & PTA_ABM
3675	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3676	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3677	if (processor_alias_table[i].flags & PTA_BMI
3678	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI))
3679	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI;
3680	if (processor_alias_table[i].flags & (PTA_LZCNT | PTA_ABM)
3681	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_LZCNT))
3682	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_LZCNT;
3683	if (processor_alias_table[i].flags & PTA_TBM
3684	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
3685	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_TBM;
3686	if (processor_alias_table[i].flags & PTA_BMI2
3687	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
3688	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
3689	if (processor_alias_table[i].flags & PTA_CX16
3690	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3691	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3692	if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3693	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3694	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3695	if (!(TARGET_64BIT_P (opts->x_ix86_isa_flags)
3696	    && (processor_alias_table[i].flags & PTA_NO_SAHF))
3697	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3698	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3699	if (processor_alias_table[i].flags & PTA_MOVBE
3700	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3701	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3702	if (processor_alias_table[i].flags & PTA_AES
3703	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3704	  ix86_isa_flags |= OPTION_MASK_ISA_AES;
3705	if (processor_alias_table[i].flags & PTA_SHA
3706	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SHA))
3707	  ix86_isa_flags |= OPTION_MASK_ISA_SHA;
3708	if (processor_alias_table[i].flags & PTA_PCLMUL
3709	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3710	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3711	if (processor_alias_table[i].flags & PTA_FSGSBASE
3712	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3713	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3714	if (processor_alias_table[i].flags & PTA_RDRND
3715	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3716	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3717	if (processor_alias_table[i].flags & PTA_F16C
3718	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3719	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3720	if (processor_alias_table[i].flags & PTA_RTM
3721	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RTM))
3722	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RTM;
3723	if (processor_alias_table[i].flags & PTA_HLE
3724	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_HLE))
3725	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_HLE;
3726	if (processor_alias_table[i].flags & PTA_PRFCHW
3727	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PRFCHW))
3728	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PRFCHW;
3729	if (processor_alias_table[i].flags & PTA_RDSEED
3730	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_RDSEED))
3731	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_RDSEED;
3732	if (processor_alias_table[i].flags & PTA_ADX
3733	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_ADX))
3734	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_ADX;
3735	if (processor_alias_table[i].flags & PTA_FXSR
3736	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_FXSR))
3737	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_FXSR;
3738	if (processor_alias_table[i].flags & PTA_XSAVE
3739	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVE))
3740	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVE;
3741	if (processor_alias_table[i].flags & PTA_XSAVEOPT
3742	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEOPT))
3743	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEOPT;
3744	if (processor_alias_table[i].flags & PTA_AVX512F
3745	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F))
3746	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512F;
3747	if (processor_alias_table[i].flags & PTA_AVX512ER
3748	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512ER))
3749	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512ER;
3750	if (processor_alias_table[i].flags & PTA_AVX512PF
3751	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512PF))
3752	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512PF;
3753	if (processor_alias_table[i].flags & PTA_AVX512CD
3754	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512CD))
3755	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512CD;
3756	if (processor_alias_table[i].flags & PTA_PREFETCHWT1
3757	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PREFETCHWT1))
3758	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PREFETCHWT1;
3759	if (processor_alias_table[i].flags & PTA_PCOMMIT
3760	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PCOMMIT))
3761	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PCOMMIT;
3762	if (processor_alias_table[i].flags & PTA_CLWB
3763	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLWB))
3764	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLWB;
3765	if (processor_alias_table[i].flags & PTA_CLFLUSHOPT
3766	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_CLFLUSHOPT))
3767	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_CLFLUSHOPT;
3768	if (processor_alias_table[i].flags & PTA_XSAVEC
3769	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVEC))
3770	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVEC;
3771	if (processor_alias_table[i].flags & PTA_XSAVES
3772	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_XSAVES))
3773	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_XSAVES;
3774	if (processor_alias_table[i].flags & PTA_AVX512DQ
3775	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512DQ))
3776	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512DQ;
3777	if (processor_alias_table[i].flags & PTA_AVX512BW
3778	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512BW))
3779	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512BW;
3780	if (processor_alias_table[i].flags & PTA_AVX512VL
3781	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VL))
3782	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VL;
3783        if (processor_alias_table[i].flags & PTA_MPX
3784            && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MPX))
3785          opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MPX;
3786	if (processor_alias_table[i].flags & PTA_AVX512VBMI
3787	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512VBMI))
3788	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512VBMI;
3789	if (processor_alias_table[i].flags & PTA_AVX512IFMA
3790	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512IFMA))
3791	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX512IFMA;
3792	if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3793	  x86_prefetch_sse = true;
3794	if (processor_alias_table[i].flags & PTA_MWAITX
3795	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
3796	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
3797
3798	break;
3799      }
3800
3801  if (TARGET_X32 && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_MPX))
3802    error ("Intel MPX does not support x32");
3803
3804  if (TARGET_X32 && (ix86_isa_flags & OPTION_MASK_ISA_MPX))
3805    error ("Intel MPX does not support x32");
3806
3807  if (!strcmp (opts->x_ix86_arch_string, "generic"))
3808    error ("generic CPU can be used only for %stune=%s %s",
3809	   prefix, suffix, sw);
3810  else if (!strcmp (opts->x_ix86_arch_string, "intel"))
3811    error ("intel CPU can be used only for %stune=%s %s",
3812	   prefix, suffix, sw);
3813  else if (i == pta_size)
3814    error ("bad value (%s) for %sarch=%s %s",
3815	   opts->x_ix86_arch_string, prefix, suffix, sw);
3816
3817  ix86_arch_mask = 1u << ix86_arch;
3818  for (i = 0; i < X86_ARCH_LAST; ++i)
3819    ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3820
3821  for (i = 0; i < pta_size; i++)
3822    if (! strcmp (opts->x_ix86_tune_string, processor_alias_table[i].name))
3823      {
3824	ix86_schedule = processor_alias_table[i].schedule;
3825	ix86_tune = processor_alias_table[i].processor;
3826	if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3827	  {
3828	    if (!(processor_alias_table[i].flags & PTA_64BIT))
3829	      {
3830		if (ix86_tune_defaulted)
3831		  {
3832		    opts->x_ix86_tune_string = "x86-64";
3833		    for (i = 0; i < pta_size; i++)
3834		      if (! strcmp (opts->x_ix86_tune_string,
3835				    processor_alias_table[i].name))
3836			break;
3837		    ix86_schedule = processor_alias_table[i].schedule;
3838		    ix86_tune = processor_alias_table[i].processor;
3839		  }
3840		else
3841		  error ("CPU you selected does not support x86-64 "
3842			 "instruction set");
3843	      }
3844	  }
3845	/* Intel CPUs have always interpreted SSE prefetch instructions as
3846	   NOPs; so, we can enable SSE prefetch instructions even when
3847	   -mtune (rather than -march) points us to a processor that has them.
3848	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3849	   higher processors.  */
3850	if (TARGET_CMOV
3851	    && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3852	  x86_prefetch_sse = true;
3853	break;
3854      }
3855
3856  if (ix86_tune_specified && i == pta_size)
3857    error ("bad value (%s) for %stune=%s %s",
3858	   opts->x_ix86_tune_string, prefix, suffix, sw);
3859
3860  set_ix86_tune_features (ix86_tune, opts->x_ix86_dump_tunes);
3861
3862#ifndef USE_IX86_FRAME_POINTER
3863#define USE_IX86_FRAME_POINTER 0
3864#endif
3865
3866#ifndef USE_X86_64_FRAME_POINTER
3867#define USE_X86_64_FRAME_POINTER 0
3868#endif
3869
3870  /* Set the default values for switches whose default depends on TARGET_64BIT
3871     in case they weren't overwritten by command line options.  */
3872  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3873    {
3874      if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3875	opts->x_flag_omit_frame_pointer = !USE_X86_64_FRAME_POINTER;
3876      if (opts->x_flag_asynchronous_unwind_tables
3877	  && !opts_set->x_flag_unwind_tables
3878	  && TARGET_64BIT_MS_ABI)
3879	opts->x_flag_unwind_tables = 1;
3880      if (opts->x_flag_asynchronous_unwind_tables == 2)
3881	opts->x_flag_unwind_tables
3882	  = opts->x_flag_asynchronous_unwind_tables = 1;
3883      if (opts->x_flag_pcc_struct_return == 2)
3884	opts->x_flag_pcc_struct_return = 0;
3885    }
3886  else
3887    {
3888      if (opts->x_optimize >= 1 && !opts_set->x_flag_omit_frame_pointer)
3889	opts->x_flag_omit_frame_pointer
3890	  = !(USE_IX86_FRAME_POINTER || opts->x_optimize_size);
3891      if (opts->x_flag_asynchronous_unwind_tables == 2)
3892	opts->x_flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3893      if (opts->x_flag_pcc_struct_return == 2)
3894	opts->x_flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3895    }
3896
3897  ix86_tune_cost = processor_target_table[ix86_tune].cost;
3898  /* TODO: ix86_cost should be chosen at instruction or function granuality
3899     so for cold code we use size_cost even in !optimize_size compilation.  */
3900  if (opts->x_optimize_size)
3901    ix86_cost = &ix86_size_cost;
3902  else
3903    ix86_cost = ix86_tune_cost;
3904
3905  /* Arrange to set up i386_stack_locals for all functions.  */
3906  init_machine_status = ix86_init_machine_status;
3907
3908  /* Validate -mregparm= value.  */
3909  if (opts_set->x_ix86_regparm)
3910    {
3911      if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3912	warning (0, "-mregparm is ignored in 64-bit mode");
3913      if (opts->x_ix86_regparm > REGPARM_MAX)
3914	{
3915	  error ("-mregparm=%d is not between 0 and %d",
3916		 opts->x_ix86_regparm, REGPARM_MAX);
3917	  opts->x_ix86_regparm = 0;
3918	}
3919    }
3920  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3921    opts->x_ix86_regparm = REGPARM_MAX;
3922
3923  /* Default align_* from the processor table.  */
3924  if (opts->x_align_loops == 0)
3925    {
3926      opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
3927      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3928    }
3929  if (opts->x_align_jumps == 0)
3930    {
3931      opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
3932      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3933    }
3934  if (opts->x_align_functions == 0)
3935    {
3936      opts->x_align_functions = processor_target_table[ix86_tune].align_func;
3937    }
3938
3939  /* Provide default for -mbranch-cost= value.  */
3940  if (!opts_set->x_ix86_branch_cost)
3941    opts->x_ix86_branch_cost = ix86_tune_cost->branch_cost;
3942
3943  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
3944    {
3945      opts->x_target_flags
3946	|= TARGET_SUBTARGET64_DEFAULT & ~opts_set->x_target_flags;
3947
3948      /* Enable by default the SSE and MMX builtins.  Do allow the user to
3949	 explicitly disable any of these.  In particular, disabling SSE and
3950	 MMX for kernel code is extremely useful.  */
3951      if (!ix86_arch_specified)
3952      opts->x_ix86_isa_flags
3953	|= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3954	     | TARGET_SUBTARGET64_ISA_DEFAULT)
3955            & ~opts->x_ix86_isa_flags_explicit);
3956
3957      if (TARGET_RTD_P (opts->x_target_flags))
3958	warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3959    }
3960  else
3961    {
3962      opts->x_target_flags
3963	|= TARGET_SUBTARGET32_DEFAULT & ~opts_set->x_target_flags;
3964
3965      if (!ix86_arch_specified)
3966        opts->x_ix86_isa_flags
3967	  |= TARGET_SUBTARGET32_ISA_DEFAULT & ~opts->x_ix86_isa_flags_explicit;
3968
3969      /* i386 ABI does not specify red zone.  It still makes sense to use it
3970         when programmer takes care to stack from being destroyed.  */
3971      if (!(opts_set->x_target_flags & MASK_NO_RED_ZONE))
3972        opts->x_target_flags |= MASK_NO_RED_ZONE;
3973    }
3974
3975  /* Keep nonleaf frame pointers.  */
3976  if (opts->x_flag_omit_frame_pointer)
3977    opts->x_target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3978  else if (TARGET_OMIT_LEAF_FRAME_POINTER_P (opts->x_target_flags))
3979    opts->x_flag_omit_frame_pointer = 1;
3980
3981  /* If we're doing fast math, we don't care about comparison order
3982     wrt NaNs.  This lets us use a shorter comparison sequence.  */
3983  if (opts->x_flag_finite_math_only)
3984    opts->x_target_flags &= ~MASK_IEEE_FP;
3985
3986  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3987     since the insns won't need emulation.  */
3988  if (ix86_tune_features [X86_TUNE_ALWAYS_FANCY_MATH_387])
3989    opts->x_target_flags &= ~MASK_NO_FANCY_MATH_387;
3990
3991  /* Likewise, if the target doesn't have a 387, or we've specified
3992     software floating point, don't use 387 inline intrinsics.  */
3993  if (!TARGET_80387_P (opts->x_target_flags))
3994    opts->x_target_flags |= MASK_NO_FANCY_MATH_387;
3995
3996  /* Turn on MMX builtins for -msse.  */
3997  if (TARGET_SSE_P (opts->x_ix86_isa_flags))
3998    opts->x_ix86_isa_flags
3999      |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;
4000
4001  /* Enable SSE prefetch.  */
4002  if (TARGET_SSE_P (opts->x_ix86_isa_flags)
4003      || (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
4004    x86_prefetch_sse = true;
4005
4006  /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1.  */
4007  if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
4008      || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
4009    opts->x_ix86_isa_flags
4010      |= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
4011
4012  /* Enable popcnt instruction for -msse4.2 or -mabm.  */
4013  if (TARGET_SSE4_2_P (opts->x_ix86_isa_flags)
4014      || TARGET_ABM_P (opts->x_ix86_isa_flags))
4015    opts->x_ix86_isa_flags
4016      |= OPTION_MASK_ISA_POPCNT & ~opts->x_ix86_isa_flags_explicit;
4017
4018  /* Enable lzcnt instruction for -mabm.  */
4019  if (TARGET_ABM_P(opts->x_ix86_isa_flags))
4020    opts->x_ix86_isa_flags
4021      |= OPTION_MASK_ISA_LZCNT & ~opts->x_ix86_isa_flags_explicit;
4022
4023  /* Validate -mpreferred-stack-boundary= value or default it to
4024     PREFERRED_STACK_BOUNDARY_DEFAULT.  */
4025  ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
4026  if (opts_set->x_ix86_preferred_stack_boundary_arg)
4027    {
4028      int min = (TARGET_64BIT_P (opts->x_ix86_isa_flags)
4029		 ? (TARGET_SSE_P (opts->x_ix86_isa_flags) ? 4 : 3) : 2);
4030      int max = (TARGET_SEH ? 4 : 12);
4031
4032      if (opts->x_ix86_preferred_stack_boundary_arg < min
4033	  || opts->x_ix86_preferred_stack_boundary_arg > max)
4034	{
4035	  if (min == max)
4036	    error ("-mpreferred-stack-boundary is not supported "
4037		   "for this target");
4038	  else
4039	    error ("-mpreferred-stack-boundary=%d is not between %d and %d",
4040		   opts->x_ix86_preferred_stack_boundary_arg, min, max);
4041	}
4042      else
4043	ix86_preferred_stack_boundary
4044	  = (1 << opts->x_ix86_preferred_stack_boundary_arg) * BITS_PER_UNIT;
4045    }
4046
4047  /* Set the default value for -mstackrealign.  */
4048  if (opts->x_ix86_force_align_arg_pointer == -1)
4049    opts->x_ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
4050
4051  ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
4052
4053  /* Validate -mincoming-stack-boundary= value or default it to
4054     MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY.  */
4055  ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
4056  if (opts_set->x_ix86_incoming_stack_boundary_arg)
4057    {
4058      if (opts->x_ix86_incoming_stack_boundary_arg
4059	  < (TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2)
4060	  || opts->x_ix86_incoming_stack_boundary_arg > 12)
4061	error ("-mincoming-stack-boundary=%d is not between %d and 12",
4062	       opts->x_ix86_incoming_stack_boundary_arg,
4063	       TARGET_64BIT_P (opts->x_ix86_isa_flags) ? 3 : 2);
4064      else
4065	{
4066	  ix86_user_incoming_stack_boundary
4067	    = (1 << opts->x_ix86_incoming_stack_boundary_arg) * BITS_PER_UNIT;
4068	  ix86_incoming_stack_boundary
4069	    = ix86_user_incoming_stack_boundary;
4070	}
4071    }
4072
4073#ifndef NO_PROFILE_COUNTERS
4074  if (flag_nop_mcount)
4075    error ("-mnop-mcount is not compatible with this target");
4076#endif
4077  if (flag_nop_mcount && flag_pic)
4078    error ("-mnop-mcount is not implemented for -fPIC");
4079
4080  /* Accept -msseregparm only if at least SSE support is enabled.  */
4081  if (TARGET_SSEREGPARM_P (opts->x_target_flags)
4082      && ! TARGET_SSE_P (opts->x_ix86_isa_flags))
4083    error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
4084
4085  if (opts_set->x_ix86_fpmath)
4086    {
4087      if (opts->x_ix86_fpmath & FPMATH_SSE)
4088	{
4089	  if (!TARGET_SSE_P (opts->x_ix86_isa_flags))
4090	    {
4091	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
4092	      opts->x_ix86_fpmath = FPMATH_387;
4093	    }
4094	  else if ((opts->x_ix86_fpmath & FPMATH_387)
4095		   && !TARGET_80387_P (opts->x_target_flags))
4096	    {
4097	      warning (0, "387 instruction set disabled, using SSE arithmetics");
4098	      opts->x_ix86_fpmath = FPMATH_SSE;
4099	    }
4100	}
4101    }
4102  /* For all chips supporting SSE2, -mfpmath=sse performs better than
4103     fpmath=387.  The second is however default at many targets since the
4104     extra 80bit precision of temporaries is considered to be part of ABI.
4105     Overwrite the default at least for -ffast-math.
4106     TODO: -mfpmath=both seems to produce same performing code with bit
4107     smaller binaries.  It is however not clear if register allocation is
4108     ready for this setting.
4109     Also -mfpmath=387 is overall a lot more compact (bout 4-5%) than SSE
4110     codegen.  We may switch to 387 with -ffast-math for size optimized
4111     functions. */
4112  else if (fast_math_flags_set_p (&global_options)
4113	   && TARGET_SSE2_P (opts->x_ix86_isa_flags))
4114    opts->x_ix86_fpmath = FPMATH_SSE;
4115  else
4116    opts->x_ix86_fpmath = TARGET_FPMATH_DEFAULT_P (opts->x_ix86_isa_flags);
4117
4118  /* If the i387 is disabled, then do not return values in it. */
4119  if (!TARGET_80387_P (opts->x_target_flags))
4120    opts->x_target_flags &= ~MASK_FLOAT_RETURNS;
4121
4122  /* Use external vectorized library in vectorizing intrinsics.  */
4123  if (opts_set->x_ix86_veclibabi_type)
4124    switch (opts->x_ix86_veclibabi_type)
4125      {
4126      case ix86_veclibabi_type_svml:
4127	ix86_veclib_handler = ix86_veclibabi_svml;
4128	break;
4129
4130      case ix86_veclibabi_type_acml:
4131	ix86_veclib_handler = ix86_veclibabi_acml;
4132	break;
4133
4134      default:
4135	gcc_unreachable ();
4136      }
4137
4138  if (ix86_tune_features [X86_TUNE_ACCUMULATE_OUTGOING_ARGS]
4139      && !(opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4140    opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4141
4142  /* If stack probes are required, the space used for large function
4143     arguments on the stack must also be probed, so enable
4144     -maccumulate-outgoing-args so this happens in the prologue.  */
4145  if (TARGET_STACK_PROBE_P (opts->x_target_flags)
4146      && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4147    {
4148      if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4149	warning (0, "stack probing requires %saccumulate-outgoing-args%s "
4150		 "for correctness", prefix, suffix);
4151      opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4152    }
4153
4154  /* Stack realignment without -maccumulate-outgoing-args requires %ebp,
4155     so enable -maccumulate-outgoing-args when %ebp is fixed.  */
4156  if (fixed_regs[BP_REG]
4157      && !(opts->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
4158    {
4159      if (opts_set->x_target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)
4160	warning (0, "fixed ebp register requires %saccumulate-outgoing-args%s",
4161		 prefix, suffix);
4162      opts->x_target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
4163    }
4164
4165  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
4166  {
4167    char *p;
4168    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
4169    p = strchr (internal_label_prefix, 'X');
4170    internal_label_prefix_len = p - internal_label_prefix;
4171    *p = '\0';
4172  }
4173
4174  /* When scheduling description is not available, disable scheduler pass
4175     so it won't slow down the compilation and make x87 code slower.  */
4176  if (!TARGET_SCHEDULE)
4177    opts->x_flag_schedule_insns_after_reload = opts->x_flag_schedule_insns = 0;
4178
4179  maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4180			 ix86_tune_cost->simultaneous_prefetches,
4181			 opts->x_param_values,
4182			 opts_set->x_param_values);
4183  maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4184			 ix86_tune_cost->prefetch_block,
4185			 opts->x_param_values,
4186			 opts_set->x_param_values);
4187  maybe_set_param_value (PARAM_L1_CACHE_SIZE,
4188			 ix86_tune_cost->l1_cache_size,
4189			 opts->x_param_values,
4190			 opts_set->x_param_values);
4191  maybe_set_param_value (PARAM_L2_CACHE_SIZE,
4192			 ix86_tune_cost->l2_cache_size,
4193			 opts->x_param_values,
4194			 opts_set->x_param_values);
4195
4196  /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful.  */
4197  if (opts->x_flag_prefetch_loop_arrays < 0
4198      && HAVE_prefetch
4199      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
4200      && TARGET_SOFTWARE_PREFETCHING_BENEFICIAL)
4201    opts->x_flag_prefetch_loop_arrays = 1;
4202
4203  /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
4204     can be opts->x_optimized to ap = __builtin_next_arg (0).  */
4205  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && !opts->x_flag_split_stack)
4206    targetm.expand_builtin_va_start = NULL;
4207
4208  if (TARGET_64BIT_P (opts->x_ix86_isa_flags))
4209    {
4210      ix86_gen_leave = gen_leave_rex64;
4211      if (Pmode == DImode)
4212	{
4213	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_di;
4214	  ix86_gen_tls_local_dynamic_base_64
4215	    = gen_tls_local_dynamic_base_64_di;
4216	}
4217      else
4218	{
4219	  ix86_gen_tls_global_dynamic_64 = gen_tls_global_dynamic_64_si;
4220	  ix86_gen_tls_local_dynamic_base_64
4221	    = gen_tls_local_dynamic_base_64_si;
4222	}
4223    }
4224  else
4225    ix86_gen_leave = gen_leave;
4226
4227  if (Pmode == DImode)
4228    {
4229      ix86_gen_add3 = gen_adddi3;
4230      ix86_gen_sub3 = gen_subdi3;
4231      ix86_gen_sub3_carry = gen_subdi3_carry;
4232      ix86_gen_one_cmpl2 = gen_one_cmpldi2;
4233      ix86_gen_andsp = gen_anddi3;
4234      ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_di;
4235      ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
4236      ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
4237      ix86_gen_monitor = gen_sse3_monitor_di;
4238      ix86_gen_monitorx = gen_monitorx_di;
4239    }
4240  else
4241    {
4242      ix86_gen_add3 = gen_addsi3;
4243      ix86_gen_sub3 = gen_subsi3;
4244      ix86_gen_sub3_carry = gen_subsi3_carry;
4245      ix86_gen_one_cmpl2 = gen_one_cmplsi2;
4246      ix86_gen_andsp = gen_andsi3;
4247      ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_probe_si;
4248      ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
4249      ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
4250      ix86_gen_monitor = gen_sse3_monitor_si;
4251      ix86_gen_monitorx = gen_monitorx_si;
4252    }
4253
4254#ifdef USE_IX86_CLD
4255  /* Use -mcld by default for 32-bit code if configured with --enable-cld.  */
4256  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags))
4257    opts->x_target_flags |= MASK_CLD & ~opts_set->x_target_flags;
4258#endif
4259
4260  if (!TARGET_64BIT_P (opts->x_ix86_isa_flags) && opts->x_flag_pic)
4261    {
4262      if (opts->x_flag_fentry > 0)
4263        sorry ("-mfentry isn%'t supported for 32-bit in combination "
4264	       "with -fpic");
4265      opts->x_flag_fentry = 0;
4266    }
4267  else if (TARGET_SEH)
4268    {
4269      if (opts->x_flag_fentry == 0)
4270	sorry ("-mno-fentry isn%'t compatible with SEH");
4271      opts->x_flag_fentry = 1;
4272    }
4273  else if (opts->x_flag_fentry < 0)
4274   {
4275#if defined(PROFILE_BEFORE_PROLOGUE)
4276     opts->x_flag_fentry = 1;
4277#else
4278     opts->x_flag_fentry = 0;
4279#endif
4280   }
4281
4282  if (!(opts_set->x_target_flags & MASK_VZEROUPPER))
4283    opts->x_target_flags |= MASK_VZEROUPPER;
4284  if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_LOAD_OPTIMAL]
4285      && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_LOAD))
4286    opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_LOAD;
4287  if (!ix86_tune_features[X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL]
4288      && !(opts_set->x_target_flags & MASK_AVX256_SPLIT_UNALIGNED_STORE))
4289    opts->x_target_flags |= MASK_AVX256_SPLIT_UNALIGNED_STORE;
4290  /* Enable 128-bit AVX instruction generation
4291     for the auto-vectorizer.  */
4292  if (TARGET_AVX128_OPTIMAL
4293      && !(opts_set->x_target_flags & MASK_PREFER_AVX128))
4294    opts->x_target_flags |= MASK_PREFER_AVX128;
4295
4296  if (opts->x_ix86_recip_name)
4297    {
4298      char *p = ASTRDUP (opts->x_ix86_recip_name);
4299      char *q;
4300      unsigned int mask, i;
4301      bool invert;
4302
4303      while ((q = strtok (p, ",")) != NULL)
4304	{
4305	  p = NULL;
4306	  if (*q == '!')
4307	    {
4308	      invert = true;
4309	      q++;
4310	    }
4311	  else
4312	    invert = false;
4313
4314	  if (!strcmp (q, "default"))
4315	    mask = RECIP_MASK_ALL;
4316	  else
4317	    {
4318	      for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4319		if (!strcmp (q, recip_options[i].string))
4320		  {
4321		    mask = recip_options[i].mask;
4322		    break;
4323		  }
4324
4325	      if (i == ARRAY_SIZE (recip_options))
4326		{
4327		  error ("unknown option for -mrecip=%s", q);
4328		  invert = false;
4329		  mask = RECIP_MASK_NONE;
4330		}
4331	    }
4332
4333	  opts->x_recip_mask_explicit |= mask;
4334	  if (invert)
4335	    opts->x_recip_mask &= ~mask;
4336	  else
4337	    opts->x_recip_mask |= mask;
4338	}
4339    }
4340
4341  if (TARGET_RECIP_P (opts->x_target_flags))
4342    opts->x_recip_mask |= RECIP_MASK_ALL & ~opts->x_recip_mask_explicit;
4343  else if (opts_set->x_target_flags & MASK_RECIP)
4344    opts->x_recip_mask &= ~(RECIP_MASK_ALL & ~opts->x_recip_mask_explicit);
4345
4346  /* Default long double to 64-bit for 32-bit Bionic and to __float128
4347     for 64-bit Bionic.  */
4348  if (TARGET_HAS_BIONIC
4349      && !(opts_set->x_target_flags
4350	   & (MASK_LONG_DOUBLE_64 | MASK_LONG_DOUBLE_128)))
4351    opts->x_target_flags |= (TARGET_64BIT
4352			     ? MASK_LONG_DOUBLE_128
4353			     : MASK_LONG_DOUBLE_64);
4354
4355  /* Only one of them can be active.  */
4356  gcc_assert ((opts->x_target_flags & MASK_LONG_DOUBLE_64) == 0
4357	      || (opts->x_target_flags & MASK_LONG_DOUBLE_128) == 0);
4358
4359  /* Save the initial options in case the user does function specific
4360     options.  */
4361  if (main_args_p)
4362    target_option_default_node = target_option_current_node
4363      = build_target_option_node (opts);
4364
4365  /* Handle stack protector */
4366  if (!opts_set->x_ix86_stack_protector_guard)
4367    opts->x_ix86_stack_protector_guard
4368      = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
4369
4370  /* Handle -mmemcpy-strategy= and -mmemset-strategy=  */
4371  if (opts->x_ix86_tune_memcpy_strategy)
4372    {
4373      char *str = xstrdup (opts->x_ix86_tune_memcpy_strategy);
4374      ix86_parse_stringop_strategy_string (str, false);
4375      free (str);
4376    }
4377
4378  if (opts->x_ix86_tune_memset_strategy)
4379    {
4380      char *str = xstrdup (opts->x_ix86_tune_memset_strategy);
4381      ix86_parse_stringop_strategy_string (str, true);
4382      free (str);
4383    }
4384}
4385
4386/* Implement the TARGET_OPTION_OVERRIDE hook.  */
4387
4388static void
4389ix86_option_override (void)
4390{
4391  opt_pass *pass_insert_vzeroupper = make_pass_insert_vzeroupper (g);
4392  struct register_pass_info insert_vzeroupper_info
4393    = { pass_insert_vzeroupper, "reload",
4394	1, PASS_POS_INSERT_AFTER
4395      };
4396
4397  ix86_option_override_internal (true, &global_options, &global_options_set);
4398
4399
4400  /* This needs to be done at start up.  It's convenient to do it here.  */
4401  register_pass (&insert_vzeroupper_info);
4402}
4403
4404/* Implement the TARGET_OFFLOAD_OPTIONS hook.  */
4405static char *
4406ix86_offload_options (void)
4407{
4408  if (TARGET_LP64)
4409    return xstrdup ("-foffload-abi=lp64");
4410  return xstrdup ("-foffload-abi=ilp32");
4411}
4412
4413/* Update register usage after having seen the compiler flags.  */
4414
4415static void
4416ix86_conditional_register_usage (void)
4417{
4418  int i, c_mask;
4419
4420  /* For 32-bit targets, squash the REX registers.  */
4421  if (! TARGET_64BIT)
4422    {
4423      for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
4424	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4425      for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
4426	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4427      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4428	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4429    }
4430
4431  /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
4432  c_mask = (TARGET_64BIT_MS_ABI ? (1 << 3)
4433	    : TARGET_64BIT ? (1 << 2)
4434	    : (1 << 1));
4435
4436  CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
4437
4438  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4439    {
4440      /* Set/reset conditionally defined registers from
4441	 CALL_USED_REGISTERS initializer.  */
4442      if (call_used_regs[i] > 1)
4443	call_used_regs[i] = !!(call_used_regs[i] & c_mask);
4444
4445      /* Calculate registers of CLOBBERED_REGS register set
4446	 as call used registers from GENERAL_REGS register set.  */
4447      if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
4448	  && call_used_regs[i])
4449	SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
4450    }
4451
4452  /* If MMX is disabled, squash the registers.  */
4453  if (! TARGET_MMX)
4454    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4455      if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
4456	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4457
4458  /* If SSE is disabled, squash the registers.  */
4459  if (! TARGET_SSE)
4460    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4461      if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
4462	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4463
4464  /* If the FPU is disabled, squash the registers.  */
4465  if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
4466    for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4467      if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
4468	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4469
4470  /* If AVX512F is disabled, squash the registers.  */
4471  if (! TARGET_AVX512F)
4472    {
4473      for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
4474	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4475
4476      for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
4477	fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4478    }
4479
4480  /* If MPX is disabled, squash the registers.  */
4481  if (! TARGET_MPX)
4482    for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
4483      fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
4484}
4485
4486
4487/* Save the current options */
4488
4489static void
4490ix86_function_specific_save (struct cl_target_option *ptr,
4491			     struct gcc_options *opts)
4492{
4493  ptr->arch = ix86_arch;
4494  ptr->schedule = ix86_schedule;
4495  ptr->prefetch_sse = x86_prefetch_sse;
4496  ptr->tune = ix86_tune;
4497  ptr->branch_cost = ix86_branch_cost;
4498  ptr->tune_defaulted = ix86_tune_defaulted;
4499  ptr->arch_specified = ix86_arch_specified;
4500  ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
4501  ptr->x_ix86_target_flags_explicit = opts->x_ix86_target_flags_explicit;
4502  ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
4503  ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
4504  ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
4505  ptr->x_ix86_cmodel = opts->x_ix86_cmodel;
4506  ptr->x_ix86_abi = opts->x_ix86_abi;
4507  ptr->x_ix86_asm_dialect = opts->x_ix86_asm_dialect;
4508  ptr->x_ix86_branch_cost = opts->x_ix86_branch_cost;
4509  ptr->x_ix86_dump_tunes = opts->x_ix86_dump_tunes;
4510  ptr->x_ix86_force_align_arg_pointer = opts->x_ix86_force_align_arg_pointer;
4511  ptr->x_ix86_force_drap = opts->x_ix86_force_drap;
4512  ptr->x_ix86_incoming_stack_boundary_arg = opts->x_ix86_incoming_stack_boundary_arg;
4513  ptr->x_ix86_pmode = opts->x_ix86_pmode;
4514  ptr->x_ix86_preferred_stack_boundary_arg = opts->x_ix86_preferred_stack_boundary_arg;
4515  ptr->x_ix86_recip_name = opts->x_ix86_recip_name;
4516  ptr->x_ix86_regparm = opts->x_ix86_regparm;
4517  ptr->x_ix86_section_threshold = opts->x_ix86_section_threshold;
4518  ptr->x_ix86_sse2avx = opts->x_ix86_sse2avx;
4519  ptr->x_ix86_stack_protector_guard = opts->x_ix86_stack_protector_guard;
4520  ptr->x_ix86_stringop_alg = opts->x_ix86_stringop_alg;
4521  ptr->x_ix86_tls_dialect = opts->x_ix86_tls_dialect;
4522  ptr->x_ix86_tune_ctrl_string = opts->x_ix86_tune_ctrl_string;
4523  ptr->x_ix86_tune_memcpy_strategy = opts->x_ix86_tune_memcpy_strategy;
4524  ptr->x_ix86_tune_memset_strategy = opts->x_ix86_tune_memset_strategy;
4525  ptr->x_ix86_tune_no_default = opts->x_ix86_tune_no_default;
4526  ptr->x_ix86_veclibabi_type = opts->x_ix86_veclibabi_type;
4527
4528  /* The fields are char but the variables are not; make sure the
4529     values fit in the fields.  */
4530  gcc_assert (ptr->arch == ix86_arch);
4531  gcc_assert (ptr->schedule == ix86_schedule);
4532  gcc_assert (ptr->tune == ix86_tune);
4533  gcc_assert (ptr->branch_cost == ix86_branch_cost);
4534}
4535
4536/* Restore the current options */
4537
4538static void
4539ix86_function_specific_restore (struct gcc_options *opts,
4540				struct cl_target_option *ptr)
4541{
4542  enum processor_type old_tune = ix86_tune;
4543  enum processor_type old_arch = ix86_arch;
4544  unsigned int ix86_arch_mask;
4545  int i;
4546
4547  /* We don't change -fPIC.  */
4548  opts->x_flag_pic = flag_pic;
4549
4550  ix86_arch = (enum processor_type) ptr->arch;
4551  ix86_schedule = (enum attr_cpu) ptr->schedule;
4552  ix86_tune = (enum processor_type) ptr->tune;
4553  x86_prefetch_sse = ptr->prefetch_sse;
4554  opts->x_ix86_branch_cost = ptr->branch_cost;
4555  ix86_tune_defaulted = ptr->tune_defaulted;
4556  ix86_arch_specified = ptr->arch_specified;
4557  opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
4558  opts->x_ix86_target_flags_explicit = ptr->x_ix86_target_flags_explicit;
4559  opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
4560  opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
4561  opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
4562  opts->x_ix86_cmodel = ptr->x_ix86_cmodel;
4563  opts->x_ix86_abi = ptr->x_ix86_abi;
4564  opts->x_ix86_asm_dialect = ptr->x_ix86_asm_dialect;
4565  opts->x_ix86_branch_cost = ptr->x_ix86_branch_cost;
4566  opts->x_ix86_dump_tunes = ptr->x_ix86_dump_tunes;
4567  opts->x_ix86_force_align_arg_pointer = ptr->x_ix86_force_align_arg_pointer;
4568  opts->x_ix86_force_drap = ptr->x_ix86_force_drap;
4569  opts->x_ix86_incoming_stack_boundary_arg = ptr->x_ix86_incoming_stack_boundary_arg;
4570  opts->x_ix86_pmode = ptr->x_ix86_pmode;
4571  opts->x_ix86_preferred_stack_boundary_arg = ptr->x_ix86_preferred_stack_boundary_arg;
4572  opts->x_ix86_recip_name = ptr->x_ix86_recip_name;
4573  opts->x_ix86_regparm = ptr->x_ix86_regparm;
4574  opts->x_ix86_section_threshold = ptr->x_ix86_section_threshold;
4575  opts->x_ix86_sse2avx = ptr->x_ix86_sse2avx;
4576  opts->x_ix86_stack_protector_guard = ptr->x_ix86_stack_protector_guard;
4577  opts->x_ix86_stringop_alg = ptr->x_ix86_stringop_alg;
4578  opts->x_ix86_tls_dialect = ptr->x_ix86_tls_dialect;
4579  opts->x_ix86_tune_ctrl_string = ptr->x_ix86_tune_ctrl_string;
4580  opts->x_ix86_tune_memcpy_strategy = ptr->x_ix86_tune_memcpy_strategy;
4581  opts->x_ix86_tune_memset_strategy = ptr->x_ix86_tune_memset_strategy;
4582  opts->x_ix86_tune_no_default = ptr->x_ix86_tune_no_default;
4583  opts->x_ix86_veclibabi_type = ptr->x_ix86_veclibabi_type;
4584  ix86_tune_cost = processor_target_table[ix86_tune].cost;
4585  /* TODO: ix86_cost should be chosen at instruction or function granuality
4586     so for cold code we use size_cost even in !optimize_size compilation.  */
4587  if (opts->x_optimize_size)
4588    ix86_cost = &ix86_size_cost;
4589  else
4590    ix86_cost = ix86_tune_cost;
4591
4592  /* Recreate the arch feature tests if the arch changed */
4593  if (old_arch != ix86_arch)
4594    {
4595      ix86_arch_mask = 1u << ix86_arch;
4596      for (i = 0; i < X86_ARCH_LAST; ++i)
4597	ix86_arch_features[i]
4598	  = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
4599    }
4600
4601  /* Recreate the tune optimization tests */
4602  if (old_tune != ix86_tune)
4603    set_ix86_tune_features (ix86_tune, false);
4604}
4605
4606/* Adjust target options after streaming them in.  This is mainly about
4607   reconciling them with global options.  */
4608
4609static void
4610ix86_function_specific_post_stream_in (struct cl_target_option *ptr)
4611{
4612  /* flag_pic is a global option, but ix86_cmodel is target saved option
4613     partly computed from flag_pic.  If flag_pic is on, adjust x_ix86_cmodel
4614     for PIC, or error out.  */
4615  if (flag_pic)
4616    switch (ptr->x_ix86_cmodel)
4617      {
4618      case CM_SMALL:
4619	ptr->x_ix86_cmodel = CM_SMALL_PIC;
4620	break;
4621
4622      case CM_MEDIUM:
4623	ptr->x_ix86_cmodel = CM_MEDIUM_PIC;
4624	break;
4625
4626      case CM_LARGE:
4627	ptr->x_ix86_cmodel = CM_LARGE_PIC;
4628	break;
4629
4630      case CM_KERNEL:
4631	error ("code model %s does not support PIC mode", "kernel");
4632	break;
4633
4634      default:
4635	break;
4636      }
4637  else
4638    switch (ptr->x_ix86_cmodel)
4639      {
4640      case CM_SMALL_PIC:
4641	ptr->x_ix86_cmodel = CM_SMALL;
4642	break;
4643
4644      case CM_MEDIUM_PIC:
4645	ptr->x_ix86_cmodel = CM_MEDIUM;
4646	break;
4647
4648      case CM_LARGE_PIC:
4649	ptr->x_ix86_cmodel = CM_LARGE;
4650	break;
4651
4652      default:
4653	break;
4654      }
4655}
4656
4657/* Print the current options */
4658
4659static void
4660ix86_function_specific_print (FILE *file, int indent,
4661			      struct cl_target_option *ptr)
4662{
4663  char *target_string
4664    = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_target_flags,
4665			  NULL, NULL, ptr->x_ix86_fpmath, false);
4666
4667  gcc_assert (ptr->arch < PROCESSOR_max);
4668  fprintf (file, "%*sarch = %d (%s)\n",
4669	   indent, "",
4670	   ptr->arch, processor_target_table[ptr->arch].name);
4671
4672  gcc_assert (ptr->tune < PROCESSOR_max);
4673  fprintf (file, "%*stune = %d (%s)\n",
4674	   indent, "",
4675	   ptr->tune, processor_target_table[ptr->tune].name);
4676
4677  fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
4678
4679  if (target_string)
4680    {
4681      fprintf (file, "%*s%s\n", indent, "", target_string);
4682      free (target_string);
4683    }
4684}
4685
4686
4687/* Inner function to process the attribute((target(...))), take an argument and
4688   set the current options from the argument. If we have a list, recursively go
4689   over the list.  */
4690
4691static bool
4692ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
4693				     struct gcc_options *opts,
4694				     struct gcc_options *opts_set,
4695				     struct gcc_options *enum_opts_set)
4696{
4697  char *next_optstr;
4698  bool ret = true;
4699
4700#define IX86_ATTR_ISA(S,O)   { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
4701#define IX86_ATTR_STR(S,O)   { S, sizeof (S)-1, ix86_opt_str, O, 0 }
4702#define IX86_ATTR_ENUM(S,O)  { S, sizeof (S)-1, ix86_opt_enum, O, 0 }
4703#define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
4704#define IX86_ATTR_NO(S,O,M)  { S, sizeof (S)-1, ix86_opt_no,  O, M }
4705
4706  enum ix86_opt_type
4707  {
4708    ix86_opt_unknown,
4709    ix86_opt_yes,
4710    ix86_opt_no,
4711    ix86_opt_str,
4712    ix86_opt_enum,
4713    ix86_opt_isa
4714  };
4715
4716  static const struct
4717  {
4718    const char *string;
4719    size_t len;
4720    enum ix86_opt_type type;
4721    int opt;
4722    int mask;
4723  } attrs[] = {
4724    /* isa options */
4725    IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
4726    IX86_ATTR_ISA ("abm",	OPT_mabm),
4727    IX86_ATTR_ISA ("bmi",	OPT_mbmi),
4728    IX86_ATTR_ISA ("bmi2",	OPT_mbmi2),
4729    IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
4730    IX86_ATTR_ISA ("tbm",	OPT_mtbm),
4731    IX86_ATTR_ISA ("aes",	OPT_maes),
4732    IX86_ATTR_ISA ("sha",	OPT_msha),
4733    IX86_ATTR_ISA ("avx",	OPT_mavx),
4734    IX86_ATTR_ISA ("avx2",	OPT_mavx2),
4735    IX86_ATTR_ISA ("avx512f",	OPT_mavx512f),
4736    IX86_ATTR_ISA ("avx512pf",	OPT_mavx512pf),
4737    IX86_ATTR_ISA ("avx512er",	OPT_mavx512er),
4738    IX86_ATTR_ISA ("avx512cd",	OPT_mavx512cd),
4739    IX86_ATTR_ISA ("avx512dq",	OPT_mavx512dq),
4740    IX86_ATTR_ISA ("avx512bw",	OPT_mavx512bw),
4741    IX86_ATTR_ISA ("avx512vl",	OPT_mavx512vl),
4742    IX86_ATTR_ISA ("mmx",	OPT_mmmx),
4743    IX86_ATTR_ISA ("pclmul",	OPT_mpclmul),
4744    IX86_ATTR_ISA ("popcnt",	OPT_mpopcnt),
4745    IX86_ATTR_ISA ("sse",	OPT_msse),
4746    IX86_ATTR_ISA ("sse2",	OPT_msse2),
4747    IX86_ATTR_ISA ("sse3",	OPT_msse3),
4748    IX86_ATTR_ISA ("sse4",	OPT_msse4),
4749    IX86_ATTR_ISA ("sse4.1",	OPT_msse4_1),
4750    IX86_ATTR_ISA ("sse4.2",	OPT_msse4_2),
4751    IX86_ATTR_ISA ("sse4a",	OPT_msse4a),
4752    IX86_ATTR_ISA ("ssse3",	OPT_mssse3),
4753    IX86_ATTR_ISA ("fma4",	OPT_mfma4),
4754    IX86_ATTR_ISA ("fma",	OPT_mfma),
4755    IX86_ATTR_ISA ("xop",	OPT_mxop),
4756    IX86_ATTR_ISA ("lwp",	OPT_mlwp),
4757    IX86_ATTR_ISA ("fsgsbase",	OPT_mfsgsbase),
4758    IX86_ATTR_ISA ("rdrnd",	OPT_mrdrnd),
4759    IX86_ATTR_ISA ("f16c",	OPT_mf16c),
4760    IX86_ATTR_ISA ("rtm",	OPT_mrtm),
4761    IX86_ATTR_ISA ("hle",	OPT_mhle),
4762    IX86_ATTR_ISA ("prfchw",	OPT_mprfchw),
4763    IX86_ATTR_ISA ("rdseed",	OPT_mrdseed),
4764    IX86_ATTR_ISA ("adx",	OPT_madx),
4765    IX86_ATTR_ISA ("fxsr",	OPT_mfxsr),
4766    IX86_ATTR_ISA ("xsave",	OPT_mxsave),
4767    IX86_ATTR_ISA ("xsaveopt",	OPT_mxsaveopt),
4768    IX86_ATTR_ISA ("prefetchwt1", OPT_mprefetchwt1),
4769    IX86_ATTR_ISA ("clflushopt",	OPT_mclflushopt),
4770    IX86_ATTR_ISA ("xsavec",	OPT_mxsavec),
4771    IX86_ATTR_ISA ("xsaves",	OPT_mxsaves),
4772    IX86_ATTR_ISA ("avx512vbmi",	OPT_mavx512vbmi),
4773    IX86_ATTR_ISA ("avx512ifma",	OPT_mavx512ifma),
4774    IX86_ATTR_ISA ("clwb",	OPT_mclwb),
4775    IX86_ATTR_ISA ("pcommit",	OPT_mpcommit),
4776    IX86_ATTR_ISA ("mwaitx",	OPT_mmwaitx),
4777
4778    /* enum options */
4779    IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
4780
4781    /* string options */
4782    IX86_ATTR_STR ("arch=",	IX86_FUNCTION_SPECIFIC_ARCH),
4783    IX86_ATTR_STR ("tune=",	IX86_FUNCTION_SPECIFIC_TUNE),
4784
4785    /* flag options */
4786    IX86_ATTR_YES ("cld",
4787		   OPT_mcld,
4788		   MASK_CLD),
4789
4790    IX86_ATTR_NO ("fancy-math-387",
4791		  OPT_mfancy_math_387,
4792		  MASK_NO_FANCY_MATH_387),
4793
4794    IX86_ATTR_YES ("ieee-fp",
4795		   OPT_mieee_fp,
4796		   MASK_IEEE_FP),
4797
4798    IX86_ATTR_YES ("inline-all-stringops",
4799		   OPT_minline_all_stringops,
4800		   MASK_INLINE_ALL_STRINGOPS),
4801
4802    IX86_ATTR_YES ("inline-stringops-dynamically",
4803		   OPT_minline_stringops_dynamically,
4804		   MASK_INLINE_STRINGOPS_DYNAMICALLY),
4805
4806    IX86_ATTR_NO ("align-stringops",
4807		  OPT_mno_align_stringops,
4808		  MASK_NO_ALIGN_STRINGOPS),
4809
4810    IX86_ATTR_YES ("recip",
4811		   OPT_mrecip,
4812		   MASK_RECIP),
4813
4814  };
4815
4816  /* If this is a list, recurse to get the options.  */
4817  if (TREE_CODE (args) == TREE_LIST)
4818    {
4819      bool ret = true;
4820
4821      for (; args; args = TREE_CHAIN (args))
4822	if (TREE_VALUE (args)
4823	    && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args),
4824						     p_strings, opts, opts_set,
4825						     enum_opts_set))
4826	  ret = false;
4827
4828      return ret;
4829    }
4830
4831  else if (TREE_CODE (args) != STRING_CST)
4832    {
4833      error ("attribute %<target%> argument not a string");
4834      return false;
4835    }
4836
4837  /* Handle multiple arguments separated by commas.  */
4838  next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
4839
4840  while (next_optstr && *next_optstr != '\0')
4841    {
4842      char *p = next_optstr;
4843      char *orig_p = p;
4844      char *comma = strchr (next_optstr, ',');
4845      const char *opt_string;
4846      size_t len, opt_len;
4847      int opt;
4848      bool opt_set_p;
4849      char ch;
4850      unsigned i;
4851      enum ix86_opt_type type = ix86_opt_unknown;
4852      int mask = 0;
4853
4854      if (comma)
4855	{
4856	  *comma = '\0';
4857	  len = comma - next_optstr;
4858	  next_optstr = comma + 1;
4859	}
4860      else
4861	{
4862	  len = strlen (p);
4863	  next_optstr = NULL;
4864	}
4865
4866      /* Recognize no-xxx.  */
4867      if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4868	{
4869	  opt_set_p = false;
4870	  p += 3;
4871	  len -= 3;
4872	}
4873      else
4874	opt_set_p = true;
4875
4876      /* Find the option.  */
4877      ch = *p;
4878      opt = N_OPTS;
4879      for (i = 0; i < ARRAY_SIZE (attrs); i++)
4880	{
4881	  type = attrs[i].type;
4882	  opt_len = attrs[i].len;
4883	  if (ch == attrs[i].string[0]
4884	      && ((type != ix86_opt_str && type != ix86_opt_enum)
4885		  ? len == opt_len
4886		  : len > opt_len)
4887	      && memcmp (p, attrs[i].string, opt_len) == 0)
4888	    {
4889	      opt = attrs[i].opt;
4890	      mask = attrs[i].mask;
4891	      opt_string = attrs[i].string;
4892	      break;
4893	    }
4894	}
4895
4896      /* Process the option.  */
4897      if (opt == N_OPTS)
4898	{
4899	  error ("attribute(target(\"%s\")) is unknown", orig_p);
4900	  ret = false;
4901	}
4902
4903      else if (type == ix86_opt_isa)
4904	{
4905	  struct cl_decoded_option decoded;
4906
4907	  generate_option (opt, NULL, opt_set_p, CL_TARGET, &decoded);
4908	  ix86_handle_option (opts, opts_set,
4909			      &decoded, input_location);
4910	}
4911
4912      else if (type == ix86_opt_yes || type == ix86_opt_no)
4913	{
4914	  if (type == ix86_opt_no)
4915	    opt_set_p = !opt_set_p;
4916
4917	  if (opt_set_p)
4918	    opts->x_target_flags |= mask;
4919	  else
4920	    opts->x_target_flags &= ~mask;
4921	}
4922
4923      else if (type == ix86_opt_str)
4924	{
4925	  if (p_strings[opt])
4926	    {
4927	      error ("option(\"%s\") was already specified", opt_string);
4928	      ret = false;
4929	    }
4930	  else
4931	    p_strings[opt] = xstrdup (p + opt_len);
4932	}
4933
4934      else if (type == ix86_opt_enum)
4935	{
4936	  bool arg_ok;
4937	  int value;
4938
4939	  arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
4940	  if (arg_ok)
4941	    set_option (opts, enum_opts_set, opt, value,
4942			p + opt_len, DK_UNSPECIFIED, input_location,
4943			global_dc);
4944	  else
4945	    {
4946	      error ("attribute(target(\"%s\")) is unknown", orig_p);
4947	      ret = false;
4948	    }
4949	}
4950
4951      else
4952	gcc_unreachable ();
4953    }
4954
4955  return ret;
4956}
4957
4958/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
4959
4960tree
4961ix86_valid_target_attribute_tree (tree args,
4962				  struct gcc_options *opts,
4963				  struct gcc_options *opts_set)
4964{
4965  const char *orig_arch_string = opts->x_ix86_arch_string;
4966  const char *orig_tune_string = opts->x_ix86_tune_string;
4967  enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
4968  int orig_tune_defaulted = ix86_tune_defaulted;
4969  int orig_arch_specified = ix86_arch_specified;
4970  char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
4971  tree t = NULL_TREE;
4972  int i;
4973  struct cl_target_option *def
4974    = TREE_TARGET_OPTION (target_option_default_node);
4975  struct gcc_options enum_opts_set;
4976
4977  memset (&enum_opts_set, 0, sizeof (enum_opts_set));
4978
4979  /* Process each of the options on the chain.  */
4980  if (! ix86_valid_target_attribute_inner_p (args, option_strings, opts,
4981					     opts_set, &enum_opts_set))
4982    return error_mark_node;
4983
4984  /* If the changed options are different from the default, rerun
4985     ix86_option_override_internal, and then save the options away.
4986     The string options are are attribute options, and will be undone
4987     when we copy the save structure.  */
4988  if (opts->x_ix86_isa_flags != def->x_ix86_isa_flags
4989      || opts->x_target_flags != def->x_target_flags
4990      || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4991      || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4992      || enum_opts_set.x_ix86_fpmath)
4993    {
4994      /* If we are using the default tune= or arch=, undo the string assigned,
4995	 and use the default.  */
4996      if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4997	opts->x_ix86_arch_string
4998	  = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]);
4999      else if (!orig_arch_specified)
5000	opts->x_ix86_arch_string = NULL;
5001
5002      if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
5003	opts->x_ix86_tune_string
5004	  = ggc_strdup (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]);
5005      else if (orig_tune_defaulted)
5006	opts->x_ix86_tune_string = NULL;
5007
5008      /* If fpmath= is not set, and we now have sse2 on 32-bit, use it.  */
5009      if (enum_opts_set.x_ix86_fpmath)
5010	opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5011      else if (!TARGET_64BIT_P (opts->x_ix86_isa_flags)
5012	       && TARGET_SSE_P (opts->x_ix86_isa_flags))
5013	{
5014	  opts->x_ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
5015	  opts_set->x_ix86_fpmath = (enum fpmath_unit) 1;
5016	}
5017
5018      /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
5019      ix86_option_override_internal (false, opts, opts_set);
5020
5021      /* Add any builtin functions with the new isa if any.  */
5022      ix86_add_new_builtins (opts->x_ix86_isa_flags);
5023
5024      /* Save the current options unless we are validating options for
5025	 #pragma.  */
5026      t = build_target_option_node (opts);
5027
5028      opts->x_ix86_arch_string = orig_arch_string;
5029      opts->x_ix86_tune_string = orig_tune_string;
5030      opts_set->x_ix86_fpmath = orig_fpmath_set;
5031
5032      /* Free up memory allocated to hold the strings */
5033      for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
5034	free (option_strings[i]);
5035    }
5036
5037  return t;
5038}
5039
5040/* Hook to validate attribute((target("string"))).  */
5041
5042static bool
5043ix86_valid_target_attribute_p (tree fndecl,
5044			       tree ARG_UNUSED (name),
5045			       tree args,
5046			       int ARG_UNUSED (flags))
5047{
5048  struct gcc_options func_options;
5049  tree new_target, new_optimize;
5050  bool ret = true;
5051
5052  /* attribute((target("default"))) does nothing, beyond
5053     affecting multi-versioning.  */
5054  if (TREE_VALUE (args)
5055      && TREE_CODE (TREE_VALUE (args)) == STRING_CST
5056      && TREE_CHAIN (args) == NULL_TREE
5057      && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
5058    return true;
5059
5060  tree old_optimize = build_optimization_node (&global_options);
5061
5062  /* Get the optimization options of the current function.  */
5063  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
5064
5065  if (!func_optimize)
5066    func_optimize = old_optimize;
5067
5068  /* Init func_options.  */
5069  memset (&func_options, 0, sizeof (func_options));
5070  init_options_struct (&func_options, NULL);
5071  lang_hooks.init_options_struct (&func_options);
5072
5073  cl_optimization_restore (&func_options,
5074			   TREE_OPTIMIZATION (func_optimize));
5075
5076  /* Initialize func_options to the default before its target options can
5077     be set.  */
5078  cl_target_option_restore (&func_options,
5079			    TREE_TARGET_OPTION (target_option_default_node));
5080
5081  new_target = ix86_valid_target_attribute_tree (args, &func_options,
5082						 &global_options_set);
5083
5084  new_optimize = build_optimization_node (&func_options);
5085
5086  if (new_target == error_mark_node)
5087    ret = false;
5088
5089  else if (fndecl && new_target)
5090    {
5091      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
5092
5093      if (old_optimize != new_optimize)
5094	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
5095    }
5096
5097  return ret;
5098}
5099
5100
5101/* Hook to determine if one function can safely inline another.  */
5102
5103static bool
5104ix86_can_inline_p (tree caller, tree callee)
5105{
5106  bool ret = false;
5107  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
5108  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
5109
5110  /* If callee has no option attributes, then it is ok to inline.  */
5111  if (!callee_tree)
5112    ret = true;
5113
5114  /* If caller has no option attributes, but callee does then it is not ok to
5115     inline.  */
5116  else if (!caller_tree)
5117    ret = false;
5118
5119  else
5120    {
5121      struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
5122      struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
5123
5124      /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
5125	 can inline a SSE2 function but a SSE2 function can't inline a SSE4
5126	 function.  */
5127      if ((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
5128	  != callee_opts->x_ix86_isa_flags)
5129	ret = false;
5130
5131      /* See if we have the same non-isa options.  */
5132      else if (caller_opts->x_target_flags != callee_opts->x_target_flags)
5133	ret = false;
5134
5135      /* See if arch, tune, etc. are the same.  */
5136      else if (caller_opts->arch != callee_opts->arch)
5137	ret = false;
5138
5139      else if (caller_opts->tune != callee_opts->tune)
5140	ret = false;
5141
5142      else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath)
5143	ret = false;
5144
5145      else if (caller_opts->branch_cost != callee_opts->branch_cost)
5146	ret = false;
5147
5148      else
5149	ret = true;
5150    }
5151
5152  return ret;
5153}
5154
5155
5156/* Remember the last target of ix86_set_current_function.  */
5157static GTY(()) tree ix86_previous_fndecl;
5158
5159/* Set targets globals to the default (or current #pragma GCC target
5160   if active).  Invalidate ix86_previous_fndecl cache.  */
5161
5162void
5163ix86_reset_previous_fndecl (void)
5164{
5165  tree new_tree = target_option_current_node;
5166  cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5167  if (TREE_TARGET_GLOBALS (new_tree))
5168    restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5169  else if (new_tree == target_option_default_node)
5170    restore_target_globals (&default_target_globals);
5171  else
5172    TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5173  ix86_previous_fndecl = NULL_TREE;
5174}
5175
5176/* Establish appropriate back-end context for processing the function
5177   FNDECL.  The argument might be NULL to indicate processing at top
5178   level, outside of any function scope.  */
5179static void
5180ix86_set_current_function (tree fndecl)
5181{
5182  /* Only change the context if the function changes.  This hook is called
5183     several times in the course of compiling a function, and we don't want to
5184     slow things down too much or call target_reinit when it isn't safe.  */
5185  if (fndecl == ix86_previous_fndecl)
5186    return;
5187
5188  tree old_tree;
5189  if (ix86_previous_fndecl == NULL_TREE)
5190    old_tree = target_option_current_node;
5191  else if (DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl))
5192    old_tree = DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl);
5193  else
5194    old_tree = target_option_default_node;
5195
5196  if (fndecl == NULL_TREE)
5197    {
5198      if (old_tree != target_option_current_node)
5199	ix86_reset_previous_fndecl ();
5200      return;
5201    }
5202
5203  tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
5204  if (new_tree == NULL_TREE)
5205    new_tree = target_option_default_node;
5206
5207  if (old_tree != new_tree)
5208    {
5209      cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
5210      if (TREE_TARGET_GLOBALS (new_tree))
5211	restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
5212      else if (new_tree == target_option_default_node)
5213	restore_target_globals (&default_target_globals);
5214      else
5215	TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
5216    }
5217  ix86_previous_fndecl = fndecl;
5218
5219  /* 64-bit MS and SYSV ABI have different set of call used registers.
5220     Avoid expensive re-initialization of init_regs each time we switch
5221     function context.  */
5222  if (TARGET_64BIT
5223      && (call_used_regs[SI_REG]
5224	  == (cfun->machine->call_abi == MS_ABI)))
5225    reinit_regs ();
5226}
5227
5228
5229/* Return true if this goes in large data/bss.  */
5230
5231static bool
5232ix86_in_large_data_p (tree exp)
5233{
5234  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
5235    return false;
5236
5237  /* Functions are never large data.  */
5238  if (TREE_CODE (exp) == FUNCTION_DECL)
5239    return false;
5240
5241  /* Automatic variables are never large data.  */
5242  if (TREE_CODE (exp) == VAR_DECL && !is_global_var (exp))
5243    return false;
5244
5245  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
5246    {
5247      const char *section = DECL_SECTION_NAME (exp);
5248      if (strcmp (section, ".ldata") == 0
5249	  || strcmp (section, ".lbss") == 0)
5250	return true;
5251      return false;
5252    }
5253  else
5254    {
5255      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
5256
5257      /* If this is an incomplete type with size 0, then we can't put it
5258	 in data because it might be too big when completed.  Also,
5259	 int_size_in_bytes returns -1 if size can vary or is larger than
5260	 an integer in which case also it is safer to assume that it goes in
5261	 large data.  */
5262      if (size <= 0 || size > ix86_section_threshold)
5263	return true;
5264    }
5265
5266  return false;
5267}
5268
5269/* Switch to the appropriate section for output of DECL.
5270   DECL is either a `VAR_DECL' node or a constant of some sort.
5271   RELOC indicates whether forming the initial value of DECL requires
5272   link-time relocations.  */
5273
5274ATTRIBUTE_UNUSED static section *
5275x86_64_elf_select_section (tree decl, int reloc,
5276			   unsigned HOST_WIDE_INT align)
5277{
5278  if (ix86_in_large_data_p (decl))
5279    {
5280      const char *sname = NULL;
5281      unsigned int flags = SECTION_WRITE;
5282      switch (categorize_decl_for_section (decl, reloc))
5283	{
5284	case SECCAT_DATA:
5285	  sname = ".ldata";
5286	  break;
5287	case SECCAT_DATA_REL:
5288	  sname = ".ldata.rel";
5289	  break;
5290	case SECCAT_DATA_REL_LOCAL:
5291	  sname = ".ldata.rel.local";
5292	  break;
5293	case SECCAT_DATA_REL_RO:
5294	  sname = ".ldata.rel.ro";
5295	  break;
5296	case SECCAT_DATA_REL_RO_LOCAL:
5297	  sname = ".ldata.rel.ro.local";
5298	  break;
5299	case SECCAT_BSS:
5300	  sname = ".lbss";
5301	  flags |= SECTION_BSS;
5302	  break;
5303	case SECCAT_RODATA:
5304	case SECCAT_RODATA_MERGE_STR:
5305	case SECCAT_RODATA_MERGE_STR_INIT:
5306	case SECCAT_RODATA_MERGE_CONST:
5307	  sname = ".lrodata";
5308	  flags = 0;
5309	  break;
5310	case SECCAT_SRODATA:
5311	case SECCAT_SDATA:
5312	case SECCAT_SBSS:
5313	  gcc_unreachable ();
5314	case SECCAT_TEXT:
5315	case SECCAT_TDATA:
5316	case SECCAT_TBSS:
5317	  /* We don't split these for medium model.  Place them into
5318	     default sections and hope for best.  */
5319	  break;
5320	}
5321      if (sname)
5322	{
5323	  /* We might get called with string constants, but get_named_section
5324	     doesn't like them as they are not DECLs.  Also, we need to set
5325	     flags in that case.  */
5326	  if (!DECL_P (decl))
5327	    return get_section (sname, flags, NULL);
5328	  return get_named_section (decl, sname, reloc);
5329	}
5330    }
5331  return default_elf_select_section (decl, reloc, align);
5332}
5333
5334/* Select a set of attributes for section NAME based on the properties
5335   of DECL and whether or not RELOC indicates that DECL's initializer
5336   might contain runtime relocations.  */
5337
5338static unsigned int ATTRIBUTE_UNUSED
5339x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
5340{
5341  unsigned int flags = default_section_type_flags (decl, name, reloc);
5342
5343  if (decl == NULL_TREE
5344      && (strcmp (name, ".ldata.rel.ro") == 0
5345	  || strcmp (name, ".ldata.rel.ro.local") == 0))
5346    flags |= SECTION_RELRO;
5347
5348  if (strcmp (name, ".lbss") == 0
5349      || strncmp (name, ".lbss.", 5) == 0
5350      || strncmp (name, ".gnu.linkonce.lb.", 16) == 0)
5351    flags |= SECTION_BSS;
5352
5353  return flags;
5354}
5355
5356/* Build up a unique section name, expressed as a
5357   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
5358   RELOC indicates whether the initial value of EXP requires
5359   link-time relocations.  */
5360
5361static void ATTRIBUTE_UNUSED
5362x86_64_elf_unique_section (tree decl, int reloc)
5363{
5364  if (ix86_in_large_data_p (decl))
5365    {
5366      const char *prefix = NULL;
5367      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
5368      bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
5369
5370      switch (categorize_decl_for_section (decl, reloc))
5371	{
5372	case SECCAT_DATA:
5373	case SECCAT_DATA_REL:
5374	case SECCAT_DATA_REL_LOCAL:
5375	case SECCAT_DATA_REL_RO:
5376	case SECCAT_DATA_REL_RO_LOCAL:
5377          prefix = one_only ? ".ld" : ".ldata";
5378	  break;
5379	case SECCAT_BSS:
5380          prefix = one_only ? ".lb" : ".lbss";
5381	  break;
5382	case SECCAT_RODATA:
5383	case SECCAT_RODATA_MERGE_STR:
5384	case SECCAT_RODATA_MERGE_STR_INIT:
5385	case SECCAT_RODATA_MERGE_CONST:
5386          prefix = one_only ? ".lr" : ".lrodata";
5387	  break;
5388	case SECCAT_SRODATA:
5389	case SECCAT_SDATA:
5390	case SECCAT_SBSS:
5391	  gcc_unreachable ();
5392	case SECCAT_TEXT:
5393	case SECCAT_TDATA:
5394	case SECCAT_TBSS:
5395	  /* We don't split these for medium model.  Place them into
5396	     default sections and hope for best.  */
5397	  break;
5398	}
5399      if (prefix)
5400	{
5401	  const char *name, *linkonce;
5402	  char *string;
5403
5404	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
5405	  name = targetm.strip_name_encoding (name);
5406
5407	  /* If we're using one_only, then there needs to be a .gnu.linkonce
5408     	     prefix to the section name.  */
5409	  linkonce = one_only ? ".gnu.linkonce" : "";
5410
5411	  string = ACONCAT ((linkonce, prefix, ".", name, NULL));
5412
5413	  set_decl_section_name (decl, string);
5414	  return;
5415	}
5416    }
5417  default_unique_section (decl, reloc);
5418}
5419
5420#ifdef COMMON_ASM_OP
5421/* This says how to output assembler code to declare an
5422   uninitialized external linkage data object.
5423
5424   For medium model x86-64 we need to use .largecomm opcode for
5425   large objects.  */
5426void
5427x86_elf_aligned_common (FILE *file,
5428			const char *name, unsigned HOST_WIDE_INT size,
5429			int align)
5430{
5431  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5432      && size > (unsigned int)ix86_section_threshold)
5433    fputs ("\t.largecomm\t", file);
5434  else
5435    fputs (COMMON_ASM_OP, file);
5436  assemble_name (file, name);
5437  fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
5438	   size, align / BITS_PER_UNIT);
5439}
5440#endif
5441
5442/* Utility function for targets to use in implementing
5443   ASM_OUTPUT_ALIGNED_BSS.  */
5444
5445void
5446x86_output_aligned_bss (FILE *file, tree decl, const char *name,
5447		       	unsigned HOST_WIDE_INT size, int align)
5448{
5449  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
5450      && size > (unsigned int)ix86_section_threshold)
5451    switch_to_section (get_named_section (decl, ".lbss", 0));
5452  else
5453    switch_to_section (bss_section);
5454  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
5455#ifdef ASM_DECLARE_OBJECT_NAME
5456  last_assemble_variable_decl = decl;
5457  ASM_DECLARE_OBJECT_NAME (file, name, decl);
5458#else
5459  /* Standard thing is just output label for the object.  */
5460  ASM_OUTPUT_LABEL (file, name);
5461#endif /* ASM_DECLARE_OBJECT_NAME */
5462  ASM_OUTPUT_SKIP (file, size ? size : 1);
5463}
5464
5465/* Decide whether we must probe the stack before any space allocation
5466   on this target.  It's essentially TARGET_STACK_PROBE except when
5467   -fstack-check causes the stack to be already probed differently.  */
5468
5469bool
5470ix86_target_stack_probe (void)
5471{
5472  /* Do not probe the stack twice if static stack checking is enabled.  */
5473  if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5474    return false;
5475
5476  return TARGET_STACK_PROBE;
5477}
5478
5479/* Decide whether we can make a sibling call to a function.  DECL is the
5480   declaration of the function being targeted by the call and EXP is the
5481   CALL_EXPR representing the call.  */
5482
5483static bool
5484ix86_function_ok_for_sibcall (tree decl, tree exp)
5485{
5486  tree type, decl_or_type;
5487  rtx a, b;
5488
5489  /* If we are generating position-independent code, we cannot sibcall
5490     optimize any indirect call, or a direct call to a global function,
5491     as the PLT requires %ebx be live. (Darwin does not have a PLT.)  */
5492  if (!TARGET_MACHO
5493      && !TARGET_64BIT
5494      && flag_pic
5495      && (!decl || !targetm.binds_local_p (decl)))
5496    return false;
5497
5498  /* If we need to align the outgoing stack, then sibcalling would
5499     unalign the stack, which may break the called function.  */
5500  if (ix86_minimum_incoming_stack_boundary (true)
5501      < PREFERRED_STACK_BOUNDARY)
5502    return false;
5503
5504  if (decl)
5505    {
5506      decl_or_type = decl;
5507      type = TREE_TYPE (decl);
5508    }
5509  else
5510    {
5511      /* We're looking at the CALL_EXPR, we need the type of the function.  */
5512      type = CALL_EXPR_FN (exp);		/* pointer expression */
5513      type = TREE_TYPE (type);			/* pointer type */
5514      type = TREE_TYPE (type);			/* function type */
5515      decl_or_type = type;
5516    }
5517
5518  /* Check that the return value locations are the same.  Like
5519     if we are returning floats on the 80387 register stack, we cannot
5520     make a sibcall from a function that doesn't return a float to a
5521     function that does or, conversely, from a function that does return
5522     a float to a function that doesn't; the necessary stack adjustment
5523     would not be executed.  This is also the place we notice
5524     differences in the return value ABI.  Note that it is ok for one
5525     of the functions to have void return type as long as the return
5526     value of the other is passed in a register.  */
5527  a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
5528  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
5529			   cfun->decl, false);
5530  if (STACK_REG_P (a) || STACK_REG_P (b))
5531    {
5532      if (!rtx_equal_p (a, b))
5533	return false;
5534    }
5535  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
5536    ;
5537  else if (!rtx_equal_p (a, b))
5538    return false;
5539
5540  if (TARGET_64BIT)
5541    {
5542      /* The SYSV ABI has more call-clobbered registers;
5543	 disallow sibcalls from MS to SYSV.  */
5544      if (cfun->machine->call_abi == MS_ABI
5545	  && ix86_function_type_abi (type) == SYSV_ABI)
5546	return false;
5547    }
5548  else
5549    {
5550      /* If this call is indirect, we'll need to be able to use a
5551	 call-clobbered register for the address of the target function.
5552	 Make sure that all such registers are not used for passing
5553	 parameters.  Note that DLLIMPORT functions are indirect.  */
5554      if (!decl
5555	  || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
5556	{
5557	  if (ix86_function_regparm (type, NULL) >= 3)
5558	    {
5559	      /* ??? Need to count the actual number of registers to be used,
5560		 not the possible number of registers.  Fix later.  */
5561	      return false;
5562	    }
5563	}
5564    }
5565
5566  /* Otherwise okay.  That also includes certain types of indirect calls.  */
5567  return true;
5568}
5569
5570/* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
5571   and "sseregparm" calling convention attributes;
5572   arguments as in struct attribute_spec.handler.  */
5573
5574static tree
5575ix86_handle_cconv_attribute (tree *node, tree name,
5576				   tree args,
5577				   int,
5578				   bool *no_add_attrs)
5579{
5580  if (TREE_CODE (*node) != FUNCTION_TYPE
5581      && TREE_CODE (*node) != METHOD_TYPE
5582      && TREE_CODE (*node) != FIELD_DECL
5583      && TREE_CODE (*node) != TYPE_DECL)
5584    {
5585      warning (OPT_Wattributes, "%qE attribute only applies to functions",
5586	       name);
5587      *no_add_attrs = true;
5588      return NULL_TREE;
5589    }
5590
5591  /* Can combine regparm with all attributes but fastcall, and thiscall.  */
5592  if (is_attribute_p ("regparm", name))
5593    {
5594      tree cst;
5595
5596      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5597        {
5598	  error ("fastcall and regparm attributes are not compatible");
5599	}
5600
5601      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5602	{
5603	  error ("regparam and thiscall attributes are not compatible");
5604	}
5605
5606      cst = TREE_VALUE (args);
5607      if (TREE_CODE (cst) != INTEGER_CST)
5608	{
5609	  warning (OPT_Wattributes,
5610		   "%qE attribute requires an integer constant argument",
5611		   name);
5612	  *no_add_attrs = true;
5613	}
5614      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
5615	{
5616	  warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
5617		   name, REGPARM_MAX);
5618	  *no_add_attrs = true;
5619	}
5620
5621      return NULL_TREE;
5622    }
5623
5624  if (TARGET_64BIT)
5625    {
5626      /* Do not warn when emulating the MS ABI.  */
5627      if ((TREE_CODE (*node) != FUNCTION_TYPE
5628	   && TREE_CODE (*node) != METHOD_TYPE)
5629	  || ix86_function_type_abi (*node) != MS_ABI)
5630	warning (OPT_Wattributes, "%qE attribute ignored",
5631	         name);
5632      *no_add_attrs = true;
5633      return NULL_TREE;
5634    }
5635
5636  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
5637  if (is_attribute_p ("fastcall", name))
5638    {
5639      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5640        {
5641	  error ("fastcall and cdecl attributes are not compatible");
5642	}
5643      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5644        {
5645	  error ("fastcall and stdcall attributes are not compatible");
5646	}
5647      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
5648        {
5649	  error ("fastcall and regparm attributes are not compatible");
5650	}
5651      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5652	{
5653	  error ("fastcall and thiscall attributes are not compatible");
5654	}
5655    }
5656
5657  /* Can combine stdcall with fastcall (redundant), regparm and
5658     sseregparm.  */
5659  else if (is_attribute_p ("stdcall", name))
5660    {
5661      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5662        {
5663	  error ("stdcall and cdecl attributes are not compatible");
5664	}
5665      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5666        {
5667	  error ("stdcall and fastcall attributes are not compatible");
5668	}
5669      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5670	{
5671	  error ("stdcall and thiscall attributes are not compatible");
5672	}
5673    }
5674
5675  /* Can combine cdecl with regparm and sseregparm.  */
5676  else if (is_attribute_p ("cdecl", name))
5677    {
5678      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5679        {
5680	  error ("stdcall and cdecl attributes are not compatible");
5681	}
5682      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5683        {
5684	  error ("fastcall and cdecl attributes are not compatible");
5685	}
5686      if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
5687	{
5688	  error ("cdecl and thiscall attributes are not compatible");
5689	}
5690    }
5691  else if (is_attribute_p ("thiscall", name))
5692    {
5693      if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
5694	warning (OPT_Wattributes, "%qE attribute is used for non-class method",
5695	         name);
5696      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
5697	{
5698	  error ("stdcall and thiscall attributes are not compatible");
5699	}
5700      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
5701	{
5702	  error ("fastcall and thiscall attributes are not compatible");
5703	}
5704      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
5705	{
5706	  error ("cdecl and thiscall attributes are not compatible");
5707	}
5708    }
5709
5710  /* Can combine sseregparm with all attributes.  */
5711
5712  return NULL_TREE;
5713}
5714
5715/* The transactional memory builtins are implicitly regparm or fastcall
5716   depending on the ABI.  Override the generic do-nothing attribute that
5717   these builtins were declared with, and replace it with one of the two
5718   attributes that we expect elsewhere.  */
5719
5720static tree
5721ix86_handle_tm_regparm_attribute (tree *node, tree, tree,
5722				  int flags, bool *no_add_attrs)
5723{
5724  tree alt;
5725
5726  /* In no case do we want to add the placeholder attribute.  */
5727  *no_add_attrs = true;
5728
5729  /* The 64-bit ABI is unchanged for transactional memory.  */
5730  if (TARGET_64BIT)
5731    return NULL_TREE;
5732
5733  /* ??? Is there a better way to validate 32-bit windows?  We have
5734     cfun->machine->call_abi, but that seems to be set only for 64-bit.  */
5735  if (CHECK_STACK_LIMIT > 0)
5736    alt = tree_cons (get_identifier ("fastcall"), NULL, NULL);
5737  else
5738    {
5739      alt = tree_cons (NULL, build_int_cst (NULL, 2), NULL);
5740      alt = tree_cons (get_identifier ("regparm"), alt, NULL);
5741    }
5742  decl_attributes (node, alt, flags);
5743
5744  return NULL_TREE;
5745}
5746
5747/* This function determines from TYPE the calling-convention.  */
5748
5749unsigned int
5750ix86_get_callcvt (const_tree type)
5751{
5752  unsigned int ret = 0;
5753  bool is_stdarg;
5754  tree attrs;
5755
5756  if (TARGET_64BIT)
5757    return IX86_CALLCVT_CDECL;
5758
5759  attrs = TYPE_ATTRIBUTES (type);
5760  if (attrs != NULL_TREE)
5761    {
5762      if (lookup_attribute ("cdecl", attrs))
5763	ret |= IX86_CALLCVT_CDECL;
5764      else if (lookup_attribute ("stdcall", attrs))
5765	ret |= IX86_CALLCVT_STDCALL;
5766      else if (lookup_attribute ("fastcall", attrs))
5767	ret |= IX86_CALLCVT_FASTCALL;
5768      else if (lookup_attribute ("thiscall", attrs))
5769	ret |= IX86_CALLCVT_THISCALL;
5770
5771      /* Regparam isn't allowed for thiscall and fastcall.  */
5772      if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
5773	{
5774	  if (lookup_attribute ("regparm", attrs))
5775	    ret |= IX86_CALLCVT_REGPARM;
5776	  if (lookup_attribute ("sseregparm", attrs))
5777	    ret |= IX86_CALLCVT_SSEREGPARM;
5778	}
5779
5780      if (IX86_BASE_CALLCVT(ret) != 0)
5781	return ret;
5782    }
5783
5784  is_stdarg = stdarg_p (type);
5785  if (TARGET_RTD && !is_stdarg)
5786    return IX86_CALLCVT_STDCALL | ret;
5787
5788  if (ret != 0
5789      || is_stdarg
5790      || TREE_CODE (type) != METHOD_TYPE
5791      || ix86_function_type_abi (type) != MS_ABI)
5792    return IX86_CALLCVT_CDECL | ret;
5793
5794  return IX86_CALLCVT_THISCALL;
5795}
5796
5797/* Return 0 if the attributes for two types are incompatible, 1 if they
5798   are compatible, and 2 if they are nearly compatible (which causes a
5799   warning to be generated).  */
5800
5801static int
5802ix86_comp_type_attributes (const_tree type1, const_tree type2)
5803{
5804  unsigned int ccvt1, ccvt2;
5805
5806  if (TREE_CODE (type1) != FUNCTION_TYPE
5807      && TREE_CODE (type1) != METHOD_TYPE)
5808    return 1;
5809
5810  ccvt1 = ix86_get_callcvt (type1);
5811  ccvt2 = ix86_get_callcvt (type2);
5812  if (ccvt1 != ccvt2)
5813    return 0;
5814  if (ix86_function_regparm (type1, NULL)
5815      != ix86_function_regparm (type2, NULL))
5816    return 0;
5817
5818  return 1;
5819}
5820
5821/* Return the regparm value for a function with the indicated TYPE and DECL.
5822   DECL may be NULL when calling function indirectly
5823   or considering a libcall.  */
5824
5825static int
5826ix86_function_regparm (const_tree type, const_tree decl)
5827{
5828  tree attr;
5829  int regparm;
5830  unsigned int ccvt;
5831
5832  if (TARGET_64BIT)
5833    return (ix86_function_type_abi (type) == SYSV_ABI
5834	    ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
5835  ccvt = ix86_get_callcvt (type);
5836  regparm = ix86_regparm;
5837
5838  if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
5839    {
5840      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
5841      if (attr)
5842	{
5843	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
5844	  return regparm;
5845	}
5846    }
5847  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
5848    return 2;
5849  else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
5850    return 1;
5851
5852  /* Use register calling convention for local functions when possible.  */
5853  if (decl
5854      && TREE_CODE (decl) == FUNCTION_DECL)
5855    {
5856      cgraph_node *target = cgraph_node::get (decl);
5857      if (target)
5858	target = target->function_symbol ();
5859
5860      /* Caller and callee must agree on the calling convention, so
5861	 checking here just optimize means that with
5862	 __attribute__((optimize (...))) caller could use regparm convention
5863	 and callee not, or vice versa.  Instead look at whether the callee
5864	 is optimized or not.  */
5865      if (target && opt_for_fn (target->decl, optimize)
5866	  && !(profile_flag && !flag_fentry))
5867	{
5868	  cgraph_local_info *i = &target->local;
5869	  if (i && i->local && i->can_change_signature)
5870	    {
5871	      int local_regparm, globals = 0, regno;
5872
5873	      /* Make sure no regparm register is taken by a
5874		 fixed register variable.  */
5875	      for (local_regparm = 0; local_regparm < REGPARM_MAX;
5876		   local_regparm++)
5877		if (fixed_regs[local_regparm])
5878		  break;
5879
5880	      /* We don't want to use regparm(3) for nested functions as
5881		 these use a static chain pointer in the third argument.  */
5882	      if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
5883		local_regparm = 2;
5884
5885	      /* Save a register for the split stack.  */
5886	      if (local_regparm == 3 && flag_split_stack)
5887		local_regparm = 2;
5888
5889	      /* Each fixed register usage increases register pressure,
5890		 so less registers should be used for argument passing.
5891		 This functionality can be overriden by an explicit
5892		 regparm value.  */
5893	      for (regno = AX_REG; regno <= DI_REG; regno++)
5894		if (fixed_regs[regno])
5895		  globals++;
5896
5897	      local_regparm
5898		= globals < local_regparm ? local_regparm - globals : 0;
5899
5900	      if (local_regparm > regparm)
5901		regparm = local_regparm;
5902	    }
5903	}
5904    }
5905
5906  return regparm;
5907}
5908
5909/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
5910   DFmode (2) arguments in SSE registers for a function with the
5911   indicated TYPE and DECL.  DECL may be NULL when calling function
5912   indirectly or considering a libcall.  Return -1 if any FP parameter
5913   should be rejected by error.  This is used in siutation we imply SSE
5914   calling convetion but the function is called from another function with
5915   SSE disabled. Otherwise return 0.  */
5916
5917static int
5918ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
5919{
5920  gcc_assert (!TARGET_64BIT);
5921
5922  /* Use SSE registers to pass SFmode and DFmode arguments if requested
5923     by the sseregparm attribute.  */
5924  if (TARGET_SSEREGPARM
5925      || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
5926    {
5927      if (!TARGET_SSE)
5928	{
5929	  if (warn)
5930	    {
5931	      if (decl)
5932		error ("calling %qD with attribute sseregparm without "
5933		       "SSE/SSE2 enabled", decl);
5934	      else
5935		error ("calling %qT with attribute sseregparm without "
5936		       "SSE/SSE2 enabled", type);
5937	    }
5938	  return 0;
5939	}
5940
5941      return 2;
5942    }
5943
5944  if (!decl)
5945    return 0;
5946
5947  cgraph_node *target = cgraph_node::get (decl);
5948  if (target)
5949    target = target->function_symbol ();
5950
5951  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
5952     (and DFmode for SSE2) arguments in SSE registers.  */
5953  if (target
5954      /* TARGET_SSE_MATH */
5955      && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
5956      && opt_for_fn (target->decl, optimize)
5957      && !(profile_flag && !flag_fentry))
5958    {
5959      cgraph_local_info *i = &target->local;
5960      if (i && i->local && i->can_change_signature)
5961	{
5962	  /* Refuse to produce wrong code when local function with SSE enabled
5963	     is called from SSE disabled function.
5964	     FIXME: We need a way to detect these cases cross-ltrans partition
5965	     and avoid using SSE calling conventions on local functions called
5966	     from function with SSE disabled.  For now at least delay the
5967	     warning until we know we are going to produce wrong code.
5968	     See PR66047  */
5969	  if (!TARGET_SSE && warn)
5970	    return -1;
5971	  return TARGET_SSE2_P (target_opts_for_fn (target->decl)
5972				->x_ix86_isa_flags) ? 2 : 1;
5973	}
5974    }
5975
5976  return 0;
5977}
5978
5979/* Return true if EAX is live at the start of the function.  Used by
5980   ix86_expand_prologue to determine if we need special help before
5981   calling allocate_stack_worker.  */
5982
5983static bool
5984ix86_eax_live_at_start_p (void)
5985{
5986  /* Cheat.  Don't bother working forward from ix86_function_regparm
5987     to the function type to whether an actual argument is located in
5988     eax.  Instead just look at cfg info, which is still close enough
5989     to correct at this point.  This gives false positives for broken
5990     functions that might use uninitialized data that happens to be
5991     allocated in eax, but who cares?  */
5992  return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
5993}
5994
5995static bool
5996ix86_keep_aggregate_return_pointer (tree fntype)
5997{
5998  tree attr;
5999
6000  if (!TARGET_64BIT)
6001    {
6002      attr = lookup_attribute ("callee_pop_aggregate_return",
6003			       TYPE_ATTRIBUTES (fntype));
6004      if (attr)
6005	return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
6006
6007      /* For 32-bit MS-ABI the default is to keep aggregate
6008         return pointer.  */
6009      if (ix86_function_type_abi (fntype) == MS_ABI)
6010	return true;
6011    }
6012  return KEEP_AGGREGATE_RETURN_POINTER != 0;
6013}
6014
6015/* Value is the number of bytes of arguments automatically
6016   popped when returning from a subroutine call.
6017   FUNDECL is the declaration node of the function (as a tree),
6018   FUNTYPE is the data type of the function (as a tree),
6019   or for a library call it is an identifier node for the subroutine name.
6020   SIZE is the number of bytes of arguments passed on the stack.
6021
6022   On the 80386, the RTD insn may be used to pop them if the number
6023     of args is fixed, but if the number is variable then the caller
6024     must pop them all.  RTD can't be used for library calls now
6025     because the library is compiled with the Unix compiler.
6026   Use of RTD is a selectable option, since it is incompatible with
6027   standard Unix calling sequences.  If the option is not selected,
6028   the caller must always pop the args.
6029
6030   The attribute stdcall is equivalent to RTD on a per module basis.  */
6031
6032static int
6033ix86_return_pops_args (tree fundecl, tree funtype, int size)
6034{
6035  unsigned int ccvt;
6036
6037  /* None of the 64-bit ABIs pop arguments.  */
6038  if (TARGET_64BIT)
6039    return 0;
6040
6041  ccvt = ix86_get_callcvt (funtype);
6042
6043  if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
6044	       | IX86_CALLCVT_THISCALL)) != 0
6045      && ! stdarg_p (funtype))
6046    return size;
6047
6048  /* Lose any fake structure return argument if it is passed on the stack.  */
6049  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
6050      && !ix86_keep_aggregate_return_pointer (funtype))
6051    {
6052      int nregs = ix86_function_regparm (funtype, fundecl);
6053      if (nregs == 0)
6054	return GET_MODE_SIZE (Pmode);
6055    }
6056
6057  return 0;
6058}
6059
6060/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook.  */
6061
6062static bool
6063ix86_legitimate_combined_insn (rtx_insn *insn)
6064{
6065  /* Check operand constraints in case hard registers were propagated
6066     into insn pattern.  This check prevents combine pass from
6067     generating insn patterns with invalid hard register operands.
6068     These invalid insns can eventually confuse reload to error out
6069     with a spill failure.  See also PRs 46829 and 46843.  */
6070  if ((INSN_CODE (insn) = recog (PATTERN (insn), insn, 0)) >= 0)
6071    {
6072      int i;
6073
6074      extract_insn (insn);
6075      preprocess_constraints (insn);
6076
6077      int n_operands = recog_data.n_operands;
6078      int n_alternatives = recog_data.n_alternatives;
6079      for (i = 0; i < n_operands; i++)
6080	{
6081	  rtx op = recog_data.operand[i];
6082	  machine_mode mode = GET_MODE (op);
6083	  const operand_alternative *op_alt;
6084	  int offset = 0;
6085	  bool win;
6086	  int j;
6087
6088	  /* For pre-AVX disallow unaligned loads/stores where the
6089	     instructions don't support it.  */
6090	  if (!TARGET_AVX
6091	      && VECTOR_MODE_P (GET_MODE (op))
6092	      && misaligned_operand (op, GET_MODE (op)))
6093	    {
6094	      int min_align = get_attr_ssememalign (insn);
6095	      if (min_align == 0)
6096		return false;
6097	    }
6098
6099	  /* A unary operator may be accepted by the predicate, but it
6100	     is irrelevant for matching constraints.  */
6101	  if (UNARY_P (op))
6102	    op = XEXP (op, 0);
6103
6104	  if (GET_CODE (op) == SUBREG)
6105	    {
6106	      if (REG_P (SUBREG_REG (op))
6107		  && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
6108		offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
6109					      GET_MODE (SUBREG_REG (op)),
6110					      SUBREG_BYTE (op),
6111					      GET_MODE (op));
6112	      op = SUBREG_REG (op);
6113	    }
6114
6115	  if (!(REG_P (op) && HARD_REGISTER_P (op)))
6116	    continue;
6117
6118	  op_alt = recog_op_alt;
6119
6120	  /* Operand has no constraints, anything is OK.  */
6121 	  win = !n_alternatives;
6122
6123	  alternative_mask preferred = get_preferred_alternatives (insn);
6124	  for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
6125	    {
6126	      if (!TEST_BIT (preferred, j))
6127		continue;
6128	      if (op_alt[i].anything_ok
6129		  || (op_alt[i].matches != -1
6130		      && operands_match_p
6131			  (recog_data.operand[i],
6132			   recog_data.operand[op_alt[i].matches]))
6133		  || reg_fits_class_p (op, op_alt[i].cl, offset, mode))
6134		{
6135		  win = true;
6136		  break;
6137		}
6138	    }
6139
6140	  if (!win)
6141	    return false;
6142	}
6143    }
6144
6145  return true;
6146}
6147
6148/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
6149
6150static unsigned HOST_WIDE_INT
6151ix86_asan_shadow_offset (void)
6152{
6153  return TARGET_LP64 ? (TARGET_MACHO ? (HOST_WIDE_INT_1 << 44)
6154				     : HOST_WIDE_INT_C (0x7fff8000))
6155		     : (HOST_WIDE_INT_1 << 29);
6156}
6157
6158/* Argument support functions.  */
6159
6160/* Return true when register may be used to pass function parameters.  */
6161bool
6162ix86_function_arg_regno_p (int regno)
6163{
6164  int i;
6165  enum calling_abi call_abi;
6166  const int *parm_regs;
6167
6168  if (TARGET_MPX && BND_REGNO_P (regno))
6169    return true;
6170
6171  if (!TARGET_64BIT)
6172    {
6173      if (TARGET_MACHO)
6174        return (regno < REGPARM_MAX
6175                || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
6176      else
6177        return (regno < REGPARM_MAX
6178	        || (TARGET_MMX && MMX_REGNO_P (regno)
6179	  	    && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
6180	        || (TARGET_SSE && SSE_REGNO_P (regno)
6181		    && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
6182    }
6183
6184  if (TARGET_SSE && SSE_REGNO_P (regno)
6185      && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
6186    return true;
6187
6188  /* TODO: The function should depend on current function ABI but
6189     builtins.c would need updating then. Therefore we use the
6190     default ABI.  */
6191  call_abi = ix86_cfun_abi ();
6192
6193  /* RAX is used as hidden argument to va_arg functions.  */
6194  if (call_abi == SYSV_ABI && regno == AX_REG)
6195    return true;
6196
6197  if (call_abi == MS_ABI)
6198    parm_regs = x86_64_ms_abi_int_parameter_registers;
6199  else
6200    parm_regs = x86_64_int_parameter_registers;
6201
6202  for (i = 0; i < (call_abi == MS_ABI
6203		   ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
6204    if (regno == parm_regs[i])
6205      return true;
6206  return false;
6207}
6208
6209/* Return if we do not know how to pass TYPE solely in registers.  */
6210
6211static bool
6212ix86_must_pass_in_stack (machine_mode mode, const_tree type)
6213{
6214  if (must_pass_in_stack_var_size_or_pad (mode, type))
6215    return true;
6216
6217  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
6218     The layout_type routine is crafty and tries to trick us into passing
6219     currently unsupported vector types on the stack by using TImode.  */
6220  return (!TARGET_64BIT && mode == TImode
6221	  && type && TREE_CODE (type) != VECTOR_TYPE);
6222}
6223
6224/* It returns the size, in bytes, of the area reserved for arguments passed
6225   in registers for the function represented by fndecl dependent to the used
6226   abi format.  */
6227int
6228ix86_reg_parm_stack_space (const_tree fndecl)
6229{
6230  enum calling_abi call_abi = SYSV_ABI;
6231  if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
6232    call_abi = ix86_function_abi (fndecl);
6233  else
6234    call_abi = ix86_function_type_abi (fndecl);
6235  if (TARGET_64BIT && call_abi == MS_ABI)
6236    return 32;
6237  return 0;
6238}
6239
6240/* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
6241   call abi used.  */
6242enum calling_abi
6243ix86_function_type_abi (const_tree fntype)
6244{
6245  if (fntype != NULL_TREE && TYPE_ATTRIBUTES (fntype) != NULL_TREE)
6246    {
6247      enum calling_abi abi = ix86_abi;
6248      if (abi == SYSV_ABI)
6249	{
6250	  if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
6251	    {
6252	      if (TARGET_X32)
6253		{
6254		  static bool warned = false;
6255		  if (!warned)
6256		    {
6257		      error ("X32 does not support ms_abi attribute");
6258		      warned = true;
6259		    }
6260		}
6261	      abi = MS_ABI;
6262	    }
6263	}
6264      else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
6265	abi = SYSV_ABI;
6266      return abi;
6267    }
6268  return ix86_abi;
6269}
6270
6271/* We add this as a workaround in order to use libc_has_function
6272   hook in i386.md.  */
6273bool
6274ix86_libc_has_function (enum function_class fn_class)
6275{
6276  return targetm.libc_has_function (fn_class);
6277}
6278
6279static bool
6280ix86_function_ms_hook_prologue (const_tree fn)
6281{
6282  if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
6283    {
6284      if (decl_function_context (fn) != NULL_TREE)
6285	error_at (DECL_SOURCE_LOCATION (fn),
6286		  "ms_hook_prologue is not compatible with nested function");
6287      else
6288        return true;
6289    }
6290  return false;
6291}
6292
6293static enum calling_abi
6294ix86_function_abi (const_tree fndecl)
6295{
6296  if (! fndecl)
6297    return ix86_abi;
6298  return ix86_function_type_abi (TREE_TYPE (fndecl));
6299}
6300
6301/* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
6302   call abi used.  */
6303enum calling_abi
6304ix86_cfun_abi (void)
6305{
6306  if (! cfun)
6307    return ix86_abi;
6308  return cfun->machine->call_abi;
6309}
6310
6311/* Write the extra assembler code needed to declare a function properly.  */
6312
6313void
6314ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
6315				tree decl)
6316{
6317  bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
6318
6319  if (is_ms_hook)
6320    {
6321      int i, filler_count = (TARGET_64BIT ? 32 : 16);
6322      unsigned int filler_cc = 0xcccccccc;
6323
6324      for (i = 0; i < filler_count; i += 4)
6325        fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
6326    }
6327
6328#ifdef SUBTARGET_ASM_UNWIND_INIT
6329  SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
6330#endif
6331
6332  ASM_OUTPUT_LABEL (asm_out_file, fname);
6333
6334  /* Output magic byte marker, if hot-patch attribute is set.  */
6335  if (is_ms_hook)
6336    {
6337      if (TARGET_64BIT)
6338	{
6339	  /* leaq [%rsp + 0], %rsp  */
6340	  asm_fprintf (asm_out_file, ASM_BYTE
6341		       "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
6342	}
6343      else
6344	{
6345          /* movl.s %edi, %edi
6346	     push   %ebp
6347	     movl.s %esp, %ebp */
6348	  asm_fprintf (asm_out_file, ASM_BYTE
6349		       "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
6350	}
6351    }
6352}
6353
6354/* regclass.c  */
6355extern void init_regs (void);
6356
6357/* Implementation of call abi switching target hook. Specific to FNDECL
6358   the specific call register sets are set.  See also
6359   ix86_conditional_register_usage for more details.  */
6360void
6361ix86_call_abi_override (const_tree fndecl)
6362{
6363  if (fndecl == NULL_TREE)
6364    cfun->machine->call_abi = ix86_abi;
6365  else
6366    cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
6367}
6368
6369/* Return 1 if pseudo register should be created and used to hold
6370   GOT address for PIC code.  */
6371bool
6372ix86_use_pseudo_pic_reg (void)
6373{
6374  if ((TARGET_64BIT
6375       && (ix86_cmodel == CM_SMALL_PIC
6376	   || TARGET_PECOFF))
6377      || !flag_pic)
6378    return false;
6379  return true;
6380}
6381
6382/* Initialize large model PIC register.  */
6383
6384static void
6385ix86_init_large_pic_reg (unsigned int tmp_regno)
6386{
6387  rtx_code_label *label;
6388  rtx tmp_reg;
6389
6390  gcc_assert (Pmode == DImode);
6391  label = gen_label_rtx ();
6392  emit_label (label);
6393  LABEL_PRESERVE_P (label) = 1;
6394  tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
6395  gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
6396  emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
6397				label));
6398  emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6399  emit_insn (ix86_gen_add3 (pic_offset_table_rtx,
6400			    pic_offset_table_rtx, tmp_reg));
6401}
6402
6403/* Create and initialize PIC register if required.  */
6404static void
6405ix86_init_pic_reg (void)
6406{
6407  edge entry_edge;
6408  rtx_insn *seq;
6409
6410  if (!ix86_use_pseudo_pic_reg ())
6411    return;
6412
6413  start_sequence ();
6414
6415  if (TARGET_64BIT)
6416    {
6417      if (ix86_cmodel == CM_LARGE_PIC)
6418	ix86_init_large_pic_reg (R11_REG);
6419      else
6420	emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6421    }
6422  else
6423    {
6424      /*  If there is future mcount call in the function it is more profitable
6425	  to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM.  */
6426      rtx reg = crtl->profile
6427		? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
6428		: pic_offset_table_rtx;
6429      rtx insn = emit_insn (gen_set_got (reg));
6430      RTX_FRAME_RELATED_P (insn) = 1;
6431      if (crtl->profile)
6432        emit_move_insn (pic_offset_table_rtx, reg);
6433      add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
6434    }
6435
6436  seq = get_insns ();
6437  end_sequence ();
6438
6439  entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
6440  insert_insn_on_edge (seq, entry_edge);
6441  commit_one_edge_insertion (entry_edge);
6442}
6443
6444/* Initialize a variable CUM of type CUMULATIVE_ARGS
6445   for a call to a function whose data type is FNTYPE.
6446   For a library call, FNTYPE is 0.  */
6447
6448void
6449init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
6450		      tree fntype,	/* tree ptr for function decl */
6451		      rtx libname,	/* SYMBOL_REF of library name or 0 */
6452		      tree fndecl,
6453		      int caller)
6454{
6455  struct cgraph_local_info *i = NULL;
6456  struct cgraph_node *target = NULL;
6457
6458  memset (cum, 0, sizeof (*cum));
6459
6460  if (fndecl)
6461    {
6462      target = cgraph_node::get (fndecl);
6463      if (target)
6464	{
6465	  target = target->function_symbol ();
6466	  i = cgraph_node::local_info (target->decl);
6467	  cum->call_abi = ix86_function_abi (target->decl);
6468	}
6469      else
6470	cum->call_abi = ix86_function_abi (fndecl);
6471    }
6472  else
6473    cum->call_abi = ix86_function_type_abi (fntype);
6474
6475  cum->caller = caller;
6476
6477  /* Set up the number of registers to use for passing arguments.  */
6478  cum->nregs = ix86_regparm;
6479  if (TARGET_64BIT)
6480    {
6481      cum->nregs = (cum->call_abi == SYSV_ABI
6482                   ? X86_64_REGPARM_MAX
6483                   : X86_64_MS_REGPARM_MAX);
6484    }
6485  if (TARGET_SSE)
6486    {
6487      cum->sse_nregs = SSE_REGPARM_MAX;
6488      if (TARGET_64BIT)
6489        {
6490          cum->sse_nregs = (cum->call_abi == SYSV_ABI
6491                           ? X86_64_SSE_REGPARM_MAX
6492                           : X86_64_MS_SSE_REGPARM_MAX);
6493        }
6494    }
6495  if (TARGET_MMX)
6496    cum->mmx_nregs = MMX_REGPARM_MAX;
6497  cum->warn_avx512f = true;
6498  cum->warn_avx = true;
6499  cum->warn_sse = true;
6500  cum->warn_mmx = true;
6501
6502  /* Because type might mismatch in between caller and callee, we need to
6503     use actual type of function for local calls.
6504     FIXME: cgraph_analyze can be told to actually record if function uses
6505     va_start so for local functions maybe_vaarg can be made aggressive
6506     helping K&R code.
6507     FIXME: once typesytem is fixed, we won't need this code anymore.  */
6508  if (i && i->local && i->can_change_signature)
6509    fntype = TREE_TYPE (target->decl);
6510  cum->stdarg = stdarg_p (fntype);
6511  cum->maybe_vaarg = (fntype
6512		      ? (!prototype_p (fntype) || stdarg_p (fntype))
6513		      : !libname);
6514
6515  cum->bnd_regno = FIRST_BND_REG;
6516  cum->bnds_in_bt = 0;
6517  cum->force_bnd_pass = 0;
6518  cum->decl = fndecl;
6519
6520  if (!TARGET_64BIT)
6521    {
6522      /* If there are variable arguments, then we won't pass anything
6523         in registers in 32-bit mode. */
6524      if (stdarg_p (fntype))
6525	{
6526	  cum->nregs = 0;
6527	  cum->sse_nregs = 0;
6528	  cum->mmx_nregs = 0;
6529	  cum->warn_avx512f = false;
6530	  cum->warn_avx = false;
6531	  cum->warn_sse = false;
6532	  cum->warn_mmx = false;
6533	  return;
6534	}
6535
6536      /* Use ecx and edx registers if function has fastcall attribute,
6537	 else look for regparm information.  */
6538      if (fntype)
6539	{
6540	  unsigned int ccvt = ix86_get_callcvt (fntype);
6541	  if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
6542	    {
6543	      cum->nregs = 1;
6544	      cum->fastcall = 1; /* Same first register as in fastcall.  */
6545	    }
6546	  else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
6547	    {
6548	      cum->nregs = 2;
6549	      cum->fastcall = 1;
6550	    }
6551	  else
6552	    cum->nregs = ix86_function_regparm (fntype, fndecl);
6553	}
6554
6555      /* Set up the number of SSE registers used for passing SFmode
6556	 and DFmode arguments.  Warn for mismatching ABI.  */
6557      cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
6558    }
6559}
6560
6561/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
6562   But in the case of vector types, it is some vector mode.
6563
6564   When we have only some of our vector isa extensions enabled, then there
6565   are some modes for which vector_mode_supported_p is false.  For these
6566   modes, the generic vector support in gcc will choose some non-vector mode
6567   in order to implement the type.  By computing the natural mode, we'll
6568   select the proper ABI location for the operand and not depend on whatever
6569   the middle-end decides to do with these vector types.
6570
6571   The midde-end can't deal with the vector types > 16 bytes.  In this
6572   case, we return the original mode and warn ABI change if CUM isn't
6573   NULL.
6574
6575   If INT_RETURN is true, warn ABI change if the vector mode isn't
6576   available for function return value.  */
6577
6578static machine_mode
6579type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
6580		   bool in_return)
6581{
6582  machine_mode mode = TYPE_MODE (type);
6583
6584  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
6585    {
6586      HOST_WIDE_INT size = int_size_in_bytes (type);
6587      if ((size == 8 || size == 16 || size == 32 || size == 64)
6588	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
6589	  && TYPE_VECTOR_SUBPARTS (type) > 1)
6590	{
6591	  machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
6592
6593	  /* There are no XFmode vector modes.  */
6594	  if (innermode == XFmode)
6595	    return mode;
6596
6597	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6598	    mode = MIN_MODE_VECTOR_FLOAT;
6599	  else
6600	    mode = MIN_MODE_VECTOR_INT;
6601
6602	  /* Get the mode which has this inner mode and number of units.  */
6603	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
6604	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
6605		&& GET_MODE_INNER (mode) == innermode)
6606	      {
6607		if (size == 64 && !TARGET_AVX512F)
6608		  {
6609		    static bool warnedavx512f;
6610		    static bool warnedavx512f_ret;
6611
6612		    if (cum && cum->warn_avx512f && !warnedavx512f)
6613		      {
6614			if (warning (OPT_Wpsabi, "AVX512F vector argument "
6615				     "without AVX512F enabled changes the ABI"))
6616			  warnedavx512f = true;
6617		      }
6618		    else if (in_return && !warnedavx512f_ret)
6619		      {
6620			if (warning (OPT_Wpsabi, "AVX512F vector return "
6621				     "without AVX512F enabled changes the ABI"))
6622			  warnedavx512f_ret = true;
6623		      }
6624
6625		    return TYPE_MODE (type);
6626		  }
6627		else if (size == 32 && !TARGET_AVX)
6628		  {
6629		    static bool warnedavx;
6630		    static bool warnedavx_ret;
6631
6632		    if (cum && cum->warn_avx && !warnedavx)
6633		      {
6634			if (warning (OPT_Wpsabi, "AVX vector argument "
6635				     "without AVX enabled changes the ABI"))
6636			  warnedavx = true;
6637		      }
6638		    else if (in_return && !warnedavx_ret)
6639		      {
6640			if (warning (OPT_Wpsabi, "AVX vector return "
6641				     "without AVX enabled changes the ABI"))
6642			  warnedavx_ret = true;
6643		      }
6644
6645		    return TYPE_MODE (type);
6646		  }
6647		else if (((size == 8 && TARGET_64BIT) || size == 16)
6648			 && !TARGET_SSE)
6649		  {
6650		    static bool warnedsse;
6651		    static bool warnedsse_ret;
6652
6653		    if (cum && cum->warn_sse && !warnedsse)
6654		      {
6655			if (warning (OPT_Wpsabi, "SSE vector argument "
6656				     "without SSE enabled changes the ABI"))
6657			  warnedsse = true;
6658		      }
6659		    else if (!TARGET_64BIT && in_return && !warnedsse_ret)
6660		      {
6661			if (warning (OPT_Wpsabi, "SSE vector return "
6662				     "without SSE enabled changes the ABI"))
6663			  warnedsse_ret = true;
6664		      }
6665		  }
6666		else if ((size == 8 && !TARGET_64BIT) && !TARGET_MMX)
6667		  {
6668		    static bool warnedmmx;
6669		    static bool warnedmmx_ret;
6670
6671		    if (cum && cum->warn_mmx && !warnedmmx)
6672		      {
6673			if (warning (OPT_Wpsabi, "MMX vector argument "
6674				     "without MMX enabled changes the ABI"))
6675			  warnedmmx = true;
6676		      }
6677		    else if (in_return && !warnedmmx_ret)
6678		      {
6679			if (warning (OPT_Wpsabi, "MMX vector return "
6680				     "without MMX enabled changes the ABI"))
6681			  warnedmmx_ret = true;
6682		      }
6683		  }
6684		return mode;
6685	      }
6686
6687	  gcc_unreachable ();
6688	}
6689    }
6690
6691  return mode;
6692}
6693
6694/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
6695   this may not agree with the mode that the type system has chosen for the
6696   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
6697   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
6698
6699static rtx
6700gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
6701		     unsigned int regno)
6702{
6703  rtx tmp;
6704
6705  if (orig_mode != BLKmode)
6706    tmp = gen_rtx_REG (orig_mode, regno);
6707  else
6708    {
6709      tmp = gen_rtx_REG (mode, regno);
6710      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
6711      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
6712    }
6713
6714  return tmp;
6715}
6716
6717/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
6718   of this code is to classify each 8bytes of incoming argument by the register
6719   class and assign registers accordingly.  */
6720
6721/* Return the union class of CLASS1 and CLASS2.
6722   See the x86-64 PS ABI for details.  */
6723
6724static enum x86_64_reg_class
6725merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
6726{
6727  /* Rule #1: If both classes are equal, this is the resulting class.  */
6728  if (class1 == class2)
6729    return class1;
6730
6731  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
6732     the other class.  */
6733  if (class1 == X86_64_NO_CLASS)
6734    return class2;
6735  if (class2 == X86_64_NO_CLASS)
6736    return class1;
6737
6738  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
6739  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
6740    return X86_64_MEMORY_CLASS;
6741
6742  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
6743  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
6744      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
6745    return X86_64_INTEGERSI_CLASS;
6746  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
6747      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
6748    return X86_64_INTEGER_CLASS;
6749
6750  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
6751     MEMORY is used.  */
6752  if (class1 == X86_64_X87_CLASS
6753      || class1 == X86_64_X87UP_CLASS
6754      || class1 == X86_64_COMPLEX_X87_CLASS
6755      || class2 == X86_64_X87_CLASS
6756      || class2 == X86_64_X87UP_CLASS
6757      || class2 == X86_64_COMPLEX_X87_CLASS)
6758    return X86_64_MEMORY_CLASS;
6759
6760  /* Rule #6: Otherwise class SSE is used.  */
6761  return X86_64_SSE_CLASS;
6762}
6763
6764/* Classify the argument of type TYPE and mode MODE.
6765   CLASSES will be filled by the register class used to pass each word
6766   of the operand.  The number of words is returned.  In case the parameter
6767   should be passed in memory, 0 is returned. As a special case for zero
6768   sized containers, classes[0] will be NO_CLASS and 1 is returned.
6769
6770   BIT_OFFSET is used internally for handling records and specifies offset
6771   of the offset in bits modulo 512 to avoid overflow cases.
6772
6773   See the x86-64 PS ABI for details.
6774*/
6775
6776static int
6777classify_argument (machine_mode mode, const_tree type,
6778		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
6779{
6780  HOST_WIDE_INT bytes =
6781    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
6782  int words
6783    = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6784
6785  /* Variable sized entities are always passed/returned in memory.  */
6786  if (bytes < 0)
6787    return 0;
6788
6789  if (mode != VOIDmode
6790      && targetm.calls.must_pass_in_stack (mode, type))
6791    return 0;
6792
6793  if (type && AGGREGATE_TYPE_P (type))
6794    {
6795      int i;
6796      tree field;
6797      enum x86_64_reg_class subclasses[MAX_CLASSES];
6798
6799      /* On x86-64 we pass structures larger than 64 bytes on the stack.  */
6800      if (bytes > 64)
6801	return 0;
6802
6803      for (i = 0; i < words; i++)
6804	classes[i] = X86_64_NO_CLASS;
6805
6806      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
6807	 signalize memory class, so handle it as special case.  */
6808      if (!words)
6809	{
6810	  classes[0] = X86_64_NO_CLASS;
6811	  return 1;
6812	}
6813
6814      /* Classify each field of record and merge classes.  */
6815      switch (TREE_CODE (type))
6816	{
6817	case RECORD_TYPE:
6818	  /* And now merge the fields of structure.  */
6819	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6820	    {
6821	      if (TREE_CODE (field) == FIELD_DECL)
6822		{
6823		  int num;
6824
6825		  if (TREE_TYPE (field) == error_mark_node)
6826		    continue;
6827
6828		  /* Bitfields are always classified as integer.  Handle them
6829		     early, since later code would consider them to be
6830		     misaligned integers.  */
6831		  if (DECL_BIT_FIELD (field))
6832		    {
6833		      for (i = (int_bit_position (field)
6834				+ (bit_offset % 64)) / 8 / 8;
6835			   i < ((int_bit_position (field) + (bit_offset % 64))
6836			        + tree_to_shwi (DECL_SIZE (field))
6837				+ 63) / 8 / 8; i++)
6838			classes[i] =
6839			  merge_classes (X86_64_INTEGER_CLASS,
6840					 classes[i]);
6841		    }
6842		  else
6843		    {
6844		      int pos;
6845
6846		      type = TREE_TYPE (field);
6847
6848		      /* Flexible array member is ignored.  */
6849		      if (TYPE_MODE (type) == BLKmode
6850			  && TREE_CODE (type) == ARRAY_TYPE
6851			  && TYPE_SIZE (type) == NULL_TREE
6852			  && TYPE_DOMAIN (type) != NULL_TREE
6853			  && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
6854			      == NULL_TREE))
6855			{
6856			  static bool warned;
6857
6858			  if (!warned && warn_psabi)
6859			    {
6860			      warned = true;
6861			      inform (input_location,
6862				      "the ABI of passing struct with"
6863				      " a flexible array member has"
6864				      " changed in GCC 4.4");
6865			    }
6866			  continue;
6867			}
6868		      num = classify_argument (TYPE_MODE (type), type,
6869					       subclasses,
6870					       (int_bit_position (field)
6871						+ bit_offset) % 512);
6872		      if (!num)
6873			return 0;
6874		      pos = (int_bit_position (field)
6875			     + (bit_offset % 64)) / 8 / 8;
6876		      for (i = 0; i < num && (i + pos) < words; i++)
6877			classes[i + pos] =
6878			  merge_classes (subclasses[i], classes[i + pos]);
6879		    }
6880		}
6881	    }
6882	  break;
6883
6884	case ARRAY_TYPE:
6885	  /* Arrays are handled as small records.  */
6886	  {
6887	    int num;
6888	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
6889				     TREE_TYPE (type), subclasses, bit_offset);
6890	    if (!num)
6891	      return 0;
6892
6893	    /* The partial classes are now full classes.  */
6894	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
6895	      subclasses[0] = X86_64_SSE_CLASS;
6896	    if (subclasses[0] == X86_64_INTEGERSI_CLASS
6897		&& !((bit_offset % 64) == 0 && bytes == 4))
6898	      subclasses[0] = X86_64_INTEGER_CLASS;
6899
6900	    for (i = 0; i < words; i++)
6901	      classes[i] = subclasses[i % num];
6902
6903	    break;
6904	  }
6905	case UNION_TYPE:
6906	case QUAL_UNION_TYPE:
6907	  /* Unions are similar to RECORD_TYPE but offset is always 0.
6908	     */
6909	  for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6910	    {
6911	      if (TREE_CODE (field) == FIELD_DECL)
6912		{
6913		  int num;
6914
6915		  if (TREE_TYPE (field) == error_mark_node)
6916		    continue;
6917
6918		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
6919					   TREE_TYPE (field), subclasses,
6920					   bit_offset);
6921		  if (!num)
6922		    return 0;
6923		  for (i = 0; i < num && i < words; i++)
6924		    classes[i] = merge_classes (subclasses[i], classes[i]);
6925		}
6926	    }
6927	  break;
6928
6929	default:
6930	  gcc_unreachable ();
6931	}
6932
6933      if (words > 2)
6934	{
6935	  /* When size > 16 bytes, if the first one isn't
6936	     X86_64_SSE_CLASS or any other ones aren't
6937	     X86_64_SSEUP_CLASS, everything should be passed in
6938	     memory.  */
6939	  if (classes[0] != X86_64_SSE_CLASS)
6940	      return 0;
6941
6942	  for (i = 1; i < words; i++)
6943	    if (classes[i] != X86_64_SSEUP_CLASS)
6944	      return 0;
6945	}
6946
6947      /* Final merger cleanup.  */
6948      for (i = 0; i < words; i++)
6949	{
6950	  /* If one class is MEMORY, everything should be passed in
6951	     memory.  */
6952	  if (classes[i] == X86_64_MEMORY_CLASS)
6953	    return 0;
6954
6955	  /* The X86_64_SSEUP_CLASS should be always preceded by
6956	     X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
6957	  if (classes[i] == X86_64_SSEUP_CLASS
6958	      && classes[i - 1] != X86_64_SSE_CLASS
6959	      && classes[i - 1] != X86_64_SSEUP_CLASS)
6960	    {
6961	      /* The first one should never be X86_64_SSEUP_CLASS.  */
6962	      gcc_assert (i != 0);
6963	      classes[i] = X86_64_SSE_CLASS;
6964	    }
6965
6966	  /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
6967	       everything should be passed in memory.  */
6968	  if (classes[i] == X86_64_X87UP_CLASS
6969	      && (classes[i - 1] != X86_64_X87_CLASS))
6970	    {
6971	      static bool warned;
6972
6973	      /* The first one should never be X86_64_X87UP_CLASS.  */
6974	      gcc_assert (i != 0);
6975	      if (!warned && warn_psabi)
6976		{
6977		  warned = true;
6978		  inform (input_location,
6979			  "the ABI of passing union with long double"
6980			  " has changed in GCC 4.4");
6981		}
6982	      return 0;
6983	    }
6984	}
6985      return words;
6986    }
6987
6988  /* Compute alignment needed.  We align all types to natural boundaries with
6989     exception of XFmode that is aligned to 64bits.  */
6990  if (mode != VOIDmode && mode != BLKmode)
6991    {
6992      int mode_alignment = GET_MODE_BITSIZE (mode);
6993
6994      if (mode == XFmode)
6995	mode_alignment = 128;
6996      else if (mode == XCmode)
6997	mode_alignment = 256;
6998      if (COMPLEX_MODE_P (mode))
6999	mode_alignment /= 2;
7000      /* Misaligned fields are always returned in memory.  */
7001      if (bit_offset % mode_alignment)
7002	return 0;
7003    }
7004
7005  /* for V1xx modes, just use the base mode */
7006  if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
7007      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
7008    mode = GET_MODE_INNER (mode);
7009
7010  /* Classification of atomic types.  */
7011  switch (mode)
7012    {
7013    case SDmode:
7014    case DDmode:
7015      classes[0] = X86_64_SSE_CLASS;
7016      return 1;
7017    case TDmode:
7018      classes[0] = X86_64_SSE_CLASS;
7019      classes[1] = X86_64_SSEUP_CLASS;
7020      return 2;
7021    case DImode:
7022    case SImode:
7023    case HImode:
7024    case QImode:
7025    case CSImode:
7026    case CHImode:
7027    case CQImode:
7028      {
7029	int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
7030
7031	/* Analyze last 128 bits only.  */
7032	size = (size - 1) & 0x7f;
7033
7034	if (size < 32)
7035	  {
7036	    classes[0] = X86_64_INTEGERSI_CLASS;
7037	    return 1;
7038	  }
7039	else if (size < 64)
7040	  {
7041	    classes[0] = X86_64_INTEGER_CLASS;
7042	    return 1;
7043	  }
7044	else if (size < 64+32)
7045	  {
7046	    classes[0] = X86_64_INTEGER_CLASS;
7047	    classes[1] = X86_64_INTEGERSI_CLASS;
7048	    return 2;
7049	  }
7050	else if (size < 64+64)
7051	  {
7052	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7053	    return 2;
7054	  }
7055	else
7056	  gcc_unreachable ();
7057      }
7058    case CDImode:
7059    case TImode:
7060      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
7061      return 2;
7062    case COImode:
7063    case OImode:
7064      /* OImode shouldn't be used directly.  */
7065      gcc_unreachable ();
7066    case CTImode:
7067      return 0;
7068    case SFmode:
7069      if (!(bit_offset % 64))
7070	classes[0] = X86_64_SSESF_CLASS;
7071      else
7072	classes[0] = X86_64_SSE_CLASS;
7073      return 1;
7074    case DFmode:
7075      classes[0] = X86_64_SSEDF_CLASS;
7076      return 1;
7077    case XFmode:
7078      classes[0] = X86_64_X87_CLASS;
7079      classes[1] = X86_64_X87UP_CLASS;
7080      return 2;
7081    case TFmode:
7082      classes[0] = X86_64_SSE_CLASS;
7083      classes[1] = X86_64_SSEUP_CLASS;
7084      return 2;
7085    case SCmode:
7086      classes[0] = X86_64_SSE_CLASS;
7087      if (!(bit_offset % 64))
7088	return 1;
7089      else
7090	{
7091	  static bool warned;
7092
7093	  if (!warned && warn_psabi)
7094	    {
7095	      warned = true;
7096	      inform (input_location,
7097		      "the ABI of passing structure with complex float"
7098		      " member has changed in GCC 4.4");
7099	    }
7100	  classes[1] = X86_64_SSESF_CLASS;
7101	  return 2;
7102	}
7103    case DCmode:
7104      classes[0] = X86_64_SSEDF_CLASS;
7105      classes[1] = X86_64_SSEDF_CLASS;
7106      return 2;
7107    case XCmode:
7108      classes[0] = X86_64_COMPLEX_X87_CLASS;
7109      return 1;
7110    case TCmode:
7111      /* This modes is larger than 16 bytes.  */
7112      return 0;
7113    case V8SFmode:
7114    case V8SImode:
7115    case V32QImode:
7116    case V16HImode:
7117    case V4DFmode:
7118    case V4DImode:
7119      classes[0] = X86_64_SSE_CLASS;
7120      classes[1] = X86_64_SSEUP_CLASS;
7121      classes[2] = X86_64_SSEUP_CLASS;
7122      classes[3] = X86_64_SSEUP_CLASS;
7123      return 4;
7124    case V8DFmode:
7125    case V16SFmode:
7126    case V8DImode:
7127    case V16SImode:
7128    case V32HImode:
7129    case V64QImode:
7130      classes[0] = X86_64_SSE_CLASS;
7131      classes[1] = X86_64_SSEUP_CLASS;
7132      classes[2] = X86_64_SSEUP_CLASS;
7133      classes[3] = X86_64_SSEUP_CLASS;
7134      classes[4] = X86_64_SSEUP_CLASS;
7135      classes[5] = X86_64_SSEUP_CLASS;
7136      classes[6] = X86_64_SSEUP_CLASS;
7137      classes[7] = X86_64_SSEUP_CLASS;
7138      return 8;
7139    case V4SFmode:
7140    case V4SImode:
7141    case V16QImode:
7142    case V8HImode:
7143    case V2DFmode:
7144    case V2DImode:
7145      classes[0] = X86_64_SSE_CLASS;
7146      classes[1] = X86_64_SSEUP_CLASS;
7147      return 2;
7148    case V1TImode:
7149    case V1DImode:
7150    case V2SFmode:
7151    case V2SImode:
7152    case V4HImode:
7153    case V8QImode:
7154      classes[0] = X86_64_SSE_CLASS;
7155      return 1;
7156    case BLKmode:
7157    case VOIDmode:
7158      return 0;
7159    default:
7160      gcc_assert (VECTOR_MODE_P (mode));
7161
7162      if (bytes > 16)
7163	return 0;
7164
7165      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
7166
7167      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
7168	classes[0] = X86_64_INTEGERSI_CLASS;
7169      else
7170	classes[0] = X86_64_INTEGER_CLASS;
7171      classes[1] = X86_64_INTEGER_CLASS;
7172      return 1 + (bytes > 8);
7173    }
7174}
7175
7176/* Examine the argument and return set number of register required in each
7177   class.  Return true iff parameter should be passed in memory.  */
7178
7179static bool
7180examine_argument (machine_mode mode, const_tree type, int in_return,
7181		  int *int_nregs, int *sse_nregs)
7182{
7183  enum x86_64_reg_class regclass[MAX_CLASSES];
7184  int n = classify_argument (mode, type, regclass, 0);
7185
7186  *int_nregs = 0;
7187  *sse_nregs = 0;
7188
7189  if (!n)
7190    return true;
7191  for (n--; n >= 0; n--)
7192    switch (regclass[n])
7193      {
7194      case X86_64_INTEGER_CLASS:
7195      case X86_64_INTEGERSI_CLASS:
7196	(*int_nregs)++;
7197	break;
7198      case X86_64_SSE_CLASS:
7199      case X86_64_SSESF_CLASS:
7200      case X86_64_SSEDF_CLASS:
7201	(*sse_nregs)++;
7202	break;
7203      case X86_64_NO_CLASS:
7204      case X86_64_SSEUP_CLASS:
7205	break;
7206      case X86_64_X87_CLASS:
7207      case X86_64_X87UP_CLASS:
7208      case X86_64_COMPLEX_X87_CLASS:
7209	if (!in_return)
7210	  return true;
7211	break;
7212      case X86_64_MEMORY_CLASS:
7213	gcc_unreachable ();
7214      }
7215
7216  return false;
7217}
7218
7219/* Construct container for the argument used by GCC interface.  See
7220   FUNCTION_ARG for the detailed description.  */
7221
7222static rtx
7223construct_container (machine_mode mode, machine_mode orig_mode,
7224		     const_tree type, int in_return, int nintregs, int nsseregs,
7225		     const int *intreg, int sse_regno)
7226{
7227  /* The following variables hold the static issued_error state.  */
7228  static bool issued_sse_arg_error;
7229  static bool issued_sse_ret_error;
7230  static bool issued_x87_ret_error;
7231
7232  machine_mode tmpmode;
7233  int bytes =
7234    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
7235  enum x86_64_reg_class regclass[MAX_CLASSES];
7236  int n;
7237  int i;
7238  int nexps = 0;
7239  int needed_sseregs, needed_intregs;
7240  rtx exp[MAX_CLASSES];
7241  rtx ret;
7242
7243  n = classify_argument (mode, type, regclass, 0);
7244  if (!n)
7245    return NULL;
7246  if (examine_argument (mode, type, in_return, &needed_intregs,
7247			&needed_sseregs))
7248    return NULL;
7249  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
7250    return NULL;
7251
7252  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
7253     some less clueful developer tries to use floating-point anyway.  */
7254  if (needed_sseregs && !TARGET_SSE)
7255    {
7256      if (in_return)
7257	{
7258	  if (!issued_sse_ret_error)
7259	    {
7260	      error ("SSE register return with SSE disabled");
7261	      issued_sse_ret_error = true;
7262	    }
7263	}
7264      else if (!issued_sse_arg_error)
7265	{
7266	  error ("SSE register argument with SSE disabled");
7267	  issued_sse_arg_error = true;
7268	}
7269      return NULL;
7270    }
7271
7272  /* Likewise, error if the ABI requires us to return values in the
7273     x87 registers and the user specified -mno-80387.  */
7274  if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
7275    for (i = 0; i < n; i++)
7276      if (regclass[i] == X86_64_X87_CLASS
7277	  || regclass[i] == X86_64_X87UP_CLASS
7278	  || regclass[i] == X86_64_COMPLEX_X87_CLASS)
7279	{
7280	  if (!issued_x87_ret_error)
7281	    {
7282	      error ("x87 register return with x87 disabled");
7283	      issued_x87_ret_error = true;
7284	    }
7285	  return NULL;
7286	}
7287
7288  /* First construct simple cases.  Avoid SCmode, since we want to use
7289     single register to pass this type.  */
7290  if (n == 1 && mode != SCmode)
7291    switch (regclass[0])
7292      {
7293      case X86_64_INTEGER_CLASS:
7294      case X86_64_INTEGERSI_CLASS:
7295	return gen_rtx_REG (mode, intreg[0]);
7296      case X86_64_SSE_CLASS:
7297      case X86_64_SSESF_CLASS:
7298      case X86_64_SSEDF_CLASS:
7299	if (mode != BLKmode)
7300	  return gen_reg_or_parallel (mode, orig_mode,
7301				      SSE_REGNO (sse_regno));
7302	break;
7303      case X86_64_X87_CLASS:
7304      case X86_64_COMPLEX_X87_CLASS:
7305	return gen_rtx_REG (mode, FIRST_STACK_REG);
7306      case X86_64_NO_CLASS:
7307	/* Zero sized array, struct or class.  */
7308	return NULL;
7309      default:
7310	gcc_unreachable ();
7311      }
7312  if (n == 2
7313      && regclass[0] == X86_64_SSE_CLASS
7314      && regclass[1] == X86_64_SSEUP_CLASS
7315      && mode != BLKmode)
7316    return gen_reg_or_parallel (mode, orig_mode,
7317				SSE_REGNO (sse_regno));
7318  if (n == 4
7319      && regclass[0] == X86_64_SSE_CLASS
7320      && regclass[1] == X86_64_SSEUP_CLASS
7321      && regclass[2] == X86_64_SSEUP_CLASS
7322      && regclass[3] == X86_64_SSEUP_CLASS
7323      && mode != BLKmode)
7324    return gen_reg_or_parallel (mode, orig_mode,
7325				SSE_REGNO (sse_regno));
7326  if (n == 8
7327      && regclass[0] == X86_64_SSE_CLASS
7328      && regclass[1] == X86_64_SSEUP_CLASS
7329      && regclass[2] == X86_64_SSEUP_CLASS
7330      && regclass[3] == X86_64_SSEUP_CLASS
7331      && regclass[4] == X86_64_SSEUP_CLASS
7332      && regclass[5] == X86_64_SSEUP_CLASS
7333      && regclass[6] == X86_64_SSEUP_CLASS
7334      && regclass[7] == X86_64_SSEUP_CLASS
7335      && mode != BLKmode)
7336    return gen_reg_or_parallel (mode, orig_mode,
7337				SSE_REGNO (sse_regno));
7338  if (n == 2
7339      && regclass[0] == X86_64_X87_CLASS
7340      && regclass[1] == X86_64_X87UP_CLASS)
7341    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
7342
7343  if (n == 2
7344      && regclass[0] == X86_64_INTEGER_CLASS
7345      && regclass[1] == X86_64_INTEGER_CLASS
7346      && (mode == CDImode || mode == TImode)
7347      && intreg[0] + 1 == intreg[1])
7348    return gen_rtx_REG (mode, intreg[0]);
7349
7350  /* Otherwise figure out the entries of the PARALLEL.  */
7351  for (i = 0; i < n; i++)
7352    {
7353      int pos;
7354
7355      switch (regclass[i])
7356        {
7357	  case X86_64_NO_CLASS:
7358	    break;
7359	  case X86_64_INTEGER_CLASS:
7360	  case X86_64_INTEGERSI_CLASS:
7361	    /* Merge TImodes on aligned occasions here too.  */
7362	    if (i * 8 + 8 > bytes)
7363	      tmpmode
7364		= mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
7365	    else if (regclass[i] == X86_64_INTEGERSI_CLASS)
7366	      tmpmode = SImode;
7367	    else
7368	      tmpmode = DImode;
7369	    /* We've requested 24 bytes we
7370	       don't have mode for.  Use DImode.  */
7371	    if (tmpmode == BLKmode)
7372	      tmpmode = DImode;
7373	    exp [nexps++]
7374	      = gen_rtx_EXPR_LIST (VOIDmode,
7375				   gen_rtx_REG (tmpmode, *intreg),
7376				   GEN_INT (i*8));
7377	    intreg++;
7378	    break;
7379	  case X86_64_SSESF_CLASS:
7380	    exp [nexps++]
7381	      = gen_rtx_EXPR_LIST (VOIDmode,
7382				   gen_rtx_REG (SFmode,
7383						SSE_REGNO (sse_regno)),
7384				   GEN_INT (i*8));
7385	    sse_regno++;
7386	    break;
7387	  case X86_64_SSEDF_CLASS:
7388	    exp [nexps++]
7389	      = gen_rtx_EXPR_LIST (VOIDmode,
7390				   gen_rtx_REG (DFmode,
7391						SSE_REGNO (sse_regno)),
7392				   GEN_INT (i*8));
7393	    sse_regno++;
7394	    break;
7395	  case X86_64_SSE_CLASS:
7396	    pos = i;
7397	    switch (n)
7398	      {
7399	      case 1:
7400		tmpmode = DImode;
7401		break;
7402	      case 2:
7403		if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
7404		  {
7405		    tmpmode = TImode;
7406		    i++;
7407		  }
7408		else
7409		  tmpmode = DImode;
7410		break;
7411	      case 4:
7412		gcc_assert (i == 0
7413			    && regclass[1] == X86_64_SSEUP_CLASS
7414			    && regclass[2] == X86_64_SSEUP_CLASS
7415			    && regclass[3] == X86_64_SSEUP_CLASS);
7416		tmpmode = OImode;
7417		i += 3;
7418		break;
7419	      case 8:
7420		gcc_assert (i == 0
7421			    && regclass[1] == X86_64_SSEUP_CLASS
7422			    && regclass[2] == X86_64_SSEUP_CLASS
7423			    && regclass[3] == X86_64_SSEUP_CLASS
7424			    && regclass[4] == X86_64_SSEUP_CLASS
7425			    && regclass[5] == X86_64_SSEUP_CLASS
7426			    && regclass[6] == X86_64_SSEUP_CLASS
7427			    && regclass[7] == X86_64_SSEUP_CLASS);
7428		tmpmode = XImode;
7429		i += 7;
7430		break;
7431	      default:
7432		gcc_unreachable ();
7433	      }
7434	    exp [nexps++]
7435	      = gen_rtx_EXPR_LIST (VOIDmode,
7436				   gen_rtx_REG (tmpmode,
7437						SSE_REGNO (sse_regno)),
7438				   GEN_INT (pos*8));
7439	    sse_regno++;
7440	    break;
7441	  default:
7442	    gcc_unreachable ();
7443	}
7444    }
7445
7446  /* Empty aligned struct, union or class.  */
7447  if (nexps == 0)
7448    return NULL;
7449
7450  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
7451  for (i = 0; i < nexps; i++)
7452    XVECEXP (ret, 0, i) = exp [i];
7453  return ret;
7454}
7455
7456/* Update the data in CUM to advance over an argument of mode MODE
7457   and data type TYPE.  (TYPE is null for libcalls where that information
7458   may not be available.)
7459
7460   Return a number of integer regsiters advanced over.  */
7461
7462static int
7463function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7464			 const_tree type, HOST_WIDE_INT bytes,
7465			 HOST_WIDE_INT words)
7466{
7467  int res = 0;
7468  bool error_p = NULL;
7469
7470  switch (mode)
7471    {
7472    default:
7473      break;
7474
7475    case BLKmode:
7476      if (bytes < 0)
7477	break;
7478      /* FALLTHRU */
7479
7480    case DImode:
7481    case SImode:
7482    case HImode:
7483    case QImode:
7484      cum->words += words;
7485      cum->nregs -= words;
7486      cum->regno += words;
7487      if (cum->nregs >= 0)
7488	res = words;
7489      if (cum->nregs <= 0)
7490	{
7491	  cum->nregs = 0;
7492	  cum->regno = 0;
7493	}
7494      break;
7495
7496    case OImode:
7497      /* OImode shouldn't be used directly.  */
7498      gcc_unreachable ();
7499
7500    case DFmode:
7501      if (cum->float_in_sse == -1)
7502	error_p = 1;
7503      if (cum->float_in_sse < 2)
7504	break;
7505    case SFmode:
7506      if (cum->float_in_sse == -1)
7507	error_p = 1;
7508      if (cum->float_in_sse < 1)
7509	break;
7510      /* FALLTHRU */
7511
7512    case V8SFmode:
7513    case V8SImode:
7514    case V64QImode:
7515    case V32HImode:
7516    case V16SImode:
7517    case V8DImode:
7518    case V16SFmode:
7519    case V8DFmode:
7520    case V32QImode:
7521    case V16HImode:
7522    case V4DFmode:
7523    case V4DImode:
7524    case TImode:
7525    case V16QImode:
7526    case V8HImode:
7527    case V4SImode:
7528    case V2DImode:
7529    case V4SFmode:
7530    case V2DFmode:
7531      if (!type || !AGGREGATE_TYPE_P (type))
7532	{
7533	  cum->sse_words += words;
7534	  cum->sse_nregs -= 1;
7535	  cum->sse_regno += 1;
7536	  if (cum->sse_nregs <= 0)
7537	    {
7538	      cum->sse_nregs = 0;
7539	      cum->sse_regno = 0;
7540	    }
7541	}
7542      break;
7543
7544    case V8QImode:
7545    case V4HImode:
7546    case V2SImode:
7547    case V2SFmode:
7548    case V1TImode:
7549    case V1DImode:
7550      if (!type || !AGGREGATE_TYPE_P (type))
7551	{
7552	  cum->mmx_words += words;
7553	  cum->mmx_nregs -= 1;
7554	  cum->mmx_regno += 1;
7555	  if (cum->mmx_nregs <= 0)
7556	    {
7557	      cum->mmx_nregs = 0;
7558	      cum->mmx_regno = 0;
7559	    }
7560	}
7561      break;
7562    }
7563  if (error_p)
7564    {
7565      cum->float_in_sse = 0;
7566      error ("calling %qD with SSE calling convention without "
7567	     "SSE/SSE2 enabled", cum->decl);
7568      sorry ("this is a GCC bug that can be worked around by adding "
7569	     "attribute used to function called");
7570    }
7571
7572  return res;
7573}
7574
7575static int
7576function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode,
7577			 const_tree type, HOST_WIDE_INT words, bool named)
7578{
7579  int int_nregs, sse_nregs;
7580
7581  /* Unnamed 512 and 256bit vector mode parameters are passed on stack.  */
7582  if (!named && (VALID_AVX512F_REG_MODE (mode)
7583		 || VALID_AVX256_REG_MODE (mode)))
7584    return 0;
7585
7586  if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
7587      && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
7588    {
7589      cum->nregs -= int_nregs;
7590      cum->sse_nregs -= sse_nregs;
7591      cum->regno += int_nregs;
7592      cum->sse_regno += sse_nregs;
7593      return int_nregs;
7594    }
7595  else
7596    {
7597      int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
7598      cum->words = (cum->words + align - 1) & ~(align - 1);
7599      cum->words += words;
7600      return 0;
7601    }
7602}
7603
7604static int
7605function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
7606			    HOST_WIDE_INT words)
7607{
7608  /* Otherwise, this should be passed indirect.  */
7609  gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
7610
7611  cum->words += words;
7612  if (cum->nregs > 0)
7613    {
7614      cum->nregs -= 1;
7615      cum->regno += 1;
7616      return 1;
7617    }
7618  return 0;
7619}
7620
7621/* Update the data in CUM to advance over an argument of mode MODE and
7622   data type TYPE.  (TYPE is null for libcalls where that information
7623   may not be available.)  */
7624
7625static void
7626ix86_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7627			   const_tree type, bool named)
7628{
7629  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7630  HOST_WIDE_INT bytes, words;
7631  int nregs;
7632
7633  if (mode == BLKmode)
7634    bytes = int_size_in_bytes (type);
7635  else
7636    bytes = GET_MODE_SIZE (mode);
7637  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7638
7639  if (type)
7640    mode = type_natural_mode (type, NULL, false);
7641
7642  if ((type && POINTER_BOUNDS_TYPE_P (type))
7643      || POINTER_BOUNDS_MODE_P (mode))
7644    {
7645      /* If we pass bounds in BT then just update remained bounds count.  */
7646      if (cum->bnds_in_bt)
7647	{
7648	  cum->bnds_in_bt--;
7649	  return;
7650	}
7651
7652      /* Update remained number of bounds to force.  */
7653      if (cum->force_bnd_pass)
7654	cum->force_bnd_pass--;
7655
7656      cum->bnd_regno++;
7657
7658      return;
7659    }
7660
7661  /* The first arg not going to Bounds Tables resets this counter.  */
7662  cum->bnds_in_bt = 0;
7663  /* For unnamed args we always pass bounds to avoid bounds mess when
7664     passed and received types do not match.  If bounds do not follow
7665     unnamed arg, still pretend required number of bounds were passed.  */
7666  if (cum->force_bnd_pass)
7667    {
7668      cum->bnd_regno += cum->force_bnd_pass;
7669      cum->force_bnd_pass = 0;
7670    }
7671
7672  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7673    nregs = function_arg_advance_ms_64 (cum, bytes, words);
7674  else if (TARGET_64BIT)
7675    nregs = function_arg_advance_64 (cum, mode, type, words, named);
7676  else
7677    nregs = function_arg_advance_32 (cum, mode, type, bytes, words);
7678
7679  /* For stdarg we expect bounds to be passed for each value passed
7680     in register.  */
7681  if (cum->stdarg)
7682    cum->force_bnd_pass = nregs;
7683  /* For pointers passed in memory we expect bounds passed in Bounds
7684     Table.  */
7685  if (!nregs)
7686    cum->bnds_in_bt = chkp_type_bounds_count (type);
7687}
7688
7689/* Define where to put the arguments to a function.
7690   Value is zero to push the argument on the stack,
7691   or a hard register in which to store the argument.
7692
7693   MODE is the argument's machine mode.
7694   TYPE is the data type of the argument (as a tree).
7695    This is null for libcalls where that information may
7696    not be available.
7697   CUM is a variable of type CUMULATIVE_ARGS which gives info about
7698    the preceding args and about the function being called.
7699   NAMED is nonzero if this argument is a named parameter
7700    (otherwise it is an extra parameter matching an ellipsis).  */
7701
7702static rtx
7703function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
7704		 machine_mode orig_mode, const_tree type,
7705		 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
7706{
7707  bool error_p = false;
7708  /* Avoid the AL settings for the Unix64 ABI.  */
7709  if (mode == VOIDmode)
7710    return constm1_rtx;
7711
7712  switch (mode)
7713    {
7714    default:
7715      break;
7716
7717    case BLKmode:
7718      if (bytes < 0)
7719	break;
7720      /* FALLTHRU */
7721    case DImode:
7722    case SImode:
7723    case HImode:
7724    case QImode:
7725      if (words <= cum->nregs)
7726	{
7727	  int regno = cum->regno;
7728
7729	  /* Fastcall allocates the first two DWORD (SImode) or
7730            smaller arguments to ECX and EDX if it isn't an
7731            aggregate type .  */
7732	  if (cum->fastcall)
7733	    {
7734	      if (mode == BLKmode
7735		  || mode == DImode
7736		  || (type && AGGREGATE_TYPE_P (type)))
7737	        break;
7738
7739	      /* ECX not EAX is the first allocated register.  */
7740	      if (regno == AX_REG)
7741		regno = CX_REG;
7742	    }
7743	  return gen_rtx_REG (mode, regno);
7744	}
7745      break;
7746
7747    case DFmode:
7748      if (cum->float_in_sse == -1)
7749	error_p = 1;
7750      if (cum->float_in_sse < 2)
7751	break;
7752    case SFmode:
7753      if (cum->float_in_sse == -1)
7754	error_p = 1;
7755      if (cum->float_in_sse < 1)
7756	break;
7757      /* FALLTHRU */
7758    case TImode:
7759      /* In 32bit, we pass TImode in xmm registers.  */
7760    case V16QImode:
7761    case V8HImode:
7762    case V4SImode:
7763    case V2DImode:
7764    case V4SFmode:
7765    case V2DFmode:
7766      if (!type || !AGGREGATE_TYPE_P (type))
7767	{
7768	  if (cum->sse_nregs)
7769	    return gen_reg_or_parallel (mode, orig_mode,
7770				        cum->sse_regno + FIRST_SSE_REG);
7771	}
7772      break;
7773
7774    case OImode:
7775    case XImode:
7776      /* OImode and XImode shouldn't be used directly.  */
7777      gcc_unreachable ();
7778
7779    case V64QImode:
7780    case V32HImode:
7781    case V16SImode:
7782    case V8DImode:
7783    case V16SFmode:
7784    case V8DFmode:
7785    case V8SFmode:
7786    case V8SImode:
7787    case V32QImode:
7788    case V16HImode:
7789    case V4DFmode:
7790    case V4DImode:
7791      if (!type || !AGGREGATE_TYPE_P (type))
7792	{
7793	  if (cum->sse_nregs)
7794	    return gen_reg_or_parallel (mode, orig_mode,
7795				        cum->sse_regno + FIRST_SSE_REG);
7796	}
7797      break;
7798
7799    case V8QImode:
7800    case V4HImode:
7801    case V2SImode:
7802    case V2SFmode:
7803    case V1TImode:
7804    case V1DImode:
7805      if (!type || !AGGREGATE_TYPE_P (type))
7806	{
7807	  if (cum->mmx_nregs)
7808	    return gen_reg_or_parallel (mode, orig_mode,
7809				        cum->mmx_regno + FIRST_MMX_REG);
7810	}
7811      break;
7812    }
7813  if (error_p)
7814    {
7815      cum->float_in_sse = 0;
7816      error ("calling %qD with SSE calling convention without "
7817	     "SSE/SSE2 enabled", cum->decl);
7818      sorry ("this is a GCC bug that can be worked around by adding "
7819	     "attribute used to function called");
7820    }
7821
7822  return NULL_RTX;
7823}
7824
7825static rtx
7826function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7827		 machine_mode orig_mode, const_tree type, bool named)
7828{
7829  /* Handle a hidden AL argument containing number of registers
7830     for varargs x86-64 functions.  */
7831  if (mode == VOIDmode)
7832    return GEN_INT (cum->maybe_vaarg
7833		    ? (cum->sse_nregs < 0
7834		       ? X86_64_SSE_REGPARM_MAX
7835		       : cum->sse_regno)
7836		    : -1);
7837
7838  switch (mode)
7839    {
7840    default:
7841      break;
7842
7843    case V8SFmode:
7844    case V8SImode:
7845    case V32QImode:
7846    case V16HImode:
7847    case V4DFmode:
7848    case V4DImode:
7849    case V16SFmode:
7850    case V16SImode:
7851    case V64QImode:
7852    case V32HImode:
7853    case V8DFmode:
7854    case V8DImode:
7855      /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
7856      if (!named)
7857	return NULL;
7858      break;
7859    }
7860
7861  return construct_container (mode, orig_mode, type, 0, cum->nregs,
7862			      cum->sse_nregs,
7863			      &x86_64_int_parameter_registers [cum->regno],
7864			      cum->sse_regno);
7865}
7866
7867static rtx
7868function_arg_ms_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
7869		    machine_mode orig_mode, bool named,
7870		    HOST_WIDE_INT bytes)
7871{
7872  unsigned int regno;
7873
7874  /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
7875     We use value of -2 to specify that current function call is MSABI.  */
7876  if (mode == VOIDmode)
7877    return GEN_INT (-2);
7878
7879  /* If we've run out of registers, it goes on the stack.  */
7880  if (cum->nregs == 0)
7881    return NULL_RTX;
7882
7883  regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
7884
7885  /* Only floating point modes are passed in anything but integer regs.  */
7886  if (TARGET_SSE && (mode == SFmode || mode == DFmode))
7887    {
7888      if (named)
7889	regno = cum->regno + FIRST_SSE_REG;
7890      else
7891	{
7892	  rtx t1, t2;
7893
7894	  /* Unnamed floating parameters are passed in both the
7895	     SSE and integer registers.  */
7896	  t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
7897	  t2 = gen_rtx_REG (mode, regno);
7898	  t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
7899	  t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
7900	  return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
7901	}
7902    }
7903  /* Handle aggregated types passed in register.  */
7904  if (orig_mode == BLKmode)
7905    {
7906      if (bytes > 0 && bytes <= 8)
7907        mode = (bytes > 4 ? DImode : SImode);
7908      if (mode == BLKmode)
7909        mode = DImode;
7910    }
7911
7912  return gen_reg_or_parallel (mode, orig_mode, regno);
7913}
7914
7915/* Return where to put the arguments to a function.
7916   Return zero to push the argument on the stack, or a hard register in which to store the argument.
7917
7918   MODE is the argument's machine mode.  TYPE is the data type of the
7919   argument.  It is null for libcalls where that information may not be
7920   available.  CUM gives information about the preceding args and about
7921   the function being called.  NAMED is nonzero if this argument is a
7922   named parameter (otherwise it is an extra parameter matching an
7923   ellipsis).  */
7924
7925static rtx
7926ix86_function_arg (cumulative_args_t cum_v, machine_mode omode,
7927		   const_tree type, bool named)
7928{
7929  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7930  machine_mode mode = omode;
7931  HOST_WIDE_INT bytes, words;
7932  rtx arg;
7933
7934  /* All pointer bounds argumntas are handled separately here.  */
7935  if ((type && POINTER_BOUNDS_TYPE_P (type))
7936      || POINTER_BOUNDS_MODE_P (mode))
7937    {
7938      /* Return NULL if bounds are forced to go in Bounds Table.  */
7939      if (cum->bnds_in_bt)
7940	arg = NULL;
7941      /* Return the next available bound reg if any.  */
7942      else if (cum->bnd_regno <= LAST_BND_REG)
7943	arg = gen_rtx_REG (BNDmode, cum->bnd_regno);
7944      /* Return the next special slot number otherwise.  */
7945      else
7946	arg = GEN_INT (cum->bnd_regno - LAST_BND_REG - 1);
7947
7948      return arg;
7949    }
7950
7951  if (mode == BLKmode)
7952    bytes = int_size_in_bytes (type);
7953  else
7954    bytes = GET_MODE_SIZE (mode);
7955  words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7956
7957  /* To simplify the code below, represent vector types with a vector mode
7958     even if MMX/SSE are not active.  */
7959  if (type && TREE_CODE (type) == VECTOR_TYPE)
7960    mode = type_natural_mode (type, cum, false);
7961
7962  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7963    arg = function_arg_ms_64 (cum, mode, omode, named, bytes);
7964  else if (TARGET_64BIT)
7965    arg = function_arg_64 (cum, mode, omode, type, named);
7966  else
7967    arg = function_arg_32 (cum, mode, omode, type, bytes, words);
7968
7969  return arg;
7970}
7971
7972/* A C expression that indicates when an argument must be passed by
7973   reference.  If nonzero for an argument, a copy of that argument is
7974   made in memory and a pointer to the argument is passed instead of
7975   the argument itself.  The pointer is passed in whatever way is
7976   appropriate for passing a pointer to that type.  */
7977
7978static bool
7979ix86_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7980			const_tree type, bool)
7981{
7982  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7983
7984  /* Bounds are never passed by reference.  */
7985  if ((type && POINTER_BOUNDS_TYPE_P (type))
7986      || POINTER_BOUNDS_MODE_P (mode))
7987    return false;
7988
7989  /* See Windows x64 Software Convention.  */
7990  if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
7991    {
7992      int msize = (int) GET_MODE_SIZE (mode);
7993      if (type)
7994	{
7995	  /* Arrays are passed by reference.  */
7996	  if (TREE_CODE (type) == ARRAY_TYPE)
7997	    return true;
7998
7999	  if (AGGREGATE_TYPE_P (type))
8000	    {
8001	      /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
8002	         are passed by reference.  */
8003	      msize = int_size_in_bytes (type);
8004	    }
8005	}
8006
8007      /* __m128 is passed by reference.  */
8008      switch (msize) {
8009      case 1: case 2: case 4: case 8:
8010        break;
8011      default:
8012        return true;
8013      }
8014    }
8015  else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
8016    return 1;
8017
8018  return 0;
8019}
8020
8021/* Return true when TYPE should be 128bit aligned for 32bit argument
8022   passing ABI.  XXX: This function is obsolete and is only used for
8023   checking psABI compatibility with previous versions of GCC.  */
8024
8025static bool
8026ix86_compat_aligned_value_p (const_tree type)
8027{
8028  machine_mode mode = TYPE_MODE (type);
8029  if (((TARGET_SSE && SSE_REG_MODE_P (mode))
8030       || mode == TDmode
8031       || mode == TFmode
8032       || mode == TCmode)
8033      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
8034    return true;
8035  if (TYPE_ALIGN (type) < 128)
8036    return false;
8037
8038  if (AGGREGATE_TYPE_P (type))
8039    {
8040      /* Walk the aggregates recursively.  */
8041      switch (TREE_CODE (type))
8042	{
8043	case RECORD_TYPE:
8044	case UNION_TYPE:
8045	case QUAL_UNION_TYPE:
8046	  {
8047	    tree field;
8048
8049	    /* Walk all the structure fields.  */
8050	    for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8051	      {
8052		if (TREE_CODE (field) == FIELD_DECL
8053		    && ix86_compat_aligned_value_p (TREE_TYPE (field)))
8054		  return true;
8055	      }
8056	    break;
8057	  }
8058
8059	case ARRAY_TYPE:
8060	  /* Just for use if some languages passes arrays by value.  */
8061	  if (ix86_compat_aligned_value_p (TREE_TYPE (type)))
8062	    return true;
8063	  break;
8064
8065	default:
8066	  gcc_unreachable ();
8067	}
8068    }
8069  return false;
8070}
8071
8072/* Return the alignment boundary for MODE and TYPE with alignment ALIGN.
8073   XXX: This function is obsolete and is only used for checking psABI
8074   compatibility with previous versions of GCC.  */
8075
8076static unsigned int
8077ix86_compat_function_arg_boundary (machine_mode mode,
8078				   const_tree type, unsigned int align)
8079{
8080  /* In 32bit, only _Decimal128 and __float128 are aligned to their
8081     natural boundaries.  */
8082  if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
8083    {
8084      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
8085	 make an exception for SSE modes since these require 128bit
8086	 alignment.
8087
8088	 The handling here differs from field_alignment.  ICC aligns MMX
8089	 arguments to 4 byte boundaries, while structure fields are aligned
8090	 to 8 byte boundaries.  */
8091      if (!type)
8092	{
8093	  if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
8094	    align = PARM_BOUNDARY;
8095	}
8096      else
8097	{
8098	  if (!ix86_compat_aligned_value_p (type))
8099	    align = PARM_BOUNDARY;
8100	}
8101    }
8102  if (align > BIGGEST_ALIGNMENT)
8103    align = BIGGEST_ALIGNMENT;
8104  return align;
8105}
8106
8107/* Return true when TYPE should be 128bit aligned for 32bit argument
8108   passing ABI.  */
8109
8110static bool
8111ix86_contains_aligned_value_p (const_tree type)
8112{
8113  machine_mode mode = TYPE_MODE (type);
8114
8115  if (mode == XFmode || mode == XCmode)
8116    return false;
8117
8118  if (TYPE_ALIGN (type) < 128)
8119    return false;
8120
8121  if (AGGREGATE_TYPE_P (type))
8122    {
8123      /* Walk the aggregates recursively.  */
8124      switch (TREE_CODE (type))
8125	{
8126	case RECORD_TYPE:
8127	case UNION_TYPE:
8128	case QUAL_UNION_TYPE:
8129	  {
8130	    tree field;
8131
8132	    /* Walk all the structure fields.  */
8133	    for (field = TYPE_FIELDS (type);
8134		 field;
8135		 field = DECL_CHAIN (field))
8136	      {
8137		if (TREE_CODE (field) == FIELD_DECL
8138		    && ix86_contains_aligned_value_p (TREE_TYPE (field)))
8139		  return true;
8140	      }
8141	    break;
8142	  }
8143
8144	case ARRAY_TYPE:
8145	  /* Just for use if some languages passes arrays by value.  */
8146	  if (ix86_contains_aligned_value_p (TREE_TYPE (type)))
8147	    return true;
8148	  break;
8149
8150	default:
8151	  gcc_unreachable ();
8152	}
8153    }
8154  else
8155    return TYPE_ALIGN (type) >= 128;
8156
8157  return false;
8158}
8159
8160/* Gives the alignment boundary, in bits, of an argument with the
8161   specified mode and type.  */
8162
8163static unsigned int
8164ix86_function_arg_boundary (machine_mode mode, const_tree type)
8165{
8166  unsigned int align;
8167  if (type)
8168    {
8169      /* Since the main variant type is used for call, we convert it to
8170	 the main variant type.  */
8171      type = TYPE_MAIN_VARIANT (type);
8172      align = TYPE_ALIGN (type);
8173    }
8174  else
8175    align = GET_MODE_ALIGNMENT (mode);
8176  if (align < PARM_BOUNDARY)
8177    align = PARM_BOUNDARY;
8178  else
8179    {
8180      static bool warned;
8181      unsigned int saved_align = align;
8182
8183      if (!TARGET_64BIT)
8184	{
8185	  /* i386 ABI defines XFmode arguments to be 4 byte aligned.  */
8186	  if (!type)
8187	    {
8188	      if (mode == XFmode || mode == XCmode)
8189		align = PARM_BOUNDARY;
8190	    }
8191	  else if (!ix86_contains_aligned_value_p (type))
8192	    align = PARM_BOUNDARY;
8193
8194	  if (align < 128)
8195	    align = PARM_BOUNDARY;
8196	}
8197
8198      if (warn_psabi
8199	  && !warned
8200	  && align != ix86_compat_function_arg_boundary (mode, type,
8201							 saved_align))
8202	{
8203	  warned = true;
8204	  inform (input_location,
8205		  "The ABI for passing parameters with %d-byte"
8206		  " alignment has changed in GCC 4.6",
8207		  align / BITS_PER_UNIT);
8208	}
8209    }
8210
8211  return align;
8212}
8213
8214/* Return true if N is a possible register number of function value.  */
8215
8216static bool
8217ix86_function_value_regno_p (const unsigned int regno)
8218{
8219  switch (regno)
8220    {
8221    case AX_REG:
8222      return true;
8223    case DX_REG:
8224      return (!TARGET_64BIT || ix86_cfun_abi () != MS_ABI);
8225    case DI_REG:
8226    case SI_REG:
8227      return TARGET_64BIT && ix86_cfun_abi () != MS_ABI;
8228
8229    case BND0_REG:
8230    case BND1_REG:
8231      return chkp_function_instrumented_p (current_function_decl);
8232
8233      /* Complex values are returned in %st(0)/%st(1) pair.  */
8234    case ST0_REG:
8235    case ST1_REG:
8236      /* TODO: The function should depend on current function ABI but
8237       builtins.c would need updating then. Therefore we use the
8238       default ABI.  */
8239      if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
8240	return false;
8241      return TARGET_FLOAT_RETURNS_IN_80387;
8242
8243      /* Complex values are returned in %xmm0/%xmm1 pair.  */
8244    case XMM0_REG:
8245    case XMM1_REG:
8246      return TARGET_SSE;
8247
8248    case MM0_REG:
8249      if (TARGET_MACHO || TARGET_64BIT)
8250	return false;
8251      return TARGET_MMX;
8252    }
8253
8254  return false;
8255}
8256
8257/* Define how to find the value returned by a function.
8258   VALTYPE is the data type of the value (as a tree).
8259   If the precise function being called is known, FUNC is its FUNCTION_DECL;
8260   otherwise, FUNC is 0.  */
8261
8262static rtx
8263function_value_32 (machine_mode orig_mode, machine_mode mode,
8264		   const_tree fntype, const_tree fn)
8265{
8266  unsigned int regno;
8267
8268  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
8269     we normally prevent this case when mmx is not available.  However
8270     some ABIs may require the result to be returned like DImode.  */
8271  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
8272    regno = FIRST_MMX_REG;
8273
8274  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
8275     we prevent this case when sse is not available.  However some ABIs
8276     may require the result to be returned like integer TImode.  */
8277  else if (mode == TImode
8278	   || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
8279    regno = FIRST_SSE_REG;
8280
8281  /* 32-byte vector modes in %ymm0.   */
8282  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
8283    regno = FIRST_SSE_REG;
8284
8285  /* 64-byte vector modes in %zmm0.   */
8286  else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
8287    regno = FIRST_SSE_REG;
8288
8289  /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387).  */
8290  else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
8291    regno = FIRST_FLOAT_REG;
8292  else
8293    /* Most things go in %eax.  */
8294    regno = AX_REG;
8295
8296  /* Override FP return register with %xmm0 for local functions when
8297     SSE math is enabled or for functions with sseregparm attribute.  */
8298  if ((fn || fntype) && (mode == SFmode || mode == DFmode))
8299    {
8300      int sse_level = ix86_function_sseregparm (fntype, fn, false);
8301      if (sse_level == -1)
8302	{
8303	  error ("calling %qD with SSE caling convention without "
8304		 "SSE/SSE2 enabled", fn);
8305	  sorry ("this is a GCC bug that can be worked around by adding "
8306		 "attribute used to function called");
8307	}
8308      else if ((sse_level >= 1 && mode == SFmode)
8309	       || (sse_level == 2 && mode == DFmode))
8310	regno = FIRST_SSE_REG;
8311    }
8312
8313  /* OImode shouldn't be used directly.  */
8314  gcc_assert (mode != OImode);
8315
8316  return gen_rtx_REG (orig_mode, regno);
8317}
8318
8319static rtx
8320function_value_64 (machine_mode orig_mode, machine_mode mode,
8321		   const_tree valtype)
8322{
8323  rtx ret;
8324
8325  /* Handle libcalls, which don't provide a type node.  */
8326  if (valtype == NULL)
8327    {
8328      unsigned int regno;
8329
8330      switch (mode)
8331	{
8332	case SFmode:
8333	case SCmode:
8334	case DFmode:
8335	case DCmode:
8336	case TFmode:
8337	case SDmode:
8338	case DDmode:
8339	case TDmode:
8340	  regno = FIRST_SSE_REG;
8341	  break;
8342	case XFmode:
8343	case XCmode:
8344	  regno = FIRST_FLOAT_REG;
8345	  break;
8346	case TCmode:
8347	  return NULL;
8348	default:
8349	  regno = AX_REG;
8350	}
8351
8352      return gen_rtx_REG (mode, regno);
8353    }
8354  else if (POINTER_TYPE_P (valtype))
8355    {
8356      /* Pointers are always returned in word_mode.  */
8357      mode = word_mode;
8358    }
8359
8360  ret = construct_container (mode, orig_mode, valtype, 1,
8361			     X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
8362			     x86_64_int_return_registers, 0);
8363
8364  /* For zero sized structures, construct_container returns NULL, but we
8365     need to keep rest of compiler happy by returning meaningful value.  */
8366  if (!ret)
8367    ret = gen_rtx_REG (orig_mode, AX_REG);
8368
8369  return ret;
8370}
8371
8372static rtx
8373function_value_ms_64 (machine_mode orig_mode, machine_mode mode,
8374		      const_tree valtype)
8375{
8376  unsigned int regno = AX_REG;
8377
8378  if (TARGET_SSE)
8379    {
8380      switch (GET_MODE_SIZE (mode))
8381	{
8382	case 16:
8383	  if (valtype != NULL_TREE
8384	      && !VECTOR_INTEGER_TYPE_P (valtype)
8385	      && !VECTOR_INTEGER_TYPE_P (valtype)
8386	      && !INTEGRAL_TYPE_P (valtype)
8387	      && !VECTOR_FLOAT_TYPE_P (valtype))
8388	    break;
8389	  if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8390	      && !COMPLEX_MODE_P (mode))
8391	    regno = FIRST_SSE_REG;
8392	  break;
8393	case 8:
8394	case 4:
8395	  if (mode == SFmode || mode == DFmode)
8396	    regno = FIRST_SSE_REG;
8397	  break;
8398	default:
8399	  break;
8400        }
8401    }
8402  return gen_rtx_REG (orig_mode, regno);
8403}
8404
8405static rtx
8406ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
8407		       machine_mode orig_mode, machine_mode mode)
8408{
8409  const_tree fn, fntype;
8410
8411  fn = NULL_TREE;
8412  if (fntype_or_decl && DECL_P (fntype_or_decl))
8413    fn = fntype_or_decl;
8414  fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
8415
8416  if ((valtype && POINTER_BOUNDS_TYPE_P (valtype))
8417      || POINTER_BOUNDS_MODE_P (mode))
8418    return gen_rtx_REG (BNDmode, FIRST_BND_REG);
8419  else if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
8420    return function_value_ms_64 (orig_mode, mode, valtype);
8421  else if (TARGET_64BIT)
8422    return function_value_64 (orig_mode, mode, valtype);
8423  else
8424    return function_value_32 (orig_mode, mode, fntype, fn);
8425}
8426
8427static rtx
8428ix86_function_value (const_tree valtype, const_tree fntype_or_decl, bool)
8429{
8430  machine_mode mode, orig_mode;
8431
8432  orig_mode = TYPE_MODE (valtype);
8433  mode = type_natural_mode (valtype, NULL, true);
8434  return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
8435}
8436
8437/*  Return an RTX representing a place where a function returns
8438    or recieves pointer bounds or NULL if no bounds are returned.
8439
8440    VALTYPE is a data type of a value returned by the function.
8441
8442    FN_DECL_OR_TYPE is a tree node representing FUNCTION_DECL
8443    or FUNCTION_TYPE of the function.
8444
8445    If OUTGOING is false, return a place in which the caller will
8446    see the return value.  Otherwise, return a place where a
8447    function returns a value.  */
8448
8449static rtx
8450ix86_function_value_bounds (const_tree valtype,
8451			    const_tree fntype_or_decl ATTRIBUTE_UNUSED,
8452			    bool outgoing ATTRIBUTE_UNUSED)
8453{
8454  rtx res = NULL_RTX;
8455
8456  if (BOUNDED_TYPE_P (valtype))
8457    res = gen_rtx_REG (BNDmode, FIRST_BND_REG);
8458  else if (chkp_type_has_pointer (valtype))
8459    {
8460      bitmap slots;
8461      rtx bounds[2];
8462      bitmap_iterator bi;
8463      unsigned i, bnd_no = 0;
8464
8465      bitmap_obstack_initialize (NULL);
8466      slots = BITMAP_ALLOC (NULL);
8467      chkp_find_bound_slots (valtype, slots);
8468
8469      EXECUTE_IF_SET_IN_BITMAP (slots, 0, i, bi)
8470	{
8471	  rtx reg = gen_rtx_REG (BNDmode, FIRST_BND_REG + bnd_no);
8472	  rtx offs = GEN_INT (i * POINTER_SIZE / BITS_PER_UNIT);
8473	  gcc_assert (bnd_no < 2);
8474	  bounds[bnd_no++] = gen_rtx_EXPR_LIST (VOIDmode, reg, offs);
8475	}
8476
8477      res = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (bnd_no, bounds));
8478
8479      BITMAP_FREE (slots);
8480      bitmap_obstack_release (NULL);
8481    }
8482  else
8483    res = NULL_RTX;
8484
8485  return res;
8486}
8487
8488/* Pointer function arguments and return values are promoted to
8489   word_mode.  */
8490
8491static machine_mode
8492ix86_promote_function_mode (const_tree type, machine_mode mode,
8493			    int *punsignedp, const_tree fntype,
8494			    int for_return)
8495{
8496  if (type != NULL_TREE && POINTER_TYPE_P (type))
8497    {
8498      *punsignedp = POINTERS_EXTEND_UNSIGNED;
8499      return word_mode;
8500    }
8501  return default_promote_function_mode (type, mode, punsignedp, fntype,
8502					for_return);
8503}
8504
8505/* Return true if a structure, union or array with MODE containing FIELD
8506   should be accessed using BLKmode.  */
8507
8508static bool
8509ix86_member_type_forces_blk (const_tree field, machine_mode mode)
8510{
8511  /* Union with XFmode must be in BLKmode.  */
8512  return (mode == XFmode
8513	  && (TREE_CODE (DECL_FIELD_CONTEXT (field)) == UNION_TYPE
8514	      || TREE_CODE (DECL_FIELD_CONTEXT (field)) == QUAL_UNION_TYPE));
8515}
8516
8517rtx
8518ix86_libcall_value (machine_mode mode)
8519{
8520  return ix86_function_value_1 (NULL, NULL, mode, mode);
8521}
8522
8523/* Return true iff type is returned in memory.  */
8524
8525static bool
8526ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8527{
8528#ifdef SUBTARGET_RETURN_IN_MEMORY
8529  return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
8530#else
8531  const machine_mode mode = type_natural_mode (type, NULL, true);
8532  HOST_WIDE_INT size;
8533
8534  if (POINTER_BOUNDS_TYPE_P (type))
8535    return false;
8536
8537  if (TARGET_64BIT)
8538    {
8539      if (ix86_function_type_abi (fntype) == MS_ABI)
8540	{
8541	  size = int_size_in_bytes (type);
8542
8543	  /* __m128 is returned in xmm0.  */
8544	  if ((!type || VECTOR_INTEGER_TYPE_P (type)
8545	       || INTEGRAL_TYPE_P (type)
8546	       || VECTOR_FLOAT_TYPE_P (type))
8547	      && (SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
8548	      && !COMPLEX_MODE_P (mode)
8549	      && (GET_MODE_SIZE (mode) == 16 || size == 16))
8550	    return false;
8551
8552	  /* Otherwise, the size must be exactly in [1248]. */
8553	  return size != 1 && size != 2 && size != 4 && size != 8;
8554	}
8555      else
8556	{
8557	  int needed_intregs, needed_sseregs;
8558
8559	  return examine_argument (mode, type, 1,
8560				   &needed_intregs, &needed_sseregs);
8561	}
8562    }
8563  else
8564    {
8565      if (mode == BLKmode)
8566	return true;
8567
8568      size = int_size_in_bytes (type);
8569
8570      if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
8571	return false;
8572
8573      if (VECTOR_MODE_P (mode) || mode == TImode)
8574	{
8575	  /* User-created vectors small enough to fit in EAX.  */
8576	  if (size < 8)
8577	    return false;
8578
8579	  /* Unless ABI prescibes otherwise,
8580	     MMX/3dNow values are returned in MM0 if available.  */
8581
8582	  if (size == 8)
8583	    return TARGET_VECT8_RETURNS || !TARGET_MMX;
8584
8585	  /* SSE values are returned in XMM0 if available.  */
8586	  if (size == 16)
8587	    return !TARGET_SSE;
8588
8589	  /* AVX values are returned in YMM0 if available.  */
8590	  if (size == 32)
8591	    return !TARGET_AVX;
8592
8593	  /* AVX512F values are returned in ZMM0 if available.  */
8594	  if (size == 64)
8595	    return !TARGET_AVX512F;
8596	}
8597
8598      if (mode == XFmode)
8599	return false;
8600
8601      if (size > 12)
8602	return true;
8603
8604      /* OImode shouldn't be used directly.  */
8605      gcc_assert (mode != OImode);
8606
8607      return false;
8608    }
8609#endif
8610}
8611
8612
8613/* Create the va_list data type.  */
8614
8615/* Returns the calling convention specific va_list date type.
8616   The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI.  */
8617
8618static tree
8619ix86_build_builtin_va_list_abi (enum calling_abi abi)
8620{
8621  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
8622
8623  /* For i386 we use plain pointer to argument area.  */
8624  if (!TARGET_64BIT || abi == MS_ABI)
8625    return build_pointer_type (char_type_node);
8626
8627  record = lang_hooks.types.make_type (RECORD_TYPE);
8628  type_decl = build_decl (BUILTINS_LOCATION,
8629			  TYPE_DECL, get_identifier ("__va_list_tag"), record);
8630
8631  f_gpr = build_decl (BUILTINS_LOCATION,
8632		      FIELD_DECL, get_identifier ("gp_offset"),
8633		      unsigned_type_node);
8634  f_fpr = build_decl (BUILTINS_LOCATION,
8635		      FIELD_DECL, get_identifier ("fp_offset"),
8636		      unsigned_type_node);
8637  f_ovf = build_decl (BUILTINS_LOCATION,
8638		      FIELD_DECL, get_identifier ("overflow_arg_area"),
8639		      ptr_type_node);
8640  f_sav = build_decl (BUILTINS_LOCATION,
8641		      FIELD_DECL, get_identifier ("reg_save_area"),
8642		      ptr_type_node);
8643
8644  va_list_gpr_counter_field = f_gpr;
8645  va_list_fpr_counter_field = f_fpr;
8646
8647  DECL_FIELD_CONTEXT (f_gpr) = record;
8648  DECL_FIELD_CONTEXT (f_fpr) = record;
8649  DECL_FIELD_CONTEXT (f_ovf) = record;
8650  DECL_FIELD_CONTEXT (f_sav) = record;
8651
8652  TYPE_STUB_DECL (record) = type_decl;
8653  TYPE_NAME (record) = type_decl;
8654  TYPE_FIELDS (record) = f_gpr;
8655  DECL_CHAIN (f_gpr) = f_fpr;
8656  DECL_CHAIN (f_fpr) = f_ovf;
8657  DECL_CHAIN (f_ovf) = f_sav;
8658
8659  layout_type (record);
8660
8661  /* The correct type is an array type of one element.  */
8662  return build_array_type (record, build_index_type (size_zero_node));
8663}
8664
8665/* Setup the builtin va_list data type and for 64-bit the additional
8666   calling convention specific va_list data types.  */
8667
8668static tree
8669ix86_build_builtin_va_list (void)
8670{
8671  tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
8672
8673  /* Initialize abi specific va_list builtin types.  */
8674  if (TARGET_64BIT)
8675    {
8676      tree t;
8677      if (ix86_abi == MS_ABI)
8678        {
8679          t = ix86_build_builtin_va_list_abi (SYSV_ABI);
8680          if (TREE_CODE (t) != RECORD_TYPE)
8681            t = build_variant_type_copy (t);
8682          sysv_va_list_type_node = t;
8683        }
8684      else
8685        {
8686          t = ret;
8687          if (TREE_CODE (t) != RECORD_TYPE)
8688            t = build_variant_type_copy (t);
8689          sysv_va_list_type_node = t;
8690        }
8691      if (ix86_abi != MS_ABI)
8692        {
8693          t = ix86_build_builtin_va_list_abi (MS_ABI);
8694          if (TREE_CODE (t) != RECORD_TYPE)
8695            t = build_variant_type_copy (t);
8696          ms_va_list_type_node = t;
8697        }
8698      else
8699        {
8700          t = ret;
8701          if (TREE_CODE (t) != RECORD_TYPE)
8702            t = build_variant_type_copy (t);
8703          ms_va_list_type_node = t;
8704        }
8705    }
8706
8707  return ret;
8708}
8709
8710/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
8711
8712static void
8713setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
8714{
8715  rtx save_area, mem;
8716  alias_set_type set;
8717  int i, max;
8718
8719  /* GPR size of varargs save area.  */
8720  if (cfun->va_list_gpr_size)
8721    ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
8722  else
8723    ix86_varargs_gpr_size = 0;
8724
8725  /* FPR size of varargs save area.  We don't need it if we don't pass
8726     anything in SSE registers.  */
8727  if (TARGET_SSE && cfun->va_list_fpr_size)
8728    ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
8729  else
8730    ix86_varargs_fpr_size = 0;
8731
8732  if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
8733    return;
8734
8735  save_area = frame_pointer_rtx;
8736  set = get_varargs_alias_set ();
8737
8738  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8739  if (max > X86_64_REGPARM_MAX)
8740    max = X86_64_REGPARM_MAX;
8741
8742  for (i = cum->regno; i < max; i++)
8743    {
8744      mem = gen_rtx_MEM (word_mode,
8745			 plus_constant (Pmode, save_area, i * UNITS_PER_WORD));
8746      MEM_NOTRAP_P (mem) = 1;
8747      set_mem_alias_set (mem, set);
8748      emit_move_insn (mem,
8749		      gen_rtx_REG (word_mode,
8750				   x86_64_int_parameter_registers[i]));
8751    }
8752
8753  if (ix86_varargs_fpr_size)
8754    {
8755      machine_mode smode;
8756      rtx_code_label *label;
8757      rtx test;
8758
8759      /* Now emit code to save SSE registers.  The AX parameter contains number
8760	 of SSE parameter registers used to call this function, though all we
8761	 actually check here is the zero/non-zero status.  */
8762
8763      label = gen_label_rtx ();
8764      test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
8765      emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
8766				      label));
8767
8768      /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
8769	 we used movdqa (i.e. TImode) instead?  Perhaps even better would
8770	 be if we could determine the real mode of the data, via a hook
8771	 into pass_stdarg.  Ignore all that for now.  */
8772      smode = V4SFmode;
8773      if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
8774	crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
8775
8776      max = cum->sse_regno + cfun->va_list_fpr_size / 16;
8777      if (max > X86_64_SSE_REGPARM_MAX)
8778	max = X86_64_SSE_REGPARM_MAX;
8779
8780      for (i = cum->sse_regno; i < max; ++i)
8781	{
8782	  mem = plus_constant (Pmode, save_area,
8783			       i * 16 + ix86_varargs_gpr_size);
8784	  mem = gen_rtx_MEM (smode, mem);
8785	  MEM_NOTRAP_P (mem) = 1;
8786	  set_mem_alias_set (mem, set);
8787	  set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
8788
8789	  emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
8790	}
8791
8792      emit_label (label);
8793    }
8794}
8795
8796static void
8797setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
8798{
8799  alias_set_type set = get_varargs_alias_set ();
8800  int i;
8801
8802  /* Reset to zero, as there might be a sysv vaarg used
8803     before.  */
8804  ix86_varargs_gpr_size = 0;
8805  ix86_varargs_fpr_size = 0;
8806
8807  for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
8808    {
8809      rtx reg, mem;
8810
8811      mem = gen_rtx_MEM (Pmode,
8812			 plus_constant (Pmode, virtual_incoming_args_rtx,
8813					i * UNITS_PER_WORD));
8814      MEM_NOTRAP_P (mem) = 1;
8815      set_mem_alias_set (mem, set);
8816
8817      reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
8818      emit_move_insn (mem, reg);
8819    }
8820}
8821
8822static void
8823ix86_setup_incoming_varargs (cumulative_args_t cum_v, machine_mode mode,
8824			     tree type, int *, int no_rtl)
8825{
8826  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8827  CUMULATIVE_ARGS next_cum;
8828  tree fntype;
8829
8830  /* This argument doesn't appear to be used anymore.  Which is good,
8831     because the old code here didn't suppress rtl generation.  */
8832  gcc_assert (!no_rtl);
8833
8834  if (!TARGET_64BIT)
8835    return;
8836
8837  fntype = TREE_TYPE (current_function_decl);
8838
8839  /* For varargs, we do not want to skip the dummy va_dcl argument.
8840     For stdargs, we do want to skip the last named argument.  */
8841  next_cum = *cum;
8842  if (stdarg_p (fntype))
8843    ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8844			       true);
8845
8846  if (cum->call_abi == MS_ABI)
8847    setup_incoming_varargs_ms_64 (&next_cum);
8848  else
8849    setup_incoming_varargs_64 (&next_cum);
8850}
8851
8852static void
8853ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
8854				   enum machine_mode mode,
8855				   tree type,
8856				   int *pretend_size ATTRIBUTE_UNUSED,
8857				   int no_rtl)
8858{
8859  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8860  CUMULATIVE_ARGS next_cum;
8861  tree fntype;
8862  rtx save_area;
8863  int bnd_reg, i, max;
8864
8865  gcc_assert (!no_rtl);
8866
8867  /* Do nothing if we use plain pointer to argument area.  */
8868  if (!TARGET_64BIT || cum->call_abi == MS_ABI)
8869    return;
8870
8871  fntype = TREE_TYPE (current_function_decl);
8872
8873  /* For varargs, we do not want to skip the dummy va_dcl argument.
8874     For stdargs, we do want to skip the last named argument.  */
8875  next_cum = *cum;
8876  if (stdarg_p (fntype))
8877    ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
8878			       true);
8879  save_area = frame_pointer_rtx;
8880
8881  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
8882  if (max > X86_64_REGPARM_MAX)
8883    max = X86_64_REGPARM_MAX;
8884
8885  bnd_reg = cum->bnd_regno + cum->force_bnd_pass;
8886  if (chkp_function_instrumented_p (current_function_decl))
8887    for (i = cum->regno; i < max; i++)
8888      {
8889	rtx addr = plus_constant (Pmode, save_area, i * UNITS_PER_WORD);
8890	rtx reg = gen_rtx_REG (DImode,
8891			       x86_64_int_parameter_registers[i]);
8892	rtx ptr = reg;
8893	rtx bounds;
8894
8895	if (bnd_reg <= LAST_BND_REG)
8896	  bounds = gen_rtx_REG (BNDmode, bnd_reg);
8897	else
8898	  {
8899	    rtx ldx_addr =
8900	      plus_constant (Pmode, arg_pointer_rtx,
8901			     (LAST_BND_REG - bnd_reg) * GET_MODE_SIZE (Pmode));
8902	    bounds = gen_reg_rtx (BNDmode);
8903	    emit_insn (BNDmode == BND64mode
8904		       ? gen_bnd64_ldx (bounds, ldx_addr, ptr)
8905		       : gen_bnd32_ldx (bounds, ldx_addr, ptr));
8906	  }
8907
8908	emit_insn (BNDmode == BND64mode
8909		   ? gen_bnd64_stx (addr, ptr, bounds)
8910		   : gen_bnd32_stx (addr, ptr, bounds));
8911
8912	bnd_reg++;
8913      }
8914}
8915
8916
8917/* Checks if TYPE is of kind va_list char *.  */
8918
8919static bool
8920is_va_list_char_pointer (tree type)
8921{
8922  tree canonic;
8923
8924  /* For 32-bit it is always true.  */
8925  if (!TARGET_64BIT)
8926    return true;
8927  canonic = ix86_canonical_va_list_type (type);
8928  return (canonic == ms_va_list_type_node
8929          || (ix86_abi == MS_ABI && canonic == va_list_type_node));
8930}
8931
8932/* Implement va_start.  */
8933
8934static void
8935ix86_va_start (tree valist, rtx nextarg)
8936{
8937  HOST_WIDE_INT words, n_gpr, n_fpr;
8938  tree f_gpr, f_fpr, f_ovf, f_sav;
8939  tree gpr, fpr, ovf, sav, t;
8940  tree type;
8941  rtx ovf_rtx;
8942
8943  if (flag_split_stack
8944      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8945    {
8946      unsigned int scratch_regno;
8947
8948      /* When we are splitting the stack, we can't refer to the stack
8949	 arguments using internal_arg_pointer, because they may be on
8950	 the old stack.  The split stack prologue will arrange to
8951	 leave a pointer to the old stack arguments in a scratch
8952	 register, which we here copy to a pseudo-register.  The split
8953	 stack prologue can't set the pseudo-register directly because
8954	 it (the prologue) runs before any registers have been saved.  */
8955
8956      scratch_regno = split_stack_prologue_scratch_regno ();
8957      if (scratch_regno != INVALID_REGNUM)
8958	{
8959	  rtx reg;
8960	  rtx_insn *seq;
8961
8962	  reg = gen_reg_rtx (Pmode);
8963	  cfun->machine->split_stack_varargs_pointer = reg;
8964
8965	  start_sequence ();
8966	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
8967	  seq = get_insns ();
8968	  end_sequence ();
8969
8970	  push_topmost_sequence ();
8971	  emit_insn_after (seq, entry_of_function ());
8972	  pop_topmost_sequence ();
8973	}
8974    }
8975
8976  /* Only 64bit target needs something special.  */
8977  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
8978    {
8979      if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
8980	std_expand_builtin_va_start (valist, nextarg);
8981      else
8982	{
8983	  rtx va_r, next;
8984
8985	  va_r = expand_expr (valist, NULL_RTX, VOIDmode, EXPAND_WRITE);
8986	  next = expand_binop (ptr_mode, add_optab,
8987			       cfun->machine->split_stack_varargs_pointer,
8988			       crtl->args.arg_offset_rtx,
8989			       NULL_RTX, 0, OPTAB_LIB_WIDEN);
8990	  convert_move (va_r, next, 0);
8991
8992	  /* Store zero bounds for va_list.  */
8993	  if (chkp_function_instrumented_p (current_function_decl))
8994	    chkp_expand_bounds_reset_for_mem (valist,
8995					      make_tree (TREE_TYPE (valist),
8996							 next));
8997
8998	}
8999      return;
9000    }
9001
9002  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9003  f_fpr = DECL_CHAIN (f_gpr);
9004  f_ovf = DECL_CHAIN (f_fpr);
9005  f_sav = DECL_CHAIN (f_ovf);
9006
9007  valist = build_simple_mem_ref (valist);
9008  TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
9009  /* The following should be folded into the MEM_REF offset.  */
9010  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
9011		f_gpr, NULL_TREE);
9012  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
9013		f_fpr, NULL_TREE);
9014  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
9015		f_ovf, NULL_TREE);
9016  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
9017		f_sav, NULL_TREE);
9018
9019  /* Count number of gp and fp argument registers used.  */
9020  words = crtl->args.info.words;
9021  n_gpr = crtl->args.info.regno;
9022  n_fpr = crtl->args.info.sse_regno;
9023
9024  if (cfun->va_list_gpr_size)
9025    {
9026      type = TREE_TYPE (gpr);
9027      t = build2 (MODIFY_EXPR, type,
9028		  gpr, build_int_cst (type, n_gpr * 8));
9029      TREE_SIDE_EFFECTS (t) = 1;
9030      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9031    }
9032
9033  if (TARGET_SSE && cfun->va_list_fpr_size)
9034    {
9035      type = TREE_TYPE (fpr);
9036      t = build2 (MODIFY_EXPR, type, fpr,
9037		  build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
9038      TREE_SIDE_EFFECTS (t) = 1;
9039      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9040    }
9041
9042  /* Find the overflow area.  */
9043  type = TREE_TYPE (ovf);
9044  if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
9045    ovf_rtx = crtl->args.internal_arg_pointer;
9046  else
9047    ovf_rtx = cfun->machine->split_stack_varargs_pointer;
9048  t = make_tree (type, ovf_rtx);
9049  if (words != 0)
9050    t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
9051
9052  /* Store zero bounds for overflow area pointer.  */
9053  if (chkp_function_instrumented_p (current_function_decl))
9054    chkp_expand_bounds_reset_for_mem (ovf, t);
9055
9056  t = build2 (MODIFY_EXPR, type, ovf, t);
9057  TREE_SIDE_EFFECTS (t) = 1;
9058  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9059
9060  if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
9061    {
9062      /* Find the register save area.
9063	 Prologue of the function save it right above stack frame.  */
9064      type = TREE_TYPE (sav);
9065      t = make_tree (type, frame_pointer_rtx);
9066      if (!ix86_varargs_gpr_size)
9067	t = fold_build_pointer_plus_hwi (t, -8 * X86_64_REGPARM_MAX);
9068
9069      /* Store zero bounds for save area pointer.  */
9070      if (chkp_function_instrumented_p (current_function_decl))
9071	chkp_expand_bounds_reset_for_mem (sav, t);
9072
9073      t = build2 (MODIFY_EXPR, type, sav, t);
9074      TREE_SIDE_EFFECTS (t) = 1;
9075      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9076    }
9077}
9078
9079/* Implement va_arg.  */
9080
9081static tree
9082ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9083		      gimple_seq *post_p)
9084{
9085  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
9086  tree f_gpr, f_fpr, f_ovf, f_sav;
9087  tree gpr, fpr, ovf, sav, t;
9088  int size, rsize;
9089  tree lab_false, lab_over = NULL_TREE;
9090  tree addr, t2;
9091  rtx container;
9092  int indirect_p = 0;
9093  tree ptrtype;
9094  machine_mode nat_mode;
9095  unsigned int arg_boundary;
9096
9097  /* Only 64bit target needs something special.  */
9098  if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
9099    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
9100
9101  f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
9102  f_fpr = DECL_CHAIN (f_gpr);
9103  f_ovf = DECL_CHAIN (f_fpr);
9104  f_sav = DECL_CHAIN (f_ovf);
9105
9106  gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
9107		build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
9108  valist = build_va_arg_indirect_ref (valist);
9109  fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9110  ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9111  sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9112
9113  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
9114  if (indirect_p)
9115    type = build_pointer_type (type);
9116  size = int_size_in_bytes (type);
9117  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
9118
9119  nat_mode = type_natural_mode (type, NULL, false);
9120  switch (nat_mode)
9121    {
9122    case V8SFmode:
9123    case V8SImode:
9124    case V32QImode:
9125    case V16HImode:
9126    case V4DFmode:
9127    case V4DImode:
9128    case V16SFmode:
9129    case V16SImode:
9130    case V64QImode:
9131    case V32HImode:
9132    case V8DFmode:
9133    case V8DImode:
9134      /* Unnamed 256 and 512bit vector mode parameters are passed on stack.  */
9135      if (!TARGET_64BIT_MS_ABI)
9136	{
9137	  container = NULL;
9138	  break;
9139	}
9140
9141    default:
9142      container = construct_container (nat_mode, TYPE_MODE (type),
9143				       type, 0, X86_64_REGPARM_MAX,
9144				       X86_64_SSE_REGPARM_MAX, intreg,
9145				       0);
9146      break;
9147    }
9148
9149  /* Pull the value out of the saved registers.  */
9150
9151  addr = create_tmp_var (ptr_type_node, "addr");
9152
9153  if (container)
9154    {
9155      int needed_intregs, needed_sseregs;
9156      bool need_temp;
9157      tree int_addr, sse_addr;
9158
9159      lab_false = create_artificial_label (UNKNOWN_LOCATION);
9160      lab_over = create_artificial_label (UNKNOWN_LOCATION);
9161
9162      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
9163
9164      need_temp = (!REG_P (container)
9165		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
9166		       || TYPE_ALIGN (type) > 128));
9167
9168      /* In case we are passing structure, verify that it is consecutive block
9169         on the register save area.  If not we need to do moves.  */
9170      if (!need_temp && !REG_P (container))
9171	{
9172	  /* Verify that all registers are strictly consecutive  */
9173	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
9174	    {
9175	      int i;
9176
9177	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9178		{
9179		  rtx slot = XVECEXP (container, 0, i);
9180		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
9181		      || INTVAL (XEXP (slot, 1)) != i * 16)
9182		    need_temp = true;
9183		}
9184	    }
9185	  else
9186	    {
9187	      int i;
9188
9189	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
9190		{
9191		  rtx slot = XVECEXP (container, 0, i);
9192		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
9193		      || INTVAL (XEXP (slot, 1)) != i * 8)
9194		    need_temp = true;
9195		}
9196	    }
9197	}
9198      if (!need_temp)
9199	{
9200	  int_addr = addr;
9201	  sse_addr = addr;
9202	}
9203      else
9204	{
9205	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
9206	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
9207	}
9208
9209      /* First ensure that we fit completely in registers.  */
9210      if (needed_intregs)
9211	{
9212	  t = build_int_cst (TREE_TYPE (gpr),
9213			     (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
9214	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
9215	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9216	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9217	  gimplify_and_add (t, pre_p);
9218	}
9219      if (needed_sseregs)
9220	{
9221	  t = build_int_cst (TREE_TYPE (fpr),
9222			     (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
9223			     + X86_64_REGPARM_MAX * 8);
9224	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
9225	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
9226	  t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
9227	  gimplify_and_add (t, pre_p);
9228	}
9229
9230      /* Compute index to start of area used for integer regs.  */
9231      if (needed_intregs)
9232	{
9233	  /* int_addr = gpr + sav; */
9234	  t = fold_build_pointer_plus (sav, gpr);
9235	  gimplify_assign (int_addr, t, pre_p);
9236	}
9237      if (needed_sseregs)
9238	{
9239	  /* sse_addr = fpr + sav; */
9240	  t = fold_build_pointer_plus (sav, fpr);
9241	  gimplify_assign (sse_addr, t, pre_p);
9242	}
9243      if (need_temp)
9244	{
9245	  int i, prev_size = 0;
9246	  tree temp = create_tmp_var (type, "va_arg_tmp");
9247
9248	  /* addr = &temp; */
9249	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
9250	  gimplify_assign (addr, t, pre_p);
9251
9252	  for (i = 0; i < XVECLEN (container, 0); i++)
9253	    {
9254	      rtx slot = XVECEXP (container, 0, i);
9255	      rtx reg = XEXP (slot, 0);
9256	      machine_mode mode = GET_MODE (reg);
9257	      tree piece_type;
9258	      tree addr_type;
9259	      tree daddr_type;
9260	      tree src_addr, src;
9261	      int src_offset;
9262	      tree dest_addr, dest;
9263	      int cur_size = GET_MODE_SIZE (mode);
9264
9265	      gcc_assert (prev_size <= INTVAL (XEXP (slot, 1)));
9266	      prev_size = INTVAL (XEXP (slot, 1));
9267	      if (prev_size + cur_size > size)
9268		{
9269		  cur_size = size - prev_size;
9270		  mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
9271		  if (mode == BLKmode)
9272		    mode = QImode;
9273		}
9274	      piece_type = lang_hooks.types.type_for_mode (mode, 1);
9275	      if (mode == GET_MODE (reg))
9276		addr_type = build_pointer_type (piece_type);
9277	      else
9278		addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9279							 true);
9280	      daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
9281							true);
9282
9283	      if (SSE_REGNO_P (REGNO (reg)))
9284		{
9285		  src_addr = sse_addr;
9286		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
9287		}
9288	      else
9289		{
9290		  src_addr = int_addr;
9291		  src_offset = REGNO (reg) * 8;
9292		}
9293	      src_addr = fold_convert (addr_type, src_addr);
9294	      src_addr = fold_build_pointer_plus_hwi (src_addr, src_offset);
9295
9296	      dest_addr = fold_convert (daddr_type, addr);
9297	      dest_addr = fold_build_pointer_plus_hwi (dest_addr, prev_size);
9298	      if (cur_size == GET_MODE_SIZE (mode))
9299		{
9300		  src = build_va_arg_indirect_ref (src_addr);
9301		  dest = build_va_arg_indirect_ref (dest_addr);
9302
9303		  gimplify_assign (dest, src, pre_p);
9304		}
9305	      else
9306		{
9307		  tree copy
9308		    = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
9309				       3, dest_addr, src_addr,
9310				       size_int (cur_size));
9311		  gimplify_and_add (copy, pre_p);
9312		}
9313	      prev_size += cur_size;
9314	    }
9315	}
9316
9317      if (needed_intregs)
9318	{
9319	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
9320		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
9321	  gimplify_assign (gpr, t, pre_p);
9322	}
9323
9324      if (needed_sseregs)
9325	{
9326	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
9327		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
9328	  gimplify_assign (fpr, t, pre_p);
9329	}
9330
9331      gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9332
9333      gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9334    }
9335
9336  /* ... otherwise out of the overflow area.  */
9337
9338  /* When we align parameter on stack for caller, if the parameter
9339     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
9340     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
9341     here with caller.  */
9342  arg_boundary = ix86_function_arg_boundary (VOIDmode, type);
9343  if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
9344    arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
9345
9346  /* Care for on-stack alignment if needed.  */
9347  if (arg_boundary <= 64 || size == 0)
9348    t = ovf;
9349 else
9350    {
9351      HOST_WIDE_INT align = arg_boundary / 8;
9352      t = fold_build_pointer_plus_hwi (ovf, align - 1);
9353      t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
9354		  build_int_cst (TREE_TYPE (t), -align));
9355    }
9356
9357  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9358  gimplify_assign (addr, t, pre_p);
9359
9360  t = fold_build_pointer_plus_hwi (t, rsize * UNITS_PER_WORD);
9361  gimplify_assign (unshare_expr (ovf), t, pre_p);
9362
9363  if (container)
9364    gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9365
9366  ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
9367  addr = fold_convert (ptrtype, addr);
9368
9369  if (indirect_p)
9370    addr = build_va_arg_indirect_ref (addr);
9371  return build_va_arg_indirect_ref (addr);
9372}
9373
9374/* Return true if OPNUM's MEM should be matched
9375   in movabs* patterns.  */
9376
9377bool
9378ix86_check_movabs (rtx insn, int opnum)
9379{
9380  rtx set, mem;
9381
9382  set = PATTERN (insn);
9383  if (GET_CODE (set) == PARALLEL)
9384    set = XVECEXP (set, 0, 0);
9385  gcc_assert (GET_CODE (set) == SET);
9386  mem = XEXP (set, opnum);
9387  while (GET_CODE (mem) == SUBREG)
9388    mem = SUBREG_REG (mem);
9389  gcc_assert (MEM_P (mem));
9390  return volatile_ok || !MEM_VOLATILE_P (mem);
9391}
9392
9393/* Initialize the table of extra 80387 mathematical constants.  */
9394
9395static void
9396init_ext_80387_constants (void)
9397{
9398  static const char * cst[5] =
9399  {
9400    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
9401    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
9402    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
9403    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
9404    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
9405  };
9406  int i;
9407
9408  for (i = 0; i < 5; i++)
9409    {
9410      real_from_string (&ext_80387_constants_table[i], cst[i]);
9411      /* Ensure each constant is rounded to XFmode precision.  */
9412      real_convert (&ext_80387_constants_table[i],
9413		    XFmode, &ext_80387_constants_table[i]);
9414    }
9415
9416  ext_80387_constants_init = 1;
9417}
9418
9419/* Return non-zero if the constant is something that
9420   can be loaded with a special instruction.  */
9421
9422int
9423standard_80387_constant_p (rtx x)
9424{
9425  machine_mode mode = GET_MODE (x);
9426
9427  REAL_VALUE_TYPE r;
9428
9429  if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
9430    return -1;
9431
9432  if (x == CONST0_RTX (mode))
9433    return 1;
9434  if (x == CONST1_RTX (mode))
9435    return 2;
9436
9437  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9438
9439  /* For XFmode constants, try to find a special 80387 instruction when
9440     optimizing for size or on those CPUs that benefit from them.  */
9441  if (mode == XFmode
9442      && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
9443    {
9444      int i;
9445
9446      if (! ext_80387_constants_init)
9447	init_ext_80387_constants ();
9448
9449      for (i = 0; i < 5; i++)
9450        if (real_identical (&r, &ext_80387_constants_table[i]))
9451	  return i + 3;
9452    }
9453
9454  /* Load of the constant -0.0 or -1.0 will be split as
9455     fldz;fchs or fld1;fchs sequence.  */
9456  if (real_isnegzero (&r))
9457    return 8;
9458  if (real_identical (&r, &dconstm1))
9459    return 9;
9460
9461  return 0;
9462}
9463
9464/* Return the opcode of the special instruction to be used to load
9465   the constant X.  */
9466
9467const char *
9468standard_80387_constant_opcode (rtx x)
9469{
9470  switch (standard_80387_constant_p (x))
9471    {
9472    case 1:
9473      return "fldz";
9474    case 2:
9475      return "fld1";
9476    case 3:
9477      return "fldlg2";
9478    case 4:
9479      return "fldln2";
9480    case 5:
9481      return "fldl2e";
9482    case 6:
9483      return "fldl2t";
9484    case 7:
9485      return "fldpi";
9486    case 8:
9487    case 9:
9488      return "#";
9489    default:
9490      gcc_unreachable ();
9491    }
9492}
9493
9494/* Return the CONST_DOUBLE representing the 80387 constant that is
9495   loaded by the specified special instruction.  The argument IDX
9496   matches the return value from standard_80387_constant_p.  */
9497
9498rtx
9499standard_80387_constant_rtx (int idx)
9500{
9501  int i;
9502
9503  if (! ext_80387_constants_init)
9504    init_ext_80387_constants ();
9505
9506  switch (idx)
9507    {
9508    case 3:
9509    case 4:
9510    case 5:
9511    case 6:
9512    case 7:
9513      i = idx - 3;
9514      break;
9515
9516    default:
9517      gcc_unreachable ();
9518    }
9519
9520  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
9521				       XFmode);
9522}
9523
9524/* Return 1 if X is all 0s and 2 if x is all 1s
9525   in supported SSE/AVX vector mode.  */
9526
9527int
9528standard_sse_constant_p (rtx x)
9529{
9530  machine_mode mode = GET_MODE (x);
9531
9532  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
9533    return 1;
9534  if (vector_all_ones_operand (x, mode))
9535    switch (mode)
9536      {
9537      case V16QImode:
9538      case V8HImode:
9539      case V4SImode:
9540      case V2DImode:
9541	if (TARGET_SSE2)
9542	  return 2;
9543      case V32QImode:
9544      case V16HImode:
9545      case V8SImode:
9546      case V4DImode:
9547	if (TARGET_AVX2)
9548	  return 2;
9549      case V64QImode:
9550      case V32HImode:
9551      case V16SImode:
9552      case V8DImode:
9553	if (TARGET_AVX512F)
9554	  return 2;
9555      default:
9556	break;
9557      }
9558
9559  return 0;
9560}
9561
9562/* Return the opcode of the special instruction to be used to load
9563   the constant X.  */
9564
9565const char *
9566standard_sse_constant_opcode (rtx_insn *insn, rtx x)
9567{
9568  switch (standard_sse_constant_p (x))
9569    {
9570    case 1:
9571      switch (get_attr_mode (insn))
9572	{
9573	case MODE_XI:
9574	  return "vpxord\t%g0, %g0, %g0";
9575	case MODE_V16SF:
9576	  return TARGET_AVX512DQ ? "vxorps\t%g0, %g0, %g0"
9577				 : "vpxord\t%g0, %g0, %g0";
9578	case MODE_V8DF:
9579	  return TARGET_AVX512DQ ? "vxorpd\t%g0, %g0, %g0"
9580				 : "vpxorq\t%g0, %g0, %g0";
9581	case MODE_TI:
9582	  return TARGET_AVX512VL ? "vpxord\t%t0, %t0, %t0"
9583				 : "%vpxor\t%0, %d0";
9584	case MODE_V2DF:
9585	  return "%vxorpd\t%0, %d0";
9586	case MODE_V4SF:
9587	  return "%vxorps\t%0, %d0";
9588
9589	case MODE_OI:
9590	  return TARGET_AVX512VL ? "vpxord\t%x0, %x0, %x0"
9591				 : "vpxor\t%x0, %x0, %x0";
9592	case MODE_V4DF:
9593	  return "vxorpd\t%x0, %x0, %x0";
9594	case MODE_V8SF:
9595	  return "vxorps\t%x0, %x0, %x0";
9596
9597	default:
9598	  break;
9599	}
9600
9601    case 2:
9602      if (TARGET_AVX512VL
9603	  || get_attr_mode (insn) == MODE_XI
9604	  || get_attr_mode (insn) == MODE_V8DF
9605	  || get_attr_mode (insn) == MODE_V16SF)
9606	return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
9607      if (TARGET_AVX)
9608	return "vpcmpeqd\t%0, %0, %0";
9609      else
9610	return "pcmpeqd\t%0, %0";
9611
9612    default:
9613      break;
9614    }
9615  gcc_unreachable ();
9616}
9617
9618/* Returns true if OP contains a symbol reference */
9619
9620bool
9621symbolic_reference_mentioned_p (rtx op)
9622{
9623  const char *fmt;
9624  int i;
9625
9626  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
9627    return true;
9628
9629  fmt = GET_RTX_FORMAT (GET_CODE (op));
9630  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
9631    {
9632      if (fmt[i] == 'E')
9633	{
9634	  int j;
9635
9636	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
9637	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
9638	      return true;
9639	}
9640
9641      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
9642	return true;
9643    }
9644
9645  return false;
9646}
9647
9648/* Return true if it is appropriate to emit `ret' instructions in the
9649   body of a function.  Do this only if the epilogue is simple, needing a
9650   couple of insns.  Prior to reloading, we can't tell how many registers
9651   must be saved, so return false then.  Return false if there is no frame
9652   marker to de-allocate.  */
9653
9654bool
9655ix86_can_use_return_insn_p (void)
9656{
9657  struct ix86_frame frame;
9658
9659  if (! reload_completed || frame_pointer_needed)
9660    return 0;
9661
9662  /* Don't allow more than 32k pop, since that's all we can do
9663     with one instruction.  */
9664  if (crtl->args.pops_args && crtl->args.size >= 32768)
9665    return 0;
9666
9667  ix86_compute_frame_layout (&frame);
9668  return (frame.stack_pointer_offset == UNITS_PER_WORD
9669	  && (frame.nregs + frame.nsseregs) == 0);
9670}
9671
9672/* Value should be nonzero if functions must have frame pointers.
9673   Zero means the frame pointer need not be set up (and parms may
9674   be accessed via the stack pointer) in functions that seem suitable.  */
9675
9676static bool
9677ix86_frame_pointer_required (void)
9678{
9679  /* If we accessed previous frames, then the generated code expects
9680     to be able to access the saved ebp value in our frame.  */
9681  if (cfun->machine->accesses_prev_frame)
9682    return true;
9683
9684  /* Several x86 os'es need a frame pointer for other reasons,
9685     usually pertaining to setjmp.  */
9686  if (SUBTARGET_FRAME_POINTER_REQUIRED)
9687    return true;
9688
9689  /* For older 32-bit runtimes setjmp requires valid frame-pointer.  */
9690  if (TARGET_32BIT_MS_ABI && cfun->calls_setjmp)
9691    return true;
9692
9693  /* Win64 SEH, very large frames need a frame-pointer as maximum stack
9694     allocation is 4GB.  */
9695  if (TARGET_64BIT_MS_ABI && get_frame_size () > SEH_MAX_FRAME_SIZE)
9696    return true;
9697
9698  /* SSE saves require frame-pointer when stack is misaligned.  */
9699  if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
9700    return true;
9701
9702  /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
9703     turns off the frame pointer by default.  Turn it back on now if
9704     we've not got a leaf function.  */
9705  if (TARGET_OMIT_LEAF_FRAME_POINTER
9706      && (!crtl->is_leaf
9707	  || ix86_current_function_calls_tls_descriptor))
9708    return true;
9709
9710  if (crtl->profile && !flag_fentry)
9711    return true;
9712
9713  return false;
9714}
9715
9716/* Record that the current function accesses previous call frames.  */
9717
9718void
9719ix86_setup_frame_addresses (void)
9720{
9721  cfun->machine->accesses_prev_frame = 1;
9722}
9723
9724#ifndef USE_HIDDEN_LINKONCE
9725# if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
9726#  define USE_HIDDEN_LINKONCE 1
9727# else
9728#  define USE_HIDDEN_LINKONCE 0
9729# endif
9730#endif
9731
9732static int pic_labels_used;
9733
9734/* Fills in the label name that should be used for a pc thunk for
9735   the given register.  */
9736
9737static void
9738get_pc_thunk_name (char name[32], unsigned int regno)
9739{
9740  gcc_assert (!TARGET_64BIT);
9741
9742  if (USE_HIDDEN_LINKONCE)
9743    sprintf (name, "__x86.get_pc_thunk.%s", reg_names[regno]);
9744  else
9745    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
9746}
9747
9748
9749/* This function generates code for -fpic that loads %ebx with
9750   the return address of the caller and then returns.  */
9751
9752static void
9753ix86_code_end (void)
9754{
9755  rtx xops[2];
9756  int regno;
9757
9758  for (regno = AX_REG; regno <= SP_REG; regno++)
9759    {
9760      char name[32];
9761      tree decl;
9762
9763      if (!(pic_labels_used & (1 << regno)))
9764	continue;
9765
9766      get_pc_thunk_name (name, regno);
9767
9768      decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
9769			 get_identifier (name),
9770			 build_function_type_list (void_type_node, NULL_TREE));
9771      DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
9772				       NULL_TREE, void_type_node);
9773      TREE_PUBLIC (decl) = 1;
9774      TREE_STATIC (decl) = 1;
9775      DECL_IGNORED_P (decl) = 1;
9776
9777#if TARGET_MACHO
9778      if (TARGET_MACHO)
9779	{
9780	  switch_to_section (darwin_sections[text_coal_section]);
9781	  fputs ("\t.weak_definition\t", asm_out_file);
9782	  assemble_name (asm_out_file, name);
9783	  fputs ("\n\t.private_extern\t", asm_out_file);
9784	  assemble_name (asm_out_file, name);
9785	  putc ('\n', asm_out_file);
9786	  ASM_OUTPUT_LABEL (asm_out_file, name);
9787	  DECL_WEAK (decl) = 1;
9788	}
9789      else
9790#endif
9791      if (USE_HIDDEN_LINKONCE)
9792	{
9793	  cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
9794
9795	  targetm.asm_out.unique_section (decl, 0);
9796	  switch_to_section (get_named_section (decl, NULL, 0));
9797
9798	  targetm.asm_out.globalize_label (asm_out_file, name);
9799	  fputs ("\t.hidden\t", asm_out_file);
9800	  assemble_name (asm_out_file, name);
9801	  putc ('\n', asm_out_file);
9802	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
9803	}
9804      else
9805	{
9806	  switch_to_section (text_section);
9807	  ASM_OUTPUT_LABEL (asm_out_file, name);
9808	}
9809
9810      DECL_INITIAL (decl) = make_node (BLOCK);
9811      current_function_decl = decl;
9812      init_function_start (decl);
9813      first_function_block_is_cold = false;
9814      /* Make sure unwind info is emitted for the thunk if needed.  */
9815      final_start_function (emit_barrier (), asm_out_file, 1);
9816
9817      /* Pad stack IP move with 4 instructions (two NOPs count
9818	 as one instruction).  */
9819      if (TARGET_PAD_SHORT_FUNCTION)
9820	{
9821	  int i = 8;
9822
9823	  while (i--)
9824	    fputs ("\tnop\n", asm_out_file);
9825	}
9826
9827      xops[0] = gen_rtx_REG (Pmode, regno);
9828      xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
9829      output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
9830      output_asm_insn ("%!ret", NULL);
9831      final_end_function ();
9832      init_insn_lengths ();
9833      free_after_compilation (cfun);
9834      set_cfun (NULL);
9835      current_function_decl = NULL;
9836    }
9837
9838  if (flag_split_stack)
9839    file_end_indicate_split_stack ();
9840}
9841
9842/* Emit code for the SET_GOT patterns.  */
9843
9844const char *
9845output_set_got (rtx dest, rtx label)
9846{
9847  rtx xops[3];
9848
9849  xops[0] = dest;
9850
9851  if (TARGET_VXWORKS_RTP && flag_pic)
9852    {
9853      /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
9854      xops[2] = gen_rtx_MEM (Pmode,
9855			     gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
9856      output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
9857
9858      /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
9859	 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
9860	 an unadorned address.  */
9861      xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
9862      SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
9863      output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
9864      return "";
9865    }
9866
9867  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
9868
9869  if (!flag_pic)
9870    {
9871      if (TARGET_MACHO)
9872	/* We don't need a pic base, we're not producing pic.  */
9873	gcc_unreachable ();
9874
9875      xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
9876      output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
9877      targetm.asm_out.internal_label (asm_out_file, "L",
9878				      CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
9879    }
9880  else
9881    {
9882      char name[32];
9883      get_pc_thunk_name (name, REGNO (dest));
9884      pic_labels_used |= 1 << REGNO (dest);
9885
9886      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
9887      xops[2] = gen_rtx_MEM (QImode, xops[2]);
9888      output_asm_insn ("%!call\t%X2", xops);
9889
9890#if TARGET_MACHO
9891      /* Output the Mach-O "canonical" pic base label name ("Lxx$pb") here.
9892         This is what will be referenced by the Mach-O PIC subsystem.  */
9893      if (machopic_should_output_picbase_label () || !label)
9894	ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
9895
9896      /* When we are restoring the pic base at the site of a nonlocal label,
9897         and we decided to emit the pic base above, we will still output a
9898         local label used for calculating the correction offset (even though
9899         the offset will be 0 in that case).  */
9900      if (label)
9901        targetm.asm_out.internal_label (asm_out_file, "L",
9902					   CODE_LABEL_NUMBER (label));
9903#endif
9904    }
9905
9906  if (!TARGET_MACHO)
9907    output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
9908
9909  return "";
9910}
9911
9912/* Generate an "push" pattern for input ARG.  */
9913
9914static rtx
9915gen_push (rtx arg)
9916{
9917  struct machine_function *m = cfun->machine;
9918
9919  if (m->fs.cfa_reg == stack_pointer_rtx)
9920    m->fs.cfa_offset += UNITS_PER_WORD;
9921  m->fs.sp_offset += UNITS_PER_WORD;
9922
9923  if (REG_P (arg) && GET_MODE (arg) != word_mode)
9924    arg = gen_rtx_REG (word_mode, REGNO (arg));
9925
9926  return gen_rtx_SET (VOIDmode,
9927		      gen_rtx_MEM (word_mode,
9928				   gen_rtx_PRE_DEC (Pmode,
9929						    stack_pointer_rtx)),
9930		      arg);
9931}
9932
9933/* Generate an "pop" pattern for input ARG.  */
9934
9935static rtx
9936gen_pop (rtx arg)
9937{
9938  if (REG_P (arg) && GET_MODE (arg) != word_mode)
9939    arg = gen_rtx_REG (word_mode, REGNO (arg));
9940
9941  return gen_rtx_SET (VOIDmode,
9942		      arg,
9943		      gen_rtx_MEM (word_mode,
9944				   gen_rtx_POST_INC (Pmode,
9945						     stack_pointer_rtx)));
9946}
9947
9948/* Return >= 0 if there is an unused call-clobbered register available
9949   for the entire function.  */
9950
9951static unsigned int
9952ix86_select_alt_pic_regnum (void)
9953{
9954  if (ix86_use_pseudo_pic_reg ())
9955    return INVALID_REGNUM;
9956
9957  if (crtl->is_leaf
9958      && !crtl->profile
9959      && !ix86_current_function_calls_tls_descriptor)
9960    {
9961      int i, drap;
9962      /* Can't use the same register for both PIC and DRAP.  */
9963      if (crtl->drap_reg)
9964	drap = REGNO (crtl->drap_reg);
9965      else
9966	drap = -1;
9967      for (i = 2; i >= 0; --i)
9968        if (i != drap && !df_regs_ever_live_p (i))
9969	  return i;
9970    }
9971
9972  return INVALID_REGNUM;
9973}
9974
9975/* Return TRUE if we need to save REGNO.  */
9976
9977static bool
9978ix86_save_reg (unsigned int regno, bool maybe_eh_return)
9979{
9980  if (regno == REAL_PIC_OFFSET_TABLE_REGNUM
9981      && pic_offset_table_rtx)
9982    {
9983      if (ix86_use_pseudo_pic_reg ())
9984	{
9985	  /* REAL_PIC_OFFSET_TABLE_REGNUM used by call to
9986	  _mcount in prologue.  */
9987	  if (!TARGET_64BIT && flag_pic && crtl->profile)
9988	    return true;
9989	}
9990      else if (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9991	       || crtl->profile
9992	       || crtl->calls_eh_return
9993	       || crtl->uses_const_pool
9994	       || cfun->has_nonlocal_label)
9995        return ix86_select_alt_pic_regnum () == INVALID_REGNUM;
9996    }
9997
9998  if (crtl->calls_eh_return && maybe_eh_return)
9999    {
10000      unsigned i;
10001      for (i = 0; ; i++)
10002	{
10003	  unsigned test = EH_RETURN_DATA_REGNO (i);
10004	  if (test == INVALID_REGNUM)
10005	    break;
10006	  if (test == regno)
10007	    return true;
10008	}
10009    }
10010
10011  if (crtl->drap_reg
10012      && regno == REGNO (crtl->drap_reg)
10013      && !cfun->machine->no_drap_save_restore)
10014    return true;
10015
10016  return (df_regs_ever_live_p (regno)
10017	  && !call_used_regs[regno]
10018	  && !fixed_regs[regno]
10019	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
10020}
10021
10022/* Return number of saved general prupose registers.  */
10023
10024static int
10025ix86_nsaved_regs (void)
10026{
10027  int nregs = 0;
10028  int regno;
10029
10030  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10031    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10032      nregs ++;
10033  return nregs;
10034}
10035
10036/* Return number of saved SSE registrers.  */
10037
10038static int
10039ix86_nsaved_sseregs (void)
10040{
10041  int nregs = 0;
10042  int regno;
10043
10044  if (!TARGET_64BIT_MS_ABI)
10045    return 0;
10046  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10047    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10048      nregs ++;
10049  return nregs;
10050}
10051
10052/* Given FROM and TO register numbers, say whether this elimination is
10053   allowed.  If stack alignment is needed, we can only replace argument
10054   pointer with hard frame pointer, or replace frame pointer with stack
10055   pointer.  Otherwise, frame pointer elimination is automatically
10056   handled and all other eliminations are valid.  */
10057
10058static bool
10059ix86_can_eliminate (const int from, const int to)
10060{
10061  if (stack_realign_fp)
10062    return ((from == ARG_POINTER_REGNUM
10063	     && to == HARD_FRAME_POINTER_REGNUM)
10064	    || (from == FRAME_POINTER_REGNUM
10065		&& to == STACK_POINTER_REGNUM));
10066  else
10067    return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
10068}
10069
10070/* Return the offset between two registers, one to be eliminated, and the other
10071   its replacement, at the start of a routine.  */
10072
10073HOST_WIDE_INT
10074ix86_initial_elimination_offset (int from, int to)
10075{
10076  struct ix86_frame frame;
10077  ix86_compute_frame_layout (&frame);
10078
10079  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
10080    return frame.hard_frame_pointer_offset;
10081  else if (from == FRAME_POINTER_REGNUM
10082	   && to == HARD_FRAME_POINTER_REGNUM)
10083    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
10084  else
10085    {
10086      gcc_assert (to == STACK_POINTER_REGNUM);
10087
10088      if (from == ARG_POINTER_REGNUM)
10089	return frame.stack_pointer_offset;
10090
10091      gcc_assert (from == FRAME_POINTER_REGNUM);
10092      return frame.stack_pointer_offset - frame.frame_pointer_offset;
10093    }
10094}
10095
10096/* In a dynamically-aligned function, we can't know the offset from
10097   stack pointer to frame pointer, so we must ensure that setjmp
10098   eliminates fp against the hard fp (%ebp) rather than trying to
10099   index from %esp up to the top of the frame across a gap that is
10100   of unknown (at compile-time) size.  */
10101static rtx
10102ix86_builtin_setjmp_frame_value (void)
10103{
10104  return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
10105}
10106
10107/* When using -fsplit-stack, the allocation routines set a field in
10108   the TCB to the bottom of the stack plus this much space, measured
10109   in bytes.  */
10110
10111#define SPLIT_STACK_AVAILABLE 256
10112
10113/* Fill structure ix86_frame about frame of currently computed function.  */
10114
10115static void
10116ix86_compute_frame_layout (struct ix86_frame *frame)
10117{
10118  unsigned HOST_WIDE_INT stack_alignment_needed;
10119  HOST_WIDE_INT offset;
10120  unsigned HOST_WIDE_INT preferred_alignment;
10121  HOST_WIDE_INT size = get_frame_size ();
10122  HOST_WIDE_INT to_allocate;
10123
10124  frame->nregs = ix86_nsaved_regs ();
10125  frame->nsseregs = ix86_nsaved_sseregs ();
10126
10127  /* 64-bit MS ABI seem to require stack alignment to be always 16,
10128     except for function prologues, leaf functions and when the defult
10129     incoming stack boundary is overriden at command line or via
10130     force_align_arg_pointer attribute.  */
10131  if ((TARGET_64BIT_MS_ABI && crtl->preferred_stack_boundary < 128)
10132      && (!crtl->is_leaf || cfun->calls_alloca != 0
10133	  || ix86_current_function_calls_tls_descriptor
10134	  || ix86_incoming_stack_boundary < 128))
10135    {
10136      crtl->preferred_stack_boundary = 128;
10137      crtl->stack_alignment_needed = 128;
10138    }
10139
10140  stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
10141  preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
10142
10143  gcc_assert (!size || stack_alignment_needed);
10144  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
10145  gcc_assert (preferred_alignment <= stack_alignment_needed);
10146
10147  /* For SEH we have to limit the amount of code movement into the prologue.
10148     At present we do this via a BLOCKAGE, at which point there's very little
10149     scheduling that can be done, which means that there's very little point
10150     in doing anything except PUSHs.  */
10151  if (TARGET_SEH)
10152    cfun->machine->use_fast_prologue_epilogue = false;
10153
10154  /* During reload iteration the amount of registers saved can change.
10155     Recompute the value as needed.  Do not recompute when amount of registers
10156     didn't change as reload does multiple calls to the function and does not
10157     expect the decision to change within single iteration.  */
10158  else if (!optimize_bb_for_size_p (ENTRY_BLOCK_PTR_FOR_FN (cfun))
10159           && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
10160    {
10161      int count = frame->nregs;
10162      struct cgraph_node *node = cgraph_node::get (current_function_decl);
10163
10164      cfun->machine->use_fast_prologue_epilogue_nregs = count;
10165
10166      /* The fast prologue uses move instead of push to save registers.  This
10167         is significantly longer, but also executes faster as modern hardware
10168         can execute the moves in parallel, but can't do that for push/pop.
10169
10170	 Be careful about choosing what prologue to emit:  When function takes
10171	 many instructions to execute we may use slow version as well as in
10172	 case function is known to be outside hot spot (this is known with
10173	 feedback only).  Weight the size of function by number of registers
10174	 to save as it is cheap to use one or two push instructions but very
10175	 slow to use many of them.  */
10176      if (count)
10177	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
10178      if (node->frequency < NODE_FREQUENCY_NORMAL
10179	  || (flag_branch_probabilities
10180	      && node->frequency < NODE_FREQUENCY_HOT))
10181        cfun->machine->use_fast_prologue_epilogue = false;
10182      else
10183        cfun->machine->use_fast_prologue_epilogue
10184	   = !expensive_function_p (count);
10185    }
10186
10187  frame->save_regs_using_mov
10188    = (TARGET_PROLOGUE_USING_MOVE && cfun->machine->use_fast_prologue_epilogue
10189       /* If static stack checking is enabled and done with probes,
10190	  the registers need to be saved before allocating the frame.  */
10191       && flag_stack_check != STATIC_BUILTIN_STACK_CHECK);
10192
10193  /* Skip return address.  */
10194  offset = UNITS_PER_WORD;
10195
10196  /* Skip pushed static chain.  */
10197  if (ix86_static_chain_on_stack)
10198    offset += UNITS_PER_WORD;
10199
10200  /* Skip saved base pointer.  */
10201  if (frame_pointer_needed)
10202    offset += UNITS_PER_WORD;
10203  frame->hfp_save_offset = offset;
10204
10205  /* The traditional frame pointer location is at the top of the frame.  */
10206  frame->hard_frame_pointer_offset = offset;
10207
10208  /* Register save area */
10209  offset += frame->nregs * UNITS_PER_WORD;
10210  frame->reg_save_offset = offset;
10211
10212  /* On SEH target, registers are pushed just before the frame pointer
10213     location.  */
10214  if (TARGET_SEH)
10215    frame->hard_frame_pointer_offset = offset;
10216
10217  /* Align and set SSE register save area.  */
10218  if (frame->nsseregs)
10219    {
10220      /* The only ABI that has saved SSE registers (Win64) also has a
10221	 16-byte aligned default stack, and thus we don't need to be
10222	 within the re-aligned local stack frame to save them.  In case
10223	 incoming stack boundary is aligned to less than 16 bytes,
10224	 unaligned move of SSE register will be emitted, so there is
10225	 no point to round up the SSE register save area outside the
10226	 re-aligned local stack frame to 16 bytes.  */
10227      if (ix86_incoming_stack_boundary >= 128)
10228	offset = (offset + 16 - 1) & -16;
10229      offset += frame->nsseregs * 16;
10230    }
10231  frame->sse_reg_save_offset = offset;
10232
10233  /* The re-aligned stack starts here.  Values before this point are not
10234     directly comparable with values below this point.  In order to make
10235     sure that no value happens to be the same before and after, force
10236     the alignment computation below to add a non-zero value.  */
10237  if (stack_realign_fp)
10238    offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10239
10240  /* Va-arg area */
10241  frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
10242  offset += frame->va_arg_size;
10243
10244  /* Align start of frame for local function.  */
10245  if (stack_realign_fp
10246      || offset != frame->sse_reg_save_offset
10247      || size != 0
10248      || !crtl->is_leaf
10249      || cfun->calls_alloca
10250      || ix86_current_function_calls_tls_descriptor)
10251    offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
10252
10253  /* Frame pointer points here.  */
10254  frame->frame_pointer_offset = offset;
10255
10256  offset += size;
10257
10258  /* Add outgoing arguments area.  Can be skipped if we eliminated
10259     all the function calls as dead code.
10260     Skipping is however impossible when function calls alloca.  Alloca
10261     expander assumes that last crtl->outgoing_args_size
10262     of stack frame are unused.  */
10263  if (ACCUMULATE_OUTGOING_ARGS
10264      && (!crtl->is_leaf || cfun->calls_alloca
10265	  || ix86_current_function_calls_tls_descriptor))
10266    {
10267      offset += crtl->outgoing_args_size;
10268      frame->outgoing_arguments_size = crtl->outgoing_args_size;
10269    }
10270  else
10271    frame->outgoing_arguments_size = 0;
10272
10273  /* Align stack boundary.  Only needed if we're calling another function
10274     or using alloca.  */
10275  if (!crtl->is_leaf || cfun->calls_alloca
10276      || ix86_current_function_calls_tls_descriptor)
10277    offset = (offset + preferred_alignment - 1) & -preferred_alignment;
10278
10279  /* We've reached end of stack frame.  */
10280  frame->stack_pointer_offset = offset;
10281
10282  /* Size prologue needs to allocate.  */
10283  to_allocate = offset - frame->sse_reg_save_offset;
10284
10285  if ((!to_allocate && frame->nregs <= 1)
10286      || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
10287    frame->save_regs_using_mov = false;
10288
10289  if (ix86_using_red_zone ()
10290      && crtl->sp_is_unchanging
10291      && crtl->is_leaf
10292      && !ix86_current_function_calls_tls_descriptor)
10293    {
10294      frame->red_zone_size = to_allocate;
10295      if (frame->save_regs_using_mov)
10296	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
10297      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
10298	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
10299    }
10300  else
10301    frame->red_zone_size = 0;
10302  frame->stack_pointer_offset -= frame->red_zone_size;
10303
10304  /* The SEH frame pointer location is near the bottom of the frame.
10305     This is enforced by the fact that the difference between the
10306     stack pointer and the frame pointer is limited to 240 bytes in
10307     the unwind data structure.  */
10308  if (TARGET_SEH)
10309    {
10310      HOST_WIDE_INT diff;
10311
10312      /* If we can leave the frame pointer where it is, do so.  Also, returns
10313	 the establisher frame for __builtin_frame_address (0).  */
10314      diff = frame->stack_pointer_offset - frame->hard_frame_pointer_offset;
10315      if (diff <= SEH_MAX_FRAME_SIZE
10316	  && (diff > 240 || (diff & 15) != 0)
10317	  && !crtl->accesses_prior_frames)
10318	{
10319	  /* Ideally we'd determine what portion of the local stack frame
10320	     (within the constraint of the lowest 240) is most heavily used.
10321	     But without that complication, simply bias the frame pointer
10322	     by 128 bytes so as to maximize the amount of the local stack
10323	     frame that is addressable with 8-bit offsets.  */
10324	  frame->hard_frame_pointer_offset = frame->stack_pointer_offset - 128;
10325	}
10326    }
10327}
10328
10329/* This is semi-inlined memory_address_length, but simplified
10330   since we know that we're always dealing with reg+offset, and
10331   to avoid having to create and discard all that rtl.  */
10332
10333static inline int
10334choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
10335{
10336  int len = 4;
10337
10338  if (offset == 0)
10339    {
10340      /* EBP and R13 cannot be encoded without an offset.  */
10341      len = (regno == BP_REG || regno == R13_REG);
10342    }
10343  else if (IN_RANGE (offset, -128, 127))
10344    len = 1;
10345
10346  /* ESP and R12 must be encoded with a SIB byte.  */
10347  if (regno == SP_REG || regno == R12_REG)
10348    len++;
10349
10350  return len;
10351}
10352
10353/* Return an RTX that points to CFA_OFFSET within the stack frame.
10354   The valid base registers are taken from CFUN->MACHINE->FS.  */
10355
10356static rtx
10357choose_baseaddr (HOST_WIDE_INT cfa_offset)
10358{
10359  const struct machine_function *m = cfun->machine;
10360  rtx base_reg = NULL;
10361  HOST_WIDE_INT base_offset = 0;
10362
10363  if (m->use_fast_prologue_epilogue)
10364    {
10365      /* Choose the base register most likely to allow the most scheduling
10366         opportunities.  Generally FP is valid throughout the function,
10367         while DRAP must be reloaded within the epilogue.  But choose either
10368         over the SP due to increased encoding size.  */
10369
10370      if (m->fs.fp_valid)
10371	{
10372	  base_reg = hard_frame_pointer_rtx;
10373	  base_offset = m->fs.fp_offset - cfa_offset;
10374	}
10375      else if (m->fs.drap_valid)
10376	{
10377	  base_reg = crtl->drap_reg;
10378	  base_offset = 0 - cfa_offset;
10379	}
10380      else if (m->fs.sp_valid)
10381	{
10382	  base_reg = stack_pointer_rtx;
10383	  base_offset = m->fs.sp_offset - cfa_offset;
10384	}
10385    }
10386  else
10387    {
10388      HOST_WIDE_INT toffset;
10389      int len = 16, tlen;
10390
10391      /* Choose the base register with the smallest address encoding.
10392         With a tie, choose FP > DRAP > SP.  */
10393      if (m->fs.sp_valid)
10394	{
10395	  base_reg = stack_pointer_rtx;
10396	  base_offset = m->fs.sp_offset - cfa_offset;
10397          len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
10398	}
10399      if (m->fs.drap_valid)
10400	{
10401	  toffset = 0 - cfa_offset;
10402	  tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
10403	  if (tlen <= len)
10404	    {
10405	      base_reg = crtl->drap_reg;
10406	      base_offset = toffset;
10407	      len = tlen;
10408	    }
10409	}
10410      if (m->fs.fp_valid)
10411	{
10412	  toffset = m->fs.fp_offset - cfa_offset;
10413	  tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
10414	  if (tlen <= len)
10415	    {
10416	      base_reg = hard_frame_pointer_rtx;
10417	      base_offset = toffset;
10418	      len = tlen;
10419	    }
10420	}
10421    }
10422  gcc_assert (base_reg != NULL);
10423
10424  return plus_constant (Pmode, base_reg, base_offset);
10425}
10426
10427/* Emit code to save registers in the prologue.  */
10428
10429static void
10430ix86_emit_save_regs (void)
10431{
10432  unsigned int regno;
10433  rtx insn;
10434
10435  for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
10436    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10437      {
10438	insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno)));
10439	RTX_FRAME_RELATED_P (insn) = 1;
10440      }
10441}
10442
10443/* Emit a single register save at CFA - CFA_OFFSET.  */
10444
10445static void
10446ix86_emit_save_reg_using_mov (machine_mode mode, unsigned int regno,
10447			      HOST_WIDE_INT cfa_offset)
10448{
10449  struct machine_function *m = cfun->machine;
10450  rtx reg = gen_rtx_REG (mode, regno);
10451  rtx unspec = NULL_RTX;
10452  rtx mem, addr, base, insn;
10453  unsigned int align;
10454
10455  addr = choose_baseaddr (cfa_offset);
10456  mem = gen_frame_mem (mode, addr);
10457
10458  /* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
10459  align = MIN (GET_MODE_ALIGNMENT (mode), INCOMING_STACK_BOUNDARY);
10460  set_mem_align (mem, align);
10461
10462  /* SSE saves are not within re-aligned local stack frame.
10463     In case INCOMING_STACK_BOUNDARY is misaligned, we have
10464     to emit unaligned store.  */
10465  if (mode == V4SFmode && align < 128)
10466    unspec = gen_rtx_UNSPEC (mode, gen_rtvec (1, reg), UNSPEC_STOREU);
10467
10468  insn = emit_insn (gen_rtx_SET (VOIDmode, mem, unspec ? unspec : reg));
10469  RTX_FRAME_RELATED_P (insn) = 1;
10470
10471  base = addr;
10472  if (GET_CODE (base) == PLUS)
10473    base = XEXP (base, 0);
10474  gcc_checking_assert (REG_P (base));
10475
10476  /* When saving registers into a re-aligned local stack frame, avoid
10477     any tricky guessing by dwarf2out.  */
10478  if (m->fs.realigned)
10479    {
10480      gcc_checking_assert (stack_realign_drap);
10481
10482      if (regno == REGNO (crtl->drap_reg))
10483	{
10484	  /* A bit of a hack.  We force the DRAP register to be saved in
10485	     the re-aligned stack frame, which provides us with a copy
10486	     of the CFA that will last past the prologue.  Install it.  */
10487	  gcc_checking_assert (cfun->machine->fs.fp_valid);
10488	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10489				cfun->machine->fs.fp_offset - cfa_offset);
10490	  mem = gen_rtx_MEM (mode, addr);
10491	  add_reg_note (insn, REG_CFA_DEF_CFA, mem);
10492	}
10493      else
10494	{
10495	  /* The frame pointer is a stable reference within the
10496	     aligned frame.  Use it.  */
10497	  gcc_checking_assert (cfun->machine->fs.fp_valid);
10498	  addr = plus_constant (Pmode, hard_frame_pointer_rtx,
10499				cfun->machine->fs.fp_offset - cfa_offset);
10500	  mem = gen_rtx_MEM (mode, addr);
10501	  add_reg_note (insn, REG_CFA_EXPRESSION,
10502			gen_rtx_SET (VOIDmode, mem, reg));
10503	}
10504    }
10505
10506  /* The memory may not be relative to the current CFA register,
10507     which means that we may need to generate a new pattern for
10508     use by the unwind info.  */
10509  else if (base != m->fs.cfa_reg)
10510    {
10511      addr = plus_constant (Pmode, m->fs.cfa_reg,
10512			    m->fs.cfa_offset - cfa_offset);
10513      mem = gen_rtx_MEM (mode, addr);
10514      add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
10515    }
10516  else if (unspec)
10517    add_reg_note (insn, REG_CFA_EXPRESSION,
10518		  gen_rtx_SET (VOIDmode, mem, reg));
10519}
10520
10521/* Emit code to save registers using MOV insns.
10522   First register is stored at CFA - CFA_OFFSET.  */
10523static void
10524ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
10525{
10526  unsigned int regno;
10527
10528  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10529    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10530      {
10531        ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
10532	cfa_offset -= UNITS_PER_WORD;
10533      }
10534}
10535
10536/* Emit code to save SSE registers using MOV insns.
10537   First register is stored at CFA - CFA_OFFSET.  */
10538static void
10539ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
10540{
10541  unsigned int regno;
10542
10543  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
10544    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
10545      {
10546	ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
10547	cfa_offset -= 16;
10548      }
10549}
10550
10551static GTY(()) rtx queued_cfa_restores;
10552
10553/* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
10554   manipulation insn.  The value is on the stack at CFA - CFA_OFFSET.
10555   Don't add the note if the previously saved value will be left untouched
10556   within stack red-zone till return, as unwinders can find the same value
10557   in the register and on the stack.  */
10558
10559static void
10560ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
10561{
10562  if (!crtl->shrink_wrapped
10563      && cfa_offset <= cfun->machine->fs.red_zone_offset)
10564    return;
10565
10566  if (insn)
10567    {
10568      add_reg_note (insn, REG_CFA_RESTORE, reg);
10569      RTX_FRAME_RELATED_P (insn) = 1;
10570    }
10571  else
10572    queued_cfa_restores
10573      = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
10574}
10575
10576/* Add queued REG_CFA_RESTORE notes if any to INSN.  */
10577
10578static void
10579ix86_add_queued_cfa_restore_notes (rtx insn)
10580{
10581  rtx last;
10582  if (!queued_cfa_restores)
10583    return;
10584  for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
10585    ;
10586  XEXP (last, 1) = REG_NOTES (insn);
10587  REG_NOTES (insn) = queued_cfa_restores;
10588  queued_cfa_restores = NULL_RTX;
10589  RTX_FRAME_RELATED_P (insn) = 1;
10590}
10591
10592/* Expand prologue or epilogue stack adjustment.
10593   The pattern exist to put a dependency on all ebp-based memory accesses.
10594   STYLE should be negative if instructions should be marked as frame related,
10595   zero if %r11 register is live and cannot be freely used and positive
10596   otherwise.  */
10597
10598static void
10599pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
10600			   int style, bool set_cfa)
10601{
10602  struct machine_function *m = cfun->machine;
10603  rtx insn;
10604  bool add_frame_related_expr = false;
10605
10606  if (Pmode == SImode)
10607    insn = gen_pro_epilogue_adjust_stack_si_add (dest, src, offset);
10608  else if (x86_64_immediate_operand (offset, DImode))
10609    insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, offset);
10610  else
10611    {
10612      rtx tmp;
10613      /* r11 is used by indirect sibcall return as well, set before the
10614	 epilogue and used after the epilogue.  */
10615      if (style)
10616        tmp = gen_rtx_REG (DImode, R11_REG);
10617      else
10618	{
10619	  gcc_assert (src != hard_frame_pointer_rtx
10620		      && dest != hard_frame_pointer_rtx);
10621	  tmp = hard_frame_pointer_rtx;
10622	}
10623      insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
10624      if (style < 0)
10625	add_frame_related_expr = true;
10626
10627      insn = gen_pro_epilogue_adjust_stack_di_add (dest, src, tmp);
10628    }
10629
10630  insn = emit_insn (insn);
10631  if (style >= 0)
10632    ix86_add_queued_cfa_restore_notes (insn);
10633
10634  if (set_cfa)
10635    {
10636      rtx r;
10637
10638      gcc_assert (m->fs.cfa_reg == src);
10639      m->fs.cfa_offset += INTVAL (offset);
10640      m->fs.cfa_reg = dest;
10641
10642      r = gen_rtx_PLUS (Pmode, src, offset);
10643      r = gen_rtx_SET (VOIDmode, dest, r);
10644      add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10645      RTX_FRAME_RELATED_P (insn) = 1;
10646    }
10647  else if (style < 0)
10648    {
10649      RTX_FRAME_RELATED_P (insn) = 1;
10650      if (add_frame_related_expr)
10651	{
10652	  rtx r = gen_rtx_PLUS (Pmode, src, offset);
10653	  r = gen_rtx_SET (VOIDmode, dest, r);
10654	  add_reg_note (insn, REG_FRAME_RELATED_EXPR, r);
10655	}
10656    }
10657
10658  if (dest == stack_pointer_rtx)
10659    {
10660      HOST_WIDE_INT ooffset = m->fs.sp_offset;
10661      bool valid = m->fs.sp_valid;
10662
10663      if (src == hard_frame_pointer_rtx)
10664	{
10665	  valid = m->fs.fp_valid;
10666	  ooffset = m->fs.fp_offset;
10667	}
10668      else if (src == crtl->drap_reg)
10669	{
10670	  valid = m->fs.drap_valid;
10671	  ooffset = 0;
10672	}
10673      else
10674	{
10675	  /* Else there are two possibilities: SP itself, which we set
10676	     up as the default above.  Or EH_RETURN_STACKADJ_RTX, which is
10677	     taken care of this by hand along the eh_return path.  */
10678	  gcc_checking_assert (src == stack_pointer_rtx
10679			       || offset == const0_rtx);
10680	}
10681
10682      m->fs.sp_offset = ooffset - INTVAL (offset);
10683      m->fs.sp_valid = valid;
10684    }
10685}
10686
10687/* Find an available register to be used as dynamic realign argument
10688   pointer regsiter.  Such a register will be written in prologue and
10689   used in begin of body, so it must not be
10690	1. parameter passing register.
10691	2. GOT pointer.
10692   We reuse static-chain register if it is available.  Otherwise, we
10693   use DI for i386 and R13 for x86-64.  We chose R13 since it has
10694   shorter encoding.
10695
10696   Return: the regno of chosen register.  */
10697
10698static unsigned int
10699find_drap_reg (void)
10700{
10701  tree decl = cfun->decl;
10702
10703  if (TARGET_64BIT)
10704    {
10705      /* Use R13 for nested function or function need static chain.
10706	 Since function with tail call may use any caller-saved
10707	 registers in epilogue, DRAP must not use caller-saved
10708	 register in such case.  */
10709      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10710	return R13_REG;
10711
10712      return R10_REG;
10713    }
10714  else
10715    {
10716      /* Use DI for nested function or function need static chain.
10717	 Since function with tail call may use any caller-saved
10718	 registers in epilogue, DRAP must not use caller-saved
10719	 register in such case.  */
10720      if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
10721	return DI_REG;
10722
10723      /* Reuse static chain register if it isn't used for parameter
10724         passing.  */
10725      if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2)
10726	{
10727	  unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
10728	  if ((ccvt & (IX86_CALLCVT_FASTCALL | IX86_CALLCVT_THISCALL)) == 0)
10729	    return CX_REG;
10730	}
10731      return DI_REG;
10732    }
10733}
10734
10735/* Handle a "force_align_arg_pointer" attribute.  */
10736
10737static tree
10738ix86_handle_force_align_arg_pointer_attribute (tree *node, tree name,
10739					       tree, int, bool *no_add_attrs)
10740{
10741  if (TREE_CODE (*node) != FUNCTION_TYPE
10742      && TREE_CODE (*node) != METHOD_TYPE
10743      && TREE_CODE (*node) != FIELD_DECL
10744      && TREE_CODE (*node) != TYPE_DECL)
10745    {
10746      warning (OPT_Wattributes, "%qE attribute only applies to functions",
10747	       name);
10748      *no_add_attrs = true;
10749    }
10750
10751  return NULL_TREE;
10752}
10753
10754/* Return minimum incoming stack alignment.  */
10755
10756static unsigned int
10757ix86_minimum_incoming_stack_boundary (bool sibcall)
10758{
10759  unsigned int incoming_stack_boundary;
10760
10761  /* Prefer the one specified at command line. */
10762  if (ix86_user_incoming_stack_boundary)
10763    incoming_stack_boundary = ix86_user_incoming_stack_boundary;
10764  /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
10765     if -mstackrealign is used, it isn't used for sibcall check and
10766     estimated stack alignment is 128bit.  */
10767  else if (!sibcall
10768	   && ix86_force_align_arg_pointer
10769	   && crtl->stack_alignment_estimated == 128)
10770    incoming_stack_boundary = MIN_STACK_BOUNDARY;
10771  else
10772    incoming_stack_boundary = ix86_default_incoming_stack_boundary;
10773
10774  /* Incoming stack alignment can be changed on individual functions
10775     via force_align_arg_pointer attribute.  We use the smallest
10776     incoming stack boundary.  */
10777  if (incoming_stack_boundary > MIN_STACK_BOUNDARY
10778      && lookup_attribute (ix86_force_align_arg_pointer_string,
10779			   TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
10780    incoming_stack_boundary = MIN_STACK_BOUNDARY;
10781
10782  /* The incoming stack frame has to be aligned at least at
10783     parm_stack_boundary.  */
10784  if (incoming_stack_boundary < crtl->parm_stack_boundary)
10785    incoming_stack_boundary = crtl->parm_stack_boundary;
10786
10787  /* Stack at entrance of main is aligned by runtime.  We use the
10788     smallest incoming stack boundary. */
10789  if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
10790      && DECL_NAME (current_function_decl)
10791      && MAIN_NAME_P (DECL_NAME (current_function_decl))
10792      && DECL_FILE_SCOPE_P (current_function_decl))
10793    incoming_stack_boundary = MAIN_STACK_BOUNDARY;
10794
10795  return incoming_stack_boundary;
10796}
10797
10798/* Update incoming stack boundary and estimated stack alignment.  */
10799
10800static void
10801ix86_update_stack_boundary (void)
10802{
10803  ix86_incoming_stack_boundary
10804    = ix86_minimum_incoming_stack_boundary (false);
10805
10806  /* x86_64 vararg needs 16byte stack alignment for register save
10807     area.  */
10808  if (TARGET_64BIT
10809      && cfun->stdarg
10810      && crtl->stack_alignment_estimated < 128)
10811    crtl->stack_alignment_estimated = 128;
10812
10813  /* __tls_get_addr needs to be called with 16-byte aligned stack.  */
10814  if (ix86_tls_descriptor_calls_expanded_in_cfun
10815      && crtl->preferred_stack_boundary < 128)
10816    crtl->preferred_stack_boundary = 128;
10817}
10818
10819/* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
10820   needed or an rtx for DRAP otherwise.  */
10821
10822static rtx
10823ix86_get_drap_rtx (void)
10824{
10825  if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
10826    crtl->need_drap = true;
10827
10828  if (stack_realign_drap)
10829    {
10830      /* Assign DRAP to vDRAP and returns vDRAP */
10831      unsigned int regno = find_drap_reg ();
10832      rtx drap_vreg;
10833      rtx arg_ptr;
10834      rtx_insn *seq, *insn;
10835
10836      arg_ptr = gen_rtx_REG (Pmode, regno);
10837      crtl->drap_reg = arg_ptr;
10838
10839      start_sequence ();
10840      drap_vreg = copy_to_reg (arg_ptr);
10841      seq = get_insns ();
10842      end_sequence ();
10843
10844      insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
10845      if (!optimize)
10846	{
10847	  add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
10848	  RTX_FRAME_RELATED_P (insn) = 1;
10849	}
10850      return drap_vreg;
10851    }
10852  else
10853    return NULL;
10854}
10855
10856/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
10857
10858static rtx
10859ix86_internal_arg_pointer (void)
10860{
10861  return virtual_incoming_args_rtx;
10862}
10863
10864struct scratch_reg {
10865  rtx reg;
10866  bool saved;
10867};
10868
10869/* Return a short-lived scratch register for use on function entry.
10870   In 32-bit mode, it is valid only after the registers are saved
10871   in the prologue.  This register must be released by means of
10872   release_scratch_register_on_entry once it is dead.  */
10873
10874static void
10875get_scratch_register_on_entry (struct scratch_reg *sr)
10876{
10877  int regno;
10878
10879  sr->saved = false;
10880
10881  if (TARGET_64BIT)
10882    {
10883      /* We always use R11 in 64-bit mode.  */
10884      regno = R11_REG;
10885    }
10886  else
10887    {
10888      tree decl = current_function_decl, fntype = TREE_TYPE (decl);
10889      bool fastcall_p
10890	= lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10891      bool thiscall_p
10892	= lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
10893      bool static_chain_p = DECL_STATIC_CHAIN (decl);
10894      int regparm = ix86_function_regparm (fntype, decl);
10895      int drap_regno
10896	= crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
10897
10898      /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
10899	  for the static chain register.  */
10900      if ((regparm < 1 || (fastcall_p && !static_chain_p))
10901	  && drap_regno != AX_REG)
10902	regno = AX_REG;
10903      /* 'thiscall' sets regparm to 1, uses ecx for arguments and edx
10904	  for the static chain register.  */
10905      else if (thiscall_p && !static_chain_p && drap_regno != AX_REG)
10906        regno = AX_REG;
10907      else if (regparm < 2 && !thiscall_p && drap_regno != DX_REG)
10908	regno = DX_REG;
10909      /* ecx is the static chain register.  */
10910      else if (regparm < 3 && !fastcall_p && !thiscall_p
10911	       && !static_chain_p
10912	       && drap_regno != CX_REG)
10913	regno = CX_REG;
10914      else if (ix86_save_reg (BX_REG, true))
10915	regno = BX_REG;
10916      /* esi is the static chain register.  */
10917      else if (!(regparm == 3 && static_chain_p)
10918	       && ix86_save_reg (SI_REG, true))
10919	regno = SI_REG;
10920      else if (ix86_save_reg (DI_REG, true))
10921	regno = DI_REG;
10922      else
10923	{
10924	  regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
10925	  sr->saved = true;
10926	}
10927    }
10928
10929  sr->reg = gen_rtx_REG (Pmode, regno);
10930  if (sr->saved)
10931    {
10932      rtx insn = emit_insn (gen_push (sr->reg));
10933      RTX_FRAME_RELATED_P (insn) = 1;
10934    }
10935}
10936
10937/* Release a scratch register obtained from the preceding function.  */
10938
10939static void
10940release_scratch_register_on_entry (struct scratch_reg *sr)
10941{
10942  if (sr->saved)
10943    {
10944      struct machine_function *m = cfun->machine;
10945      rtx x, insn = emit_insn (gen_pop (sr->reg));
10946
10947      /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop.  */
10948      RTX_FRAME_RELATED_P (insn) = 1;
10949      x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
10950      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
10951      add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
10952      m->fs.sp_offset -= UNITS_PER_WORD;
10953    }
10954}
10955
10956#define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
10957
10958/* Emit code to adjust the stack pointer by SIZE bytes while probing it.  */
10959
10960static void
10961ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
10962{
10963  /* We skip the probe for the first interval + a small dope of 4 words and
10964     probe that many bytes past the specified size to maintain a protection
10965     area at the botton of the stack.  */
10966  const int dope = 4 * UNITS_PER_WORD;
10967  rtx size_rtx = GEN_INT (size), last;
10968
10969  /* See if we have a constant small number of probes to generate.  If so,
10970     that's the easy case.  The run-time loop is made up of 11 insns in the
10971     generic case while the compile-time loop is made up of 3+2*(n-1) insns
10972     for n # of intervals.  */
10973  if (size <= 5 * PROBE_INTERVAL)
10974    {
10975      HOST_WIDE_INT i, adjust;
10976      bool first_probe = true;
10977
10978      /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
10979	 values of N from 1 until it exceeds SIZE.  If only one probe is
10980	 needed, this will not generate any code.  Then adjust and probe
10981	 to PROBE_INTERVAL + SIZE.  */
10982      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
10983	{
10984	  if (first_probe)
10985	    {
10986	      adjust = 2 * PROBE_INTERVAL + dope;
10987	      first_probe = false;
10988	    }
10989	  else
10990	    adjust = PROBE_INTERVAL;
10991
10992	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10993				  plus_constant (Pmode, stack_pointer_rtx,
10994						 -adjust)));
10995	  emit_stack_probe (stack_pointer_rtx);
10996	}
10997
10998      if (first_probe)
10999	adjust = size + PROBE_INTERVAL + dope;
11000      else
11001        adjust = size + PROBE_INTERVAL - i;
11002
11003      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11004			      plus_constant (Pmode, stack_pointer_rtx,
11005					     -adjust)));
11006      emit_stack_probe (stack_pointer_rtx);
11007
11008      /* Adjust back to account for the additional first interval.  */
11009      last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11010				     plus_constant (Pmode, stack_pointer_rtx,
11011						    PROBE_INTERVAL + dope)));
11012    }
11013
11014  /* Otherwise, do the same as above, but in a loop.  Note that we must be
11015     extra careful with variables wrapping around because we might be at
11016     the very top (or the very bottom) of the address space and we have
11017     to be able to handle this case properly; in particular, we use an
11018     equality test for the loop condition.  */
11019  else
11020    {
11021      HOST_WIDE_INT rounded_size;
11022      struct scratch_reg sr;
11023
11024      get_scratch_register_on_entry (&sr);
11025
11026
11027      /* Step 1: round SIZE to the previous multiple of the interval.  */
11028
11029      rounded_size = size & -PROBE_INTERVAL;
11030
11031
11032      /* Step 2: compute initial and final value of the loop counter.  */
11033
11034      /* SP = SP_0 + PROBE_INTERVAL.  */
11035      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11036			      plus_constant (Pmode, stack_pointer_rtx,
11037					     - (PROBE_INTERVAL + dope))));
11038
11039      /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE.  */
11040      emit_move_insn (sr.reg, GEN_INT (-rounded_size));
11041      emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
11042			      gen_rtx_PLUS (Pmode, sr.reg,
11043					    stack_pointer_rtx)));
11044
11045
11046      /* Step 3: the loop
11047
11048	 while (SP != LAST_ADDR)
11049	   {
11050	     SP = SP + PROBE_INTERVAL
11051	     probe at SP
11052	   }
11053
11054	 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
11055	 values of N from 1 until it is equal to ROUNDED_SIZE.  */
11056
11057      emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
11058
11059
11060      /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
11061	 assert at compile-time that SIZE is equal to ROUNDED_SIZE.  */
11062
11063      if (size != rounded_size)
11064	{
11065	  emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11066			          plus_constant (Pmode, stack_pointer_rtx,
11067						 rounded_size - size)));
11068	  emit_stack_probe (stack_pointer_rtx);
11069	}
11070
11071      /* Adjust back to account for the additional first interval.  */
11072      last = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11073				     plus_constant (Pmode, stack_pointer_rtx,
11074						    PROBE_INTERVAL + dope)));
11075
11076      release_scratch_register_on_entry (&sr);
11077    }
11078
11079  /* Even if the stack pointer isn't the CFA register, we need to correctly
11080     describe the adjustments made to it, in particular differentiate the
11081     frame-related ones from the frame-unrelated ones.  */
11082  if (size > 0)
11083    {
11084      rtx expr = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (2));
11085      XVECEXP (expr, 0, 0)
11086	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11087		       plus_constant (Pmode, stack_pointer_rtx, -size));
11088      XVECEXP (expr, 0, 1)
11089	= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11090		       plus_constant (Pmode, stack_pointer_rtx,
11091				      PROBE_INTERVAL + dope + size));
11092      add_reg_note (last, REG_FRAME_RELATED_EXPR, expr);
11093      RTX_FRAME_RELATED_P (last) = 1;
11094
11095      cfun->machine->fs.sp_offset += size;
11096    }
11097
11098  /* Make sure nothing is scheduled before we are done.  */
11099  emit_insn (gen_blockage ());
11100}
11101
11102/* Adjust the stack pointer up to REG while probing it.  */
11103
11104const char *
11105output_adjust_stack_and_probe (rtx reg)
11106{
11107  static int labelno = 0;
11108  char loop_lab[32], end_lab[32];
11109  rtx xops[2];
11110
11111  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11112  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11113
11114  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11115
11116  /* Jump to END_LAB if SP == LAST_ADDR.  */
11117  xops[0] = stack_pointer_rtx;
11118  xops[1] = reg;
11119  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11120  fputs ("\tje\t", asm_out_file);
11121  assemble_name_raw (asm_out_file, end_lab);
11122  fputc ('\n', asm_out_file);
11123
11124  /* SP = SP + PROBE_INTERVAL.  */
11125  xops[1] = GEN_INT (PROBE_INTERVAL);
11126  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11127
11128  /* Probe at SP.  */
11129  xops[1] = const0_rtx;
11130  output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
11131
11132  fprintf (asm_out_file, "\tjmp\t");
11133  assemble_name_raw (asm_out_file, loop_lab);
11134  fputc ('\n', asm_out_file);
11135
11136  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11137
11138  return "";
11139}
11140
11141/* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
11142   inclusive.  These are offsets from the current stack pointer.  */
11143
11144static void
11145ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
11146{
11147  /* See if we have a constant small number of probes to generate.  If so,
11148     that's the easy case.  The run-time loop is made up of 7 insns in the
11149     generic case while the compile-time loop is made up of n insns for n #
11150     of intervals.  */
11151  if (size <= 7 * PROBE_INTERVAL)
11152    {
11153      HOST_WIDE_INT i;
11154
11155      /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
11156	 it exceeds SIZE.  If only one probe is needed, this will not
11157	 generate any code.  Then probe at FIRST + SIZE.  */
11158      for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
11159	emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11160					 -(first + i)));
11161
11162      emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
11163				       -(first + size)));
11164    }
11165
11166  /* Otherwise, do the same as above, but in a loop.  Note that we must be
11167     extra careful with variables wrapping around because we might be at
11168     the very top (or the very bottom) of the address space and we have
11169     to be able to handle this case properly; in particular, we use an
11170     equality test for the loop condition.  */
11171  else
11172    {
11173      HOST_WIDE_INT rounded_size, last;
11174      struct scratch_reg sr;
11175
11176      get_scratch_register_on_entry (&sr);
11177
11178
11179      /* Step 1: round SIZE to the previous multiple of the interval.  */
11180
11181      rounded_size = size & -PROBE_INTERVAL;
11182
11183
11184      /* Step 2: compute initial and final value of the loop counter.  */
11185
11186      /* TEST_OFFSET = FIRST.  */
11187      emit_move_insn (sr.reg, GEN_INT (-first));
11188
11189      /* LAST_OFFSET = FIRST + ROUNDED_SIZE.  */
11190      last = first + rounded_size;
11191
11192
11193      /* Step 3: the loop
11194
11195	 while (TEST_ADDR != LAST_ADDR)
11196	   {
11197	     TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
11198	     probe at TEST_ADDR
11199	   }
11200
11201         probes at FIRST + N * PROBE_INTERVAL for values of N from 1
11202         until it is equal to ROUNDED_SIZE.  */
11203
11204      emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
11205
11206
11207      /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
11208	 that SIZE is equal to ROUNDED_SIZE.  */
11209
11210      if (size != rounded_size)
11211	emit_stack_probe (plus_constant (Pmode,
11212					 gen_rtx_PLUS (Pmode,
11213						       stack_pointer_rtx,
11214						       sr.reg),
11215					 rounded_size - size));
11216
11217      release_scratch_register_on_entry (&sr);
11218    }
11219
11220  /* Make sure nothing is scheduled before we are done.  */
11221  emit_insn (gen_blockage ());
11222}
11223
11224/* Probe a range of stack addresses from REG to END, inclusive.  These are
11225   offsets from the current stack pointer.  */
11226
11227const char *
11228output_probe_stack_range (rtx reg, rtx end)
11229{
11230  static int labelno = 0;
11231  char loop_lab[32], end_lab[32];
11232  rtx xops[3];
11233
11234  ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
11235  ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
11236
11237  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
11238
11239  /* Jump to END_LAB if TEST_ADDR == LAST_ADDR.  */
11240  xops[0] = reg;
11241  xops[1] = end;
11242  output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
11243  fputs ("\tje\t", asm_out_file);
11244  assemble_name_raw (asm_out_file, end_lab);
11245  fputc ('\n', asm_out_file);
11246
11247  /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL.  */
11248  xops[1] = GEN_INT (PROBE_INTERVAL);
11249  output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
11250
11251  /* Probe at TEST_ADDR.  */
11252  xops[0] = stack_pointer_rtx;
11253  xops[1] = reg;
11254  xops[2] = const0_rtx;
11255  output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
11256
11257  fprintf (asm_out_file, "\tjmp\t");
11258  assemble_name_raw (asm_out_file, loop_lab);
11259  fputc ('\n', asm_out_file);
11260
11261  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
11262
11263  return "";
11264}
11265
11266/* Finalize stack_realign_needed flag, which will guide prologue/epilogue
11267   to be generated in correct form.  */
11268static void
11269ix86_finalize_stack_realign_flags (void)
11270{
11271  /* Check if stack realign is really needed after reload, and
11272     stores result in cfun */
11273  unsigned int incoming_stack_boundary
11274    = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
11275       ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
11276  unsigned int stack_realign
11277    = (incoming_stack_boundary
11278       < (crtl->is_leaf && !ix86_current_function_calls_tls_descriptor
11279	  ? crtl->max_used_stack_slot_alignment
11280	  : crtl->stack_alignment_needed));
11281
11282  if (crtl->stack_realign_finalized)
11283    {
11284      /* After stack_realign_needed is finalized, we can't no longer
11285	 change it.  */
11286      gcc_assert (crtl->stack_realign_needed == stack_realign);
11287      return;
11288    }
11289
11290  /* If the only reason for frame_pointer_needed is that we conservatively
11291     assumed stack realignment might be needed, but in the end nothing that
11292     needed the stack alignment had been spilled, clear frame_pointer_needed
11293     and say we don't need stack realignment.  */
11294  if (stack_realign
11295      && frame_pointer_needed
11296      && crtl->is_leaf
11297      && flag_omit_frame_pointer
11298      && crtl->sp_is_unchanging
11299      && !ix86_current_function_calls_tls_descriptor
11300      && !crtl->accesses_prior_frames
11301      && !cfun->calls_alloca
11302      && !crtl->calls_eh_return
11303      /* See ira_setup_eliminable_regset for the rationale.  */
11304      && !(STACK_CHECK_MOVING_SP
11305	   && flag_stack_check
11306	   && flag_exceptions
11307	   && cfun->can_throw_non_call_exceptions)
11308      && !ix86_frame_pointer_required ()
11309      && get_frame_size () == 0
11310      && ix86_nsaved_sseregs () == 0
11311      && ix86_varargs_gpr_size + ix86_varargs_fpr_size == 0)
11312    {
11313      HARD_REG_SET set_up_by_prologue, prologue_used;
11314      basic_block bb;
11315
11316      CLEAR_HARD_REG_SET (prologue_used);
11317      CLEAR_HARD_REG_SET (set_up_by_prologue);
11318      add_to_hard_reg_set (&set_up_by_prologue, Pmode, STACK_POINTER_REGNUM);
11319      add_to_hard_reg_set (&set_up_by_prologue, Pmode, ARG_POINTER_REGNUM);
11320      add_to_hard_reg_set (&set_up_by_prologue, Pmode,
11321			   HARD_FRAME_POINTER_REGNUM);
11322      FOR_EACH_BB_FN (bb, cfun)
11323        {
11324          rtx_insn *insn;
11325	  FOR_BB_INSNS (bb, insn)
11326	    if (NONDEBUG_INSN_P (insn)
11327		&& requires_stack_frame_p (insn, prologue_used,
11328					   set_up_by_prologue))
11329	      {
11330		crtl->stack_realign_needed = stack_realign;
11331		crtl->stack_realign_finalized = true;
11332		return;
11333	      }
11334	}
11335
11336      /* If drap has been set, but it actually isn't live at the start
11337	 of the function, there is no reason to set it up.  */
11338      if (crtl->drap_reg)
11339	{
11340	  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11341	  if (! REGNO_REG_SET_P (DF_LR_IN (bb), REGNO (crtl->drap_reg)))
11342	    {
11343	      crtl->drap_reg = NULL_RTX;
11344	      crtl->need_drap = false;
11345	    }
11346	}
11347      else
11348	cfun->machine->no_drap_save_restore = true;
11349
11350      frame_pointer_needed = false;
11351      stack_realign = false;
11352      crtl->max_used_stack_slot_alignment = incoming_stack_boundary;
11353      crtl->stack_alignment_needed = incoming_stack_boundary;
11354      crtl->stack_alignment_estimated = incoming_stack_boundary;
11355      if (crtl->preferred_stack_boundary > incoming_stack_boundary)
11356	crtl->preferred_stack_boundary = incoming_stack_boundary;
11357      df_finish_pass (true);
11358      df_scan_alloc (NULL);
11359      df_scan_blocks ();
11360      df_compute_regs_ever_live (true);
11361      df_analyze ();
11362    }
11363
11364  crtl->stack_realign_needed = stack_realign;
11365  crtl->stack_realign_finalized = true;
11366}
11367
11368/* Delete SET_GOT right after entry block if it is allocated to reg.  */
11369
11370static void
11371ix86_elim_entry_set_got (rtx reg)
11372{
11373  basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
11374  rtx_insn *c_insn = BB_HEAD (bb);
11375  if (!NONDEBUG_INSN_P (c_insn))
11376    c_insn = next_nonnote_nondebug_insn (c_insn);
11377  if (c_insn && NONJUMP_INSN_P (c_insn))
11378    {
11379      rtx pat = PATTERN (c_insn);
11380      if (GET_CODE (pat) == PARALLEL)
11381	{
11382	  rtx vec = XVECEXP (pat, 0, 0);
11383	  if (GET_CODE (vec) == SET
11384	      && XINT (XEXP (vec, 1), 1) == UNSPEC_SET_GOT
11385	      && REGNO (XEXP (vec, 0)) == REGNO (reg))
11386	    delete_insn (c_insn);
11387	}
11388    }
11389}
11390
11391/* Expand the prologue into a bunch of separate insns.  */
11392
11393void
11394ix86_expand_prologue (void)
11395{
11396  struct machine_function *m = cfun->machine;
11397  rtx insn, t;
11398  struct ix86_frame frame;
11399  HOST_WIDE_INT allocate;
11400  bool int_registers_saved;
11401  bool sse_registers_saved;
11402
11403  ix86_finalize_stack_realign_flags ();
11404
11405  /* DRAP should not coexist with stack_realign_fp */
11406  gcc_assert (!(crtl->drap_reg && stack_realign_fp));
11407
11408  memset (&m->fs, 0, sizeof (m->fs));
11409
11410  /* Initialize CFA state for before the prologue.  */
11411  m->fs.cfa_reg = stack_pointer_rtx;
11412  m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
11413
11414  /* Track SP offset to the CFA.  We continue tracking this after we've
11415     swapped the CFA register away from SP.  In the case of re-alignment
11416     this is fudged; we're interested to offsets within the local frame.  */
11417  m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11418  m->fs.sp_valid = true;
11419
11420  ix86_compute_frame_layout (&frame);
11421
11422  if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
11423    {
11424      /* We should have already generated an error for any use of
11425         ms_hook on a nested function.  */
11426      gcc_checking_assert (!ix86_static_chain_on_stack);
11427
11428      /* Check if profiling is active and we shall use profiling before
11429         prologue variant. If so sorry.  */
11430      if (crtl->profile && flag_fentry != 0)
11431        sorry ("ms_hook_prologue attribute isn%'t compatible "
11432	       "with -mfentry for 32-bit");
11433
11434      /* In ix86_asm_output_function_label we emitted:
11435	 8b ff     movl.s %edi,%edi
11436	 55        push   %ebp
11437	 8b ec     movl.s %esp,%ebp
11438
11439	 This matches the hookable function prologue in Win32 API
11440	 functions in Microsoft Windows XP Service Pack 2 and newer.
11441	 Wine uses this to enable Windows apps to hook the Win32 API
11442	 functions provided by Wine.
11443
11444	 What that means is that we've already set up the frame pointer.  */
11445
11446      if (frame_pointer_needed
11447	  && !(crtl->drap_reg && crtl->stack_realign_needed))
11448	{
11449	  rtx push, mov;
11450
11451	  /* We've decided to use the frame pointer already set up.
11452	     Describe this to the unwinder by pretending that both
11453	     push and mov insns happen right here.
11454
11455	     Putting the unwind info here at the end of the ms_hook
11456	     is done so that we can make absolutely certain we get
11457	     the required byte sequence at the start of the function,
11458	     rather than relying on an assembler that can produce
11459	     the exact encoding required.
11460
11461	     However it does mean (in the unpatched case) that we have
11462	     a 1 insn window where the asynchronous unwind info is
11463	     incorrect.  However, if we placed the unwind info at
11464	     its correct location we would have incorrect unwind info
11465	     in the patched case.  Which is probably all moot since
11466	     I don't expect Wine generates dwarf2 unwind info for the
11467	     system libraries that use this feature.  */
11468
11469	  insn = emit_insn (gen_blockage ());
11470
11471	  push = gen_push (hard_frame_pointer_rtx);
11472	  mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
11473			     stack_pointer_rtx);
11474	  RTX_FRAME_RELATED_P (push) = 1;
11475	  RTX_FRAME_RELATED_P (mov) = 1;
11476
11477	  RTX_FRAME_RELATED_P (insn) = 1;
11478	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11479			gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
11480
11481	  /* Note that gen_push incremented m->fs.cfa_offset, even
11482	     though we didn't emit the push insn here.  */
11483	  m->fs.cfa_reg = hard_frame_pointer_rtx;
11484	  m->fs.fp_offset = m->fs.cfa_offset;
11485	  m->fs.fp_valid = true;
11486	}
11487      else
11488	{
11489	  /* The frame pointer is not needed so pop %ebp again.
11490	     This leaves us with a pristine state.  */
11491	  emit_insn (gen_pop (hard_frame_pointer_rtx));
11492	}
11493    }
11494
11495  /* The first insn of a function that accepts its static chain on the
11496     stack is to push the register that would be filled in by a direct
11497     call.  This insn will be skipped by the trampoline.  */
11498  else if (ix86_static_chain_on_stack)
11499    {
11500      insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
11501      emit_insn (gen_blockage ());
11502
11503      /* We don't want to interpret this push insn as a register save,
11504	 only as a stack adjustment.  The real copy of the register as
11505	 a save will be done later, if needed.  */
11506      t = plus_constant (Pmode, stack_pointer_rtx, -UNITS_PER_WORD);
11507      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
11508      add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
11509      RTX_FRAME_RELATED_P (insn) = 1;
11510    }
11511
11512  /* Emit prologue code to adjust stack alignment and setup DRAP, in case
11513     of DRAP is needed and stack realignment is really needed after reload */
11514  if (stack_realign_drap)
11515    {
11516      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11517
11518      /* Only need to push parameter pointer reg if it is caller saved.  */
11519      if (!call_used_regs[REGNO (crtl->drap_reg)])
11520	{
11521	  /* Push arg pointer reg */
11522	  insn = emit_insn (gen_push (crtl->drap_reg));
11523	  RTX_FRAME_RELATED_P (insn) = 1;
11524	}
11525
11526      /* Grab the argument pointer.  */
11527      t = plus_constant (Pmode, stack_pointer_rtx, m->fs.sp_offset);
11528      insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11529      RTX_FRAME_RELATED_P (insn) = 1;
11530      m->fs.cfa_reg = crtl->drap_reg;
11531      m->fs.cfa_offset = 0;
11532
11533      /* Align the stack.  */
11534      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11535					stack_pointer_rtx,
11536					GEN_INT (-align_bytes)));
11537      RTX_FRAME_RELATED_P (insn) = 1;
11538
11539      /* Replicate the return address on the stack so that return
11540	 address can be reached via (argp - 1) slot.  This is needed
11541	 to implement macro RETURN_ADDR_RTX and intrinsic function
11542	 expand_builtin_return_addr etc.  */
11543      t = plus_constant (Pmode, crtl->drap_reg, -UNITS_PER_WORD);
11544      t = gen_frame_mem (word_mode, t);
11545      insn = emit_insn (gen_push (t));
11546      RTX_FRAME_RELATED_P (insn) = 1;
11547
11548      /* For the purposes of frame and register save area addressing,
11549	 we've started over with a new frame.  */
11550      m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
11551      m->fs.realigned = true;
11552    }
11553
11554  int_registers_saved = (frame.nregs == 0);
11555  sse_registers_saved = (frame.nsseregs == 0);
11556
11557  if (frame_pointer_needed && !m->fs.fp_valid)
11558    {
11559      /* Note: AT&T enter does NOT have reversed args.  Enter is probably
11560         slower on all targets.  Also sdb doesn't like it.  */
11561      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
11562      RTX_FRAME_RELATED_P (insn) = 1;
11563
11564      /* Push registers now, before setting the frame pointer
11565	 on SEH target.  */
11566      if (!int_registers_saved
11567	  && TARGET_SEH
11568	  && !frame.save_regs_using_mov)
11569	{
11570	  ix86_emit_save_regs ();
11571	  int_registers_saved = true;
11572	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11573	}
11574
11575      if (m->fs.sp_offset == frame.hard_frame_pointer_offset)
11576	{
11577	  insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11578	  RTX_FRAME_RELATED_P (insn) = 1;
11579
11580	  if (m->fs.cfa_reg == stack_pointer_rtx)
11581	    m->fs.cfa_reg = hard_frame_pointer_rtx;
11582	  m->fs.fp_offset = m->fs.sp_offset;
11583	  m->fs.fp_valid = true;
11584	}
11585    }
11586
11587  if (!int_registers_saved)
11588    {
11589      /* If saving registers via PUSH, do so now.  */
11590      if (!frame.save_regs_using_mov)
11591	{
11592	  ix86_emit_save_regs ();
11593	  int_registers_saved = true;
11594	  gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
11595	}
11596
11597      /* When using red zone we may start register saving before allocating
11598	 the stack frame saving one cycle of the prologue.  However, avoid
11599	 doing this if we have to probe the stack; at least on x86_64 the
11600	 stack probe can turn into a call that clobbers a red zone location. */
11601      else if (ix86_using_red_zone ()
11602	       && (! TARGET_STACK_PROBE
11603		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
11604	{
11605	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11606	  int_registers_saved = true;
11607	}
11608    }
11609
11610  if (stack_realign_fp)
11611    {
11612      int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
11613      gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
11614
11615      /* The computation of the size of the re-aligned stack frame means
11616	 that we must allocate the size of the register save area before
11617	 performing the actual alignment.  Otherwise we cannot guarantee
11618	 that there's enough storage above the realignment point.  */
11619      if (m->fs.sp_offset != frame.sse_reg_save_offset)
11620        pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11621				   GEN_INT (m->fs.sp_offset
11622					    - frame.sse_reg_save_offset),
11623				   -1, false);
11624
11625      /* Align the stack.  */
11626      insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
11627					stack_pointer_rtx,
11628					GEN_INT (-align_bytes)));
11629
11630      /* For the purposes of register save area addressing, the stack
11631         pointer is no longer valid.  As for the value of sp_offset,
11632	 see ix86_compute_frame_layout, which we need to match in order
11633	 to pass verification of stack_pointer_offset at the end.  */
11634      m->fs.sp_offset = (m->fs.sp_offset + align_bytes - 1) & -align_bytes;
11635      m->fs.sp_valid = false;
11636    }
11637
11638  allocate = frame.stack_pointer_offset - m->fs.sp_offset;
11639
11640  if (flag_stack_usage_info)
11641    {
11642      /* We start to count from ARG_POINTER.  */
11643      HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
11644
11645      /* If it was realigned, take into account the fake frame.  */
11646      if (stack_realign_drap)
11647	{
11648	  if (ix86_static_chain_on_stack)
11649	    stack_size += UNITS_PER_WORD;
11650
11651	  if (!call_used_regs[REGNO (crtl->drap_reg)])
11652	    stack_size += UNITS_PER_WORD;
11653
11654	  /* This over-estimates by 1 minimal-stack-alignment-unit but
11655	     mitigates that by counting in the new return address slot.  */
11656	  current_function_dynamic_stack_size
11657	    += crtl->stack_alignment_needed / BITS_PER_UNIT;
11658	}
11659
11660      current_function_static_stack_size = stack_size;
11661    }
11662
11663  /* On SEH target with very large frame size, allocate an area to save
11664     SSE registers (as the very large allocation won't be described).  */
11665  if (TARGET_SEH
11666      && frame.stack_pointer_offset > SEH_MAX_FRAME_SIZE
11667      && !sse_registers_saved)
11668    {
11669      HOST_WIDE_INT sse_size =
11670	frame.sse_reg_save_offset - frame.reg_save_offset;
11671
11672      gcc_assert (int_registers_saved);
11673
11674      /* No need to do stack checking as the area will be immediately
11675	 written.  */
11676      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11677			         GEN_INT (-sse_size), -1,
11678				 m->fs.cfa_reg == stack_pointer_rtx);
11679      allocate -= sse_size;
11680      ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11681      sse_registers_saved = true;
11682    }
11683
11684  /* The stack has already been decremented by the instruction calling us
11685     so probe if the size is non-negative to preserve the protection area.  */
11686  if (allocate >= 0 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
11687    {
11688      /* We expect the registers to be saved when probes are used.  */
11689      gcc_assert (int_registers_saved);
11690
11691      if (STACK_CHECK_MOVING_SP)
11692	{
11693	  if (!(crtl->is_leaf && !cfun->calls_alloca
11694		&& allocate <= PROBE_INTERVAL))
11695	    {
11696	      ix86_adjust_stack_and_probe (allocate);
11697	      allocate = 0;
11698	    }
11699	}
11700      else
11701	{
11702	  HOST_WIDE_INT size = allocate;
11703
11704	  if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
11705	    size = 0x80000000 - STACK_CHECK_PROTECT - 1;
11706
11707	  if (TARGET_STACK_PROBE)
11708	    {
11709	      if (crtl->is_leaf && !cfun->calls_alloca)
11710		{
11711		  if (size > PROBE_INTERVAL)
11712		    ix86_emit_probe_stack_range (0, size);
11713		}
11714	      else
11715		ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
11716	    }
11717	  else
11718	    {
11719	      if (crtl->is_leaf && !cfun->calls_alloca)
11720		{
11721		  if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
11722		    ix86_emit_probe_stack_range (STACK_CHECK_PROTECT,
11723						 size - STACK_CHECK_PROTECT);
11724		}
11725	      else
11726		ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
11727	    }
11728	}
11729    }
11730
11731  if (allocate == 0)
11732    ;
11733  else if (!ix86_target_stack_probe ()
11734	   || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
11735    {
11736      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
11737			         GEN_INT (-allocate), -1,
11738			         m->fs.cfa_reg == stack_pointer_rtx);
11739    }
11740  else
11741    {
11742      rtx eax = gen_rtx_REG (Pmode, AX_REG);
11743      rtx r10 = NULL;
11744      rtx (*adjust_stack_insn)(rtx, rtx, rtx);
11745      const bool sp_is_cfa_reg = (m->fs.cfa_reg == stack_pointer_rtx);
11746      bool eax_live = ix86_eax_live_at_start_p ();
11747      bool r10_live = false;
11748
11749      if (TARGET_64BIT)
11750        r10_live = (DECL_STATIC_CHAIN (current_function_decl) != 0);
11751
11752      if (eax_live)
11753	{
11754	  insn = emit_insn (gen_push (eax));
11755	  allocate -= UNITS_PER_WORD;
11756	  /* Note that SEH directives need to continue tracking the stack
11757	     pointer even after the frame pointer has been set up.  */
11758	  if (sp_is_cfa_reg || TARGET_SEH)
11759	    {
11760	      if (sp_is_cfa_reg)
11761		m->fs.cfa_offset += UNITS_PER_WORD;
11762	      RTX_FRAME_RELATED_P (insn) = 1;
11763	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11764			    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11765					 plus_constant (Pmode, stack_pointer_rtx,
11766							-UNITS_PER_WORD)));
11767	    }
11768	}
11769
11770      if (r10_live)
11771	{
11772	  r10 = gen_rtx_REG (Pmode, R10_REG);
11773	  insn = emit_insn (gen_push (r10));
11774	  allocate -= UNITS_PER_WORD;
11775	  if (sp_is_cfa_reg || TARGET_SEH)
11776	    {
11777	      if (sp_is_cfa_reg)
11778		m->fs.cfa_offset += UNITS_PER_WORD;
11779	      RTX_FRAME_RELATED_P (insn) = 1;
11780	      add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11781			    gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11782					 plus_constant (Pmode, stack_pointer_rtx,
11783							-UNITS_PER_WORD)));
11784	    }
11785	}
11786
11787      emit_move_insn (eax, GEN_INT (allocate));
11788      emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
11789
11790      /* Use the fact that AX still contains ALLOCATE.  */
11791      adjust_stack_insn = (Pmode == DImode
11792			   ? gen_pro_epilogue_adjust_stack_di_sub
11793			   : gen_pro_epilogue_adjust_stack_si_sub);
11794
11795      insn = emit_insn (adjust_stack_insn (stack_pointer_rtx,
11796					   stack_pointer_rtx, eax));
11797
11798      if (sp_is_cfa_reg || TARGET_SEH)
11799	{
11800	  if (sp_is_cfa_reg)
11801	    m->fs.cfa_offset += allocate;
11802	  RTX_FRAME_RELATED_P (insn) = 1;
11803	  add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11804			gen_rtx_SET (VOIDmode, stack_pointer_rtx,
11805				     plus_constant (Pmode, stack_pointer_rtx,
11806						    -allocate)));
11807	}
11808      m->fs.sp_offset += allocate;
11809
11810      /* Use stack_pointer_rtx for relative addressing so that code
11811	 works for realigned stack, too.  */
11812      if (r10_live && eax_live)
11813        {
11814	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11815	  emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
11816			  gen_frame_mem (word_mode, t));
11817	  t = plus_constant (Pmode, t, UNITS_PER_WORD);
11818	  emit_move_insn (gen_rtx_REG (word_mode, AX_REG),
11819			  gen_frame_mem (word_mode, t));
11820	}
11821      else if (eax_live || r10_live)
11822	{
11823	  t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, eax);
11824	  emit_move_insn (gen_rtx_REG (word_mode,
11825				       (eax_live ? AX_REG : R10_REG)),
11826			  gen_frame_mem (word_mode, t));
11827	}
11828    }
11829  gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
11830
11831  /* If we havn't already set up the frame pointer, do so now.  */
11832  if (frame_pointer_needed && !m->fs.fp_valid)
11833    {
11834      insn = ix86_gen_add3 (hard_frame_pointer_rtx, stack_pointer_rtx,
11835			    GEN_INT (frame.stack_pointer_offset
11836				     - frame.hard_frame_pointer_offset));
11837      insn = emit_insn (insn);
11838      RTX_FRAME_RELATED_P (insn) = 1;
11839      add_reg_note (insn, REG_CFA_ADJUST_CFA, NULL);
11840
11841      if (m->fs.cfa_reg == stack_pointer_rtx)
11842	m->fs.cfa_reg = hard_frame_pointer_rtx;
11843      m->fs.fp_offset = frame.hard_frame_pointer_offset;
11844      m->fs.fp_valid = true;
11845    }
11846
11847  if (!int_registers_saved)
11848    ix86_emit_save_regs_using_mov (frame.reg_save_offset);
11849  if (!sse_registers_saved)
11850    ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
11851
11852  /* For the mcount profiling on 32 bit PIC mode we need to emit SET_GOT
11853     in PROLOGUE.  */
11854  if (!TARGET_64BIT && pic_offset_table_rtx && crtl->profile && !flag_fentry)
11855    {
11856      rtx pic = gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM);
11857      insn = emit_insn (gen_set_got (pic));
11858      RTX_FRAME_RELATED_P (insn) = 1;
11859      add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
11860      emit_insn (gen_prologue_use (pic));
11861      /* Deleting already emmitted SET_GOT if exist and allocated to
11862	 REAL_PIC_OFFSET_TABLE_REGNUM.  */
11863      ix86_elim_entry_set_got (pic);
11864    }
11865
11866  if (crtl->drap_reg && !crtl->stack_realign_needed)
11867    {
11868      /* vDRAP is setup but after reload it turns out stack realign
11869         isn't necessary, here we will emit prologue to setup DRAP
11870         without stack realign adjustment */
11871      t = choose_baseaddr (0);
11872      emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
11873    }
11874
11875  /* Prevent instructions from being scheduled into register save push
11876     sequence when access to the redzone area is done through frame pointer.
11877     The offset between the frame pointer and the stack pointer is calculated
11878     relative to the value of the stack pointer at the end of the function
11879     prologue, and moving instructions that access redzone area via frame
11880     pointer inside push sequence violates this assumption.  */
11881  if (frame_pointer_needed && frame.red_zone_size)
11882    emit_insn (gen_memory_blockage ());
11883
11884  /* Emit cld instruction if stringops are used in the function.  */
11885  if (TARGET_CLD && ix86_current_function_needs_cld)
11886    emit_insn (gen_cld ());
11887
11888  /* SEH requires that the prologue end within 256 bytes of the start of
11889     the function.  Prevent instruction schedules that would extend that.
11890     Further, prevent alloca modifications to the stack pointer from being
11891     combined with prologue modifications.  */
11892  if (TARGET_SEH)
11893    emit_insn (gen_prologue_use (stack_pointer_rtx));
11894}
11895
11896/* Emit code to restore REG using a POP insn.  */
11897
11898static void
11899ix86_emit_restore_reg_using_pop (rtx reg)
11900{
11901  struct machine_function *m = cfun->machine;
11902  rtx insn = emit_insn (gen_pop (reg));
11903
11904  ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
11905  m->fs.sp_offset -= UNITS_PER_WORD;
11906
11907  if (m->fs.cfa_reg == crtl->drap_reg
11908      && REGNO (reg) == REGNO (crtl->drap_reg))
11909    {
11910      /* Previously we'd represented the CFA as an expression
11911	 like *(%ebp - 8).  We've just popped that value from
11912	 the stack, which means we need to reset the CFA to
11913	 the drap register.  This will remain until we restore
11914	 the stack pointer.  */
11915      add_reg_note (insn, REG_CFA_DEF_CFA, reg);
11916      RTX_FRAME_RELATED_P (insn) = 1;
11917
11918      /* This means that the DRAP register is valid for addressing too.  */
11919      m->fs.drap_valid = true;
11920      return;
11921    }
11922
11923  if (m->fs.cfa_reg == stack_pointer_rtx)
11924    {
11925      rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
11926      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
11927      add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
11928      RTX_FRAME_RELATED_P (insn) = 1;
11929
11930      m->fs.cfa_offset -= UNITS_PER_WORD;
11931    }
11932
11933  /* When the frame pointer is the CFA, and we pop it, we are
11934     swapping back to the stack pointer as the CFA.  This happens
11935     for stack frames that don't allocate other data, so we assume
11936     the stack pointer is now pointing at the return address, i.e.
11937     the function entry state, which makes the offset be 1 word.  */
11938  if (reg == hard_frame_pointer_rtx)
11939    {
11940      m->fs.fp_valid = false;
11941      if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11942	{
11943	  m->fs.cfa_reg = stack_pointer_rtx;
11944	  m->fs.cfa_offset -= UNITS_PER_WORD;
11945
11946	  add_reg_note (insn, REG_CFA_DEF_CFA,
11947			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11948				      GEN_INT (m->fs.cfa_offset)));
11949	  RTX_FRAME_RELATED_P (insn) = 1;
11950	}
11951    }
11952}
11953
11954/* Emit code to restore saved registers using POP insns.  */
11955
11956static void
11957ix86_emit_restore_regs_using_pop (void)
11958{
11959  unsigned int regno;
11960
11961  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
11962    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
11963      ix86_emit_restore_reg_using_pop (gen_rtx_REG (word_mode, regno));
11964}
11965
11966/* Emit code and notes for the LEAVE instruction.  */
11967
11968static void
11969ix86_emit_leave (void)
11970{
11971  struct machine_function *m = cfun->machine;
11972  rtx insn = emit_insn (ix86_gen_leave ());
11973
11974  ix86_add_queued_cfa_restore_notes (insn);
11975
11976  gcc_assert (m->fs.fp_valid);
11977  m->fs.sp_valid = true;
11978  m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
11979  m->fs.fp_valid = false;
11980
11981  if (m->fs.cfa_reg == hard_frame_pointer_rtx)
11982    {
11983      m->fs.cfa_reg = stack_pointer_rtx;
11984      m->fs.cfa_offset = m->fs.sp_offset;
11985
11986      add_reg_note (insn, REG_CFA_DEF_CFA,
11987		    plus_constant (Pmode, stack_pointer_rtx,
11988				   m->fs.sp_offset));
11989      RTX_FRAME_RELATED_P (insn) = 1;
11990    }
11991  ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
11992			     m->fs.fp_offset);
11993}
11994
11995/* Emit code to restore saved registers using MOV insns.
11996   First register is restored from CFA - CFA_OFFSET.  */
11997static void
11998ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
11999				  bool maybe_eh_return)
12000{
12001  struct machine_function *m = cfun->machine;
12002  unsigned int regno;
12003
12004  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12005    if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12006      {
12007	rtx reg = gen_rtx_REG (word_mode, regno);
12008	rtx insn, mem;
12009
12010	mem = choose_baseaddr (cfa_offset);
12011	mem = gen_frame_mem (word_mode, mem);
12012	insn = emit_move_insn (reg, mem);
12013
12014        if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
12015	  {
12016	    /* Previously we'd represented the CFA as an expression
12017	       like *(%ebp - 8).  We've just popped that value from
12018	       the stack, which means we need to reset the CFA to
12019	       the drap register.  This will remain until we restore
12020	       the stack pointer.  */
12021	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
12022	    RTX_FRAME_RELATED_P (insn) = 1;
12023
12024	    /* This means that the DRAP register is valid for addressing.  */
12025	    m->fs.drap_valid = true;
12026	  }
12027	else
12028	  ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
12029
12030	cfa_offset -= UNITS_PER_WORD;
12031      }
12032}
12033
12034/* Emit code to restore saved registers using MOV insns.
12035   First register is restored from CFA - CFA_OFFSET.  */
12036static void
12037ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
12038				      bool maybe_eh_return)
12039{
12040  unsigned int regno;
12041
12042  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12043    if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
12044      {
12045	rtx reg = gen_rtx_REG (V4SFmode, regno);
12046	rtx mem;
12047 	unsigned int align;
12048
12049	mem = choose_baseaddr (cfa_offset);
12050	mem = gen_rtx_MEM (V4SFmode, mem);
12051
12052 	/* The location is aligned up to INCOMING_STACK_BOUNDARY.  */
12053	align = MIN (GET_MODE_ALIGNMENT (V4SFmode), INCOMING_STACK_BOUNDARY);
12054 	set_mem_align (mem, align);
12055
12056 	/* SSE saves are not within re-aligned local stack frame.
12057 	   In case INCOMING_STACK_BOUNDARY is misaligned, we have
12058 	   to emit unaligned load.  */
12059 	if (align < 128)
12060 	  {
12061 	    rtx unspec = gen_rtx_UNSPEC (V4SFmode, gen_rtvec (1, mem),
12062 					 UNSPEC_LOADU);
12063 	    emit_insn (gen_rtx_SET (VOIDmode, reg, unspec));
12064 	  }
12065 	else
12066 	  emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
12067
12068	ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
12069
12070	cfa_offset -= 16;
12071      }
12072}
12073
12074/* Restore function stack, frame, and registers.  */
12075
12076void
12077ix86_expand_epilogue (int style)
12078{
12079  struct machine_function *m = cfun->machine;
12080  struct machine_frame_state frame_state_save = m->fs;
12081  struct ix86_frame frame;
12082  bool restore_regs_via_mov;
12083  bool using_drap;
12084
12085  ix86_finalize_stack_realign_flags ();
12086  ix86_compute_frame_layout (&frame);
12087
12088  m->fs.sp_valid = (!frame_pointer_needed
12089		    || (crtl->sp_is_unchanging
12090			&& !stack_realign_fp));
12091  gcc_assert (!m->fs.sp_valid
12092	      || m->fs.sp_offset == frame.stack_pointer_offset);
12093
12094  /* The FP must be valid if the frame pointer is present.  */
12095  gcc_assert (frame_pointer_needed == m->fs.fp_valid);
12096  gcc_assert (!m->fs.fp_valid
12097	      || m->fs.fp_offset == frame.hard_frame_pointer_offset);
12098
12099  /* We must have *some* valid pointer to the stack frame.  */
12100  gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
12101
12102  /* The DRAP is never valid at this point.  */
12103  gcc_assert (!m->fs.drap_valid);
12104
12105  /* See the comment about red zone and frame
12106     pointer usage in ix86_expand_prologue.  */
12107  if (frame_pointer_needed && frame.red_zone_size)
12108    emit_insn (gen_memory_blockage ());
12109
12110  using_drap = crtl->drap_reg && crtl->stack_realign_needed;
12111  gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
12112
12113  /* Determine the CFA offset of the end of the red-zone.  */
12114  m->fs.red_zone_offset = 0;
12115  if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
12116    {
12117      /* The red-zone begins below the return address.  */
12118      m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
12119
12120      /* When the register save area is in the aligned portion of
12121         the stack, determine the maximum runtime displacement that
12122	 matches up with the aligned frame.  */
12123      if (stack_realign_drap)
12124	m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
12125				  + UNITS_PER_WORD);
12126    }
12127
12128  /* Special care must be taken for the normal return case of a function
12129     using eh_return: the eax and edx registers are marked as saved, but
12130     not restored along this path.  Adjust the save location to match.  */
12131  if (crtl->calls_eh_return && style != 2)
12132    frame.reg_save_offset -= 2 * UNITS_PER_WORD;
12133
12134  /* EH_RETURN requires the use of moves to function properly.  */
12135  if (crtl->calls_eh_return)
12136    restore_regs_via_mov = true;
12137  /* SEH requires the use of pops to identify the epilogue.  */
12138  else if (TARGET_SEH)
12139    restore_regs_via_mov = false;
12140  /* If we're only restoring one register and sp is not valid then
12141     using a move instruction to restore the register since it's
12142     less work than reloading sp and popping the register.  */
12143  else if (!m->fs.sp_valid && frame.nregs <= 1)
12144    restore_regs_via_mov = true;
12145  else if (TARGET_EPILOGUE_USING_MOVE
12146	   && cfun->machine->use_fast_prologue_epilogue
12147	   && (frame.nregs > 1
12148	       || m->fs.sp_offset != frame.reg_save_offset))
12149    restore_regs_via_mov = true;
12150  else if (frame_pointer_needed
12151	   && !frame.nregs
12152	   && m->fs.sp_offset != frame.reg_save_offset)
12153    restore_regs_via_mov = true;
12154  else if (frame_pointer_needed
12155	   && TARGET_USE_LEAVE
12156	   && cfun->machine->use_fast_prologue_epilogue
12157	   && frame.nregs == 1)
12158    restore_regs_via_mov = true;
12159  else
12160    restore_regs_via_mov = false;
12161
12162  if (restore_regs_via_mov || frame.nsseregs)
12163    {
12164      /* Ensure that the entire register save area is addressable via
12165	 the stack pointer, if we will restore via sp.  */
12166      if (TARGET_64BIT
12167	  && m->fs.sp_offset > 0x7fffffff
12168	  && !(m->fs.fp_valid || m->fs.drap_valid)
12169	  && (frame.nsseregs + frame.nregs) != 0)
12170	{
12171	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12172				     GEN_INT (m->fs.sp_offset
12173					      - frame.sse_reg_save_offset),
12174				     style,
12175				     m->fs.cfa_reg == stack_pointer_rtx);
12176	}
12177    }
12178
12179  /* If there are any SSE registers to restore, then we have to do it
12180     via moves, since there's obviously no pop for SSE regs.  */
12181  if (frame.nsseregs)
12182    ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
12183					  style == 2);
12184
12185  if (restore_regs_via_mov)
12186    {
12187      rtx t;
12188
12189      if (frame.nregs)
12190	ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
12191
12192      /* eh_return epilogues need %ecx added to the stack pointer.  */
12193      if (style == 2)
12194	{
12195	  rtx insn, sa = EH_RETURN_STACKADJ_RTX;
12196
12197	  /* Stack align doesn't work with eh_return.  */
12198	  gcc_assert (!stack_realign_drap);
12199	  /* Neither does regparm nested functions.  */
12200	  gcc_assert (!ix86_static_chain_on_stack);
12201
12202	  if (frame_pointer_needed)
12203	    {
12204	      t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
12205	      t = plus_constant (Pmode, t, m->fs.fp_offset - UNITS_PER_WORD);
12206	      emit_insn (gen_rtx_SET (VOIDmode, sa, t));
12207
12208	      t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
12209	      insn = emit_move_insn (hard_frame_pointer_rtx, t);
12210
12211	      /* Note that we use SA as a temporary CFA, as the return
12212		 address is at the proper place relative to it.  We
12213		 pretend this happens at the FP restore insn because
12214		 prior to this insn the FP would be stored at the wrong
12215		 offset relative to SA, and after this insn we have no
12216		 other reasonable register to use for the CFA.  We don't
12217		 bother resetting the CFA to the SP for the duration of
12218		 the return insn.  */
12219	      add_reg_note (insn, REG_CFA_DEF_CFA,
12220			    plus_constant (Pmode, sa, UNITS_PER_WORD));
12221	      ix86_add_queued_cfa_restore_notes (insn);
12222	      add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
12223	      RTX_FRAME_RELATED_P (insn) = 1;
12224
12225	      m->fs.cfa_reg = sa;
12226	      m->fs.cfa_offset = UNITS_PER_WORD;
12227	      m->fs.fp_valid = false;
12228
12229	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
12230					 const0_rtx, style, false);
12231	    }
12232	  else
12233	    {
12234	      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
12235	      t = plus_constant (Pmode, t, m->fs.sp_offset - UNITS_PER_WORD);
12236	      insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
12237	      ix86_add_queued_cfa_restore_notes (insn);
12238
12239	      gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
12240	      if (m->fs.cfa_offset != UNITS_PER_WORD)
12241		{
12242		  m->fs.cfa_offset = UNITS_PER_WORD;
12243		  add_reg_note (insn, REG_CFA_DEF_CFA,
12244				plus_constant (Pmode, stack_pointer_rtx,
12245					       UNITS_PER_WORD));
12246		  RTX_FRAME_RELATED_P (insn) = 1;
12247		}
12248	    }
12249	  m->fs.sp_offset = UNITS_PER_WORD;
12250	  m->fs.sp_valid = true;
12251	}
12252    }
12253  else
12254    {
12255      /* SEH requires that the function end with (1) a stack adjustment
12256	 if necessary, (2) a sequence of pops, and (3) a return or
12257	 jump instruction.  Prevent insns from the function body from
12258	 being scheduled into this sequence.  */
12259      if (TARGET_SEH)
12260	{
12261	  /* Prevent a catch region from being adjacent to the standard
12262	     epilogue sequence.  Unfortuantely crtl->uses_eh_lsda nor
12263	     several other flags that would be interesting to test are
12264	     not yet set up.  */
12265	  if (flag_non_call_exceptions)
12266	    emit_insn (gen_nops (const1_rtx));
12267	  else
12268	    emit_insn (gen_blockage ());
12269	}
12270
12271      /* First step is to deallocate the stack frame so that we can
12272	 pop the registers.  Also do it on SEH target for very large
12273	 frame as the emitted instructions aren't allowed by the ABI in
12274	 epilogues.  */
12275      if (!m->fs.sp_valid
12276 	  || (TARGET_SEH
12277	      && (m->fs.sp_offset - frame.reg_save_offset
12278		  >= SEH_MAX_FRAME_SIZE)))
12279	{
12280	  pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
12281				     GEN_INT (m->fs.fp_offset
12282					      - frame.reg_save_offset),
12283				     style, false);
12284	}
12285      else if (m->fs.sp_offset != frame.reg_save_offset)
12286	{
12287	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12288				     GEN_INT (m->fs.sp_offset
12289					      - frame.reg_save_offset),
12290				     style,
12291				     m->fs.cfa_reg == stack_pointer_rtx);
12292	}
12293
12294      ix86_emit_restore_regs_using_pop ();
12295    }
12296
12297  /* If we used a stack pointer and haven't already got rid of it,
12298     then do so now.  */
12299  if (m->fs.fp_valid)
12300    {
12301      /* If the stack pointer is valid and pointing at the frame
12302	 pointer store address, then we only need a pop.  */
12303      if (m->fs.sp_valid && m->fs.sp_offset == frame.hfp_save_offset)
12304	ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12305      /* Leave results in shorter dependency chains on CPUs that are
12306	 able to grok it fast.  */
12307      else if (TARGET_USE_LEAVE
12308	       || optimize_bb_for_size_p (EXIT_BLOCK_PTR_FOR_FN (cfun))
12309	       || !cfun->machine->use_fast_prologue_epilogue)
12310	ix86_emit_leave ();
12311      else
12312        {
12313	  pro_epilogue_adjust_stack (stack_pointer_rtx,
12314				     hard_frame_pointer_rtx,
12315				     const0_rtx, style, !using_drap);
12316	  ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
12317        }
12318    }
12319
12320  if (using_drap)
12321    {
12322      int param_ptr_offset = UNITS_PER_WORD;
12323      rtx insn;
12324
12325      gcc_assert (stack_realign_drap);
12326
12327      if (ix86_static_chain_on_stack)
12328	param_ptr_offset += UNITS_PER_WORD;
12329      if (!call_used_regs[REGNO (crtl->drap_reg)])
12330	param_ptr_offset += UNITS_PER_WORD;
12331
12332      insn = emit_insn (gen_rtx_SET
12333			(VOIDmode, stack_pointer_rtx,
12334			 gen_rtx_PLUS (Pmode,
12335				       crtl->drap_reg,
12336				       GEN_INT (-param_ptr_offset))));
12337      m->fs.cfa_reg = stack_pointer_rtx;
12338      m->fs.cfa_offset = param_ptr_offset;
12339      m->fs.sp_offset = param_ptr_offset;
12340      m->fs.realigned = false;
12341
12342      add_reg_note (insn, REG_CFA_DEF_CFA,
12343		    gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12344				  GEN_INT (param_ptr_offset)));
12345      RTX_FRAME_RELATED_P (insn) = 1;
12346
12347      if (!call_used_regs[REGNO (crtl->drap_reg)])
12348	ix86_emit_restore_reg_using_pop (crtl->drap_reg);
12349    }
12350
12351  /* At this point the stack pointer must be valid, and we must have
12352     restored all of the registers.  We may not have deallocated the
12353     entire stack frame.  We've delayed this until now because it may
12354     be possible to merge the local stack deallocation with the
12355     deallocation forced by ix86_static_chain_on_stack.   */
12356  gcc_assert (m->fs.sp_valid);
12357  gcc_assert (!m->fs.fp_valid);
12358  gcc_assert (!m->fs.realigned);
12359  if (m->fs.sp_offset != UNITS_PER_WORD)
12360    {
12361      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12362				 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
12363				 style, true);
12364    }
12365  else
12366    ix86_add_queued_cfa_restore_notes (get_last_insn ());
12367
12368  /* Sibcall epilogues don't want a return instruction.  */
12369  if (style == 0)
12370    {
12371      m->fs = frame_state_save;
12372      return;
12373    }
12374
12375  if (crtl->args.pops_args && crtl->args.size)
12376    {
12377      rtx popc = GEN_INT (crtl->args.pops_args);
12378
12379      /* i386 can only pop 64K bytes.  If asked to pop more, pop return
12380	 address, do explicit add, and jump indirectly to the caller.  */
12381
12382      if (crtl->args.pops_args >= 65536)
12383	{
12384	  rtx ecx = gen_rtx_REG (SImode, CX_REG);
12385	  rtx insn;
12386
12387	  /* There is no "pascal" calling convention in any 64bit ABI.  */
12388	  gcc_assert (!TARGET_64BIT);
12389
12390	  insn = emit_insn (gen_pop (ecx));
12391	  m->fs.cfa_offset -= UNITS_PER_WORD;
12392	  m->fs.sp_offset -= UNITS_PER_WORD;
12393
12394	  rtx x = plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD);
12395	  x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
12396	  add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
12397	  add_reg_note (insn, REG_CFA_REGISTER,
12398			gen_rtx_SET (VOIDmode, ecx, pc_rtx));
12399	  RTX_FRAME_RELATED_P (insn) = 1;
12400
12401	  pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
12402				     popc, -1, true);
12403	  emit_jump_insn (gen_simple_return_indirect_internal (ecx));
12404	}
12405      else
12406	emit_jump_insn (gen_simple_return_pop_internal (popc));
12407    }
12408  else
12409    emit_jump_insn (gen_simple_return_internal ());
12410
12411  /* Restore the state back to the state from the prologue,
12412     so that it's correct for the next epilogue.  */
12413  m->fs = frame_state_save;
12414}
12415
12416/* Reset from the function's potential modifications.  */
12417
12418static void
12419ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, HOST_WIDE_INT)
12420{
12421  if (pic_offset_table_rtx
12422      && !ix86_use_pseudo_pic_reg ())
12423    SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
12424#if TARGET_MACHO
12425  /* Mach-O doesn't support labels at the end of objects, so if
12426     it looks like we might want one, insert a NOP.  */
12427  {
12428    rtx_insn *insn = get_last_insn ();
12429    rtx_insn *deleted_debug_label = NULL;
12430    while (insn
12431	   && NOTE_P (insn)
12432	   && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
12433      {
12434	/* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
12435	   notes only, instead set their CODE_LABEL_NUMBER to -1,
12436	   otherwise there would be code generation differences
12437	   in between -g and -g0.  */
12438	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12439	  deleted_debug_label = insn;
12440	insn = PREV_INSN (insn);
12441      }
12442    if (insn
12443	&& (LABEL_P (insn)
12444	    || (NOTE_P (insn)
12445		&& NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
12446      fputs ("\tnop\n", file);
12447    else if (deleted_debug_label)
12448      for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
12449	if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
12450	  CODE_LABEL_NUMBER (insn) = -1;
12451  }
12452#endif
12453
12454}
12455
12456/* Return a scratch register to use in the split stack prologue.  The
12457   split stack prologue is used for -fsplit-stack.  It is the first
12458   instructions in the function, even before the regular prologue.
12459   The scratch register can be any caller-saved register which is not
12460   used for parameters or for the static chain.  */
12461
12462static unsigned int
12463split_stack_prologue_scratch_regno (void)
12464{
12465  if (TARGET_64BIT)
12466    return R11_REG;
12467  else
12468    {
12469      bool is_fastcall, is_thiscall;
12470      int regparm;
12471
12472      is_fastcall = (lookup_attribute ("fastcall",
12473				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12474		     != NULL);
12475      is_thiscall = (lookup_attribute ("thiscall",
12476				       TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl)))
12477		     != NULL);
12478      regparm = ix86_function_regparm (TREE_TYPE (cfun->decl), cfun->decl);
12479
12480      if (is_fastcall)
12481	{
12482	  if (DECL_STATIC_CHAIN (cfun->decl))
12483	    {
12484	      sorry ("-fsplit-stack does not support fastcall with "
12485		     "nested function");
12486	      return INVALID_REGNUM;
12487	    }
12488	  return AX_REG;
12489	}
12490      else if (is_thiscall)
12491        {
12492	  if (!DECL_STATIC_CHAIN (cfun->decl))
12493	    return DX_REG;
12494	  return AX_REG;
12495	}
12496      else if (regparm < 3)
12497	{
12498	  if (!DECL_STATIC_CHAIN (cfun->decl))
12499	    return CX_REG;
12500	  else
12501	    {
12502	      if (regparm >= 2)
12503		{
12504		  sorry ("-fsplit-stack does not support 2 register "
12505			 "parameters for a nested function");
12506		  return INVALID_REGNUM;
12507		}
12508	      return DX_REG;
12509	    }
12510	}
12511      else
12512	{
12513	  /* FIXME: We could make this work by pushing a register
12514	     around the addition and comparison.  */
12515	  sorry ("-fsplit-stack does not support 3 register parameters");
12516	  return INVALID_REGNUM;
12517	}
12518    }
12519}
12520
12521/* A SYMBOL_REF for the function which allocates new stackspace for
12522   -fsplit-stack.  */
12523
12524static GTY(()) rtx split_stack_fn;
12525
12526/* A SYMBOL_REF for the more stack function when using the large
12527   model.  */
12528
12529static GTY(()) rtx split_stack_fn_large;
12530
12531/* Handle -fsplit-stack.  These are the first instructions in the
12532   function, even before the regular prologue.  */
12533
12534void
12535ix86_expand_split_stack_prologue (void)
12536{
12537  struct ix86_frame frame;
12538  HOST_WIDE_INT allocate;
12539  unsigned HOST_WIDE_INT args_size;
12540  rtx_code_label *label;
12541  rtx limit, current, jump_insn, allocate_rtx, call_insn, call_fusage;
12542  rtx scratch_reg = NULL_RTX;
12543  rtx_code_label *varargs_label = NULL;
12544  rtx fn;
12545
12546  gcc_assert (flag_split_stack && reload_completed);
12547
12548  ix86_finalize_stack_realign_flags ();
12549  ix86_compute_frame_layout (&frame);
12550  allocate = frame.stack_pointer_offset - INCOMING_FRAME_SP_OFFSET;
12551
12552  /* This is the label we will branch to if we have enough stack
12553     space.  We expect the basic block reordering pass to reverse this
12554     branch if optimizing, so that we branch in the unlikely case.  */
12555  label = gen_label_rtx ();
12556
12557  /* We need to compare the stack pointer minus the frame size with
12558     the stack boundary in the TCB.  The stack boundary always gives
12559     us SPLIT_STACK_AVAILABLE bytes, so if we need less than that we
12560     can compare directly.  Otherwise we need to do an addition.  */
12561
12562  limit = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
12563			  UNSPEC_STACK_CHECK);
12564  limit = gen_rtx_CONST (Pmode, limit);
12565  limit = gen_rtx_MEM (Pmode, limit);
12566  if (allocate < SPLIT_STACK_AVAILABLE)
12567    current = stack_pointer_rtx;
12568  else
12569    {
12570      unsigned int scratch_regno;
12571      rtx offset;
12572
12573      /* We need a scratch register to hold the stack pointer minus
12574	 the required frame size.  Since this is the very start of the
12575	 function, the scratch register can be any caller-saved
12576	 register which is not used for parameters.  */
12577      offset = GEN_INT (- allocate);
12578      scratch_regno = split_stack_prologue_scratch_regno ();
12579      if (scratch_regno == INVALID_REGNUM)
12580	return;
12581      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12582      if (!TARGET_64BIT || x86_64_immediate_operand (offset, Pmode))
12583	{
12584	  /* We don't use ix86_gen_add3 in this case because it will
12585	     want to split to lea, but when not optimizing the insn
12586	     will not be split after this point.  */
12587	  emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12588				  gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12589						offset)));
12590	}
12591      else
12592	{
12593	  emit_move_insn (scratch_reg, offset);
12594	  emit_insn (ix86_gen_add3 (scratch_reg, scratch_reg,
12595				    stack_pointer_rtx));
12596	}
12597      current = scratch_reg;
12598    }
12599
12600  ix86_expand_branch (GEU, current, limit, label);
12601  jump_insn = get_last_insn ();
12602  JUMP_LABEL (jump_insn) = label;
12603
12604  /* Mark the jump as very likely to be taken.  */
12605  add_int_reg_note (jump_insn, REG_BR_PROB,
12606		    REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
12607
12608  if (split_stack_fn == NULL_RTX)
12609    {
12610      split_stack_fn = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
12611      SYMBOL_REF_FLAGS (split_stack_fn) |= SYMBOL_FLAG_LOCAL;
12612    }
12613  fn = split_stack_fn;
12614
12615  /* Get more stack space.  We pass in the desired stack space and the
12616     size of the arguments to copy to the new stack.  In 32-bit mode
12617     we push the parameters; __morestack will return on a new stack
12618     anyhow.  In 64-bit mode we pass the parameters in r10 and
12619     r11.  */
12620  allocate_rtx = GEN_INT (allocate);
12621  args_size = crtl->args.size >= 0 ? crtl->args.size : 0;
12622  call_fusage = NULL_RTX;
12623  if (TARGET_64BIT)
12624    {
12625      rtx reg10, reg11;
12626
12627      reg10 = gen_rtx_REG (Pmode, R10_REG);
12628      reg11 = gen_rtx_REG (Pmode, R11_REG);
12629
12630      /* If this function uses a static chain, it will be in %r10.
12631	 Preserve it across the call to __morestack.  */
12632      if (DECL_STATIC_CHAIN (cfun->decl))
12633	{
12634	  rtx rax;
12635
12636	  rax = gen_rtx_REG (word_mode, AX_REG);
12637	  emit_move_insn (rax, gen_rtx_REG (word_mode, R10_REG));
12638	  use_reg (&call_fusage, rax);
12639	}
12640
12641      if ((ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC)
12642          && !TARGET_PECOFF)
12643	{
12644	  HOST_WIDE_INT argval;
12645
12646	  gcc_assert (Pmode == DImode);
12647	  /* When using the large model we need to load the address
12648	     into a register, and we've run out of registers.  So we
12649	     switch to a different calling convention, and we call a
12650	     different function: __morestack_large.  We pass the
12651	     argument size in the upper 32 bits of r10 and pass the
12652	     frame size in the lower 32 bits.  */
12653	  gcc_assert ((allocate & (HOST_WIDE_INT) 0xffffffff) == allocate);
12654	  gcc_assert ((args_size & 0xffffffff) == args_size);
12655
12656	  if (split_stack_fn_large == NULL_RTX)
12657	    {
12658	      split_stack_fn_large =
12659	        gen_rtx_SYMBOL_REF (Pmode, "__morestack_large_model");
12660	      SYMBOL_REF_FLAGS (split_stack_fn_large) |= SYMBOL_FLAG_LOCAL;
12661	    }
12662	  if (ix86_cmodel == CM_LARGE_PIC)
12663	    {
12664	      rtx_code_label *label;
12665	      rtx x;
12666
12667	      label = gen_label_rtx ();
12668	      emit_label (label);
12669	      LABEL_PRESERVE_P (label) = 1;
12670	      emit_insn (gen_set_rip_rex64 (reg10, label));
12671	      emit_insn (gen_set_got_offset_rex64 (reg11, label));
12672	      emit_insn (ix86_gen_add3 (reg10, reg10, reg11));
12673	      x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, split_stack_fn_large),
12674				  UNSPEC_GOT);
12675	      x = gen_rtx_CONST (Pmode, x);
12676	      emit_move_insn (reg11, x);
12677	      x = gen_rtx_PLUS (Pmode, reg10, reg11);
12678	      x = gen_const_mem (Pmode, x);
12679	      emit_move_insn (reg11, x);
12680	    }
12681	  else
12682	    emit_move_insn (reg11, split_stack_fn_large);
12683
12684	  fn = reg11;
12685
12686	  argval = ((args_size << 16) << 16) + allocate;
12687	  emit_move_insn (reg10, GEN_INT (argval));
12688	}
12689      else
12690	{
12691	  emit_move_insn (reg10, allocate_rtx);
12692	  emit_move_insn (reg11, GEN_INT (args_size));
12693	  use_reg (&call_fusage, reg11);
12694	}
12695
12696      use_reg (&call_fusage, reg10);
12697    }
12698  else
12699    {
12700      emit_insn (gen_push (GEN_INT (args_size)));
12701      emit_insn (gen_push (allocate_rtx));
12702    }
12703  call_insn = ix86_expand_call (NULL_RTX, gen_rtx_MEM (QImode, fn),
12704				GEN_INT (UNITS_PER_WORD), constm1_rtx,
12705				NULL_RTX, false);
12706  add_function_usage_to (call_insn, call_fusage);
12707
12708  /* In order to make call/return prediction work right, we now need
12709     to execute a return instruction.  See
12710     libgcc/config/i386/morestack.S for the details on how this works.
12711
12712     For flow purposes gcc must not see this as a return
12713     instruction--we need control flow to continue at the subsequent
12714     label.  Therefore, we use an unspec.  */
12715  gcc_assert (crtl->args.pops_args < 65536);
12716  emit_insn (gen_split_stack_return (GEN_INT (crtl->args.pops_args)));
12717
12718  /* If we are in 64-bit mode and this function uses a static chain,
12719     we saved %r10 in %rax before calling _morestack.  */
12720  if (TARGET_64BIT && DECL_STATIC_CHAIN (cfun->decl))
12721    emit_move_insn (gen_rtx_REG (word_mode, R10_REG),
12722		    gen_rtx_REG (word_mode, AX_REG));
12723
12724  /* If this function calls va_start, we need to store a pointer to
12725     the arguments on the old stack, because they may not have been
12726     all copied to the new stack.  At this point the old stack can be
12727     found at the frame pointer value used by __morestack, because
12728     __morestack has set that up before calling back to us.  Here we
12729     store that pointer in a scratch register, and in
12730     ix86_expand_prologue we store the scratch register in a stack
12731     slot.  */
12732  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12733    {
12734      unsigned int scratch_regno;
12735      rtx frame_reg;
12736      int words;
12737
12738      scratch_regno = split_stack_prologue_scratch_regno ();
12739      scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
12740      frame_reg = gen_rtx_REG (Pmode, BP_REG);
12741
12742      /* 64-bit:
12743	 fp -> old fp value
12744	       return address within this function
12745	       return address of caller of this function
12746	       stack arguments
12747	 So we add three words to get to the stack arguments.
12748
12749	 32-bit:
12750	 fp -> old fp value
12751	       return address within this function
12752               first argument to __morestack
12753               second argument to __morestack
12754               return address of caller of this function
12755               stack arguments
12756         So we add five words to get to the stack arguments.
12757      */
12758      words = TARGET_64BIT ? 3 : 5;
12759      emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12760			      gen_rtx_PLUS (Pmode, frame_reg,
12761					    GEN_INT (words * UNITS_PER_WORD))));
12762
12763      varargs_label = gen_label_rtx ();
12764      emit_jump_insn (gen_jump (varargs_label));
12765      JUMP_LABEL (get_last_insn ()) = varargs_label;
12766
12767      emit_barrier ();
12768    }
12769
12770  emit_label (label);
12771  LABEL_NUSES (label) = 1;
12772
12773  /* If this function calls va_start, we now have to set the scratch
12774     register for the case where we do not call __morestack.  In this
12775     case we need to set it based on the stack pointer.  */
12776  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12777    {
12778      emit_insn (gen_rtx_SET (VOIDmode, scratch_reg,
12779			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12780					    GEN_INT (UNITS_PER_WORD))));
12781
12782      emit_label (varargs_label);
12783      LABEL_NUSES (varargs_label) = 1;
12784    }
12785}
12786
12787/* We may have to tell the dataflow pass that the split stack prologue
12788   is initializing a scratch register.  */
12789
12790static void
12791ix86_live_on_entry (bitmap regs)
12792{
12793  if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12794    {
12795      gcc_assert (flag_split_stack);
12796      bitmap_set_bit (regs, split_stack_prologue_scratch_regno ());
12797    }
12798}
12799
12800/* Extract the parts of an RTL expression that is a valid memory address
12801   for an instruction.  Return 0 if the structure of the address is
12802   grossly off.  Return -1 if the address contains ASHIFT, so it is not
12803   strictly valid, but still used for computing length of lea instruction.  */
12804
12805int
12806ix86_decompose_address (rtx addr, struct ix86_address *out)
12807{
12808  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
12809  rtx base_reg, index_reg;
12810  HOST_WIDE_INT scale = 1;
12811  rtx scale_rtx = NULL_RTX;
12812  rtx tmp;
12813  int retval = 1;
12814  enum ix86_address_seg seg = SEG_DEFAULT;
12815
12816  /* Allow zero-extended SImode addresses,
12817     they will be emitted with addr32 prefix.  */
12818  if (TARGET_64BIT && GET_MODE (addr) == DImode)
12819    {
12820      if (GET_CODE (addr) == ZERO_EXTEND
12821	  && GET_MODE (XEXP (addr, 0)) == SImode)
12822	{
12823	  addr = XEXP (addr, 0);
12824	  if (CONST_INT_P (addr))
12825	    return 0;
12826	}
12827      else if (GET_CODE (addr) == AND
12828	       && const_32bit_mask (XEXP (addr, 1), DImode))
12829	{
12830	  addr = simplify_gen_subreg (SImode, XEXP (addr, 0), DImode, 0);
12831	  if (addr == NULL_RTX)
12832	    return 0;
12833
12834	  if (CONST_INT_P (addr))
12835	    return 0;
12836	}
12837    }
12838
12839  /* Allow SImode subregs of DImode addresses,
12840     they will be emitted with addr32 prefix.  */
12841  if (TARGET_64BIT && GET_MODE (addr) == SImode)
12842    {
12843      if (GET_CODE (addr) == SUBREG
12844	  && GET_MODE (SUBREG_REG (addr)) == DImode)
12845	{
12846	  addr = SUBREG_REG (addr);
12847	  if (CONST_INT_P (addr))
12848	    return 0;
12849	}
12850    }
12851
12852  if (REG_P (addr))
12853    base = addr;
12854  else if (GET_CODE (addr) == SUBREG)
12855    {
12856      if (REG_P (SUBREG_REG (addr)))
12857	base = addr;
12858      else
12859	return 0;
12860    }
12861  else if (GET_CODE (addr) == PLUS)
12862    {
12863      rtx addends[4], op;
12864      int n = 0, i;
12865
12866      op = addr;
12867      do
12868	{
12869	  if (n >= 4)
12870	    return 0;
12871	  addends[n++] = XEXP (op, 1);
12872	  op = XEXP (op, 0);
12873	}
12874      while (GET_CODE (op) == PLUS);
12875      if (n >= 4)
12876	return 0;
12877      addends[n] = op;
12878
12879      for (i = n; i >= 0; --i)
12880	{
12881	  op = addends[i];
12882	  switch (GET_CODE (op))
12883	    {
12884	    case MULT:
12885	      if (index)
12886		return 0;
12887	      index = XEXP (op, 0);
12888	      scale_rtx = XEXP (op, 1);
12889	      break;
12890
12891	    case ASHIFT:
12892	      if (index)
12893		return 0;
12894	      index = XEXP (op, 0);
12895	      tmp = XEXP (op, 1);
12896	      if (!CONST_INT_P (tmp))
12897		return 0;
12898	      scale = INTVAL (tmp);
12899	      if ((unsigned HOST_WIDE_INT) scale > 3)
12900		return 0;
12901	      scale = 1 << scale;
12902	      break;
12903
12904	    case ZERO_EXTEND:
12905	      op = XEXP (op, 0);
12906	      if (GET_CODE (op) != UNSPEC)
12907		return 0;
12908	      /* FALLTHRU */
12909
12910	    case UNSPEC:
12911	      if (XINT (op, 1) == UNSPEC_TP
12912	          && TARGET_TLS_DIRECT_SEG_REFS
12913	          && seg == SEG_DEFAULT)
12914		seg = DEFAULT_TLS_SEG_REG;
12915	      else
12916		return 0;
12917	      break;
12918
12919	    case SUBREG:
12920	      if (!REG_P (SUBREG_REG (op)))
12921		return 0;
12922	      /* FALLTHRU */
12923
12924	    case REG:
12925	      if (!base)
12926		base = op;
12927	      else if (!index)
12928		index = op;
12929	      else
12930		return 0;
12931	      break;
12932
12933	    case CONST:
12934	    case CONST_INT:
12935	    case SYMBOL_REF:
12936	    case LABEL_REF:
12937	      if (disp)
12938		return 0;
12939	      disp = op;
12940	      break;
12941
12942	    default:
12943	      return 0;
12944	    }
12945	}
12946    }
12947  else if (GET_CODE (addr) == MULT)
12948    {
12949      index = XEXP (addr, 0);		/* index*scale */
12950      scale_rtx = XEXP (addr, 1);
12951    }
12952  else if (GET_CODE (addr) == ASHIFT)
12953    {
12954      /* We're called for lea too, which implements ashift on occasion.  */
12955      index = XEXP (addr, 0);
12956      tmp = XEXP (addr, 1);
12957      if (!CONST_INT_P (tmp))
12958	return 0;
12959      scale = INTVAL (tmp);
12960      if ((unsigned HOST_WIDE_INT) scale > 3)
12961	return 0;
12962      scale = 1 << scale;
12963      retval = -1;
12964    }
12965  else
12966    disp = addr;			/* displacement */
12967
12968  if (index)
12969    {
12970      if (REG_P (index))
12971	;
12972      else if (GET_CODE (index) == SUBREG
12973	       && REG_P (SUBREG_REG (index)))
12974	;
12975      else
12976	return 0;
12977    }
12978
12979  /* Extract the integral value of scale.  */
12980  if (scale_rtx)
12981    {
12982      if (!CONST_INT_P (scale_rtx))
12983	return 0;
12984      scale = INTVAL (scale_rtx);
12985    }
12986
12987  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
12988  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
12989
12990  /* Avoid useless 0 displacement.  */
12991  if (disp == const0_rtx && (base || index))
12992    disp = NULL_RTX;
12993
12994  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
12995  if (base_reg && index_reg && scale == 1
12996      && (index_reg == arg_pointer_rtx
12997	  || index_reg == frame_pointer_rtx
12998	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
12999    {
13000      std::swap (base, index);
13001      std::swap (base_reg, index_reg);
13002    }
13003
13004  /* Special case: %ebp cannot be encoded as a base without a displacement.
13005     Similarly %r13.  */
13006  if (!disp
13007      && base_reg
13008      && (base_reg == hard_frame_pointer_rtx
13009	  || base_reg == frame_pointer_rtx
13010	  || base_reg == arg_pointer_rtx
13011	  || (REG_P (base_reg)
13012	      && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
13013		  || REGNO (base_reg) == R13_REG))))
13014    disp = const0_rtx;
13015
13016  /* Special case: on K6, [%esi] makes the instruction vector decoded.
13017     Avoid this by transforming to [%esi+0].
13018     Reload calls address legitimization without cfun defined, so we need
13019     to test cfun for being non-NULL. */
13020  if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
13021      && base_reg && !index_reg && !disp
13022      && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
13023    disp = const0_rtx;
13024
13025  /* Special case: encode reg+reg instead of reg*2.  */
13026  if (!base && index && scale == 2)
13027    base = index, base_reg = index_reg, scale = 1;
13028
13029  /* Special case: scaling cannot be encoded without base or displacement.  */
13030  if (!base && !disp && index && scale != 1)
13031    disp = const0_rtx;
13032
13033  out->base = base;
13034  out->index = index;
13035  out->disp = disp;
13036  out->scale = scale;
13037  out->seg = seg;
13038
13039  return retval;
13040}
13041
13042/* Return cost of the memory address x.
13043   For i386, it is better to use a complex address than let gcc copy
13044   the address into a reg and make a new pseudo.  But not if the address
13045   requires to two regs - that would mean more pseudos with longer
13046   lifetimes.  */
13047static int
13048ix86_address_cost (rtx x, machine_mode, addr_space_t, bool)
13049{
13050  struct ix86_address parts;
13051  int cost = 1;
13052  int ok = ix86_decompose_address (x, &parts);
13053
13054  gcc_assert (ok);
13055
13056  if (parts.base && GET_CODE (parts.base) == SUBREG)
13057    parts.base = SUBREG_REG (parts.base);
13058  if (parts.index && GET_CODE (parts.index) == SUBREG)
13059    parts.index = SUBREG_REG (parts.index);
13060
13061  /* Attempt to minimize number of registers in the address by increasing
13062     address cost for each used register.  We don't increase address cost
13063     for "pic_offset_table_rtx".  When a memopt with "pic_offset_table_rtx"
13064     is not invariant itself it most likely means that base or index is not
13065     invariant.  Therefore only "pic_offset_table_rtx" could be hoisted out,
13066     which is not profitable for x86.  */
13067  if (parts.base
13068      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
13069      && (current_pass->type == GIMPLE_PASS
13070	  || !pic_offset_table_rtx
13071	  || !REG_P (parts.base)
13072	  || REGNO (pic_offset_table_rtx) != REGNO (parts.base)))
13073    cost++;
13074
13075  if (parts.index
13076      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
13077      && (current_pass->type == GIMPLE_PASS
13078	  || !pic_offset_table_rtx
13079	  || !REG_P (parts.index)
13080	  || REGNO (pic_offset_table_rtx) != REGNO (parts.index)))
13081    cost++;
13082
13083  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
13084     since it's predecode logic can't detect the length of instructions
13085     and it degenerates to vector decoded.  Increase cost of such
13086     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
13087     to split such addresses or even refuse such addresses at all.
13088
13089     Following addressing modes are affected:
13090      [base+scale*index]
13091      [scale*index+disp]
13092      [base+index]
13093
13094     The first and last case  may be avoidable by explicitly coding the zero in
13095     memory address, but I don't have AMD-K6 machine handy to check this
13096     theory.  */
13097
13098  if (TARGET_K6
13099      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
13100	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
13101	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
13102    cost += 10;
13103
13104  return cost;
13105}
13106
13107/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
13108   this is used for to form addresses to local data when -fPIC is in
13109   use.  */
13110
13111static bool
13112darwin_local_data_pic (rtx disp)
13113{
13114  return (GET_CODE (disp) == UNSPEC
13115	  && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
13116}
13117
13118/* Determine if a given RTX is a valid constant.  We already know this
13119   satisfies CONSTANT_P.  */
13120
13121static bool
13122ix86_legitimate_constant_p (machine_mode, rtx x)
13123{
13124  /* Pointer bounds constants are not valid.  */
13125  if (POINTER_BOUNDS_MODE_P (GET_MODE (x)))
13126    return false;
13127
13128  switch (GET_CODE (x))
13129    {
13130    case CONST:
13131      x = XEXP (x, 0);
13132
13133      if (GET_CODE (x) == PLUS)
13134	{
13135	  if (!CONST_INT_P (XEXP (x, 1)))
13136	    return false;
13137	  x = XEXP (x, 0);
13138	}
13139
13140      if (TARGET_MACHO && darwin_local_data_pic (x))
13141	return true;
13142
13143      /* Only some unspecs are valid as "constants".  */
13144      if (GET_CODE (x) == UNSPEC)
13145	switch (XINT (x, 1))
13146	  {
13147	  case UNSPEC_GOT:
13148	  case UNSPEC_GOTOFF:
13149	  case UNSPEC_PLTOFF:
13150	    return TARGET_64BIT;
13151	  case UNSPEC_TPOFF:
13152	  case UNSPEC_NTPOFF:
13153	    x = XVECEXP (x, 0, 0);
13154	    return (GET_CODE (x) == SYMBOL_REF
13155		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13156	  case UNSPEC_DTPOFF:
13157	    x = XVECEXP (x, 0, 0);
13158	    return (GET_CODE (x) == SYMBOL_REF
13159		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
13160	  default:
13161	    return false;
13162	  }
13163
13164      /* We must have drilled down to a symbol.  */
13165      if (GET_CODE (x) == LABEL_REF)
13166	return true;
13167      if (GET_CODE (x) != SYMBOL_REF)
13168	return false;
13169      /* FALLTHRU */
13170
13171    case SYMBOL_REF:
13172      /* TLS symbols are never valid.  */
13173      if (SYMBOL_REF_TLS_MODEL (x))
13174	return false;
13175
13176      /* DLLIMPORT symbols are never valid.  */
13177      if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13178	  && SYMBOL_REF_DLLIMPORT_P (x))
13179	return false;
13180
13181#if TARGET_MACHO
13182      /* mdynamic-no-pic */
13183      if (MACHO_DYNAMIC_NO_PIC_P)
13184	return machopic_symbol_defined_p (x);
13185#endif
13186      break;
13187
13188    case CONST_DOUBLE:
13189      if (GET_MODE (x) == TImode
13190	  && x != CONST0_RTX (TImode)
13191          && !TARGET_64BIT)
13192	return false;
13193      break;
13194
13195    case CONST_VECTOR:
13196      if (!standard_sse_constant_p (x))
13197	return false;
13198
13199    default:
13200      break;
13201    }
13202
13203  /* Otherwise we handle everything else in the move patterns.  */
13204  return true;
13205}
13206
13207/* Determine if it's legal to put X into the constant pool.  This
13208   is not possible for the address of thread-local symbols, which
13209   is checked above.  */
13210
13211static bool
13212ix86_cannot_force_const_mem (machine_mode mode, rtx x)
13213{
13214  /* We can always put integral constants and vectors in memory.  */
13215  switch (GET_CODE (x))
13216    {
13217    case CONST_INT:
13218    case CONST_DOUBLE:
13219    case CONST_VECTOR:
13220      return false;
13221
13222    default:
13223      break;
13224    }
13225  return !ix86_legitimate_constant_p (mode, x);
13226}
13227
13228/*  Nonzero if the symbol is marked as dllimport, or as stub-variable,
13229    otherwise zero.  */
13230
13231static bool
13232is_imported_p (rtx x)
13233{
13234  if (!TARGET_DLLIMPORT_DECL_ATTRIBUTES
13235      || GET_CODE (x) != SYMBOL_REF)
13236    return false;
13237
13238  return SYMBOL_REF_DLLIMPORT_P (x) || SYMBOL_REF_STUBVAR_P (x);
13239}
13240
13241
13242/* Nonzero if the constant value X is a legitimate general operand
13243   when generating PIC code.  It is given that flag_pic is on and
13244   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
13245
13246bool
13247legitimate_pic_operand_p (rtx x)
13248{
13249  rtx inner;
13250
13251  switch (GET_CODE (x))
13252    {
13253    case CONST:
13254      inner = XEXP (x, 0);
13255      if (GET_CODE (inner) == PLUS
13256	  && CONST_INT_P (XEXP (inner, 1)))
13257	inner = XEXP (inner, 0);
13258
13259      /* Only some unspecs are valid as "constants".  */
13260      if (GET_CODE (inner) == UNSPEC)
13261	switch (XINT (inner, 1))
13262	  {
13263	  case UNSPEC_GOT:
13264	  case UNSPEC_GOTOFF:
13265	  case UNSPEC_PLTOFF:
13266	    return TARGET_64BIT;
13267	  case UNSPEC_TPOFF:
13268	    x = XVECEXP (inner, 0, 0);
13269	    return (GET_CODE (x) == SYMBOL_REF
13270		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
13271	  case UNSPEC_MACHOPIC_OFFSET:
13272	    return legitimate_pic_address_disp_p (x);
13273	  default:
13274	    return false;
13275	  }
13276      /* FALLTHRU */
13277
13278    case SYMBOL_REF:
13279    case LABEL_REF:
13280      return legitimate_pic_address_disp_p (x);
13281
13282    default:
13283      return true;
13284    }
13285}
13286
13287/* Determine if a given CONST RTX is a valid memory displacement
13288   in PIC mode.  */
13289
13290bool
13291legitimate_pic_address_disp_p (rtx disp)
13292{
13293  bool saw_plus;
13294
13295  /* In 64bit mode we can allow direct addresses of symbols and labels
13296     when they are not dynamic symbols.  */
13297  if (TARGET_64BIT)
13298    {
13299      rtx op0 = disp, op1;
13300
13301      switch (GET_CODE (disp))
13302	{
13303	case LABEL_REF:
13304	  return true;
13305
13306	case CONST:
13307	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
13308	    break;
13309	  op0 = XEXP (XEXP (disp, 0), 0);
13310	  op1 = XEXP (XEXP (disp, 0), 1);
13311	  if (!CONST_INT_P (op1)
13312	      || INTVAL (op1) >= 16*1024*1024
13313	      || INTVAL (op1) < -16*1024*1024)
13314            break;
13315	  if (GET_CODE (op0) == LABEL_REF)
13316	    return true;
13317	  if (GET_CODE (op0) == CONST
13318	      && GET_CODE (XEXP (op0, 0)) == UNSPEC
13319	      && XINT (XEXP (op0, 0), 1) == UNSPEC_PCREL)
13320	    return true;
13321	  if (GET_CODE (op0) == UNSPEC
13322	      && XINT (op0, 1) == UNSPEC_PCREL)
13323	    return true;
13324	  if (GET_CODE (op0) != SYMBOL_REF)
13325	    break;
13326	  /* FALLTHRU */
13327
13328	case SYMBOL_REF:
13329	  /* TLS references should always be enclosed in UNSPEC.
13330	     The dllimported symbol needs always to be resolved.  */
13331	  if (SYMBOL_REF_TLS_MODEL (op0)
13332	      || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op0)))
13333	    return false;
13334
13335	  if (TARGET_PECOFF)
13336	    {
13337	      if (is_imported_p (op0))
13338		return true;
13339
13340	      if (SYMBOL_REF_FAR_ADDR_P (op0)
13341		  || !SYMBOL_REF_LOCAL_P (op0))
13342		break;
13343
13344	      /* Function-symbols need to be resolved only for
13345	         large-model.
13346	         For the small-model we don't need to resolve anything
13347	         here.  */
13348	      if ((ix86_cmodel != CM_LARGE_PIC
13349	           && SYMBOL_REF_FUNCTION_P (op0))
13350		  || ix86_cmodel == CM_SMALL_PIC)
13351		return true;
13352	      /* Non-external symbols don't need to be resolved for
13353	         large, and medium-model.  */
13354	      if ((ix86_cmodel == CM_LARGE_PIC
13355		   || ix86_cmodel == CM_MEDIUM_PIC)
13356		  && !SYMBOL_REF_EXTERNAL_P (op0))
13357		return true;
13358	    }
13359	  else if (!SYMBOL_REF_FAR_ADDR_P (op0)
13360		   && (SYMBOL_REF_LOCAL_P (op0)
13361		       || (HAVE_LD_PIE_COPYRELOC
13362			   && flag_pie
13363			   && !SYMBOL_REF_WEAK (op0)
13364			   && !SYMBOL_REF_FUNCTION_P (op0)))
13365		   && ix86_cmodel != CM_LARGE_PIC)
13366	    return true;
13367	  break;
13368
13369	default:
13370	  break;
13371	}
13372    }
13373  if (GET_CODE (disp) != CONST)
13374    return false;
13375  disp = XEXP (disp, 0);
13376
13377  if (TARGET_64BIT)
13378    {
13379      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
13380         of GOT tables.  We should not need these anyway.  */
13381      if (GET_CODE (disp) != UNSPEC
13382	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
13383	      && XINT (disp, 1) != UNSPEC_GOTOFF
13384	      && XINT (disp, 1) != UNSPEC_PCREL
13385	      && XINT (disp, 1) != UNSPEC_PLTOFF))
13386	return false;
13387
13388      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
13389	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
13390	return false;
13391      return true;
13392    }
13393
13394  saw_plus = false;
13395  if (GET_CODE (disp) == PLUS)
13396    {
13397      if (!CONST_INT_P (XEXP (disp, 1)))
13398	return false;
13399      disp = XEXP (disp, 0);
13400      saw_plus = true;
13401    }
13402
13403  if (TARGET_MACHO && darwin_local_data_pic (disp))
13404    return true;
13405
13406  if (GET_CODE (disp) != UNSPEC)
13407    return false;
13408
13409  switch (XINT (disp, 1))
13410    {
13411    case UNSPEC_GOT:
13412      if (saw_plus)
13413	return false;
13414      /* We need to check for both symbols and labels because VxWorks loads
13415	 text labels with @GOT rather than @GOTOFF.  See gotoff_operand for
13416	 details.  */
13417      return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13418	      || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
13419    case UNSPEC_GOTOFF:
13420      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
13421	 While ABI specify also 32bit relocation but we don't produce it in
13422	 small PIC model at all.  */
13423      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
13424	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
13425	  && !TARGET_64BIT)
13426        return !TARGET_PECOFF && gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
13427      return false;
13428    case UNSPEC_GOTTPOFF:
13429    case UNSPEC_GOTNTPOFF:
13430    case UNSPEC_INDNTPOFF:
13431      if (saw_plus)
13432	return false;
13433      disp = XVECEXP (disp, 0, 0);
13434      return (GET_CODE (disp) == SYMBOL_REF
13435	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
13436    case UNSPEC_NTPOFF:
13437      disp = XVECEXP (disp, 0, 0);
13438      return (GET_CODE (disp) == SYMBOL_REF
13439	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
13440    case UNSPEC_DTPOFF:
13441      disp = XVECEXP (disp, 0, 0);
13442      return (GET_CODE (disp) == SYMBOL_REF
13443	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
13444    }
13445
13446  return false;
13447}
13448
13449/* Our implementation of LEGITIMIZE_RELOAD_ADDRESS.  Returns a value to
13450   replace the input X, or the original X if no replacement is called for.
13451   The output parameter *WIN is 1 if the calling macro should goto WIN,
13452   0 if it should not.  */
13453
13454bool
13455ix86_legitimize_reload_address (rtx x, machine_mode, int opnum, int type,
13456			       	int)
13457{
13458  /* Reload can generate:
13459
13460     (plus:DI (plus:DI (unspec:DI [(const_int 0 [0])] UNSPEC_TP)
13461		       (reg:DI 97))
13462	      (reg:DI 2 cx))
13463
13464     This RTX is rejected from ix86_legitimate_address_p due to
13465     non-strictness of base register 97.  Following this rejection,
13466     reload pushes all three components into separate registers,
13467     creating invalid memory address RTX.
13468
13469     Following code reloads only the invalid part of the
13470     memory address RTX.  */
13471
13472  if (GET_CODE (x) == PLUS
13473      && REG_P (XEXP (x, 1))
13474      && GET_CODE (XEXP (x, 0)) == PLUS
13475      && REG_P (XEXP (XEXP (x, 0), 1)))
13476    {
13477      rtx base, index;
13478      bool something_reloaded = false;
13479
13480      base = XEXP (XEXP (x, 0), 1);
13481      if (!REG_OK_FOR_BASE_STRICT_P (base))
13482	{
13483	  push_reload (base, NULL_RTX, &XEXP (XEXP (x, 0), 1), NULL,
13484		       BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13485		       opnum, (enum reload_type) type);
13486	  something_reloaded = true;
13487	}
13488
13489      index = XEXP (x, 1);
13490      if (!REG_OK_FOR_INDEX_STRICT_P (index))
13491	{
13492	  push_reload (index, NULL_RTX, &XEXP (x, 1), NULL,
13493		       INDEX_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
13494		       opnum, (enum reload_type) type);
13495	  something_reloaded = true;
13496	}
13497
13498      gcc_assert (something_reloaded);
13499      return true;
13500    }
13501
13502  return false;
13503}
13504
13505/* Determine if op is suitable RTX for an address register.
13506   Return naked register if a register or a register subreg is
13507   found, otherwise return NULL_RTX.  */
13508
13509static rtx
13510ix86_validate_address_register (rtx op)
13511{
13512  machine_mode mode = GET_MODE (op);
13513
13514  /* Only SImode or DImode registers can form the address.  */
13515  if (mode != SImode && mode != DImode)
13516    return NULL_RTX;
13517
13518  if (REG_P (op))
13519    return op;
13520  else if (GET_CODE (op) == SUBREG)
13521    {
13522      rtx reg = SUBREG_REG (op);
13523
13524      if (!REG_P (reg))
13525	return NULL_RTX;
13526
13527      mode = GET_MODE (reg);
13528
13529      /* Don't allow SUBREGs that span more than a word.  It can
13530	 lead to spill failures when the register is one word out
13531	 of a two word structure.  */
13532      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
13533	return NULL_RTX;
13534
13535      /* Allow only SUBREGs of non-eliminable hard registers.  */
13536      if (register_no_elim_operand (reg, mode))
13537	return reg;
13538    }
13539
13540  /* Op is not a register.  */
13541  return NULL_RTX;
13542}
13543
13544/* Recognizes RTL expressions that are valid memory addresses for an
13545   instruction.  The MODE argument is the machine mode for the MEM
13546   expression that wants to use this address.
13547
13548   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
13549   convert common non-canonical forms to canonical form so that they will
13550   be recognized.  */
13551
13552static bool
13553ix86_legitimate_address_p (machine_mode, rtx addr, bool strict)
13554{
13555  struct ix86_address parts;
13556  rtx base, index, disp;
13557  HOST_WIDE_INT scale;
13558  enum ix86_address_seg seg;
13559
13560  if (ix86_decompose_address (addr, &parts) <= 0)
13561    /* Decomposition failed.  */
13562    return false;
13563
13564  base = parts.base;
13565  index = parts.index;
13566  disp = parts.disp;
13567  scale = parts.scale;
13568  seg = parts.seg;
13569
13570  /* Validate base register.  */
13571  if (base)
13572    {
13573      rtx reg = ix86_validate_address_register (base);
13574
13575      if (reg == NULL_RTX)
13576	return false;
13577
13578      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
13579	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
13580	/* Base is not valid.  */
13581	return false;
13582    }
13583
13584  /* Validate index register.  */
13585  if (index)
13586    {
13587      rtx reg = ix86_validate_address_register (index);
13588
13589      if (reg == NULL_RTX)
13590	return false;
13591
13592      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
13593	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
13594	/* Index is not valid.  */
13595	return false;
13596    }
13597
13598  /* Index and base should have the same mode.  */
13599  if (base && index
13600      && GET_MODE (base) != GET_MODE (index))
13601    return false;
13602
13603  /* Address override works only on the (%reg) part of %fs:(%reg).  */
13604  if (seg != SEG_DEFAULT
13605      && ((base && GET_MODE (base) != word_mode)
13606	  || (index && GET_MODE (index) != word_mode)))
13607    return false;
13608
13609  /* Validate scale factor.  */
13610  if (scale != 1)
13611    {
13612      if (!index)
13613	/* Scale without index.  */
13614	return false;
13615
13616      if (scale != 2 && scale != 4 && scale != 8)
13617	/* Scale is not a valid multiplier.  */
13618	return false;
13619    }
13620
13621  /* Validate displacement.  */
13622  if (disp)
13623    {
13624      if (GET_CODE (disp) == CONST
13625	  && GET_CODE (XEXP (disp, 0)) == UNSPEC
13626	  && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
13627	switch (XINT (XEXP (disp, 0), 1))
13628	  {
13629	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
13630	     used.  While ABI specify also 32bit relocations, we don't produce
13631	     them at all and use IP relative instead.  */
13632	  case UNSPEC_GOT:
13633	  case UNSPEC_GOTOFF:
13634	    gcc_assert (flag_pic);
13635	    if (!TARGET_64BIT)
13636	      goto is_legitimate_pic;
13637
13638	    /* 64bit address unspec.  */
13639	    return false;
13640
13641	  case UNSPEC_GOTPCREL:
13642	  case UNSPEC_PCREL:
13643	    gcc_assert (flag_pic);
13644	    goto is_legitimate_pic;
13645
13646	  case UNSPEC_GOTTPOFF:
13647	  case UNSPEC_GOTNTPOFF:
13648	  case UNSPEC_INDNTPOFF:
13649	  case UNSPEC_NTPOFF:
13650	  case UNSPEC_DTPOFF:
13651	    break;
13652
13653	  case UNSPEC_STACK_CHECK:
13654	    gcc_assert (flag_split_stack);
13655	    break;
13656
13657	  default:
13658	    /* Invalid address unspec.  */
13659	    return false;
13660	  }
13661
13662      else if (SYMBOLIC_CONST (disp)
13663	       && (flag_pic
13664		   || (TARGET_MACHO
13665#if TARGET_MACHO
13666		       && MACHOPIC_INDIRECT
13667		       && !machopic_operand_p (disp)
13668#endif
13669	       )))
13670	{
13671
13672	is_legitimate_pic:
13673	  if (TARGET_64BIT && (index || base))
13674	    {
13675	      /* foo@dtpoff(%rX) is ok.  */
13676	      if (GET_CODE (disp) != CONST
13677		  || GET_CODE (XEXP (disp, 0)) != PLUS
13678		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
13679		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
13680		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
13681		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
13682		/* Non-constant pic memory reference.  */
13683		return false;
13684	    }
13685	  else if ((!TARGET_MACHO || flag_pic)
13686		    && ! legitimate_pic_address_disp_p (disp))
13687	    /* Displacement is an invalid pic construct.  */
13688	    return false;
13689#if TARGET_MACHO
13690	  else if (MACHO_DYNAMIC_NO_PIC_P
13691		   && !ix86_legitimate_constant_p (Pmode, disp))
13692	    /* displacment must be referenced via non_lazy_pointer */
13693	    return false;
13694#endif
13695
13696          /* This code used to verify that a symbolic pic displacement
13697	     includes the pic_offset_table_rtx register.
13698
13699	     While this is good idea, unfortunately these constructs may
13700	     be created by "adds using lea" optimization for incorrect
13701	     code like:
13702
13703	     int a;
13704	     int foo(int i)
13705	       {
13706	         return *(&a+i);
13707	       }
13708
13709	     This code is nonsensical, but results in addressing
13710	     GOT table with pic_offset_table_rtx base.  We can't
13711	     just refuse it easily, since it gets matched by
13712	     "addsi3" pattern, that later gets split to lea in the
13713	     case output register differs from input.  While this
13714	     can be handled by separate addsi pattern for this case
13715	     that never results in lea, this seems to be easier and
13716	     correct fix for crash to disable this test.  */
13717	}
13718      else if (GET_CODE (disp) != LABEL_REF
13719	       && !CONST_INT_P (disp)
13720	       && (GET_CODE (disp) != CONST
13721		   || !ix86_legitimate_constant_p (Pmode, disp))
13722	       && (GET_CODE (disp) != SYMBOL_REF
13723		   || !ix86_legitimate_constant_p (Pmode, disp)))
13724	/* Displacement is not constant.  */
13725	return false;
13726      else if (TARGET_64BIT
13727	       && !x86_64_immediate_operand (disp, VOIDmode))
13728	/* Displacement is out of range.  */
13729	return false;
13730      /* In x32 mode, constant addresses are sign extended to 64bit, so
13731	 we have to prevent addresses from 0x80000000 to 0xffffffff.  */
13732      else if (TARGET_X32 && !(index || base)
13733	       && CONST_INT_P (disp)
13734	       && val_signbit_known_set_p (SImode, INTVAL (disp)))
13735	return false;
13736    }
13737
13738  /* Everything looks valid.  */
13739  return true;
13740}
13741
13742/* Determine if a given RTX is a valid constant address.  */
13743
13744bool
13745constant_address_p (rtx x)
13746{
13747  return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
13748}
13749
13750/* Return a unique alias set for the GOT.  */
13751
13752static alias_set_type
13753ix86_GOT_alias_set (void)
13754{
13755  static alias_set_type set = -1;
13756  if (set == -1)
13757    set = new_alias_set ();
13758  return set;
13759}
13760
13761/* Set regs_ever_live for PIC base address register
13762   to true if required.  */
13763static void
13764set_pic_reg_ever_live ()
13765{
13766  if (reload_in_progress)
13767    df_set_regs_ever_live (REGNO (pic_offset_table_rtx), true);
13768}
13769
13770/* Return a legitimate reference for ORIG (an address) using the
13771   register REG.  If REG is 0, a new pseudo is generated.
13772
13773   There are two types of references that must be handled:
13774
13775   1. Global data references must load the address from the GOT, via
13776      the PIC reg.  An insn is emitted to do this load, and the reg is
13777      returned.
13778
13779   2. Static data references, constant pool addresses, and code labels
13780      compute the address as an offset from the GOT, whose base is in
13781      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
13782      differentiate them from global data objects.  The returned
13783      address is the PIC reg + an unspec constant.
13784
13785   TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
13786   reg also appears in the address.  */
13787
13788static rtx
13789legitimize_pic_address (rtx orig, rtx reg)
13790{
13791  rtx addr = orig;
13792  rtx new_rtx = orig;
13793
13794#if TARGET_MACHO
13795  if (TARGET_MACHO && !TARGET_64BIT)
13796    {
13797      if (reg == 0)
13798	reg = gen_reg_rtx (Pmode);
13799      /* Use the generic Mach-O PIC machinery.  */
13800      return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
13801    }
13802#endif
13803
13804  if (TARGET_64BIT && TARGET_DLLIMPORT_DECL_ATTRIBUTES)
13805    {
13806      rtx tmp = legitimize_pe_coff_symbol (addr, true);
13807      if (tmp)
13808        return tmp;
13809    }
13810
13811  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
13812    new_rtx = addr;
13813  else if (TARGET_64BIT && !TARGET_PECOFF
13814	   && ix86_cmodel != CM_SMALL_PIC && gotoff_operand (addr, Pmode))
13815    {
13816      rtx tmpreg;
13817      /* This symbol may be referenced via a displacement from the PIC
13818	 base address (@GOTOFF).  */
13819
13820      set_pic_reg_ever_live ();
13821      if (GET_CODE (addr) == CONST)
13822	addr = XEXP (addr, 0);
13823      if (GET_CODE (addr) == PLUS)
13824	  {
13825            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13826				      UNSPEC_GOTOFF);
13827	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13828	  }
13829	else
13830          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13831      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13832      if (!reg)
13833        tmpreg = gen_reg_rtx (Pmode);
13834      else
13835	tmpreg = reg;
13836      emit_move_insn (tmpreg, new_rtx);
13837
13838      if (reg != 0)
13839	{
13840	  new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
13841					 tmpreg, 1, OPTAB_DIRECT);
13842	  new_rtx = reg;
13843	}
13844      else
13845        new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
13846    }
13847  else if (!TARGET_64BIT && !TARGET_PECOFF && gotoff_operand (addr, Pmode))
13848    {
13849      /* This symbol may be referenced via a displacement from the PIC
13850	 base address (@GOTOFF).  */
13851
13852      set_pic_reg_ever_live ();
13853      if (GET_CODE (addr) == CONST)
13854	addr = XEXP (addr, 0);
13855      if (GET_CODE (addr) == PLUS)
13856	  {
13857            new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
13858				      UNSPEC_GOTOFF);
13859	    new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
13860	  }
13861	else
13862          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
13863      new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13864      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13865
13866      if (reg != 0)
13867	{
13868	  emit_move_insn (reg, new_rtx);
13869	  new_rtx = reg;
13870	}
13871    }
13872  else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
13873	   /* We can't use @GOTOFF for text labels on VxWorks;
13874	      see gotoff_operand.  */
13875	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
13876    {
13877      rtx tmp = legitimize_pe_coff_symbol (addr, true);
13878      if (tmp)
13879        return tmp;
13880
13881      /* For x64 PE-COFF there is no GOT table.  So we use address
13882         directly.  */
13883      if (TARGET_64BIT && TARGET_PECOFF)
13884	{
13885	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_PCREL);
13886	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13887
13888	  if (reg == 0)
13889	    reg = gen_reg_rtx (Pmode);
13890	  emit_move_insn (reg, new_rtx);
13891	  new_rtx = reg;
13892	}
13893      else if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
13894	{
13895	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
13896	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13897	  new_rtx = gen_const_mem (Pmode, new_rtx);
13898	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13899
13900	  if (reg == 0)
13901	    reg = gen_reg_rtx (Pmode);
13902	  /* Use directly gen_movsi, otherwise the address is loaded
13903	     into register for CSE.  We don't want to CSE this addresses,
13904	     instead we CSE addresses from the GOT table, so skip this.  */
13905	  emit_insn (gen_movsi (reg, new_rtx));
13906	  new_rtx = reg;
13907	}
13908      else
13909	{
13910	  /* This symbol must be referenced via a load from the
13911	     Global Offset Table (@GOT).  */
13912
13913	  set_pic_reg_ever_live ();
13914	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
13915	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13916	  if (TARGET_64BIT)
13917	    new_rtx = force_reg (Pmode, new_rtx);
13918	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13919	  new_rtx = gen_const_mem (Pmode, new_rtx);
13920	  set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
13921
13922	  if (reg == 0)
13923	    reg = gen_reg_rtx (Pmode);
13924	  emit_move_insn (reg, new_rtx);
13925	  new_rtx = reg;
13926	}
13927    }
13928  else
13929    {
13930      if (CONST_INT_P (addr)
13931	  && !x86_64_immediate_operand (addr, VOIDmode))
13932	{
13933	  if (reg)
13934	    {
13935	      emit_move_insn (reg, addr);
13936	      new_rtx = reg;
13937	    }
13938	  else
13939	    new_rtx = force_reg (Pmode, addr);
13940	}
13941      else if (GET_CODE (addr) == CONST)
13942	{
13943	  addr = XEXP (addr, 0);
13944
13945	  /* We must match stuff we generate before.  Assume the only
13946	     unspecs that can get here are ours.  Not that we could do
13947	     anything with them anyway....  */
13948	  if (GET_CODE (addr) == UNSPEC
13949	      || (GET_CODE (addr) == PLUS
13950		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
13951	    return orig;
13952	  gcc_assert (GET_CODE (addr) == PLUS);
13953	}
13954      if (GET_CODE (addr) == PLUS)
13955	{
13956	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
13957
13958	  /* Check first to see if this is a constant offset from a @GOTOFF
13959	     symbol reference.  */
13960	  if (!TARGET_PECOFF && gotoff_operand (op0, Pmode)
13961	      && CONST_INT_P (op1))
13962	    {
13963	      if (!TARGET_64BIT)
13964		{
13965		  set_pic_reg_ever_live ();
13966		  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
13967					    UNSPEC_GOTOFF);
13968		  new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
13969		  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
13970		  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
13971
13972		  if (reg != 0)
13973		    {
13974		      emit_move_insn (reg, new_rtx);
13975		      new_rtx = reg;
13976		    }
13977		}
13978	      else
13979		{
13980		  if (INTVAL (op1) < -16*1024*1024
13981		      || INTVAL (op1) >= 16*1024*1024)
13982		    {
13983		      if (!x86_64_immediate_operand (op1, Pmode))
13984			op1 = force_reg (Pmode, op1);
13985		      new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
13986		    }
13987		}
13988	    }
13989	  else
13990	    {
13991	      rtx base = legitimize_pic_address (op0, reg);
13992	      machine_mode mode = GET_MODE (base);
13993	      new_rtx
13994	        = legitimize_pic_address (op1, base == reg ? NULL_RTX : reg);
13995
13996	      if (CONST_INT_P (new_rtx))
13997		{
13998		  if (INTVAL (new_rtx) < -16*1024*1024
13999		      || INTVAL (new_rtx) >= 16*1024*1024)
14000		    {
14001		      if (!x86_64_immediate_operand (new_rtx, mode))
14002			new_rtx = force_reg (mode, new_rtx);
14003		      new_rtx
14004		        = gen_rtx_PLUS (mode, force_reg (mode, base), new_rtx);
14005		    }
14006		  else
14007		    new_rtx = plus_constant (mode, base, INTVAL (new_rtx));
14008		}
14009	      else
14010		{
14011		  /* For %rip addressing, we have to use just disp32, not
14012		     base nor index.  */
14013		  if (TARGET_64BIT
14014		      && (GET_CODE (base) == SYMBOL_REF
14015			  || GET_CODE (base) == LABEL_REF))
14016		    base = force_reg (mode, base);
14017		  if (GET_CODE (new_rtx) == PLUS
14018		      && CONSTANT_P (XEXP (new_rtx, 1)))
14019		    {
14020		      base = gen_rtx_PLUS (mode, base, XEXP (new_rtx, 0));
14021		      new_rtx = XEXP (new_rtx, 1);
14022		    }
14023		  new_rtx = gen_rtx_PLUS (mode, base, new_rtx);
14024		}
14025	    }
14026	}
14027    }
14028  return new_rtx;
14029}
14030
14031/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
14032
14033static rtx
14034get_thread_pointer (machine_mode tp_mode, bool to_reg)
14035{
14036  rtx tp = gen_rtx_UNSPEC (ptr_mode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
14037
14038  if (GET_MODE (tp) != tp_mode)
14039    {
14040      gcc_assert (GET_MODE (tp) == SImode);
14041      gcc_assert (tp_mode == DImode);
14042
14043      tp = gen_rtx_ZERO_EXTEND (tp_mode, tp);
14044    }
14045
14046  if (to_reg)
14047    tp = copy_to_mode_reg (tp_mode, tp);
14048
14049  return tp;
14050}
14051
14052/* Construct the SYMBOL_REF for the tls_get_addr function.  */
14053
14054static GTY(()) rtx ix86_tls_symbol;
14055
14056static rtx
14057ix86_tls_get_addr (void)
14058{
14059  if (!ix86_tls_symbol)
14060    {
14061      const char *sym
14062	= ((TARGET_ANY_GNU_TLS && !TARGET_64BIT)
14063	   ? "___tls_get_addr" : "__tls_get_addr");
14064
14065      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, sym);
14066    }
14067
14068  if (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF)
14069    {
14070      rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_symbol),
14071				   UNSPEC_PLTOFF);
14072      return gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
14073			   gen_rtx_CONST (Pmode, unspec));
14074    }
14075
14076  return ix86_tls_symbol;
14077}
14078
14079/* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol.  */
14080
14081static GTY(()) rtx ix86_tls_module_base_symbol;
14082
14083rtx
14084ix86_tls_module_base (void)
14085{
14086  if (!ix86_tls_module_base_symbol)
14087    {
14088      ix86_tls_module_base_symbol
14089	= gen_rtx_SYMBOL_REF (Pmode, "_TLS_MODULE_BASE_");
14090
14091      SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14092	|= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14093    }
14094
14095  return ix86_tls_module_base_symbol;
14096}
14097
14098/* A subroutine of ix86_legitimize_address and ix86_expand_move.  FOR_MOV is
14099   false if we expect this to be used for a memory address and true if
14100   we expect to load the address into a register.  */
14101
14102static rtx
14103legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
14104{
14105  rtx dest, base, off;
14106  rtx pic = NULL_RTX, tp = NULL_RTX;
14107  machine_mode tp_mode = Pmode;
14108  int type;
14109
14110  /* Fall back to global dynamic model if tool chain cannot support local
14111     dynamic.  */
14112  if (TARGET_SUN_TLS && !TARGET_64BIT
14113      && !HAVE_AS_IX86_TLSLDMPLT && !HAVE_AS_IX86_TLSLDM
14114      && model == TLS_MODEL_LOCAL_DYNAMIC)
14115    model = TLS_MODEL_GLOBAL_DYNAMIC;
14116
14117  switch (model)
14118    {
14119    case TLS_MODEL_GLOBAL_DYNAMIC:
14120      dest = gen_reg_rtx (Pmode);
14121
14122      if (!TARGET_64BIT)
14123	{
14124	  if (flag_pic && !TARGET_PECOFF)
14125	    pic = pic_offset_table_rtx;
14126	  else
14127	    {
14128	      pic = gen_reg_rtx (Pmode);
14129	      emit_insn (gen_set_got (pic));
14130	    }
14131	}
14132
14133      if (TARGET_GNU2_TLS)
14134	{
14135	  if (TARGET_64BIT)
14136	    emit_insn (gen_tls_dynamic_gnu2_64 (dest, x));
14137	  else
14138	    emit_insn (gen_tls_dynamic_gnu2_32 (dest, x, pic));
14139
14140	  tp = get_thread_pointer (Pmode, true);
14141	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
14142
14143	  if (GET_MODE (x) != Pmode)
14144	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
14145
14146	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14147	}
14148      else
14149	{
14150	  rtx caddr = ix86_tls_get_addr ();
14151
14152	  if (TARGET_64BIT)
14153	    {
14154	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
14155	      rtx_insn *insns;
14156
14157	      start_sequence ();
14158	      emit_call_insn
14159		(ix86_gen_tls_global_dynamic_64 (rax, x, caddr));
14160	      insns = get_insns ();
14161	      end_sequence ();
14162
14163	      if (GET_MODE (x) != Pmode)
14164		x = gen_rtx_ZERO_EXTEND (Pmode, x);
14165
14166	      RTL_CONST_CALL_P (insns) = 1;
14167	      emit_libcall_block (insns, dest, rax, x);
14168	    }
14169	  else
14170	    emit_insn (gen_tls_global_dynamic_32 (dest, x, pic, caddr));
14171	}
14172      break;
14173
14174    case TLS_MODEL_LOCAL_DYNAMIC:
14175      base = gen_reg_rtx (Pmode);
14176
14177      if (!TARGET_64BIT)
14178	{
14179	  if (flag_pic)
14180	    pic = pic_offset_table_rtx;
14181	  else
14182	    {
14183	      pic = gen_reg_rtx (Pmode);
14184	      emit_insn (gen_set_got (pic));
14185	    }
14186	}
14187
14188      if (TARGET_GNU2_TLS)
14189	{
14190	  rtx tmp = ix86_tls_module_base ();
14191
14192	  if (TARGET_64BIT)
14193	    emit_insn (gen_tls_dynamic_gnu2_64 (base, tmp));
14194	  else
14195	    emit_insn (gen_tls_dynamic_gnu2_32 (base, tmp, pic));
14196
14197	  tp = get_thread_pointer (Pmode, true);
14198	  set_unique_reg_note (get_last_insn (), REG_EQUAL,
14199			       gen_rtx_MINUS (Pmode, tmp, tp));
14200	}
14201      else
14202	{
14203	  rtx caddr = ix86_tls_get_addr ();
14204
14205	  if (TARGET_64BIT)
14206	    {
14207	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
14208	      rtx_insn *insns;
14209	      rtx eqv;
14210
14211	      start_sequence ();
14212	      emit_call_insn
14213		(ix86_gen_tls_local_dynamic_base_64 (rax, caddr));
14214	      insns = get_insns ();
14215	      end_sequence ();
14216
14217	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
14218		 share the LD_BASE result with other LD model accesses.  */
14219	      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
14220				    UNSPEC_TLS_LD_BASE);
14221
14222	      RTL_CONST_CALL_P (insns) = 1;
14223	      emit_libcall_block (insns, base, rax, eqv);
14224	    }
14225	  else
14226	    emit_insn (gen_tls_local_dynamic_base_32 (base, pic, caddr));
14227	}
14228
14229      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
14230      off = gen_rtx_CONST (Pmode, off);
14231
14232      dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
14233
14234      if (TARGET_GNU2_TLS)
14235	{
14236	  dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
14237
14238	  if (GET_MODE (x) != Pmode)
14239	    x = gen_rtx_ZERO_EXTEND (Pmode, x);
14240
14241	  set_unique_reg_note (get_last_insn (), REG_EQUAL, x);
14242	}
14243      break;
14244
14245    case TLS_MODEL_INITIAL_EXEC:
14246      if (TARGET_64BIT)
14247	{
14248	  if (TARGET_SUN_TLS && !TARGET_X32)
14249	    {
14250	      /* The Sun linker took the AMD64 TLS spec literally
14251		 and can only handle %rax as destination of the
14252		 initial executable code sequence.  */
14253
14254	      dest = gen_reg_rtx (DImode);
14255	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
14256	      return dest;
14257	    }
14258
14259	  /* Generate DImode references to avoid %fs:(%reg32)
14260	     problems and linker IE->LE relaxation bug.  */
14261	  tp_mode = DImode;
14262	  pic = NULL;
14263	  type = UNSPEC_GOTNTPOFF;
14264	}
14265      else if (flag_pic)
14266	{
14267	  set_pic_reg_ever_live ();
14268	  pic = pic_offset_table_rtx;
14269	  type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
14270	}
14271      else if (!TARGET_ANY_GNU_TLS)
14272	{
14273	  pic = gen_reg_rtx (Pmode);
14274	  emit_insn (gen_set_got (pic));
14275	  type = UNSPEC_GOTTPOFF;
14276	}
14277      else
14278	{
14279	  pic = NULL;
14280	  type = UNSPEC_INDNTPOFF;
14281	}
14282
14283      off = gen_rtx_UNSPEC (tp_mode, gen_rtvec (1, x), type);
14284      off = gen_rtx_CONST (tp_mode, off);
14285      if (pic)
14286	off = gen_rtx_PLUS (tp_mode, pic, off);
14287      off = gen_const_mem (tp_mode, off);
14288      set_mem_alias_set (off, ix86_GOT_alias_set ());
14289
14290      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14291	{
14292	  base = get_thread_pointer (tp_mode,
14293				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14294	  off = force_reg (tp_mode, off);
14295	  return gen_rtx_PLUS (tp_mode, base, off);
14296	}
14297      else
14298	{
14299	  base = get_thread_pointer (Pmode, true);
14300	  dest = gen_reg_rtx (Pmode);
14301	  emit_insn (ix86_gen_sub3 (dest, base, off));
14302	}
14303      break;
14304
14305    case TLS_MODEL_LOCAL_EXEC:
14306      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
14307			    (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14308			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
14309      off = gen_rtx_CONST (Pmode, off);
14310
14311      if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
14312	{
14313	  base = get_thread_pointer (Pmode,
14314				     for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
14315	  return gen_rtx_PLUS (Pmode, base, off);
14316	}
14317      else
14318	{
14319	  base = get_thread_pointer (Pmode, true);
14320	  dest = gen_reg_rtx (Pmode);
14321	  emit_insn (ix86_gen_sub3 (dest, base, off));
14322	}
14323      break;
14324
14325    default:
14326      gcc_unreachable ();
14327    }
14328
14329  return dest;
14330}
14331
14332/* Create or return the unique __imp_DECL dllimport symbol corresponding
14333   to symbol DECL if BEIMPORT is true.  Otherwise create or return the
14334   unique refptr-DECL symbol corresponding to symbol DECL.  */
14335
14336struct dllimport_hasher : ggc_cache_hasher<tree_map *>
14337{
14338  static inline hashval_t hash (tree_map *m) { return m->hash; }
14339  static inline bool
14340  equal (tree_map *a, tree_map *b)
14341  {
14342    return a->base.from == b->base.from;
14343  }
14344
14345  static void
14346  handle_cache_entry (tree_map *&m)
14347  {
14348    extern void gt_ggc_mx (tree_map *&);
14349    if (m == HTAB_EMPTY_ENTRY || m == HTAB_DELETED_ENTRY)
14350      return;
14351    else if (ggc_marked_p (m->base.from))
14352      gt_ggc_mx (m);
14353    else
14354      m = static_cast<tree_map *> (HTAB_DELETED_ENTRY);
14355  }
14356};
14357
14358static GTY((cache)) hash_table<dllimport_hasher> *dllimport_map;
14359
14360static tree
14361get_dllimport_decl (tree decl, bool beimport)
14362{
14363  struct tree_map *h, in;
14364  const char *name;
14365  const char *prefix;
14366  size_t namelen, prefixlen;
14367  char *imp_name;
14368  tree to;
14369  rtx rtl;
14370
14371  if (!dllimport_map)
14372    dllimport_map = hash_table<dllimport_hasher>::create_ggc (512);
14373
14374  in.hash = htab_hash_pointer (decl);
14375  in.base.from = decl;
14376  tree_map **loc = dllimport_map->find_slot_with_hash (&in, in.hash, INSERT);
14377  h = *loc;
14378  if (h)
14379    return h->to;
14380
14381  *loc = h = ggc_alloc<tree_map> ();
14382  h->hash = in.hash;
14383  h->base.from = decl;
14384  h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
14385			   VAR_DECL, NULL, ptr_type_node);
14386  DECL_ARTIFICIAL (to) = 1;
14387  DECL_IGNORED_P (to) = 1;
14388  DECL_EXTERNAL (to) = 1;
14389  TREE_READONLY (to) = 1;
14390
14391  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
14392  name = targetm.strip_name_encoding (name);
14393  if (beimport)
14394    prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
14395      ? "*__imp_" : "*__imp__";
14396  else
14397    prefix = user_label_prefix[0] == 0 ? "*.refptr." : "*refptr.";
14398  namelen = strlen (name);
14399  prefixlen = strlen (prefix);
14400  imp_name = (char *) alloca (namelen + prefixlen + 1);
14401  memcpy (imp_name, prefix, prefixlen);
14402  memcpy (imp_name + prefixlen, name, namelen + 1);
14403
14404  name = ggc_alloc_string (imp_name, namelen + prefixlen);
14405  rtl = gen_rtx_SYMBOL_REF (Pmode, name);
14406  SET_SYMBOL_REF_DECL (rtl, to);
14407  SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL | SYMBOL_FLAG_STUBVAR;
14408  if (!beimport)
14409    {
14410      SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
14411#ifdef SUB_TARGET_RECORD_STUB
14412      SUB_TARGET_RECORD_STUB (name);
14413#endif
14414    }
14415
14416  rtl = gen_const_mem (Pmode, rtl);
14417  set_mem_alias_set (rtl, ix86_GOT_alias_set ());
14418
14419  SET_DECL_RTL (to, rtl);
14420  SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
14421
14422  return to;
14423}
14424
14425/* Expand SYMBOL into its corresponding far-addresse symbol.
14426   WANT_REG is true if we require the result be a register.  */
14427
14428static rtx
14429legitimize_pe_coff_extern_decl (rtx symbol, bool want_reg)
14430{
14431  tree imp_decl;
14432  rtx x;
14433
14434  gcc_assert (SYMBOL_REF_DECL (symbol));
14435  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), false);
14436
14437  x = DECL_RTL (imp_decl);
14438  if (want_reg)
14439    x = force_reg (Pmode, x);
14440  return x;
14441}
14442
14443/* Expand SYMBOL into its corresponding dllimport symbol.  WANT_REG is
14444   true if we require the result be a register.  */
14445
14446static rtx
14447legitimize_dllimport_symbol (rtx symbol, bool want_reg)
14448{
14449  tree imp_decl;
14450  rtx x;
14451
14452  gcc_assert (SYMBOL_REF_DECL (symbol));
14453  imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol), true);
14454
14455  x = DECL_RTL (imp_decl);
14456  if (want_reg)
14457    x = force_reg (Pmode, x);
14458  return x;
14459}
14460
14461/* Expand SYMBOL into its corresponding dllimport or refptr symbol.  WANT_REG
14462   is true if we require the result be a register.  */
14463
14464static rtx
14465legitimize_pe_coff_symbol (rtx addr, bool inreg)
14466{
14467  if (!TARGET_PECOFF)
14468    return NULL_RTX;
14469
14470  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14471    {
14472      if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
14473	return legitimize_dllimport_symbol (addr, inreg);
14474      if (GET_CODE (addr) == CONST
14475	  && GET_CODE (XEXP (addr, 0)) == PLUS
14476	  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14477	  && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
14478	{
14479	  rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), inreg);
14480	  return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14481	}
14482    }
14483
14484  if (ix86_cmodel != CM_LARGE_PIC && ix86_cmodel != CM_MEDIUM_PIC)
14485    return NULL_RTX;
14486  if (GET_CODE (addr) == SYMBOL_REF
14487      && !is_imported_p (addr)
14488      && SYMBOL_REF_EXTERNAL_P (addr)
14489      && SYMBOL_REF_DECL (addr))
14490    return legitimize_pe_coff_extern_decl (addr, inreg);
14491
14492  if (GET_CODE (addr) == CONST
14493      && GET_CODE (XEXP (addr, 0)) == PLUS
14494      && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
14495      && !is_imported_p (XEXP (XEXP (addr, 0), 0))
14496      && SYMBOL_REF_EXTERNAL_P (XEXP (XEXP (addr, 0), 0))
14497      && SYMBOL_REF_DECL (XEXP (XEXP (addr, 0), 0)))
14498    {
14499      rtx t = legitimize_pe_coff_extern_decl (XEXP (XEXP (addr, 0), 0), inreg);
14500      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
14501    }
14502  return NULL_RTX;
14503}
14504
14505/* Try machine-dependent ways of modifying an illegitimate address
14506   to be legitimate.  If we find one, return the new, valid address.
14507   This macro is used in only one place: `memory_address' in explow.c.
14508
14509   OLDX is the address as it was before break_out_memory_refs was called.
14510   In some cases it is useful to look at this to decide what needs to be done.
14511
14512   It is always safe for this macro to do nothing.  It exists to recognize
14513   opportunities to optimize the output.
14514
14515   For the 80386, we handle X+REG by loading X into a register R and
14516   using R+REG.  R will go in a general reg and indexing will be used.
14517   However, if REG is a broken-out memory address or multiplication,
14518   nothing needs to be done because REG can certainly go in a general reg.
14519
14520   When -fpic is used, special handling is needed for symbolic references.
14521   See comments by legitimize_pic_address in i386.c for details.  */
14522
14523static rtx
14524ix86_legitimize_address (rtx x, rtx, machine_mode mode)
14525{
14526  bool changed = false;
14527  unsigned log;
14528
14529  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
14530  if (log)
14531    return legitimize_tls_address (x, (enum tls_model) log, false);
14532  if (GET_CODE (x) == CONST
14533      && GET_CODE (XEXP (x, 0)) == PLUS
14534      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
14535      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
14536    {
14537      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
14538				      (enum tls_model) log, false);
14539      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
14540    }
14541
14542  if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
14543    {
14544      rtx tmp = legitimize_pe_coff_symbol (x, true);
14545      if (tmp)
14546        return tmp;
14547    }
14548
14549  if (flag_pic && SYMBOLIC_CONST (x))
14550    return legitimize_pic_address (x, 0);
14551
14552#if TARGET_MACHO
14553  if (MACHO_DYNAMIC_NO_PIC_P && SYMBOLIC_CONST (x))
14554    return machopic_indirect_data_reference (x, 0);
14555#endif
14556
14557  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
14558  if (GET_CODE (x) == ASHIFT
14559      && CONST_INT_P (XEXP (x, 1))
14560      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
14561    {
14562      changed = true;
14563      log = INTVAL (XEXP (x, 1));
14564      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
14565			GEN_INT (1 << log));
14566    }
14567
14568  if (GET_CODE (x) == PLUS)
14569    {
14570      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
14571
14572      if (GET_CODE (XEXP (x, 0)) == ASHIFT
14573	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
14574	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
14575	{
14576	  changed = true;
14577	  log = INTVAL (XEXP (XEXP (x, 0), 1));
14578	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
14579				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
14580				      GEN_INT (1 << log));
14581	}
14582
14583      if (GET_CODE (XEXP (x, 1)) == ASHIFT
14584	  && CONST_INT_P (XEXP (XEXP (x, 1), 1))
14585	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
14586	{
14587	  changed = true;
14588	  log = INTVAL (XEXP (XEXP (x, 1), 1));
14589	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
14590				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
14591				      GEN_INT (1 << log));
14592	}
14593
14594      /* Put multiply first if it isn't already.  */
14595      if (GET_CODE (XEXP (x, 1)) == MULT)
14596	{
14597	  std::swap (XEXP (x, 0), XEXP (x, 1));
14598	  changed = true;
14599	}
14600
14601      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
14602	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
14603	 created by virtual register instantiation, register elimination, and
14604	 similar optimizations.  */
14605      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
14606	{
14607	  changed = true;
14608	  x = gen_rtx_PLUS (Pmode,
14609			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
14610					  XEXP (XEXP (x, 1), 0)),
14611			    XEXP (XEXP (x, 1), 1));
14612	}
14613
14614      /* Canonicalize
14615	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
14616	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
14617      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
14618	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
14619	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
14620	       && CONSTANT_P (XEXP (x, 1)))
14621	{
14622	  rtx constant;
14623	  rtx other = NULL_RTX;
14624
14625	  if (CONST_INT_P (XEXP (x, 1)))
14626	    {
14627	      constant = XEXP (x, 1);
14628	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
14629	    }
14630	  else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
14631	    {
14632	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
14633	      other = XEXP (x, 1);
14634	    }
14635	  else
14636	    constant = 0;
14637
14638	  if (constant)
14639	    {
14640	      changed = true;
14641	      x = gen_rtx_PLUS (Pmode,
14642				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
14643					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
14644				plus_constant (Pmode, other,
14645					       INTVAL (constant)));
14646	    }
14647	}
14648
14649      if (changed && ix86_legitimate_address_p (mode, x, false))
14650	return x;
14651
14652      if (GET_CODE (XEXP (x, 0)) == MULT)
14653	{
14654	  changed = true;
14655	  XEXP (x, 0) = copy_addr_to_reg (XEXP (x, 0));
14656	}
14657
14658      if (GET_CODE (XEXP (x, 1)) == MULT)
14659	{
14660	  changed = true;
14661	  XEXP (x, 1) = copy_addr_to_reg (XEXP (x, 1));
14662	}
14663
14664      if (changed
14665	  && REG_P (XEXP (x, 1))
14666	  && REG_P (XEXP (x, 0)))
14667	return x;
14668
14669      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
14670	{
14671	  changed = true;
14672	  x = legitimize_pic_address (x, 0);
14673	}
14674
14675      if (changed && ix86_legitimate_address_p (mode, x, false))
14676	return x;
14677
14678      if (REG_P (XEXP (x, 0)))
14679	{
14680	  rtx temp = gen_reg_rtx (Pmode);
14681	  rtx val  = force_operand (XEXP (x, 1), temp);
14682	  if (val != temp)
14683	    {
14684	      val = convert_to_mode (Pmode, val, 1);
14685	      emit_move_insn (temp, val);
14686	    }
14687
14688	  XEXP (x, 1) = temp;
14689	  return x;
14690	}
14691
14692      else if (REG_P (XEXP (x, 1)))
14693	{
14694	  rtx temp = gen_reg_rtx (Pmode);
14695	  rtx val  = force_operand (XEXP (x, 0), temp);
14696	  if (val != temp)
14697	    {
14698	      val = convert_to_mode (Pmode, val, 1);
14699	      emit_move_insn (temp, val);
14700	    }
14701
14702	  XEXP (x, 0) = temp;
14703	  return x;
14704	}
14705    }
14706
14707  return x;
14708}
14709
14710/* Print an integer constant expression in assembler syntax.  Addition
14711   and subtraction are the only arithmetic that may appear in these
14712   expressions.  FILE is the stdio stream to write to, X is the rtx, and
14713   CODE is the operand print code from the output string.  */
14714
14715static void
14716output_pic_addr_const (FILE *file, rtx x, int code)
14717{
14718  char buf[256];
14719
14720  switch (GET_CODE (x))
14721    {
14722    case PC:
14723      gcc_assert (flag_pic);
14724      putc ('.', file);
14725      break;
14726
14727    case SYMBOL_REF:
14728      if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
14729	output_addr_const (file, x);
14730      else
14731	{
14732	  const char *name = XSTR (x, 0);
14733
14734	  /* Mark the decl as referenced so that cgraph will
14735	     output the function.  */
14736	  if (SYMBOL_REF_DECL (x))
14737	    mark_decl_referenced (SYMBOL_REF_DECL (x));
14738
14739#if TARGET_MACHO
14740	  if (MACHOPIC_INDIRECT
14741	      && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14742	    name = machopic_indirection_name (x, /*stub_p=*/true);
14743#endif
14744	  assemble_name (file, name);
14745	}
14746      if (!TARGET_MACHO && !(TARGET_64BIT && TARGET_PECOFF)
14747	  && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
14748	fputs ("@PLT", file);
14749      break;
14750
14751    case LABEL_REF:
14752      x = XEXP (x, 0);
14753      /* FALLTHRU */
14754    case CODE_LABEL:
14755      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
14756      assemble_name (asm_out_file, buf);
14757      break;
14758
14759    case CONST_INT:
14760      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
14761      break;
14762
14763    case CONST:
14764      /* This used to output parentheses around the expression,
14765	 but that does not work on the 386 (either ATT or BSD assembler).  */
14766      output_pic_addr_const (file, XEXP (x, 0), code);
14767      break;
14768
14769    case CONST_DOUBLE:
14770      if (GET_MODE (x) == VOIDmode)
14771	{
14772	  /* We can use %d if the number is <32 bits and positive.  */
14773	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
14774	    fprintf (file, "0x%lx%08lx",
14775		     (unsigned long) CONST_DOUBLE_HIGH (x),
14776		     (unsigned long) CONST_DOUBLE_LOW (x));
14777	  else
14778	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
14779	}
14780      else
14781	/* We can't handle floating point constants;
14782	   TARGET_PRINT_OPERAND must handle them.  */
14783	output_operand_lossage ("floating constant misused");
14784      break;
14785
14786    case PLUS:
14787      /* Some assemblers need integer constants to appear first.  */
14788      if (CONST_INT_P (XEXP (x, 0)))
14789	{
14790	  output_pic_addr_const (file, XEXP (x, 0), code);
14791	  putc ('+', file);
14792	  output_pic_addr_const (file, XEXP (x, 1), code);
14793	}
14794      else
14795	{
14796	  gcc_assert (CONST_INT_P (XEXP (x, 1)));
14797	  output_pic_addr_const (file, XEXP (x, 1), code);
14798	  putc ('+', file);
14799	  output_pic_addr_const (file, XEXP (x, 0), code);
14800	}
14801      break;
14802
14803    case MINUS:
14804      if (!TARGET_MACHO)
14805	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
14806      output_pic_addr_const (file, XEXP (x, 0), code);
14807      putc ('-', file);
14808      output_pic_addr_const (file, XEXP (x, 1), code);
14809      if (!TARGET_MACHO)
14810	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
14811      break;
14812
14813     case UNSPEC:
14814       if (XINT (x, 1) == UNSPEC_STACK_CHECK)
14815	 {
14816	   bool f = i386_asm_output_addr_const_extra (file, x);
14817	   gcc_assert (f);
14818	   break;
14819	 }
14820
14821       gcc_assert (XVECLEN (x, 0) == 1);
14822       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
14823       switch (XINT (x, 1))
14824	{
14825	case UNSPEC_GOT:
14826	  fputs ("@GOT", file);
14827	  break;
14828	case UNSPEC_GOTOFF:
14829	  fputs ("@GOTOFF", file);
14830	  break;
14831	case UNSPEC_PLTOFF:
14832	  fputs ("@PLTOFF", file);
14833	  break;
14834	case UNSPEC_PCREL:
14835	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14836		 "(%rip)" : "[rip]", file);
14837	  break;
14838	case UNSPEC_GOTPCREL:
14839	  fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14840		 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
14841	  break;
14842	case UNSPEC_GOTTPOFF:
14843	  /* FIXME: This might be @TPOFF in Sun ld too.  */
14844	  fputs ("@gottpoff", file);
14845	  break;
14846	case UNSPEC_TPOFF:
14847	  fputs ("@tpoff", file);
14848	  break;
14849	case UNSPEC_NTPOFF:
14850	  if (TARGET_64BIT)
14851	    fputs ("@tpoff", file);
14852	  else
14853	    fputs ("@ntpoff", file);
14854	  break;
14855	case UNSPEC_DTPOFF:
14856	  fputs ("@dtpoff", file);
14857	  break;
14858	case UNSPEC_GOTNTPOFF:
14859	  if (TARGET_64BIT)
14860	    fputs (ASSEMBLER_DIALECT == ASM_ATT ?
14861		   "@gottpoff(%rip)": "@gottpoff[rip]", file);
14862	  else
14863	    fputs ("@gotntpoff", file);
14864	  break;
14865	case UNSPEC_INDNTPOFF:
14866	  fputs ("@indntpoff", file);
14867	  break;
14868#if TARGET_MACHO
14869	case UNSPEC_MACHOPIC_OFFSET:
14870	  putc ('-', file);
14871	  machopic_output_function_base_name (file);
14872	  break;
14873#endif
14874	default:
14875	  output_operand_lossage ("invalid UNSPEC as operand");
14876	  break;
14877	}
14878       break;
14879
14880    default:
14881      output_operand_lossage ("invalid expression as operand");
14882    }
14883}
14884
14885/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
14886   We need to emit DTP-relative relocations.  */
14887
14888static void ATTRIBUTE_UNUSED
14889i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
14890{
14891  fputs (ASM_LONG, file);
14892  output_addr_const (file, x);
14893  fputs ("@dtpoff", file);
14894  switch (size)
14895    {
14896    case 4:
14897      break;
14898    case 8:
14899      fputs (", 0", file);
14900      break;
14901    default:
14902      gcc_unreachable ();
14903   }
14904}
14905
14906/* Return true if X is a representation of the PIC register.  This copes
14907   with calls from ix86_find_base_term, where the register might have
14908   been replaced by a cselib value.  */
14909
14910static bool
14911ix86_pic_register_p (rtx x)
14912{
14913  if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
14914    return (pic_offset_table_rtx
14915	    && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
14916  else if (!REG_P (x))
14917    return false;
14918  else if (pic_offset_table_rtx)
14919    {
14920      if (REGNO (x) == REGNO (pic_offset_table_rtx))
14921	return true;
14922      if (HARD_REGISTER_P (x)
14923	  && !HARD_REGISTER_P (pic_offset_table_rtx)
14924	  && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
14925	return true;
14926      return false;
14927    }
14928  else
14929    return REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
14930}
14931
14932/* Helper function for ix86_delegitimize_address.
14933   Attempt to delegitimize TLS local-exec accesses.  */
14934
14935static rtx
14936ix86_delegitimize_tls_address (rtx orig_x)
14937{
14938  rtx x = orig_x, unspec;
14939  struct ix86_address addr;
14940
14941  if (!TARGET_TLS_DIRECT_SEG_REFS)
14942    return orig_x;
14943  if (MEM_P (x))
14944    x = XEXP (x, 0);
14945  if (GET_CODE (x) != PLUS || GET_MODE (x) != Pmode)
14946    return orig_x;
14947  if (ix86_decompose_address (x, &addr) == 0
14948      || addr.seg != DEFAULT_TLS_SEG_REG
14949      || addr.disp == NULL_RTX
14950      || GET_CODE (addr.disp) != CONST)
14951    return orig_x;
14952  unspec = XEXP (addr.disp, 0);
14953  if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
14954    unspec = XEXP (unspec, 0);
14955  if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NTPOFF)
14956    return orig_x;
14957  x = XVECEXP (unspec, 0, 0);
14958  gcc_assert (GET_CODE (x) == SYMBOL_REF);
14959  if (unspec != XEXP (addr.disp, 0))
14960    x = gen_rtx_PLUS (Pmode, x, XEXP (XEXP (addr.disp, 0), 1));
14961  if (addr.index)
14962    {
14963      rtx idx = addr.index;
14964      if (addr.scale != 1)
14965	idx = gen_rtx_MULT (Pmode, idx, GEN_INT (addr.scale));
14966      x = gen_rtx_PLUS (Pmode, idx, x);
14967    }
14968  if (addr.base)
14969    x = gen_rtx_PLUS (Pmode, addr.base, x);
14970  if (MEM_P (orig_x))
14971    x = replace_equiv_address_nv (orig_x, x);
14972  return x;
14973}
14974
14975/* In the name of slightly smaller debug output, and to cater to
14976   general assembler lossage, recognize PIC+GOTOFF and turn it back
14977   into a direct symbol reference.
14978
14979   On Darwin, this is necessary to avoid a crash, because Darwin
14980   has a different PIC label for each routine but the DWARF debugging
14981   information is not associated with any particular routine, so it's
14982   necessary to remove references to the PIC label from RTL stored by
14983   the DWARF output code.  */
14984
14985static rtx
14986ix86_delegitimize_address (rtx x)
14987{
14988  rtx orig_x = delegitimize_mem_from_attrs (x);
14989  /* addend is NULL or some rtx if x is something+GOTOFF where
14990     something doesn't include the PIC register.  */
14991  rtx addend = NULL_RTX;
14992  /* reg_addend is NULL or a multiple of some register.  */
14993  rtx reg_addend = NULL_RTX;
14994  /* const_addend is NULL or a const_int.  */
14995  rtx const_addend = NULL_RTX;
14996  /* This is the result, or NULL.  */
14997  rtx result = NULL_RTX;
14998
14999  x = orig_x;
15000
15001  if (MEM_P (x))
15002    x = XEXP (x, 0);
15003
15004  if (TARGET_64BIT)
15005    {
15006      if (GET_CODE (x) == CONST
15007          && GET_CODE (XEXP (x, 0)) == PLUS
15008          && GET_MODE (XEXP (x, 0)) == Pmode
15009          && CONST_INT_P (XEXP (XEXP (x, 0), 1))
15010          && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
15011          && XINT (XEXP (XEXP (x, 0), 0), 1) == UNSPEC_PCREL)
15012        {
15013	  rtx x2 = XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
15014	  x = gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 1), x2);
15015	  if (MEM_P (orig_x))
15016	    x = replace_equiv_address_nv (orig_x, x);
15017	  return x;
15018	}
15019
15020      if (GET_CODE (x) == CONST
15021	  && GET_CODE (XEXP (x, 0)) == UNSPEC
15022	  && (XINT (XEXP (x, 0), 1) == UNSPEC_GOTPCREL
15023	      || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL)
15024	  && (MEM_P (orig_x) || XINT (XEXP (x, 0), 1) == UNSPEC_PCREL))
15025	{
15026	  x = XVECEXP (XEXP (x, 0), 0, 0);
15027	  if (GET_MODE (orig_x) != GET_MODE (x) && MEM_P (orig_x))
15028	    {
15029	      x = simplify_gen_subreg (GET_MODE (orig_x), x,
15030				       GET_MODE (x), 0);
15031	      if (x == NULL_RTX)
15032		return orig_x;
15033	    }
15034	  return x;
15035	}
15036
15037      if (ix86_cmodel != CM_MEDIUM_PIC && ix86_cmodel != CM_LARGE_PIC)
15038	return ix86_delegitimize_tls_address (orig_x);
15039
15040      /* Fall thru into the code shared with -m32 for -mcmodel=large -fpic
15041	 and -mcmodel=medium -fpic.  */
15042    }
15043
15044  if (GET_CODE (x) != PLUS
15045      || GET_CODE (XEXP (x, 1)) != CONST)
15046    return ix86_delegitimize_tls_address (orig_x);
15047
15048  if (ix86_pic_register_p (XEXP (x, 0)))
15049    /* %ebx + GOT/GOTOFF */
15050    ;
15051  else if (GET_CODE (XEXP (x, 0)) == PLUS)
15052    {
15053      /* %ebx + %reg * scale + GOT/GOTOFF */
15054      reg_addend = XEXP (x, 0);
15055      if (ix86_pic_register_p (XEXP (reg_addend, 0)))
15056	reg_addend = XEXP (reg_addend, 1);
15057      else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
15058	reg_addend = XEXP (reg_addend, 0);
15059      else
15060	{
15061	  reg_addend = NULL_RTX;
15062	  addend = XEXP (x, 0);
15063	}
15064    }
15065  else
15066    addend = XEXP (x, 0);
15067
15068  x = XEXP (XEXP (x, 1), 0);
15069  if (GET_CODE (x) == PLUS
15070      && CONST_INT_P (XEXP (x, 1)))
15071    {
15072      const_addend = XEXP (x, 1);
15073      x = XEXP (x, 0);
15074    }
15075
15076  if (GET_CODE (x) == UNSPEC
15077      && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
15078	  || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))
15079	  || (XINT (x, 1) == UNSPEC_PLTOFF && ix86_cmodel == CM_LARGE_PIC
15080	      && !MEM_P (orig_x) && !addend)))
15081    result = XVECEXP (x, 0, 0);
15082
15083  if (!TARGET_64BIT && TARGET_MACHO && darwin_local_data_pic (x)
15084      && !MEM_P (orig_x))
15085    result = XVECEXP (x, 0, 0);
15086
15087  if (! result)
15088    return ix86_delegitimize_tls_address (orig_x);
15089
15090  if (const_addend)
15091    result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
15092  if (reg_addend)
15093    result = gen_rtx_PLUS (Pmode, reg_addend, result);
15094  if (addend)
15095    {
15096      /* If the rest of original X doesn't involve the PIC register, add
15097	 addend and subtract pic_offset_table_rtx.  This can happen e.g.
15098	 for code like:
15099	 leal (%ebx, %ecx, 4), %ecx
15100	 ...
15101	 movl foo@GOTOFF(%ecx), %edx
15102	 in which case we return (%ecx - %ebx) + foo
15103	 or (%ecx - _GLOBAL_OFFSET_TABLE_) + foo if pseudo_pic_reg
15104	 and reload has completed.  */
15105      if (pic_offset_table_rtx
15106	  && (!reload_completed || !ix86_use_pseudo_pic_reg ()))
15107        result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
15108						     pic_offset_table_rtx),
15109			       result);
15110      else if (pic_offset_table_rtx && !TARGET_MACHO && !TARGET_VXWORKS_RTP)
15111	{
15112	  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
15113	  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
15114	  result = gen_rtx_PLUS (Pmode, tmp, result);
15115	}
15116      else
15117	return orig_x;
15118    }
15119  if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
15120    {
15121      result = simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
15122      if (result == NULL_RTX)
15123	return orig_x;
15124    }
15125  return result;
15126}
15127
15128/* If X is a machine specific address (i.e. a symbol or label being
15129   referenced as a displacement from the GOT implemented using an
15130   UNSPEC), then return the base term.  Otherwise return X.  */
15131
15132rtx
15133ix86_find_base_term (rtx x)
15134{
15135  rtx term;
15136
15137  if (TARGET_64BIT)
15138    {
15139      if (GET_CODE (x) != CONST)
15140	return x;
15141      term = XEXP (x, 0);
15142      if (GET_CODE (term) == PLUS
15143	  && (CONST_INT_P (XEXP (term, 1))
15144	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
15145	term = XEXP (term, 0);
15146      if (GET_CODE (term) != UNSPEC
15147	  || (XINT (term, 1) != UNSPEC_GOTPCREL
15148	      && XINT (term, 1) != UNSPEC_PCREL))
15149	return x;
15150
15151      return XVECEXP (term, 0, 0);
15152    }
15153
15154  return ix86_delegitimize_address (x);
15155}
15156
15157static void
15158put_condition_code (enum rtx_code code, machine_mode mode, bool reverse,
15159		    bool fp, FILE *file)
15160{
15161  const char *suffix;
15162
15163  if (mode == CCFPmode || mode == CCFPUmode)
15164    {
15165      code = ix86_fp_compare_code_to_integer (code);
15166      mode = CCmode;
15167    }
15168  if (reverse)
15169    code = reverse_condition (code);
15170
15171  switch (code)
15172    {
15173    case EQ:
15174      switch (mode)
15175	{
15176	case CCAmode:
15177	  suffix = "a";
15178	  break;
15179
15180	case CCCmode:
15181	  suffix = "c";
15182	  break;
15183
15184	case CCOmode:
15185	  suffix = "o";
15186	  break;
15187
15188	case CCSmode:
15189	  suffix = "s";
15190	  break;
15191
15192	default:
15193	  suffix = "e";
15194	}
15195      break;
15196    case NE:
15197      switch (mode)
15198	{
15199	case CCAmode:
15200	  suffix = "na";
15201	  break;
15202
15203	case CCCmode:
15204	  suffix = "nc";
15205	  break;
15206
15207	case CCOmode:
15208	  suffix = "no";
15209	  break;
15210
15211	case CCSmode:
15212	  suffix = "ns";
15213	  break;
15214
15215	default:
15216	  suffix = "ne";
15217	}
15218      break;
15219    case GT:
15220      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
15221      suffix = "g";
15222      break;
15223    case GTU:
15224      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
15225	 Those same assemblers have the same but opposite lossage on cmov.  */
15226      if (mode == CCmode)
15227	suffix = fp ? "nbe" : "a";
15228      else
15229	gcc_unreachable ();
15230      break;
15231    case LT:
15232      switch (mode)
15233	{
15234	case CCNOmode:
15235	case CCGOCmode:
15236	  suffix = "s";
15237	  break;
15238
15239	case CCmode:
15240	case CCGCmode:
15241	  suffix = "l";
15242	  break;
15243
15244	default:
15245	  gcc_unreachable ();
15246	}
15247      break;
15248    case LTU:
15249      if (mode == CCmode)
15250	suffix = "b";
15251      else if (mode == CCCmode)
15252	suffix = fp ? "b" : "c";
15253      else
15254	gcc_unreachable ();
15255      break;
15256    case GE:
15257      switch (mode)
15258	{
15259	case CCNOmode:
15260	case CCGOCmode:
15261	  suffix = "ns";
15262	  break;
15263
15264	case CCmode:
15265	case CCGCmode:
15266	  suffix = "ge";
15267	  break;
15268
15269	default:
15270	  gcc_unreachable ();
15271	}
15272      break;
15273    case GEU:
15274      if (mode == CCmode)
15275	suffix = "nb";
15276      else if (mode == CCCmode)
15277	suffix = fp ? "nb" : "nc";
15278      else
15279	gcc_unreachable ();
15280      break;
15281    case LE:
15282      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
15283      suffix = "le";
15284      break;
15285    case LEU:
15286      if (mode == CCmode)
15287	suffix = "be";
15288      else
15289	gcc_unreachable ();
15290      break;
15291    case UNORDERED:
15292      suffix = fp ? "u" : "p";
15293      break;
15294    case ORDERED:
15295      suffix = fp ? "nu" : "np";
15296      break;
15297    default:
15298      gcc_unreachable ();
15299    }
15300  fputs (suffix, file);
15301}
15302
15303/* Print the name of register X to FILE based on its machine mode and number.
15304   If CODE is 'w', pretend the mode is HImode.
15305   If CODE is 'b', pretend the mode is QImode.
15306   If CODE is 'k', pretend the mode is SImode.
15307   If CODE is 'q', pretend the mode is DImode.
15308   If CODE is 'x', pretend the mode is V4SFmode.
15309   If CODE is 't', pretend the mode is V8SFmode.
15310   If CODE is 'g', pretend the mode is V16SFmode.
15311   If CODE is 'h', pretend the reg is the 'high' byte register.
15312   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
15313   If CODE is 'd', duplicate the operand for AVX instruction.
15314 */
15315
15316void
15317print_reg (rtx x, int code, FILE *file)
15318{
15319  const char *reg;
15320  unsigned int regno;
15321  bool duplicated = code == 'd' && TARGET_AVX;
15322
15323  if (ASSEMBLER_DIALECT == ASM_ATT)
15324    putc ('%', file);
15325
15326  if (x == pc_rtx)
15327    {
15328      gcc_assert (TARGET_64BIT);
15329      fputs ("rip", file);
15330      return;
15331    }
15332
15333  regno = true_regnum (x);
15334  gcc_assert (regno != ARG_POINTER_REGNUM
15335	      && regno != FRAME_POINTER_REGNUM
15336	      && regno != FLAGS_REG
15337	      && regno != FPSR_REG
15338	      && regno != FPCR_REG);
15339
15340  if (code == 'w' || MMX_REG_P (x))
15341    code = 2;
15342  else if (code == 'b')
15343    code = 1;
15344  else if (code == 'k')
15345    code = 4;
15346  else if (code == 'q')
15347    code = 8;
15348  else if (code == 'y')
15349    code = 3;
15350  else if (code == 'h')
15351    code = 0;
15352  else if (code == 'x')
15353    code = 16;
15354  else if (code == 't')
15355    code = 32;
15356  else if (code == 'g')
15357    code = 64;
15358  else
15359    code = GET_MODE_SIZE (GET_MODE (x));
15360
15361  /* Irritatingly, AMD extended registers use different naming convention
15362     from the normal registers: "r%d[bwd]"  */
15363  if (REX_INT_REGNO_P (regno))
15364    {
15365      gcc_assert (TARGET_64BIT);
15366      putc ('r', file);
15367      fprint_ul (file, regno - FIRST_REX_INT_REG + 8);
15368      switch (code)
15369	{
15370	  case 0:
15371	    error ("extended registers have no high halves");
15372	    break;
15373	  case 1:
15374	    putc ('b', file);
15375	    break;
15376	  case 2:
15377	    putc ('w', file);
15378	    break;
15379	  case 4:
15380	    putc ('d', file);
15381	    break;
15382	  case 8:
15383	    /* no suffix */
15384	    break;
15385	  default:
15386	    error ("unsupported operand size for extended register");
15387	    break;
15388	}
15389      return;
15390    }
15391
15392  reg = NULL;
15393  switch (code)
15394    {
15395    case 3:
15396      if (STACK_TOP_P (x))
15397	{
15398	  reg = "st(0)";
15399	  break;
15400	}
15401      /* FALLTHRU */
15402    case 8:
15403    case 4:
15404    case 12:
15405      if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x) && ! ANY_BND_REG_P (x))
15406	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
15407      /* FALLTHRU */
15408    case 16:
15409    case 2:
15410    normal:
15411      reg = hi_reg_name[regno];
15412      break;
15413    case 1:
15414      if (regno >= ARRAY_SIZE (qi_reg_name))
15415	goto normal;
15416      reg = qi_reg_name[regno];
15417      break;
15418    case 0:
15419      if (regno >= ARRAY_SIZE (qi_high_reg_name))
15420	goto normal;
15421      reg = qi_high_reg_name[regno];
15422      break;
15423    case 32:
15424      if (SSE_REG_P (x))
15425	{
15426	  gcc_assert (!duplicated);
15427	  putc ('y', file);
15428	  fputs (hi_reg_name[regno] + 1, file);
15429	  return;
15430	}
15431    case 64:
15432      if (SSE_REG_P (x))
15433        {
15434          gcc_assert (!duplicated);
15435          putc ('z', file);
15436          fputs (hi_reg_name[REGNO (x)] + 1, file);
15437          return;
15438        }
15439      break;
15440    default:
15441      gcc_unreachable ();
15442    }
15443
15444  fputs (reg, file);
15445  if (duplicated)
15446    {
15447      if (ASSEMBLER_DIALECT == ASM_ATT)
15448	fprintf (file, ", %%%s", reg);
15449      else
15450	fprintf (file, ", %s", reg);
15451    }
15452}
15453
15454/* Meaning of CODE:
15455   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
15456   C -- print opcode suffix for set/cmov insn.
15457   c -- like C, but print reversed condition
15458   F,f -- likewise, but for floating-point.
15459   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
15460	otherwise nothing
15461   R -- print embeded rounding and sae.
15462   r -- print only sae.
15463   z -- print the opcode suffix for the size of the current operand.
15464   Z -- likewise, with special suffixes for x87 instructions.
15465   * -- print a star (in certain assembler syntax)
15466   A -- print an absolute memory reference.
15467   E -- print address with DImode register names if TARGET_64BIT.
15468   w -- print the operand as if it's a "word" (HImode) even if it isn't.
15469   s -- print a shift double count, followed by the assemblers argument
15470	delimiter.
15471   b -- print the QImode name of the register for the indicated operand.
15472	%b0 would print %al if operands[0] is reg 0.
15473   w --  likewise, print the HImode name of the register.
15474   k --  likewise, print the SImode name of the register.
15475   q --  likewise, print the DImode name of the register.
15476   x --  likewise, print the V4SFmode name of the register.
15477   t --  likewise, print the V8SFmode name of the register.
15478   g --  likewise, print the V16SFmode name of the register.
15479   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
15480   y -- print "st(0)" instead of "st" as a register.
15481   d -- print duplicated register operand for AVX instruction.
15482   D -- print condition for SSE cmp instruction.
15483   P -- if PIC, print an @PLT suffix.
15484   p -- print raw symbol name.
15485   X -- don't print any sort of PIC '@' suffix for a symbol.
15486   & -- print some in-use local-dynamic symbol name.
15487   H -- print a memory address offset by 8; used for sse high-parts
15488   Y -- print condition for XOP pcom* instruction.
15489   + -- print a branch hint as 'cs' or 'ds' prefix
15490   ; -- print a semicolon (after prefixes due to bug in older gas).
15491   ~ -- print "i" if TARGET_AVX2, "f" otherwise.
15492   @ -- print a segment register of thread base pointer load
15493   ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
15494   ! -- print MPX prefix for jxx/call/ret instructions if required.
15495 */
15496
15497void
15498ix86_print_operand (FILE *file, rtx x, int code)
15499{
15500  if (code)
15501    {
15502      switch (code)
15503	{
15504	case 'A':
15505	  switch (ASSEMBLER_DIALECT)
15506	    {
15507	    case ASM_ATT:
15508	      putc ('*', file);
15509	      break;
15510
15511	    case ASM_INTEL:
15512	      /* Intel syntax. For absolute addresses, registers should not
15513		 be surrounded by braces.  */
15514	      if (!REG_P (x))
15515		{
15516		  putc ('[', file);
15517		  ix86_print_operand (file, x, 0);
15518		  putc (']', file);
15519		  return;
15520		}
15521	      break;
15522
15523	    default:
15524	      gcc_unreachable ();
15525	    }
15526
15527	  ix86_print_operand (file, x, 0);
15528	  return;
15529
15530	case 'E':
15531	  /* Wrap address in an UNSPEC to declare special handling.  */
15532	  if (TARGET_64BIT)
15533	    x = gen_rtx_UNSPEC (DImode, gen_rtvec (1, x), UNSPEC_LEA_ADDR);
15534
15535	  output_address (x);
15536	  return;
15537
15538	case 'L':
15539	  if (ASSEMBLER_DIALECT == ASM_ATT)
15540	    putc ('l', file);
15541	  return;
15542
15543	case 'W':
15544	  if (ASSEMBLER_DIALECT == ASM_ATT)
15545	    putc ('w', file);
15546	  return;
15547
15548	case 'B':
15549	  if (ASSEMBLER_DIALECT == ASM_ATT)
15550	    putc ('b', file);
15551	  return;
15552
15553	case 'Q':
15554	  if (ASSEMBLER_DIALECT == ASM_ATT)
15555	    putc ('l', file);
15556	  return;
15557
15558	case 'S':
15559	  if (ASSEMBLER_DIALECT == ASM_ATT)
15560	    putc ('s', file);
15561	  return;
15562
15563	case 'T':
15564	  if (ASSEMBLER_DIALECT == ASM_ATT)
15565	    putc ('t', file);
15566	  return;
15567
15568	case 'O':
15569#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15570	  if (ASSEMBLER_DIALECT != ASM_ATT)
15571	    return;
15572
15573	  switch (GET_MODE_SIZE (GET_MODE (x)))
15574	    {
15575	    case 2:
15576	      putc ('w', file);
15577	      break;
15578
15579	    case 4:
15580	      putc ('l', file);
15581	      break;
15582
15583	    case 8:
15584	      putc ('q', file);
15585	      break;
15586
15587	    default:
15588	      output_operand_lossage
15589		("invalid operand size for operand code 'O'");
15590	      return;
15591	    }
15592
15593	  putc ('.', file);
15594#endif
15595	  return;
15596
15597	case 'z':
15598	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15599	    {
15600	      /* Opcodes don't get size suffixes if using Intel opcodes.  */
15601	      if (ASSEMBLER_DIALECT == ASM_INTEL)
15602		return;
15603
15604	      switch (GET_MODE_SIZE (GET_MODE (x)))
15605		{
15606		case 1:
15607		  putc ('b', file);
15608		  return;
15609
15610		case 2:
15611		  putc ('w', file);
15612		  return;
15613
15614		case 4:
15615		  putc ('l', file);
15616		  return;
15617
15618		case 8:
15619		  putc ('q', file);
15620		  return;
15621
15622		default:
15623		  output_operand_lossage
15624		    ("invalid operand size for operand code 'z'");
15625		  return;
15626		}
15627	    }
15628
15629	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15630	    warning
15631	      (0, "non-integer operand used with operand code 'z'");
15632	  /* FALLTHRU */
15633
15634	case 'Z':
15635	  /* 387 opcodes don't get size suffixes if using Intel opcodes.  */
15636	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15637	    return;
15638
15639	  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
15640	    {
15641	      switch (GET_MODE_SIZE (GET_MODE (x)))
15642		{
15643		case 2:
15644#ifdef HAVE_AS_IX86_FILDS
15645		  putc ('s', file);
15646#endif
15647		  return;
15648
15649		case 4:
15650		  putc ('l', file);
15651		  return;
15652
15653		case 8:
15654#ifdef HAVE_AS_IX86_FILDQ
15655		  putc ('q', file);
15656#else
15657		  fputs ("ll", file);
15658#endif
15659		  return;
15660
15661		default:
15662		  break;
15663		}
15664	    }
15665	  else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15666	    {
15667	      /* 387 opcodes don't get size suffixes
15668		 if the operands are registers.  */
15669	      if (STACK_REG_P (x))
15670		return;
15671
15672	      switch (GET_MODE_SIZE (GET_MODE (x)))
15673		{
15674		case 4:
15675		  putc ('s', file);
15676		  return;
15677
15678		case 8:
15679		  putc ('l', file);
15680		  return;
15681
15682		case 12:
15683		case 16:
15684		  putc ('t', file);
15685		  return;
15686
15687		default:
15688		  break;
15689		}
15690	    }
15691	  else
15692	    {
15693	      output_operand_lossage
15694		("invalid operand type used with operand code 'Z'");
15695	      return;
15696	    }
15697
15698	  output_operand_lossage
15699	    ("invalid operand size for operand code 'Z'");
15700	  return;
15701
15702	case 'd':
15703	case 'b':
15704	case 'w':
15705	case 'k':
15706	case 'q':
15707	case 'h':
15708	case 't':
15709	case 'g':
15710	case 'y':
15711	case 'x':
15712	case 'X':
15713	case 'P':
15714	case 'p':
15715	  break;
15716
15717	case 's':
15718	  if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
15719	    {
15720	      ix86_print_operand (file, x, 0);
15721	      fputs (", ", file);
15722	    }
15723	  return;
15724
15725	case 'Y':
15726	  switch (GET_CODE (x))
15727	    {
15728	    case NE:
15729	      fputs ("neq", file);
15730	      break;
15731	    case EQ:
15732	      fputs ("eq", file);
15733	      break;
15734	    case GE:
15735	    case GEU:
15736	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
15737	      break;
15738	    case GT:
15739	    case GTU:
15740	      fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
15741	      break;
15742	    case LE:
15743	    case LEU:
15744	      fputs ("le", file);
15745	      break;
15746	    case LT:
15747	    case LTU:
15748	      fputs ("lt", file);
15749	      break;
15750	    case UNORDERED:
15751	      fputs ("unord", file);
15752	      break;
15753	    case ORDERED:
15754	      fputs ("ord", file);
15755	      break;
15756	    case UNEQ:
15757	      fputs ("ueq", file);
15758	      break;
15759	    case UNGE:
15760	      fputs ("nlt", file);
15761	      break;
15762	    case UNGT:
15763	      fputs ("nle", file);
15764	      break;
15765	    case UNLE:
15766	      fputs ("ule", file);
15767	      break;
15768	    case UNLT:
15769	      fputs ("ult", file);
15770	      break;
15771	    case LTGT:
15772	      fputs ("une", file);
15773	      break;
15774	    default:
15775	      output_operand_lossage ("operand is not a condition code, "
15776				      "invalid operand code 'Y'");
15777	      return;
15778	    }
15779	  return;
15780
15781	case 'D':
15782	  /* Little bit of braindamage here.  The SSE compare instructions
15783	     does use completely different names for the comparisons that the
15784	     fp conditional moves.  */
15785	  switch (GET_CODE (x))
15786	    {
15787	    case UNEQ:
15788	      if (TARGET_AVX)
15789		{
15790		  fputs ("eq_us", file);
15791		  break;
15792		}
15793	    case EQ:
15794	      fputs ("eq", file);
15795	      break;
15796	    case UNLT:
15797	      if (TARGET_AVX)
15798		{
15799		  fputs ("nge", file);
15800		  break;
15801		}
15802	    case LT:
15803	      fputs ("lt", file);
15804	      break;
15805	    case UNLE:
15806	      if (TARGET_AVX)
15807		{
15808		  fputs ("ngt", file);
15809		  break;
15810		}
15811	    case LE:
15812	      fputs ("le", file);
15813	      break;
15814	    case UNORDERED:
15815	      fputs ("unord", file);
15816	      break;
15817	    case LTGT:
15818	      if (TARGET_AVX)
15819		{
15820		  fputs ("neq_oq", file);
15821		  break;
15822		}
15823	    case NE:
15824	      fputs ("neq", file);
15825	      break;
15826	    case GE:
15827	      if (TARGET_AVX)
15828		{
15829		  fputs ("ge", file);
15830		  break;
15831		}
15832	    case UNGE:
15833	      fputs ("nlt", file);
15834	      break;
15835	    case GT:
15836	      if (TARGET_AVX)
15837		{
15838		  fputs ("gt", file);
15839		  break;
15840		}
15841	    case UNGT:
15842	      fputs ("nle", file);
15843	      break;
15844	    case ORDERED:
15845	      fputs ("ord", file);
15846	      break;
15847	    default:
15848	      output_operand_lossage ("operand is not a condition code, "
15849				      "invalid operand code 'D'");
15850	      return;
15851	    }
15852	  return;
15853
15854	case 'F':
15855	case 'f':
15856#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
15857	  if (ASSEMBLER_DIALECT == ASM_ATT)
15858	    putc ('.', file);
15859#endif
15860
15861	case 'C':
15862	case 'c':
15863	  if (!COMPARISON_P (x))
15864	    {
15865	      output_operand_lossage ("operand is not a condition code, "
15866				      "invalid operand code '%c'", code);
15867	      return;
15868	    }
15869	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)),
15870			      code == 'c' || code == 'f',
15871			      code == 'F' || code == 'f',
15872			      file);
15873	  return;
15874
15875	case 'H':
15876	  if (!offsettable_memref_p (x))
15877	    {
15878	      output_operand_lossage ("operand is not an offsettable memory "
15879				      "reference, invalid operand code 'H'");
15880	      return;
15881	    }
15882	  /* It doesn't actually matter what mode we use here, as we're
15883	     only going to use this for printing.  */
15884	  x = adjust_address_nv (x, DImode, 8);
15885	  /* Output 'qword ptr' for intel assembler dialect.  */
15886	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15887	    code = 'q';
15888	  break;
15889
15890	case 'K':
15891	  gcc_assert (CONST_INT_P (x));
15892
15893	  if (INTVAL (x) & IX86_HLE_ACQUIRE)
15894#ifdef HAVE_AS_IX86_HLE
15895	    fputs ("xacquire ", file);
15896#else
15897	    fputs ("\n" ASM_BYTE "0xf2\n\t", file);
15898#endif
15899	  else if (INTVAL (x) & IX86_HLE_RELEASE)
15900#ifdef HAVE_AS_IX86_HLE
15901	    fputs ("xrelease ", file);
15902#else
15903	    fputs ("\n" ASM_BYTE "0xf3\n\t", file);
15904#endif
15905	  /* We do not want to print value of the operand.  */
15906	  return;
15907
15908	case 'N':
15909	  if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
15910	    fputs ("{z}", file);
15911	  return;
15912
15913	case 'r':
15914	  gcc_assert (CONST_INT_P (x));
15915	  gcc_assert (INTVAL (x) == ROUND_SAE);
15916
15917	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15918	    fputs (", ", file);
15919
15920	  fputs ("{sae}", file);
15921
15922	  if (ASSEMBLER_DIALECT == ASM_ATT)
15923	    fputs (", ", file);
15924
15925	  return;
15926
15927	case 'R':
15928	  gcc_assert (CONST_INT_P (x));
15929
15930	  if (ASSEMBLER_DIALECT == ASM_INTEL)
15931	    fputs (", ", file);
15932
15933	  switch (INTVAL (x))
15934	    {
15935	    case ROUND_NEAREST_INT | ROUND_SAE:
15936	      fputs ("{rn-sae}", file);
15937	      break;
15938	    case ROUND_NEG_INF | ROUND_SAE:
15939	      fputs ("{rd-sae}", file);
15940	      break;
15941	    case ROUND_POS_INF | ROUND_SAE:
15942	      fputs ("{ru-sae}", file);
15943	      break;
15944	    case ROUND_ZERO | ROUND_SAE:
15945	      fputs ("{rz-sae}", file);
15946	      break;
15947	    default:
15948	      gcc_unreachable ();
15949	    }
15950
15951	  if (ASSEMBLER_DIALECT == ASM_ATT)
15952	    fputs (", ", file);
15953
15954	  return;
15955
15956	case '*':
15957	  if (ASSEMBLER_DIALECT == ASM_ATT)
15958	    putc ('*', file);
15959	  return;
15960
15961	case '&':
15962	  {
15963	    const char *name = get_some_local_dynamic_name ();
15964	    if (name == NULL)
15965	      output_operand_lossage ("'%%&' used without any "
15966				      "local dynamic TLS references");
15967	    else
15968	      assemble_name (file, name);
15969	    return;
15970	  }
15971
15972	case '+':
15973	  {
15974	    rtx x;
15975
15976	    if (!optimize
15977	        || optimize_function_for_size_p (cfun)
15978		|| !TARGET_BRANCH_PREDICTION_HINTS)
15979	      return;
15980
15981	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
15982	    if (x)
15983	      {
15984		int pred_val = XINT (x, 0);
15985
15986		if (pred_val < REG_BR_PROB_BASE * 45 / 100
15987		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
15988		  {
15989		    bool taken = pred_val > REG_BR_PROB_BASE / 2;
15990		    bool cputaken
15991		      = final_forward_branch_p (current_output_insn) == 0;
15992
15993		    /* Emit hints only in the case default branch prediction
15994		       heuristics would fail.  */
15995		    if (taken != cputaken)
15996		      {
15997			/* We use 3e (DS) prefix for taken branches and
15998			   2e (CS) prefix for not taken branches.  */
15999			if (taken)
16000			  fputs ("ds ; ", file);
16001			else
16002			  fputs ("cs ; ", file);
16003		      }
16004		  }
16005	      }
16006	    return;
16007	  }
16008
16009	case ';':
16010#ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
16011	  putc (';', file);
16012#endif
16013	  return;
16014
16015	case '@':
16016	  if (ASSEMBLER_DIALECT == ASM_ATT)
16017	    putc ('%', file);
16018
16019	  /* The kernel uses a different segment register for performance
16020	     reasons; a system call would not have to trash the userspace
16021	     segment register, which would be expensive.  */
16022	  if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
16023	    fputs ("fs", file);
16024	  else
16025	    fputs ("gs", file);
16026	  return;
16027
16028	case '~':
16029	  putc (TARGET_AVX2 ? 'i' : 'f', file);
16030	  return;
16031
16032	case '^':
16033	  if (TARGET_64BIT && Pmode != word_mode)
16034	    fputs ("addr32 ", file);
16035	  return;
16036
16037	case '!':
16038	  if (ix86_bnd_prefixed_insn_p (current_output_insn))
16039	    fputs ("bnd ", file);
16040	  return;
16041
16042	default:
16043	    output_operand_lossage ("invalid operand code '%c'", code);
16044	}
16045    }
16046
16047  if (REG_P (x))
16048    print_reg (x, code, file);
16049
16050  else if (MEM_P (x))
16051    {
16052      /* No `byte ptr' prefix for call instructions or BLKmode operands.  */
16053      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
16054	  && GET_MODE (x) != BLKmode)
16055	{
16056	  const char * size;
16057	  switch (GET_MODE_SIZE (GET_MODE (x)))
16058	    {
16059	    case 1: size = "BYTE"; break;
16060	    case 2: size = "WORD"; break;
16061	    case 4: size = "DWORD"; break;
16062	    case 8: size = "QWORD"; break;
16063	    case 12: size = "TBYTE"; break;
16064	    case 16:
16065	      if (GET_MODE (x) == XFmode)
16066		size = "TBYTE";
16067              else
16068		size = "XMMWORD";
16069              break;
16070	    case 32: size = "YMMWORD"; break;
16071	    case 64: size = "ZMMWORD"; break;
16072	    default:
16073	      gcc_unreachable ();
16074	    }
16075
16076	  /* Check for explicit size override (codes 'b', 'w', 'k',
16077	     'q' and 'x')  */
16078	  if (code == 'b')
16079	    size = "BYTE";
16080	  else if (code == 'w')
16081	    size = "WORD";
16082	  else if (code == 'k')
16083	    size = "DWORD";
16084	  else if (code == 'q')
16085	    size = "QWORD";
16086	  else if (code == 'x')
16087	    size = "XMMWORD";
16088
16089	  fputs (size, file);
16090	  fputs (" PTR ", file);
16091	}
16092
16093      x = XEXP (x, 0);
16094      /* Avoid (%rip) for call operands.  */
16095      if (CONSTANT_ADDRESS_P (x) && code == 'P'
16096	  && !CONST_INT_P (x))
16097	output_addr_const (file, x);
16098      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
16099	output_operand_lossage ("invalid constraints for operand");
16100      else
16101	output_address (x);
16102    }
16103
16104  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
16105    {
16106      REAL_VALUE_TYPE r;
16107      long l;
16108
16109      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16110      REAL_VALUE_TO_TARGET_SINGLE (r, l);
16111
16112      if (ASSEMBLER_DIALECT == ASM_ATT)
16113	putc ('$', file);
16114      /* Sign extend 32bit SFmode immediate to 8 bytes.  */
16115      if (code == 'q')
16116	fprintf (file, "0x%08" HOST_LONG_LONG_FORMAT "x",
16117		 (unsigned long long) (int) l);
16118      else
16119	fprintf (file, "0x%08x", (unsigned int) l);
16120    }
16121
16122  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
16123    {
16124      REAL_VALUE_TYPE r;
16125      long l[2];
16126
16127      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
16128      REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16129
16130      if (ASSEMBLER_DIALECT == ASM_ATT)
16131	putc ('$', file);
16132      fprintf (file, "0x%lx%08lx", l[1] & 0xffffffff, l[0] & 0xffffffff);
16133    }
16134
16135  /* These float cases don't actually occur as immediate operands.  */
16136  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == XFmode)
16137    {
16138      char dstr[30];
16139
16140      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
16141      fputs (dstr, file);
16142    }
16143
16144  else
16145    {
16146      /* We have patterns that allow zero sets of memory, for instance.
16147	 In 64-bit mode, we should probably support all 8-byte vectors,
16148	 since we can in fact encode that into an immediate.  */
16149      if (GET_CODE (x) == CONST_VECTOR)
16150	{
16151	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
16152	  x = const0_rtx;
16153	}
16154
16155      if (code != 'P' && code != 'p')
16156	{
16157	  if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
16158	    {
16159	      if (ASSEMBLER_DIALECT == ASM_ATT)
16160		putc ('$', file);
16161	    }
16162	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
16163		   || GET_CODE (x) == LABEL_REF)
16164	    {
16165	      if (ASSEMBLER_DIALECT == ASM_ATT)
16166		putc ('$', file);
16167	      else
16168		fputs ("OFFSET FLAT:", file);
16169	    }
16170	}
16171      if (CONST_INT_P (x))
16172	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
16173      else if (flag_pic || MACHOPIC_INDIRECT)
16174	output_pic_addr_const (file, x, code);
16175      else
16176	output_addr_const (file, x);
16177    }
16178}
16179
16180static bool
16181ix86_print_operand_punct_valid_p (unsigned char code)
16182{
16183  return (code == '@' || code == '*' || code == '+' || code == '&'
16184	  || code == ';' || code == '~' || code == '^' || code == '!');
16185}
16186
16187/* Print a memory operand whose address is ADDR.  */
16188
16189static void
16190ix86_print_operand_address (FILE *file, rtx addr)
16191{
16192  struct ix86_address parts;
16193  rtx base, index, disp;
16194  int scale;
16195  int ok;
16196  bool vsib = false;
16197  int code = 0;
16198
16199  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_VSIBADDR)
16200    {
16201      ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16202      gcc_assert (parts.index == NULL_RTX);
16203      parts.index = XVECEXP (addr, 0, 1);
16204      parts.scale = INTVAL (XVECEXP (addr, 0, 2));
16205      addr = XVECEXP (addr, 0, 0);
16206      vsib = true;
16207    }
16208  else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LEA_ADDR)
16209    {
16210      gcc_assert (TARGET_64BIT);
16211      ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16212      code = 'q';
16213    }
16214  else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDMK_ADDR)
16215    {
16216      ok = ix86_decompose_address (XVECEXP (addr, 0, 1), &parts);
16217      gcc_assert (parts.base == NULL_RTX || parts.index == NULL_RTX);
16218      if (parts.base != NULL_RTX)
16219	{
16220	  parts.index = parts.base;
16221	  parts.scale = 1;
16222	}
16223      parts.base = XVECEXP (addr, 0, 0);
16224      addr = XVECEXP (addr, 0, 0);
16225    }
16226  else if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_BNDLDX_ADDR)
16227    {
16228      ok = ix86_decompose_address (XVECEXP (addr, 0, 0), &parts);
16229      gcc_assert (parts.index == NULL_RTX);
16230      parts.index = XVECEXP (addr, 0, 1);
16231      addr = XVECEXP (addr, 0, 0);
16232    }
16233  else
16234    ok = ix86_decompose_address (addr, &parts);
16235
16236  gcc_assert (ok);
16237
16238  base = parts.base;
16239  index = parts.index;
16240  disp = parts.disp;
16241  scale = parts.scale;
16242
16243  switch (parts.seg)
16244    {
16245    case SEG_DEFAULT:
16246      break;
16247    case SEG_FS:
16248    case SEG_GS:
16249      if (ASSEMBLER_DIALECT == ASM_ATT)
16250	putc ('%', file);
16251      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
16252      break;
16253    default:
16254      gcc_unreachable ();
16255    }
16256
16257  /* Use one byte shorter RIP relative addressing for 64bit mode.  */
16258  if (TARGET_64BIT && !base && !index)
16259    {
16260      rtx symbol = disp;
16261
16262      if (GET_CODE (disp) == CONST
16263	  && GET_CODE (XEXP (disp, 0)) == PLUS
16264	  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16265	symbol = XEXP (XEXP (disp, 0), 0);
16266
16267      if (GET_CODE (symbol) == LABEL_REF
16268	  || (GET_CODE (symbol) == SYMBOL_REF
16269	      && SYMBOL_REF_TLS_MODEL (symbol) == 0))
16270	base = pc_rtx;
16271    }
16272  if (!base && !index)
16273    {
16274      /* Displacement only requires special attention.  */
16275
16276      if (CONST_INT_P (disp))
16277	{
16278	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
16279	    fputs ("ds:", file);
16280	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
16281	}
16282      else if (flag_pic)
16283	output_pic_addr_const (file, disp, 0);
16284      else
16285	output_addr_const (file, disp);
16286    }
16287  else
16288    {
16289      /* Print SImode register names to force addr32 prefix.  */
16290      if (SImode_address_operand (addr, VOIDmode))
16291	{
16292#ifdef ENABLE_CHECKING
16293	  gcc_assert (TARGET_64BIT);
16294	  switch (GET_CODE (addr))
16295	    {
16296	    case SUBREG:
16297	      gcc_assert (GET_MODE (addr) == SImode);
16298	      gcc_assert (GET_MODE (SUBREG_REG (addr)) == DImode);
16299	      break;
16300	    case ZERO_EXTEND:
16301	    case AND:
16302	      gcc_assert (GET_MODE (addr) == DImode);
16303	      break;
16304	    default:
16305	      gcc_unreachable ();
16306	    }
16307#endif
16308	  gcc_assert (!code);
16309	  code = 'k';
16310	}
16311      else if (code == 0
16312	       && TARGET_X32
16313	       && disp
16314	       && CONST_INT_P (disp)
16315	       && INTVAL (disp) < -16*1024*1024)
16316	{
16317	  /* X32 runs in 64-bit mode, where displacement, DISP, in
16318	     address DISP(%r64), is encoded as 32-bit immediate sign-
16319	     extended from 32-bit to 64-bit.  For -0x40000300(%r64),
16320	     address is %r64 + 0xffffffffbffffd00.  When %r64 <
16321	     0x40000300, like 0x37ffe064, address is 0xfffffffff7ffdd64,
16322	     which is invalid for x32.  The correct address is %r64
16323	     - 0x40000300 == 0xf7ffdd64.  To properly encode
16324	     -0x40000300(%r64) for x32, we zero-extend negative
16325	     displacement by forcing addr32 prefix which truncates
16326	     0xfffffffff7ffdd64 to 0xf7ffdd64.  In theory, we should
16327	     zero-extend all negative displacements, including -1(%rsp).
16328	     However, for small negative displacements, sign-extension
16329	     won't cause overflow.  We only zero-extend negative
16330	     displacements if they < -16*1024*1024, which is also used
16331	     to check legitimate address displacements for PIC.  */
16332	  code = 'k';
16333	}
16334
16335      if (ASSEMBLER_DIALECT == ASM_ATT)
16336	{
16337	  if (disp)
16338	    {
16339	      if (flag_pic)
16340		output_pic_addr_const (file, disp, 0);
16341	      else if (GET_CODE (disp) == LABEL_REF)
16342		output_asm_label (disp);
16343	      else
16344		output_addr_const (file, disp);
16345	    }
16346
16347	  putc ('(', file);
16348	  if (base)
16349	    print_reg (base, code, file);
16350	  if (index)
16351	    {
16352	      putc (',', file);
16353	      print_reg (index, vsib ? 0 : code, file);
16354	      if (scale != 1 || vsib)
16355		fprintf (file, ",%d", scale);
16356	    }
16357	  putc (')', file);
16358	}
16359      else
16360	{
16361	  rtx offset = NULL_RTX;
16362
16363	  if (disp)
16364	    {
16365	      /* Pull out the offset of a symbol; print any symbol itself.  */
16366	      if (GET_CODE (disp) == CONST
16367		  && GET_CODE (XEXP (disp, 0)) == PLUS
16368		  && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
16369		{
16370		  offset = XEXP (XEXP (disp, 0), 1);
16371		  disp = gen_rtx_CONST (VOIDmode,
16372					XEXP (XEXP (disp, 0), 0));
16373		}
16374
16375	      if (flag_pic)
16376		output_pic_addr_const (file, disp, 0);
16377	      else if (GET_CODE (disp) == LABEL_REF)
16378		output_asm_label (disp);
16379	      else if (CONST_INT_P (disp))
16380		offset = disp;
16381	      else
16382		output_addr_const (file, disp);
16383	    }
16384
16385	  putc ('[', file);
16386	  if (base)
16387	    {
16388	      print_reg (base, code, file);
16389	      if (offset)
16390		{
16391		  if (INTVAL (offset) >= 0)
16392		    putc ('+', file);
16393		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16394		}
16395	    }
16396	  else if (offset)
16397	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
16398	  else
16399	    putc ('0', file);
16400
16401	  if (index)
16402	    {
16403	      putc ('+', file);
16404	      print_reg (index, vsib ? 0 : code, file);
16405	      if (scale != 1 || vsib)
16406		fprintf (file, "*%d", scale);
16407	    }
16408	  putc (']', file);
16409	}
16410    }
16411}
16412
16413/* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
16414
16415static bool
16416i386_asm_output_addr_const_extra (FILE *file, rtx x)
16417{
16418  rtx op;
16419
16420  if (GET_CODE (x) != UNSPEC)
16421    return false;
16422
16423  op = XVECEXP (x, 0, 0);
16424  switch (XINT (x, 1))
16425    {
16426    case UNSPEC_GOTTPOFF:
16427      output_addr_const (file, op);
16428      /* FIXME: This might be @TPOFF in Sun ld.  */
16429      fputs ("@gottpoff", file);
16430      break;
16431    case UNSPEC_TPOFF:
16432      output_addr_const (file, op);
16433      fputs ("@tpoff", file);
16434      break;
16435    case UNSPEC_NTPOFF:
16436      output_addr_const (file, op);
16437      if (TARGET_64BIT)
16438	fputs ("@tpoff", file);
16439      else
16440	fputs ("@ntpoff", file);
16441      break;
16442    case UNSPEC_DTPOFF:
16443      output_addr_const (file, op);
16444      fputs ("@dtpoff", file);
16445      break;
16446    case UNSPEC_GOTNTPOFF:
16447      output_addr_const (file, op);
16448      if (TARGET_64BIT)
16449	fputs (ASSEMBLER_DIALECT == ASM_ATT ?
16450	       "@gottpoff(%rip)" : "@gottpoff[rip]", file);
16451      else
16452	fputs ("@gotntpoff", file);
16453      break;
16454    case UNSPEC_INDNTPOFF:
16455      output_addr_const (file, op);
16456      fputs ("@indntpoff", file);
16457      break;
16458#if TARGET_MACHO
16459    case UNSPEC_MACHOPIC_OFFSET:
16460      output_addr_const (file, op);
16461      putc ('-', file);
16462      machopic_output_function_base_name (file);
16463      break;
16464#endif
16465
16466    case UNSPEC_STACK_CHECK:
16467      {
16468	int offset;
16469
16470	gcc_assert (flag_split_stack);
16471
16472#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
16473	offset = TARGET_THREAD_SPLIT_STACK_OFFSET;
16474#else
16475	gcc_unreachable ();
16476#endif
16477
16478	fprintf (file, "%s:%d", TARGET_64BIT ? "%fs" : "%gs", offset);
16479      }
16480      break;
16481
16482    default:
16483      return false;
16484    }
16485
16486  return true;
16487}
16488
16489/* Split one or more double-mode RTL references into pairs of half-mode
16490   references.  The RTL can be REG, offsettable MEM, integer constant, or
16491   CONST_DOUBLE.  "operands" is a pointer to an array of double-mode RTLs to
16492   split and "num" is its length.  lo_half and hi_half are output arrays
16493   that parallel "operands".  */
16494
16495void
16496split_double_mode (machine_mode mode, rtx operands[],
16497		   int num, rtx lo_half[], rtx hi_half[])
16498{
16499  machine_mode half_mode;
16500  unsigned int byte;
16501
16502  switch (mode)
16503    {
16504    case TImode:
16505      half_mode = DImode;
16506      break;
16507    case DImode:
16508      half_mode = SImode;
16509      break;
16510    default:
16511      gcc_unreachable ();
16512    }
16513
16514  byte = GET_MODE_SIZE (half_mode);
16515
16516  while (num--)
16517    {
16518      rtx op = operands[num];
16519
16520      /* simplify_subreg refuse to split volatile memory addresses,
16521         but we still have to handle it.  */
16522      if (MEM_P (op))
16523	{
16524	  lo_half[num] = adjust_address (op, half_mode, 0);
16525	  hi_half[num] = adjust_address (op, half_mode, byte);
16526	}
16527      else
16528	{
16529	  lo_half[num] = simplify_gen_subreg (half_mode, op,
16530					      GET_MODE (op) == VOIDmode
16531					      ? mode : GET_MODE (op), 0);
16532	  hi_half[num] = simplify_gen_subreg (half_mode, op,
16533					      GET_MODE (op) == VOIDmode
16534					      ? mode : GET_MODE (op), byte);
16535	}
16536    }
16537}
16538
16539/* Output code to perform a 387 binary operation in INSN, one of PLUS,
16540   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
16541   is the expression of the binary operation.  The output may either be
16542   emitted here, or returned to the caller, like all output_* functions.
16543
16544   There is no guarantee that the operands are the same mode, as they
16545   might be within FLOAT or FLOAT_EXTEND expressions.  */
16546
16547#ifndef SYSV386_COMPAT
16548/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
16549   wants to fix the assemblers because that causes incompatibility
16550   with gcc.  No-one wants to fix gcc because that causes
16551   incompatibility with assemblers...  You can use the option of
16552   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
16553#define SYSV386_COMPAT 1
16554#endif
16555
16556const char *
16557output_387_binary_op (rtx insn, rtx *operands)
16558{
16559  static char buf[40];
16560  const char *p;
16561  const char *ssep;
16562  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
16563
16564#ifdef ENABLE_CHECKING
16565  /* Even if we do not want to check the inputs, this documents input
16566     constraints.  Which helps in understanding the following code.  */
16567  if (STACK_REG_P (operands[0])
16568      && ((REG_P (operands[1])
16569	   && REGNO (operands[0]) == REGNO (operands[1])
16570	   && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
16571	  || (REG_P (operands[2])
16572	      && REGNO (operands[0]) == REGNO (operands[2])
16573	      && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
16574      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
16575    ; /* ok */
16576  else
16577    gcc_assert (is_sse);
16578#endif
16579
16580  switch (GET_CODE (operands[3]))
16581    {
16582    case PLUS:
16583      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16584	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16585	p = "fiadd";
16586      else
16587	p = "fadd";
16588      ssep = "vadd";
16589      break;
16590
16591    case MINUS:
16592      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16593	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16594	p = "fisub";
16595      else
16596	p = "fsub";
16597      ssep = "vsub";
16598      break;
16599
16600    case MULT:
16601      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16602	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16603	p = "fimul";
16604      else
16605	p = "fmul";
16606      ssep = "vmul";
16607      break;
16608
16609    case DIV:
16610      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
16611	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
16612	p = "fidiv";
16613      else
16614	p = "fdiv";
16615      ssep = "vdiv";
16616      break;
16617
16618    default:
16619      gcc_unreachable ();
16620    }
16621
16622  if (is_sse)
16623   {
16624     if (TARGET_AVX)
16625       {
16626	 strcpy (buf, ssep);
16627	 if (GET_MODE (operands[0]) == SFmode)
16628	   strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
16629	 else
16630	   strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
16631       }
16632     else
16633       {
16634	 strcpy (buf, ssep + 1);
16635	 if (GET_MODE (operands[0]) == SFmode)
16636	   strcat (buf, "ss\t{%2, %0|%0, %2}");
16637	 else
16638	   strcat (buf, "sd\t{%2, %0|%0, %2}");
16639       }
16640      return buf;
16641   }
16642  strcpy (buf, p);
16643
16644  switch (GET_CODE (operands[3]))
16645    {
16646    case MULT:
16647    case PLUS:
16648      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
16649	std::swap (operands[1], operands[2]);
16650
16651      /* know operands[0] == operands[1].  */
16652
16653      if (MEM_P (operands[2]))
16654	{
16655	  p = "%Z2\t%2";
16656	  break;
16657	}
16658
16659      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16660	{
16661	  if (STACK_TOP_P (operands[0]))
16662	    /* How is it that we are storing to a dead operand[2]?
16663	       Well, presumably operands[1] is dead too.  We can't
16664	       store the result to st(0) as st(0) gets popped on this
16665	       instruction.  Instead store to operands[2] (which I
16666	       think has to be st(1)).  st(1) will be popped later.
16667	       gcc <= 2.8.1 didn't have this check and generated
16668	       assembly code that the Unixware assembler rejected.  */
16669	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
16670	  else
16671	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
16672	  break;
16673	}
16674
16675      if (STACK_TOP_P (operands[0]))
16676	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
16677      else
16678	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
16679      break;
16680
16681    case MINUS:
16682    case DIV:
16683      if (MEM_P (operands[1]))
16684	{
16685	  p = "r%Z1\t%1";
16686	  break;
16687	}
16688
16689      if (MEM_P (operands[2]))
16690	{
16691	  p = "%Z2\t%2";
16692	  break;
16693	}
16694
16695      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
16696	{
16697#if SYSV386_COMPAT
16698	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
16699	     derived assemblers, confusingly reverse the direction of
16700	     the operation for fsub{r} and fdiv{r} when the
16701	     destination register is not st(0).  The Intel assembler
16702	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
16703	     figure out what the hardware really does.  */
16704	  if (STACK_TOP_P (operands[0]))
16705	    p = "{p\t%0, %2|rp\t%2, %0}";
16706	  else
16707	    p = "{rp\t%2, %0|p\t%0, %2}";
16708#else
16709	  if (STACK_TOP_P (operands[0]))
16710	    /* As above for fmul/fadd, we can't store to st(0).  */
16711	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
16712	  else
16713	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
16714#endif
16715	  break;
16716	}
16717
16718      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
16719	{
16720#if SYSV386_COMPAT
16721	  if (STACK_TOP_P (operands[0]))
16722	    p = "{rp\t%0, %1|p\t%1, %0}";
16723	  else
16724	    p = "{p\t%1, %0|rp\t%0, %1}";
16725#else
16726	  if (STACK_TOP_P (operands[0]))
16727	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
16728	  else
16729	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
16730#endif
16731	  break;
16732	}
16733
16734      if (STACK_TOP_P (operands[0]))
16735	{
16736	  if (STACK_TOP_P (operands[1]))
16737	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
16738	  else
16739	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
16740	  break;
16741	}
16742      else if (STACK_TOP_P (operands[1]))
16743	{
16744#if SYSV386_COMPAT
16745	  p = "{\t%1, %0|r\t%0, %1}";
16746#else
16747	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
16748#endif
16749	}
16750      else
16751	{
16752#if SYSV386_COMPAT
16753	  p = "{r\t%2, %0|\t%0, %2}";
16754#else
16755	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
16756#endif
16757	}
16758      break;
16759
16760    default:
16761      gcc_unreachable ();
16762    }
16763
16764  strcat (buf, p);
16765  return buf;
16766}
16767
16768/* Check if a 256bit AVX register is referenced inside of EXP.   */
16769
16770static bool
16771ix86_check_avx256_register (const_rtx exp)
16772{
16773  if (GET_CODE (exp) == SUBREG)
16774    exp = SUBREG_REG (exp);
16775
16776  return (REG_P (exp)
16777	  && VALID_AVX256_REG_OR_OI_MODE (GET_MODE (exp)));
16778}
16779
16780/* Return needed mode for entity in optimize_mode_switching pass.  */
16781
16782static int
16783ix86_avx_u128_mode_needed (rtx_insn *insn)
16784{
16785  if (CALL_P (insn))
16786    {
16787      rtx link;
16788
16789      /* Needed mode is set to AVX_U128_CLEAN if there are
16790	 no 256bit modes used in function arguments.  */
16791      for (link = CALL_INSN_FUNCTION_USAGE (insn);
16792	   link;
16793	   link = XEXP (link, 1))
16794	{
16795	  if (GET_CODE (XEXP (link, 0)) == USE)
16796	    {
16797	      rtx arg = XEXP (XEXP (link, 0), 0);
16798
16799	      if (ix86_check_avx256_register (arg))
16800		return AVX_U128_DIRTY;
16801	    }
16802	}
16803
16804      return AVX_U128_CLEAN;
16805    }
16806
16807  /* Require DIRTY mode if a 256bit AVX register is referenced.  Hardware
16808     changes state only when a 256bit register is written to, but we need
16809     to prevent the compiler from moving optimal insertion point above
16810     eventual read from 256bit register.  */
16811  subrtx_iterator::array_type array;
16812  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
16813    if (ix86_check_avx256_register (*iter))
16814      return AVX_U128_DIRTY;
16815
16816  return AVX_U128_ANY;
16817}
16818
16819/* Return mode that i387 must be switched into
16820   prior to the execution of insn.  */
16821
16822static int
16823ix86_i387_mode_needed (int entity, rtx_insn *insn)
16824{
16825  enum attr_i387_cw mode;
16826
16827  /* The mode UNINITIALIZED is used to store control word after a
16828     function call or ASM pattern.  The mode ANY specify that function
16829     has no requirements on the control word and make no changes in the
16830     bits we are interested in.  */
16831
16832  if (CALL_P (insn)
16833      || (NONJUMP_INSN_P (insn)
16834	  && (asm_noperands (PATTERN (insn)) >= 0
16835	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
16836    return I387_CW_UNINITIALIZED;
16837
16838  if (recog_memoized (insn) < 0)
16839    return I387_CW_ANY;
16840
16841  mode = get_attr_i387_cw (insn);
16842
16843  switch (entity)
16844    {
16845    case I387_TRUNC:
16846      if (mode == I387_CW_TRUNC)
16847	return mode;
16848      break;
16849
16850    case I387_FLOOR:
16851      if (mode == I387_CW_FLOOR)
16852	return mode;
16853      break;
16854
16855    case I387_CEIL:
16856      if (mode == I387_CW_CEIL)
16857	return mode;
16858      break;
16859
16860    case I387_MASK_PM:
16861      if (mode == I387_CW_MASK_PM)
16862	return mode;
16863      break;
16864
16865    default:
16866      gcc_unreachable ();
16867    }
16868
16869  return I387_CW_ANY;
16870}
16871
16872/* Return mode that entity must be switched into
16873   prior to the execution of insn.  */
16874
16875static int
16876ix86_mode_needed (int entity, rtx_insn *insn)
16877{
16878  switch (entity)
16879    {
16880    case AVX_U128:
16881      return ix86_avx_u128_mode_needed (insn);
16882    case I387_TRUNC:
16883    case I387_FLOOR:
16884    case I387_CEIL:
16885    case I387_MASK_PM:
16886      return ix86_i387_mode_needed (entity, insn);
16887    default:
16888      gcc_unreachable ();
16889    }
16890  return 0;
16891}
16892
16893/* Check if a 256bit AVX register is referenced in stores.   */
16894
16895static void
16896ix86_check_avx256_stores (rtx dest, const_rtx, void *data)
16897 {
16898   if (ix86_check_avx256_register (dest))
16899    {
16900      bool *used = (bool *) data;
16901      *used = true;
16902    }
16903 }
16904
16905/* Calculate mode of upper 128bit AVX registers after the insn.  */
16906
16907static int
16908ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
16909{
16910  rtx pat = PATTERN (insn);
16911
16912  if (vzeroupper_operation (pat, VOIDmode)
16913      || vzeroall_operation (pat, VOIDmode))
16914    return AVX_U128_CLEAN;
16915
16916  /* We know that state is clean after CALL insn if there are no
16917     256bit registers used in the function return register.  */
16918  if (CALL_P (insn))
16919    {
16920      bool avx_reg256_found = false;
16921      note_stores (pat, ix86_check_avx256_stores, &avx_reg256_found);
16922
16923      return avx_reg256_found ? AVX_U128_DIRTY : AVX_U128_CLEAN;
16924    }
16925
16926  /* Otherwise, return current mode.  Remember that if insn
16927     references AVX 256bit registers, the mode was already changed
16928     to DIRTY from MODE_NEEDED.  */
16929  return mode;
16930}
16931
16932/* Return the mode that an insn results in.  */
16933
16934static int
16935ix86_mode_after (int entity, int mode, rtx_insn *insn)
16936{
16937  switch (entity)
16938    {
16939    case AVX_U128:
16940      return ix86_avx_u128_mode_after (mode, insn);
16941    case I387_TRUNC:
16942    case I387_FLOOR:
16943    case I387_CEIL:
16944    case I387_MASK_PM:
16945      return mode;
16946    default:
16947      gcc_unreachable ();
16948    }
16949}
16950
16951static int
16952ix86_avx_u128_mode_entry (void)
16953{
16954  tree arg;
16955
16956  /* Entry mode is set to AVX_U128_DIRTY if there are
16957     256bit modes used in function arguments.  */
16958  for (arg = DECL_ARGUMENTS (current_function_decl); arg;
16959       arg = TREE_CHAIN (arg))
16960    {
16961      rtx incoming = DECL_INCOMING_RTL (arg);
16962
16963      if (incoming && ix86_check_avx256_register (incoming))
16964	return AVX_U128_DIRTY;
16965    }
16966
16967  return AVX_U128_CLEAN;
16968}
16969
16970/* Return a mode that ENTITY is assumed to be
16971   switched to at function entry.  */
16972
16973static int
16974ix86_mode_entry (int entity)
16975{
16976  switch (entity)
16977    {
16978    case AVX_U128:
16979      return ix86_avx_u128_mode_entry ();
16980    case I387_TRUNC:
16981    case I387_FLOOR:
16982    case I387_CEIL:
16983    case I387_MASK_PM:
16984      return I387_CW_ANY;
16985    default:
16986      gcc_unreachable ();
16987    }
16988}
16989
16990static int
16991ix86_avx_u128_mode_exit (void)
16992{
16993  rtx reg = crtl->return_rtx;
16994
16995  /* Exit mode is set to AVX_U128_DIRTY if there are
16996     256bit modes used in the function return register.  */
16997  if (reg && ix86_check_avx256_register (reg))
16998    return AVX_U128_DIRTY;
16999
17000  return AVX_U128_CLEAN;
17001}
17002
17003/* Return a mode that ENTITY is assumed to be
17004   switched to at function exit.  */
17005
17006static int
17007ix86_mode_exit (int entity)
17008{
17009  switch (entity)
17010    {
17011    case AVX_U128:
17012      return ix86_avx_u128_mode_exit ();
17013    case I387_TRUNC:
17014    case I387_FLOOR:
17015    case I387_CEIL:
17016    case I387_MASK_PM:
17017      return I387_CW_ANY;
17018    default:
17019      gcc_unreachable ();
17020    }
17021}
17022
17023static int
17024ix86_mode_priority (int, int n)
17025{
17026  return n;
17027}
17028
17029/* Output code to initialize control word copies used by trunc?f?i and
17030   rounding patterns.  CURRENT_MODE is set to current control word,
17031   while NEW_MODE is set to new control word.  */
17032
17033static void
17034emit_i387_cw_initialization (int mode)
17035{
17036  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
17037  rtx new_mode;
17038
17039  enum ix86_stack_slot slot;
17040
17041  rtx reg = gen_reg_rtx (HImode);
17042
17043  emit_insn (gen_x86_fnstcw_1 (stored_mode));
17044  emit_move_insn (reg, copy_rtx (stored_mode));
17045
17046  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
17047      || optimize_insn_for_size_p ())
17048    {
17049      switch (mode)
17050	{
17051	case I387_CW_TRUNC:
17052	  /* round toward zero (truncate) */
17053	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
17054	  slot = SLOT_CW_TRUNC;
17055	  break;
17056
17057	case I387_CW_FLOOR:
17058	  /* round down toward -oo */
17059	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17060	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
17061	  slot = SLOT_CW_FLOOR;
17062	  break;
17063
17064	case I387_CW_CEIL:
17065	  /* round up toward +oo */
17066	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
17067	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
17068	  slot = SLOT_CW_CEIL;
17069	  break;
17070
17071	case I387_CW_MASK_PM:
17072	  /* mask precision exception for nearbyint() */
17073	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17074	  slot = SLOT_CW_MASK_PM;
17075	  break;
17076
17077	default:
17078	  gcc_unreachable ();
17079	}
17080    }
17081  else
17082    {
17083      switch (mode)
17084	{
17085	case I387_CW_TRUNC:
17086	  /* round toward zero (truncate) */
17087	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
17088	  slot = SLOT_CW_TRUNC;
17089	  break;
17090
17091	case I387_CW_FLOOR:
17092	  /* round down toward -oo */
17093	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
17094	  slot = SLOT_CW_FLOOR;
17095	  break;
17096
17097	case I387_CW_CEIL:
17098	  /* round up toward +oo */
17099	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
17100	  slot = SLOT_CW_CEIL;
17101	  break;
17102
17103	case I387_CW_MASK_PM:
17104	  /* mask precision exception for nearbyint() */
17105	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
17106	  slot = SLOT_CW_MASK_PM;
17107	  break;
17108
17109	default:
17110	  gcc_unreachable ();
17111	}
17112    }
17113
17114  gcc_assert (slot < MAX_386_STACK_LOCALS);
17115
17116  new_mode = assign_386_stack_local (HImode, slot);
17117  emit_move_insn (new_mode, reg);
17118}
17119
17120/* Emit vzeroupper.  */
17121
17122void
17123ix86_avx_emit_vzeroupper (HARD_REG_SET regs_live)
17124{
17125  int i;
17126
17127  /* Cancel automatic vzeroupper insertion if there are
17128     live call-saved SSE registers at the insertion point.  */
17129
17130  for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
17131    if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17132      return;
17133
17134  if (TARGET_64BIT)
17135    for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
17136      if (TEST_HARD_REG_BIT (regs_live, i) && !call_used_regs[i])
17137	return;
17138
17139  emit_insn (gen_avx_vzeroupper ());
17140}
17141
17142/* Generate one or more insns to set ENTITY to MODE.  */
17143
17144/* Generate one or more insns to set ENTITY to MODE.  HARD_REG_LIVE
17145   is the set of hard registers live at the point where the insn(s)
17146   are to be inserted.  */
17147
17148static void
17149ix86_emit_mode_set (int entity, int mode, int prev_mode ATTRIBUTE_UNUSED,
17150		    HARD_REG_SET regs_live)
17151{
17152  switch (entity)
17153    {
17154    case AVX_U128:
17155      if (mode == AVX_U128_CLEAN)
17156	ix86_avx_emit_vzeroupper (regs_live);
17157      break;
17158    case I387_TRUNC:
17159    case I387_FLOOR:
17160    case I387_CEIL:
17161    case I387_MASK_PM:
17162      if (mode != I387_CW_ANY
17163	  && mode != I387_CW_UNINITIALIZED)
17164	emit_i387_cw_initialization (mode);
17165      break;
17166    default:
17167      gcc_unreachable ();
17168    }
17169}
17170
17171/* Output code for INSN to convert a float to a signed int.  OPERANDS
17172   are the insn operands.  The output may be [HSD]Imode and the input
17173   operand may be [SDX]Fmode.  */
17174
17175const char *
17176output_fix_trunc (rtx_insn *insn, rtx *operands, bool fisttp)
17177{
17178  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17179  int dimode_p = GET_MODE (operands[0]) == DImode;
17180  int round_mode = get_attr_i387_cw (insn);
17181
17182  /* Jump through a hoop or two for DImode, since the hardware has no
17183     non-popping instruction.  We used to do this a different way, but
17184     that was somewhat fragile and broke with post-reload splitters.  */
17185  if ((dimode_p || fisttp) && !stack_top_dies)
17186    output_asm_insn ("fld\t%y1", operands);
17187
17188  gcc_assert (STACK_TOP_P (operands[1]));
17189  gcc_assert (MEM_P (operands[0]));
17190  gcc_assert (GET_MODE (operands[1]) != TFmode);
17191
17192  if (fisttp)
17193      output_asm_insn ("fisttp%Z0\t%0", operands);
17194  else
17195    {
17196      if (round_mode != I387_CW_ANY)
17197	output_asm_insn ("fldcw\t%3", operands);
17198      if (stack_top_dies || dimode_p)
17199	output_asm_insn ("fistp%Z0\t%0", operands);
17200      else
17201	output_asm_insn ("fist%Z0\t%0", operands);
17202      if (round_mode != I387_CW_ANY)
17203	output_asm_insn ("fldcw\t%2", operands);
17204    }
17205
17206  return "";
17207}
17208
17209/* Output code for x87 ffreep insn.  The OPNO argument, which may only
17210   have the values zero or one, indicates the ffreep insn's operand
17211   from the OPERANDS array.  */
17212
17213static const char *
17214output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
17215{
17216  if (TARGET_USE_FFREEP)
17217#ifdef HAVE_AS_IX86_FFREEP
17218    return opno ? "ffreep\t%y1" : "ffreep\t%y0";
17219#else
17220    {
17221      static char retval[32];
17222      int regno = REGNO (operands[opno]);
17223
17224      gcc_assert (STACK_REGNO_P (regno));
17225
17226      regno -= FIRST_STACK_REG;
17227
17228      snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
17229      return retval;
17230    }
17231#endif
17232
17233  return opno ? "fstp\t%y1" : "fstp\t%y0";
17234}
17235
17236
17237/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
17238   should be used.  UNORDERED_P is true when fucom should be used.  */
17239
17240const char *
17241output_fp_compare (rtx insn, rtx *operands, bool eflags_p, bool unordered_p)
17242{
17243  int stack_top_dies;
17244  rtx cmp_op0, cmp_op1;
17245  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
17246
17247  if (eflags_p)
17248    {
17249      cmp_op0 = operands[0];
17250      cmp_op1 = operands[1];
17251    }
17252  else
17253    {
17254      cmp_op0 = operands[1];
17255      cmp_op1 = operands[2];
17256    }
17257
17258  if (is_sse)
17259    {
17260      if (GET_MODE (operands[0]) == SFmode)
17261	if (unordered_p)
17262	  return "%vucomiss\t{%1, %0|%0, %1}";
17263	else
17264	  return "%vcomiss\t{%1, %0|%0, %1}";
17265      else
17266	if (unordered_p)
17267	  return "%vucomisd\t{%1, %0|%0, %1}";
17268	else
17269	  return "%vcomisd\t{%1, %0|%0, %1}";
17270    }
17271
17272  gcc_assert (STACK_TOP_P (cmp_op0));
17273
17274  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
17275
17276  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
17277    {
17278      if (stack_top_dies)
17279	{
17280	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
17281	  return output_387_ffreep (operands, 1);
17282	}
17283      else
17284	return "ftst\n\tfnstsw\t%0";
17285    }
17286
17287  if (STACK_REG_P (cmp_op1)
17288      && stack_top_dies
17289      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
17290      && REGNO (cmp_op1) != FIRST_STACK_REG)
17291    {
17292      /* If both the top of the 387 stack dies, and the other operand
17293	 is also a stack register that dies, then this must be a
17294	 `fcompp' float compare */
17295
17296      if (eflags_p)
17297	{
17298	  /* There is no double popping fcomi variant.  Fortunately,
17299	     eflags is immune from the fstp's cc clobbering.  */
17300	  if (unordered_p)
17301	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
17302	  else
17303	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
17304	  return output_387_ffreep (operands, 0);
17305	}
17306      else
17307	{
17308	  if (unordered_p)
17309	    return "fucompp\n\tfnstsw\t%0";
17310	  else
17311	    return "fcompp\n\tfnstsw\t%0";
17312	}
17313    }
17314  else
17315    {
17316      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
17317
17318      static const char * const alt[16] =
17319      {
17320	"fcom%Z2\t%y2\n\tfnstsw\t%0",
17321	"fcomp%Z2\t%y2\n\tfnstsw\t%0",
17322	"fucom%Z2\t%y2\n\tfnstsw\t%0",
17323	"fucomp%Z2\t%y2\n\tfnstsw\t%0",
17324
17325	"ficom%Z2\t%y2\n\tfnstsw\t%0",
17326	"ficomp%Z2\t%y2\n\tfnstsw\t%0",
17327	NULL,
17328	NULL,
17329
17330	"fcomi\t{%y1, %0|%0, %y1}",
17331	"fcomip\t{%y1, %0|%0, %y1}",
17332	"fucomi\t{%y1, %0|%0, %y1}",
17333	"fucomip\t{%y1, %0|%0, %y1}",
17334
17335	NULL,
17336	NULL,
17337	NULL,
17338	NULL
17339      };
17340
17341      int mask;
17342      const char *ret;
17343
17344      mask  = eflags_p << 3;
17345      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
17346      mask |= unordered_p << 1;
17347      mask |= stack_top_dies;
17348
17349      gcc_assert (mask < 16);
17350      ret = alt[mask];
17351      gcc_assert (ret);
17352
17353      return ret;
17354    }
17355}
17356
17357void
17358ix86_output_addr_vec_elt (FILE *file, int value)
17359{
17360  const char *directive = ASM_LONG;
17361
17362#ifdef ASM_QUAD
17363  if (TARGET_LP64)
17364    directive = ASM_QUAD;
17365#else
17366  gcc_assert (!TARGET_64BIT);
17367#endif
17368
17369  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
17370}
17371
17372void
17373ix86_output_addr_diff_elt (FILE *file, int value, int rel)
17374{
17375  const char *directive = ASM_LONG;
17376
17377#ifdef ASM_QUAD
17378  if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
17379    directive = ASM_QUAD;
17380#else
17381  gcc_assert (!TARGET_64BIT);
17382#endif
17383  /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
17384  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
17385    fprintf (file, "%s%s%d-%s%d\n",
17386	     directive, LPREFIX, value, LPREFIX, rel);
17387  else if (HAVE_AS_GOTOFF_IN_DATA)
17388    fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
17389#if TARGET_MACHO
17390  else if (TARGET_MACHO)
17391    {
17392      fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
17393      machopic_output_function_base_name (file);
17394      putc ('\n', file);
17395    }
17396#endif
17397  else
17398    asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
17399		 GOT_SYMBOL_NAME, LPREFIX, value);
17400}
17401
17402/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
17403   for the target.  */
17404
17405void
17406ix86_expand_clear (rtx dest)
17407{
17408  rtx tmp;
17409
17410  /* We play register width games, which are only valid after reload.  */
17411  gcc_assert (reload_completed);
17412
17413  /* Avoid HImode and its attendant prefix byte.  */
17414  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
17415    dest = gen_rtx_REG (SImode, REGNO (dest));
17416  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
17417
17418  if (!TARGET_USE_MOV0 || optimize_insn_for_size_p ())
17419    {
17420      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
17421      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
17422    }
17423
17424  emit_insn (tmp);
17425}
17426
17427/* X is an unchanging MEM.  If it is a constant pool reference, return
17428   the constant pool rtx, else NULL.  */
17429
17430rtx
17431maybe_get_pool_constant (rtx x)
17432{
17433  x = ix86_delegitimize_address (XEXP (x, 0));
17434
17435  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
17436    return get_pool_constant (x);
17437
17438  return NULL_RTX;
17439}
17440
17441void
17442ix86_expand_move (machine_mode mode, rtx operands[])
17443{
17444  rtx op0, op1;
17445  enum tls_model model;
17446
17447  op0 = operands[0];
17448  op1 = operands[1];
17449
17450  if (GET_CODE (op1) == SYMBOL_REF)
17451    {
17452      rtx tmp;
17453
17454      model = SYMBOL_REF_TLS_MODEL (op1);
17455      if (model)
17456	{
17457	  op1 = legitimize_tls_address (op1, model, true);
17458	  op1 = force_operand (op1, op0);
17459	  if (op1 == op0)
17460	    return;
17461	  op1 = convert_to_mode (mode, op1, 1);
17462	}
17463      else if ((tmp = legitimize_pe_coff_symbol (op1, false)) != NULL_RTX)
17464	op1 = tmp;
17465    }
17466  else if (GET_CODE (op1) == CONST
17467	   && GET_CODE (XEXP (op1, 0)) == PLUS
17468	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
17469    {
17470      rtx addend = XEXP (XEXP (op1, 0), 1);
17471      rtx symbol = XEXP (XEXP (op1, 0), 0);
17472      rtx tmp;
17473
17474      model = SYMBOL_REF_TLS_MODEL (symbol);
17475      if (model)
17476	tmp = legitimize_tls_address (symbol, model, true);
17477      else
17478        tmp = legitimize_pe_coff_symbol (symbol, true);
17479
17480      if (tmp)
17481	{
17482	  tmp = force_operand (tmp, NULL);
17483	  tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
17484				     op0, 1, OPTAB_DIRECT);
17485	  if (tmp == op0)
17486	    return;
17487	  op1 = convert_to_mode (mode, tmp, 1);
17488	}
17489    }
17490
17491  if ((flag_pic || MACHOPIC_INDIRECT)
17492      && symbolic_operand (op1, mode))
17493    {
17494      if (TARGET_MACHO && !TARGET_64BIT)
17495	{
17496#if TARGET_MACHO
17497	  /* dynamic-no-pic */
17498	  if (MACHOPIC_INDIRECT)
17499	    {
17500	      rtx temp = ((reload_in_progress
17501			   || ((op0 && REG_P (op0))
17502			       && mode == Pmode))
17503			  ? op0 : gen_reg_rtx (Pmode));
17504	      op1 = machopic_indirect_data_reference (op1, temp);
17505	      if (MACHOPIC_PURE)
17506		op1 = machopic_legitimize_pic_address (op1, mode,
17507						       temp == op1 ? 0 : temp);
17508	    }
17509	  if (op0 != op1 && GET_CODE (op0) != MEM)
17510	    {
17511	      rtx insn = gen_rtx_SET (VOIDmode, op0, op1);
17512	      emit_insn (insn);
17513	      return;
17514	    }
17515	  if (GET_CODE (op0) == MEM)
17516	    op1 = force_reg (Pmode, op1);
17517	  else
17518	    {
17519	      rtx temp = op0;
17520	      if (GET_CODE (temp) != REG)
17521		temp = gen_reg_rtx (Pmode);
17522	      temp = legitimize_pic_address (op1, temp);
17523	      if (temp == op0)
17524	    return;
17525	      op1 = temp;
17526	    }
17527      /* dynamic-no-pic */
17528#endif
17529	}
17530      else
17531	{
17532	  if (MEM_P (op0))
17533	    op1 = force_reg (mode, op1);
17534	  else if (!(TARGET_64BIT && x86_64_movabs_operand (op1, DImode)))
17535	    {
17536	      rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
17537	      op1 = legitimize_pic_address (op1, reg);
17538	      if (op0 == op1)
17539		return;
17540	      op1 = convert_to_mode (mode, op1, 1);
17541	    }
17542	}
17543    }
17544  else
17545    {
17546      if (MEM_P (op0)
17547	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
17548	      || !push_operand (op0, mode))
17549	  && MEM_P (op1))
17550	op1 = force_reg (mode, op1);
17551
17552      if (push_operand (op0, mode)
17553	  && ! general_no_elim_operand (op1, mode))
17554	op1 = copy_to_mode_reg (mode, op1);
17555
17556      /* Force large constants in 64bit compilation into register
17557	 to get them CSEed.  */
17558      if (can_create_pseudo_p ()
17559	  && (mode == DImode) && TARGET_64BIT
17560	  && immediate_operand (op1, mode)
17561	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
17562	  && !register_operand (op0, mode)
17563	  && optimize)
17564	op1 = copy_to_mode_reg (mode, op1);
17565
17566      if (can_create_pseudo_p ()
17567	  && FLOAT_MODE_P (mode)
17568	  && GET_CODE (op1) == CONST_DOUBLE)
17569	{
17570	  /* If we are loading a floating point constant to a register,
17571	     force the value to memory now, since we'll get better code
17572	     out the back end.  */
17573
17574	  op1 = validize_mem (force_const_mem (mode, op1));
17575	  if (!register_operand (op0, mode))
17576	    {
17577	      rtx temp = gen_reg_rtx (mode);
17578	      emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
17579	      emit_move_insn (op0, temp);
17580	      return;
17581	    }
17582	}
17583    }
17584
17585  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17586}
17587
17588void
17589ix86_expand_vector_move (machine_mode mode, rtx operands[])
17590{
17591  rtx op0 = operands[0], op1 = operands[1];
17592  unsigned int align = GET_MODE_ALIGNMENT (mode);
17593
17594  if (push_operand (op0, VOIDmode))
17595    op0 = emit_move_resolve_push (mode, op0);
17596
17597  /* Force constants other than zero into memory.  We do not know how
17598     the instructions used to build constants modify the upper 64 bits
17599     of the register, once we have that information we may be able
17600     to handle some of them more efficiently.  */
17601  if (can_create_pseudo_p ()
17602      && register_operand (op0, mode)
17603      && (CONSTANT_P (op1)
17604	  || (GET_CODE (op1) == SUBREG
17605	      && CONSTANT_P (SUBREG_REG (op1))))
17606      && !standard_sse_constant_p (op1))
17607    op1 = validize_mem (force_const_mem (mode, op1));
17608
17609  /* We need to check memory alignment for SSE mode since attribute
17610     can make operands unaligned.  */
17611  if (can_create_pseudo_p ()
17612      && SSE_REG_MODE_P (mode)
17613      && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
17614	  || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
17615    {
17616      rtx tmp[2];
17617
17618      /* ix86_expand_vector_move_misalign() does not like constants ... */
17619      if (CONSTANT_P (op1)
17620	  || (GET_CODE (op1) == SUBREG
17621	      && CONSTANT_P (SUBREG_REG (op1))))
17622	op1 = validize_mem (force_const_mem (mode, op1));
17623
17624      /* ... nor both arguments in memory.  */
17625      if (!register_operand (op0, mode)
17626	  && !register_operand (op1, mode))
17627	op1 = force_reg (mode, op1);
17628
17629      tmp[0] = op0; tmp[1] = op1;
17630      ix86_expand_vector_move_misalign (mode, tmp);
17631      return;
17632    }
17633
17634  /* Make operand1 a register if it isn't already.  */
17635  if (can_create_pseudo_p ()
17636      && !register_operand (op0, mode)
17637      && !register_operand (op1, mode))
17638    {
17639      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
17640      return;
17641    }
17642
17643  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17644}
17645
17646/* Split 32-byte AVX unaligned load and store if needed.  */
17647
17648static void
17649ix86_avx256_split_vector_move_misalign (rtx op0, rtx op1)
17650{
17651  rtx m;
17652  rtx (*extract) (rtx, rtx, rtx);
17653  rtx (*load_unaligned) (rtx, rtx);
17654  rtx (*store_unaligned) (rtx, rtx);
17655  machine_mode mode;
17656
17657  switch (GET_MODE (op0))
17658    {
17659    default:
17660      gcc_unreachable ();
17661    case V32QImode:
17662      extract = gen_avx_vextractf128v32qi;
17663      load_unaligned = gen_avx_loaddquv32qi;
17664      store_unaligned = gen_avx_storedquv32qi;
17665      mode = V16QImode;
17666      break;
17667    case V8SFmode:
17668      extract = gen_avx_vextractf128v8sf;
17669      load_unaligned = gen_avx_loadups256;
17670      store_unaligned = gen_avx_storeups256;
17671      mode = V4SFmode;
17672      break;
17673    case V4DFmode:
17674      extract = gen_avx_vextractf128v4df;
17675      load_unaligned = gen_avx_loadupd256;
17676      store_unaligned = gen_avx_storeupd256;
17677      mode = V2DFmode;
17678      break;
17679    }
17680
17681  if (MEM_P (op1))
17682    {
17683      if (TARGET_AVX256_SPLIT_UNALIGNED_LOAD
17684	  && optimize_insn_for_speed_p ())
17685	{
17686	  rtx r = gen_reg_rtx (mode);
17687	  m = adjust_address (op1, mode, 0);
17688	  emit_move_insn (r, m);
17689	  m = adjust_address (op1, mode, 16);
17690	  r = gen_rtx_VEC_CONCAT (GET_MODE (op0), r, m);
17691	  emit_move_insn (op0, r);
17692	}
17693      /* Normal *mov<mode>_internal pattern will handle
17694	 unaligned loads just fine if misaligned_operand
17695	 is true, and without the UNSPEC it can be combined
17696	 with arithmetic instructions.  */
17697      else if (misaligned_operand (op1, GET_MODE (op1)))
17698	emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17699      else
17700	emit_insn (load_unaligned (op0, op1));
17701    }
17702  else if (MEM_P (op0))
17703    {
17704      if (TARGET_AVX256_SPLIT_UNALIGNED_STORE
17705	  && optimize_insn_for_speed_p ())
17706	{
17707	  m = adjust_address (op0, mode, 0);
17708	  emit_insn (extract (m, op1, const0_rtx));
17709	  m = adjust_address (op0, mode, 16);
17710	  emit_insn (extract (m, op1, const1_rtx));
17711	}
17712      else
17713	emit_insn (store_unaligned (op0, op1));
17714    }
17715  else
17716    gcc_unreachable ();
17717}
17718
17719/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
17720   straight to ix86_expand_vector_move.  */
17721/* Code generation for scalar reg-reg moves of single and double precision data:
17722     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
17723       movaps reg, reg
17724     else
17725       movss reg, reg
17726     if (x86_sse_partial_reg_dependency == true)
17727       movapd reg, reg
17728     else
17729       movsd reg, reg
17730
17731   Code generation for scalar loads of double precision data:
17732     if (x86_sse_split_regs == true)
17733       movlpd mem, reg      (gas syntax)
17734     else
17735       movsd mem, reg
17736
17737   Code generation for unaligned packed loads of single precision data
17738   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
17739     if (x86_sse_unaligned_move_optimal)
17740       movups mem, reg
17741
17742     if (x86_sse_partial_reg_dependency == true)
17743       {
17744         xorps  reg, reg
17745         movlps mem, reg
17746         movhps mem+8, reg
17747       }
17748     else
17749       {
17750         movlps mem, reg
17751         movhps mem+8, reg
17752       }
17753
17754   Code generation for unaligned packed loads of double precision data
17755   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
17756     if (x86_sse_unaligned_move_optimal)
17757       movupd mem, reg
17758
17759     if (x86_sse_split_regs == true)
17760       {
17761         movlpd mem, reg
17762         movhpd mem+8, reg
17763       }
17764     else
17765       {
17766         movsd  mem, reg
17767         movhpd mem+8, reg
17768       }
17769 */
17770
17771void
17772ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
17773{
17774  rtx op0, op1, orig_op0 = NULL_RTX, m;
17775  rtx (*load_unaligned) (rtx, rtx);
17776  rtx (*store_unaligned) (rtx, rtx);
17777
17778  op0 = operands[0];
17779  op1 = operands[1];
17780
17781  if (GET_MODE_SIZE (mode) == 64)
17782    {
17783      switch (GET_MODE_CLASS (mode))
17784	{
17785	case MODE_VECTOR_INT:
17786	case MODE_INT:
17787	  if (GET_MODE (op0) != V16SImode)
17788	    {
17789	      if (!MEM_P (op0))
17790		{
17791		  orig_op0 = op0;
17792		  op0 = gen_reg_rtx (V16SImode);
17793		}
17794	      else
17795		op0 = gen_lowpart (V16SImode, op0);
17796	    }
17797	  op1 = gen_lowpart (V16SImode, op1);
17798	  /* FALLTHRU */
17799
17800	case MODE_VECTOR_FLOAT:
17801	  switch (GET_MODE (op0))
17802	    {
17803	    default:
17804	      gcc_unreachable ();
17805	    case V16SImode:
17806	      load_unaligned = gen_avx512f_loaddquv16si;
17807	      store_unaligned = gen_avx512f_storedquv16si;
17808	      break;
17809	    case V16SFmode:
17810	      load_unaligned = gen_avx512f_loadups512;
17811	      store_unaligned = gen_avx512f_storeups512;
17812	      break;
17813	    case V8DFmode:
17814	      load_unaligned = gen_avx512f_loadupd512;
17815	      store_unaligned = gen_avx512f_storeupd512;
17816	      break;
17817	    }
17818
17819	  if (MEM_P (op1))
17820	    emit_insn (load_unaligned (op0, op1));
17821	  else if (MEM_P (op0))
17822	    emit_insn (store_unaligned (op0, op1));
17823	  else
17824	    gcc_unreachable ();
17825	  if (orig_op0)
17826	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17827	  break;
17828
17829	default:
17830	  gcc_unreachable ();
17831	}
17832
17833      return;
17834    }
17835
17836  if (TARGET_AVX
17837      && GET_MODE_SIZE (mode) == 32)
17838    {
17839      switch (GET_MODE_CLASS (mode))
17840	{
17841	case MODE_VECTOR_INT:
17842	case MODE_INT:
17843	  if (GET_MODE (op0) != V32QImode)
17844	    {
17845	      if (!MEM_P (op0))
17846		{
17847		  orig_op0 = op0;
17848		  op0 = gen_reg_rtx (V32QImode);
17849		}
17850	      else
17851		op0 = gen_lowpart (V32QImode, op0);
17852	    }
17853	  op1 = gen_lowpart (V32QImode, op1);
17854	  /* FALLTHRU */
17855
17856	case MODE_VECTOR_FLOAT:
17857	  ix86_avx256_split_vector_move_misalign (op0, op1);
17858	  if (orig_op0)
17859	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17860	  break;
17861
17862	default:
17863	  gcc_unreachable ();
17864	}
17865
17866      return;
17867    }
17868
17869  if (MEM_P (op1))
17870    {
17871      /* Normal *mov<mode>_internal pattern will handle
17872	 unaligned loads just fine if misaligned_operand
17873	 is true, and without the UNSPEC it can be combined
17874	 with arithmetic instructions.  */
17875      if (TARGET_AVX
17876	  && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
17877	      || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
17878	  && misaligned_operand (op1, GET_MODE (op1)))
17879	emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17880      /* ??? If we have typed data, then it would appear that using
17881	 movdqu is the only way to get unaligned data loaded with
17882	 integer type.  */
17883      else if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17884	{
17885	  if (GET_MODE (op0) != V16QImode)
17886	    {
17887	      orig_op0 = op0;
17888	      op0 = gen_reg_rtx (V16QImode);
17889	    }
17890	  op1 = gen_lowpart (V16QImode, op1);
17891	  /* We will eventually emit movups based on insn attributes.  */
17892	  emit_insn (gen_sse2_loaddquv16qi (op0, op1));
17893	  if (orig_op0)
17894	    emit_move_insn (orig_op0, gen_lowpart (GET_MODE (orig_op0), op0));
17895	}
17896      else if (TARGET_SSE2 && mode == V2DFmode)
17897        {
17898          rtx zero;
17899
17900	  if (TARGET_AVX
17901	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17902	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17903	      || optimize_insn_for_size_p ())
17904	    {
17905	      /* We will eventually emit movups based on insn attributes.  */
17906	      emit_insn (gen_sse2_loadupd (op0, op1));
17907	      return;
17908	    }
17909
17910	  /* When SSE registers are split into halves, we can avoid
17911	     writing to the top half twice.  */
17912	  if (TARGET_SSE_SPLIT_REGS)
17913	    {
17914	      emit_clobber (op0);
17915	      zero = op0;
17916	    }
17917	  else
17918	    {
17919	      /* ??? Not sure about the best option for the Intel chips.
17920		 The following would seem to satisfy; the register is
17921		 entirely cleared, breaking the dependency chain.  We
17922		 then store to the upper half, with a dependency depth
17923		 of one.  A rumor has it that Intel recommends two movsd
17924		 followed by an unpacklpd, but this is unconfirmed.  And
17925		 given that the dependency depth of the unpacklpd would
17926		 still be one, I'm not sure why this would be better.  */
17927	      zero = CONST0_RTX (V2DFmode);
17928	    }
17929
17930	  m = adjust_address (op1, DFmode, 0);
17931	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
17932	  m = adjust_address (op1, DFmode, 8);
17933	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
17934	}
17935      else
17936        {
17937	  rtx t;
17938
17939	  if (TARGET_AVX
17940	      || TARGET_SSE_UNALIGNED_LOAD_OPTIMAL
17941	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17942	      || optimize_insn_for_size_p ())
17943	    {
17944	      if (GET_MODE (op0) != V4SFmode)
17945		{
17946		  orig_op0 = op0;
17947		  op0 = gen_reg_rtx (V4SFmode);
17948		}
17949	      op1 = gen_lowpart (V4SFmode, op1);
17950	      emit_insn (gen_sse_loadups (op0, op1));
17951	      if (orig_op0)
17952		emit_move_insn (orig_op0,
17953				gen_lowpart (GET_MODE (orig_op0), op0));
17954	      return;
17955            }
17956
17957	  if (mode != V4SFmode)
17958	    t = gen_reg_rtx (V4SFmode);
17959	  else
17960	    t = op0;
17961
17962	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
17963	    emit_move_insn (t, CONST0_RTX (V4SFmode));
17964	  else
17965	    emit_clobber (t);
17966
17967	  m = adjust_address (op1, V2SFmode, 0);
17968	  emit_insn (gen_sse_loadlps (t, t, m));
17969	  m = adjust_address (op1, V2SFmode, 8);
17970	  emit_insn (gen_sse_loadhps (t, t, m));
17971	  if (mode != V4SFmode)
17972	    emit_move_insn (op0, gen_lowpart (mode, t));
17973	}
17974    }
17975  else if (MEM_P (op0))
17976    {
17977      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
17978        {
17979	  op0 = gen_lowpart (V16QImode, op0);
17980	  op1 = gen_lowpart (V16QImode, op1);
17981	  /* We will eventually emit movups based on insn attributes.  */
17982	  emit_insn (gen_sse2_storedquv16qi (op0, op1));
17983	}
17984      else if (TARGET_SSE2 && mode == V2DFmode)
17985	{
17986	  if (TARGET_AVX
17987	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
17988	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
17989	      || optimize_insn_for_size_p ())
17990	    /* We will eventually emit movups based on insn attributes.  */
17991	    emit_insn (gen_sse2_storeupd (op0, op1));
17992	  else
17993	    {
17994	      m = adjust_address (op0, DFmode, 0);
17995	      emit_insn (gen_sse2_storelpd (m, op1));
17996	      m = adjust_address (op0, DFmode, 8);
17997	      emit_insn (gen_sse2_storehpd (m, op1));
17998	    }
17999	}
18000      else
18001	{
18002	  if (mode != V4SFmode)
18003	    op1 = gen_lowpart (V4SFmode, op1);
18004
18005	  if (TARGET_AVX
18006	      || TARGET_SSE_UNALIGNED_STORE_OPTIMAL
18007	      || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18008	      || optimize_insn_for_size_p ())
18009	    {
18010	      op0 = gen_lowpart (V4SFmode, op0);
18011	      emit_insn (gen_sse_storeups (op0, op1));
18012	    }
18013	  else
18014	    {
18015	      m = adjust_address (op0, V2SFmode, 0);
18016	      emit_insn (gen_sse_storelps (m, op1));
18017	      m = adjust_address (op0, V2SFmode, 8);
18018	      emit_insn (gen_sse_storehps (m, op1));
18019	    }
18020	}
18021    }
18022  else
18023    gcc_unreachable ();
18024}
18025
18026/* Helper function of ix86_fixup_binary_operands to canonicalize
18027   operand order.  Returns true if the operands should be swapped.  */
18028
18029static bool
18030ix86_swap_binary_operands_p (enum rtx_code code, machine_mode mode,
18031			     rtx operands[])
18032{
18033  rtx dst = operands[0];
18034  rtx src1 = operands[1];
18035  rtx src2 = operands[2];
18036
18037  /* If the operation is not commutative, we can't do anything.  */
18038  if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
18039    return false;
18040
18041  /* Highest priority is that src1 should match dst.  */
18042  if (rtx_equal_p (dst, src1))
18043    return false;
18044  if (rtx_equal_p (dst, src2))
18045    return true;
18046
18047  /* Next highest priority is that immediate constants come second.  */
18048  if (immediate_operand (src2, mode))
18049    return false;
18050  if (immediate_operand (src1, mode))
18051    return true;
18052
18053  /* Lowest priority is that memory references should come second.  */
18054  if (MEM_P (src2))
18055    return false;
18056  if (MEM_P (src1))
18057    return true;
18058
18059  return false;
18060}
18061
18062
18063/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
18064   destination to use for the operation.  If different from the true
18065   destination in operands[0], a copy operation will be required.  */
18066
18067rtx
18068ix86_fixup_binary_operands (enum rtx_code code, machine_mode mode,
18069			    rtx operands[])
18070{
18071  rtx dst = operands[0];
18072  rtx src1 = operands[1];
18073  rtx src2 = operands[2];
18074
18075  /* Canonicalize operand order.  */
18076  if (ix86_swap_binary_operands_p (code, mode, operands))
18077    {
18078      /* It is invalid to swap operands of different modes.  */
18079      gcc_assert (GET_MODE (src1) == GET_MODE (src2));
18080
18081      std::swap (src1, src2);
18082    }
18083
18084  /* Both source operands cannot be in memory.  */
18085  if (MEM_P (src1) && MEM_P (src2))
18086    {
18087      /* Optimization: Only read from memory once.  */
18088      if (rtx_equal_p (src1, src2))
18089	{
18090	  src2 = force_reg (mode, src2);
18091	  src1 = src2;
18092	}
18093      else if (rtx_equal_p (dst, src1))
18094	src2 = force_reg (mode, src2);
18095      else
18096	src1 = force_reg (mode, src1);
18097    }
18098
18099  /* If the destination is memory, and we do not have matching source
18100     operands, do things in registers.  */
18101  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18102    dst = gen_reg_rtx (mode);
18103
18104  /* Source 1 cannot be a constant.  */
18105  if (CONSTANT_P (src1))
18106    src1 = force_reg (mode, src1);
18107
18108  /* Source 1 cannot be a non-matching memory.  */
18109  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18110    src1 = force_reg (mode, src1);
18111
18112  /* Improve address combine.  */
18113  if (code == PLUS
18114      && GET_MODE_CLASS (mode) == MODE_INT
18115      && MEM_P (src2))
18116    src2 = force_reg (mode, src2);
18117
18118  operands[1] = src1;
18119  operands[2] = src2;
18120  return dst;
18121}
18122
18123/* Similarly, but assume that the destination has already been
18124   set up properly.  */
18125
18126void
18127ix86_fixup_binary_operands_no_copy (enum rtx_code code,
18128				    machine_mode mode, rtx operands[])
18129{
18130  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
18131  gcc_assert (dst == operands[0]);
18132}
18133
18134/* Attempt to expand a binary operator.  Make the expansion closer to the
18135   actual machine, then just general_operand, which will allow 3 separate
18136   memory references (one output, two input) in a single insn.  */
18137
18138void
18139ix86_expand_binary_operator (enum rtx_code code, machine_mode mode,
18140			     rtx operands[])
18141{
18142  rtx src1, src2, dst, op, clob;
18143
18144  dst = ix86_fixup_binary_operands (code, mode, operands);
18145  src1 = operands[1];
18146  src2 = operands[2];
18147
18148 /* Emit the instruction.  */
18149
18150  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
18151  if (reload_in_progress)
18152    {
18153      /* Reload doesn't know about the flags register, and doesn't know that
18154         it doesn't want to clobber it.  We can only do this with PLUS.  */
18155      gcc_assert (code == PLUS);
18156      emit_insn (op);
18157    }
18158  else if (reload_completed
18159	   && code == PLUS
18160	   && !rtx_equal_p (dst, src1))
18161    {
18162      /* This is going to be an LEA; avoid splitting it later.  */
18163      emit_insn (op);
18164    }
18165  else
18166    {
18167      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18168      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18169    }
18170
18171  /* Fix up the destination if needed.  */
18172  if (dst != operands[0])
18173    emit_move_insn (operands[0], dst);
18174}
18175
18176/* Expand vector logical operation CODE (AND, IOR, XOR) in MODE with
18177   the given OPERANDS.  */
18178
18179void
18180ix86_expand_vector_logical_operator (enum rtx_code code, machine_mode mode,
18181				     rtx operands[])
18182{
18183  rtx op1 = NULL_RTX, op2 = NULL_RTX;
18184  if (GET_CODE (operands[1]) == SUBREG)
18185    {
18186      op1 = operands[1];
18187      op2 = operands[2];
18188    }
18189  else if (GET_CODE (operands[2]) == SUBREG)
18190    {
18191      op1 = operands[2];
18192      op2 = operands[1];
18193    }
18194  /* Optimize (__m128i) d | (__m128i) e and similar code
18195     when d and e are float vectors into float vector logical
18196     insn.  In C/C++ without using intrinsics there is no other way
18197     to express vector logical operation on float vectors than
18198     to cast them temporarily to integer vectors.  */
18199  if (op1
18200      && !TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL
18201      && ((GET_CODE (op2) == SUBREG || GET_CODE (op2) == CONST_VECTOR))
18202      && GET_MODE_CLASS (GET_MODE (SUBREG_REG (op1))) == MODE_VECTOR_FLOAT
18203      && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op1))) == GET_MODE_SIZE (mode)
18204      && SUBREG_BYTE (op1) == 0
18205      && (GET_CODE (op2) == CONST_VECTOR
18206	  || (GET_MODE (SUBREG_REG (op1)) == GET_MODE (SUBREG_REG (op2))
18207	      && SUBREG_BYTE (op2) == 0))
18208      && can_create_pseudo_p ())
18209    {
18210      rtx dst;
18211      switch (GET_MODE (SUBREG_REG (op1)))
18212	{
18213	case V4SFmode:
18214	case V8SFmode:
18215	case V16SFmode:
18216	case V2DFmode:
18217	case V4DFmode:
18218	case V8DFmode:
18219	  dst = gen_reg_rtx (GET_MODE (SUBREG_REG (op1)));
18220	  if (GET_CODE (op2) == CONST_VECTOR)
18221	    {
18222	      op2 = gen_lowpart (GET_MODE (dst), op2);
18223	      op2 = force_reg (GET_MODE (dst), op2);
18224	    }
18225	  else
18226	    {
18227	      op1 = operands[1];
18228	      op2 = SUBREG_REG (operands[2]);
18229	      if (!nonimmediate_operand (op2, GET_MODE (dst)))
18230		op2 = force_reg (GET_MODE (dst), op2);
18231	    }
18232	  op1 = SUBREG_REG (op1);
18233	  if (!nonimmediate_operand (op1, GET_MODE (dst)))
18234	    op1 = force_reg (GET_MODE (dst), op1);
18235	  emit_insn (gen_rtx_SET (VOIDmode, dst,
18236				  gen_rtx_fmt_ee (code, GET_MODE (dst),
18237						  op1, op2)));
18238	  emit_move_insn (operands[0], gen_lowpart (mode, dst));
18239	  return;
18240	default:
18241	  break;
18242	}
18243    }
18244  if (!nonimmediate_operand (operands[1], mode))
18245    operands[1] = force_reg (mode, operands[1]);
18246  if (!nonimmediate_operand (operands[2], mode))
18247    operands[2] = force_reg (mode, operands[2]);
18248  ix86_fixup_binary_operands_no_copy (code, mode, operands);
18249  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
18250			  gen_rtx_fmt_ee (code, mode, operands[1],
18251					  operands[2])));
18252}
18253
18254/* Return TRUE or FALSE depending on whether the binary operator meets the
18255   appropriate constraints.  */
18256
18257bool
18258ix86_binary_operator_ok (enum rtx_code code, machine_mode mode,
18259			 rtx operands[3])
18260{
18261  rtx dst = operands[0];
18262  rtx src1 = operands[1];
18263  rtx src2 = operands[2];
18264
18265  /* Both source operands cannot be in memory.  */
18266  if (MEM_P (src1) && MEM_P (src2))
18267    return false;
18268
18269  /* Canonicalize operand order for commutative operators.  */
18270  if (ix86_swap_binary_operands_p (code, mode, operands))
18271    std::swap (src1, src2);
18272
18273  /* If the destination is memory, we must have a matching source operand.  */
18274  if (MEM_P (dst) && !rtx_equal_p (dst, src1))
18275      return false;
18276
18277  /* Source 1 cannot be a constant.  */
18278  if (CONSTANT_P (src1))
18279    return false;
18280
18281  /* Source 1 cannot be a non-matching memory.  */
18282  if (MEM_P (src1) && !rtx_equal_p (dst, src1))
18283    /* Support "andhi/andsi/anddi" as a zero-extending move.  */
18284    return (code == AND
18285	    && (mode == HImode
18286		|| mode == SImode
18287		|| (TARGET_64BIT && mode == DImode))
18288	    && satisfies_constraint_L (src2));
18289
18290  return true;
18291}
18292
18293/* Attempt to expand a unary operator.  Make the expansion closer to the
18294   actual machine, then just general_operand, which will allow 2 separate
18295   memory references (one output, one input) in a single insn.  */
18296
18297void
18298ix86_expand_unary_operator (enum rtx_code code, machine_mode mode,
18299			    rtx operands[])
18300{
18301  bool matching_memory = false;
18302  rtx src, dst, op, clob;
18303
18304  dst = operands[0];
18305  src = operands[1];
18306
18307  /* If the destination is memory, and we do not have matching source
18308     operands, do things in registers.  */
18309  if (MEM_P (dst))
18310    {
18311      if (rtx_equal_p (dst, src))
18312	matching_memory = true;
18313      else
18314	dst = gen_reg_rtx (mode);
18315    }
18316
18317  /* When source operand is memory, destination must match.  */
18318  if (MEM_P (src) && !matching_memory)
18319    src = force_reg (mode, src);
18320
18321  /* Emit the instruction.  */
18322
18323  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
18324  if (reload_in_progress || code == NOT)
18325    {
18326      /* Reload doesn't know about the flags register, and doesn't know that
18327         it doesn't want to clobber it.  */
18328      gcc_assert (code == NOT);
18329      emit_insn (op);
18330    }
18331  else
18332    {
18333      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
18334      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
18335    }
18336
18337  /* Fix up the destination if needed.  */
18338  if (dst != operands[0])
18339    emit_move_insn (operands[0], dst);
18340}
18341
18342/* Split 32bit/64bit divmod with 8bit unsigned divmod if dividend and
18343   divisor are within the range [0-255].  */
18344
18345void
18346ix86_split_idivmod (machine_mode mode, rtx operands[],
18347		    bool signed_p)
18348{
18349  rtx_code_label *end_label, *qimode_label;
18350  rtx insn, div, mod;
18351  rtx scratch, tmp0, tmp1, tmp2;
18352  rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx);
18353  rtx (*gen_zero_extend) (rtx, rtx);
18354  rtx (*gen_test_ccno_1) (rtx, rtx);
18355
18356  switch (mode)
18357    {
18358    case SImode:
18359      gen_divmod4_1 = signed_p ? gen_divmodsi4_1 : gen_udivmodsi4_1;
18360      gen_test_ccno_1 = gen_testsi_ccno_1;
18361      gen_zero_extend = gen_zero_extendqisi2;
18362      break;
18363    case DImode:
18364      gen_divmod4_1 = signed_p ? gen_divmoddi4_1 : gen_udivmoddi4_1;
18365      gen_test_ccno_1 = gen_testdi_ccno_1;
18366      gen_zero_extend = gen_zero_extendqidi2;
18367      break;
18368    default:
18369      gcc_unreachable ();
18370    }
18371
18372  end_label = gen_label_rtx ();
18373  qimode_label = gen_label_rtx ();
18374
18375  scratch = gen_reg_rtx (mode);
18376
18377  /* Use 8bit unsigned divimod if dividend and divisor are within
18378     the range [0-255].  */
18379  emit_move_insn (scratch, operands[2]);
18380  scratch = expand_simple_binop (mode, IOR, scratch, operands[3],
18381				 scratch, 1, OPTAB_DIRECT);
18382  emit_insn (gen_test_ccno_1 (scratch, GEN_INT (-0x100)));
18383  tmp0 = gen_rtx_REG (CCNOmode, FLAGS_REG);
18384  tmp0 = gen_rtx_EQ (VOIDmode, tmp0, const0_rtx);
18385  tmp0 = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp0,
18386			       gen_rtx_LABEL_REF (VOIDmode, qimode_label),
18387			       pc_rtx);
18388  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp0));
18389  predict_jump (REG_BR_PROB_BASE * 50 / 100);
18390  JUMP_LABEL (insn) = qimode_label;
18391
18392  /* Generate original signed/unsigned divimod.  */
18393  div = gen_divmod4_1 (operands[0], operands[1],
18394		       operands[2], operands[3]);
18395  emit_insn (div);
18396
18397  /* Branch to the end.  */
18398  emit_jump_insn (gen_jump (end_label));
18399  emit_barrier ();
18400
18401  /* Generate 8bit unsigned divide.  */
18402  emit_label (qimode_label);
18403  /* Don't use operands[0] for result of 8bit divide since not all
18404     registers support QImode ZERO_EXTRACT.  */
18405  tmp0 = simplify_gen_subreg (HImode, scratch, mode, 0);
18406  tmp1 = simplify_gen_subreg (HImode, operands[2], mode, 0);
18407  tmp2 = simplify_gen_subreg (QImode, operands[3], mode, 0);
18408  emit_insn (gen_udivmodhiqi3 (tmp0, tmp1, tmp2));
18409
18410  if (signed_p)
18411    {
18412      div = gen_rtx_DIV (SImode, operands[2], operands[3]);
18413      mod = gen_rtx_MOD (SImode, operands[2], operands[3]);
18414    }
18415  else
18416    {
18417      div = gen_rtx_UDIV (SImode, operands[2], operands[3]);
18418      mod = gen_rtx_UMOD (SImode, operands[2], operands[3]);
18419    }
18420
18421  /* Extract remainder from AH.  */
18422  tmp1 = gen_rtx_ZERO_EXTRACT (mode, tmp0, GEN_INT (8), GEN_INT (8));
18423  if (REG_P (operands[1]))
18424    insn = emit_move_insn (operands[1], tmp1);
18425  else
18426    {
18427      /* Need a new scratch register since the old one has result
18428	 of 8bit divide.  */
18429      scratch = gen_reg_rtx (mode);
18430      emit_move_insn (scratch, tmp1);
18431      insn = emit_move_insn (operands[1], scratch);
18432    }
18433  set_unique_reg_note (insn, REG_EQUAL, mod);
18434
18435  /* Zero extend quotient from AL.  */
18436  tmp1 = gen_lowpart (QImode, tmp0);
18437  insn = emit_insn (gen_zero_extend (operands[0], tmp1));
18438  set_unique_reg_note (insn, REG_EQUAL, div);
18439
18440  emit_label (end_label);
18441}
18442
18443#define LEA_MAX_STALL (3)
18444#define LEA_SEARCH_THRESHOLD (LEA_MAX_STALL << 1)
18445
18446/* Increase given DISTANCE in half-cycles according to
18447   dependencies between PREV and NEXT instructions.
18448   Add 1 half-cycle if there is no dependency and
18449   go to next cycle if there is some dependecy.  */
18450
18451static unsigned int
18452increase_distance (rtx_insn *prev, rtx_insn *next, unsigned int distance)
18453{
18454  df_ref def, use;
18455
18456  if (!prev || !next)
18457    return distance + (distance & 1) + 2;
18458
18459  if (!DF_INSN_USES (next) || !DF_INSN_DEFS (prev))
18460    return distance + 1;
18461
18462  FOR_EACH_INSN_USE (use, next)
18463    FOR_EACH_INSN_DEF (def, prev)
18464      if (!DF_REF_IS_ARTIFICIAL (def)
18465	  && DF_REF_REGNO (use) == DF_REF_REGNO (def))
18466	return distance + (distance & 1) + 2;
18467
18468  return distance + 1;
18469}
18470
18471/* Function checks if instruction INSN defines register number
18472   REGNO1 or REGNO2.  */
18473
18474static bool
18475insn_defines_reg (unsigned int regno1, unsigned int regno2,
18476		  rtx insn)
18477{
18478  df_ref def;
18479
18480  FOR_EACH_INSN_DEF (def, insn)
18481    if (DF_REF_REG_DEF_P (def)
18482	&& !DF_REF_IS_ARTIFICIAL (def)
18483	&& (regno1 == DF_REF_REGNO (def)
18484	    || regno2 == DF_REF_REGNO (def)))
18485      return true;
18486
18487  return false;
18488}
18489
18490/* Function checks if instruction INSN uses register number
18491   REGNO as a part of address expression.  */
18492
18493static bool
18494insn_uses_reg_mem (unsigned int regno, rtx insn)
18495{
18496  df_ref use;
18497
18498  FOR_EACH_INSN_USE (use, insn)
18499    if (DF_REF_REG_MEM_P (use) && regno == DF_REF_REGNO (use))
18500      return true;
18501
18502  return false;
18503}
18504
18505/* Search backward for non-agu definition of register number REGNO1
18506   or register number REGNO2 in basic block starting from instruction
18507   START up to head of basic block or instruction INSN.
18508
18509   Function puts true value into *FOUND var if definition was found
18510   and false otherwise.
18511
18512   Distance in half-cycles between START and found instruction or head
18513   of BB is added to DISTANCE and returned.  */
18514
18515static int
18516distance_non_agu_define_in_bb (unsigned int regno1, unsigned int regno2,
18517			       rtx_insn *insn, int distance,
18518			       rtx_insn *start, bool *found)
18519{
18520  basic_block bb = start ? BLOCK_FOR_INSN (start) : NULL;
18521  rtx_insn *prev = start;
18522  rtx_insn *next = NULL;
18523
18524  *found = false;
18525
18526  while (prev
18527	 && prev != insn
18528	 && distance < LEA_SEARCH_THRESHOLD)
18529    {
18530      if (NONDEBUG_INSN_P (prev) && NONJUMP_INSN_P (prev))
18531	{
18532	  distance = increase_distance (prev, next, distance);
18533	  if (insn_defines_reg (regno1, regno2, prev))
18534	    {
18535	      if (recog_memoized (prev) < 0
18536		  || get_attr_type (prev) != TYPE_LEA)
18537		{
18538		  *found = true;
18539		  return distance;
18540		}
18541	    }
18542
18543	  next = prev;
18544	}
18545      if (prev == BB_HEAD (bb))
18546	break;
18547
18548      prev = PREV_INSN (prev);
18549    }
18550
18551  return distance;
18552}
18553
18554/* Search backward for non-agu definition of register number REGNO1
18555   or register number REGNO2 in INSN's basic block until
18556   1. Pass LEA_SEARCH_THRESHOLD instructions, or
18557   2. Reach neighbour BBs boundary, or
18558   3. Reach agu definition.
18559   Returns the distance between the non-agu definition point and INSN.
18560   If no definition point, returns -1.  */
18561
18562static int
18563distance_non_agu_define (unsigned int regno1, unsigned int regno2,
18564			 rtx_insn *insn)
18565{
18566  basic_block bb = BLOCK_FOR_INSN (insn);
18567  int distance = 0;
18568  bool found = false;
18569
18570  if (insn != BB_HEAD (bb))
18571    distance = distance_non_agu_define_in_bb (regno1, regno2, insn,
18572					      distance, PREV_INSN (insn),
18573					      &found);
18574
18575  if (!found && distance < LEA_SEARCH_THRESHOLD)
18576    {
18577      edge e;
18578      edge_iterator ei;
18579      bool simple_loop = false;
18580
18581      FOR_EACH_EDGE (e, ei, bb->preds)
18582	if (e->src == bb)
18583	  {
18584	    simple_loop = true;
18585	    break;
18586	  }
18587
18588      if (simple_loop)
18589	distance = distance_non_agu_define_in_bb (regno1, regno2,
18590						  insn, distance,
18591						  BB_END (bb), &found);
18592      else
18593	{
18594	  int shortest_dist = -1;
18595	  bool found_in_bb = false;
18596
18597	  FOR_EACH_EDGE (e, ei, bb->preds)
18598	    {
18599	      int bb_dist
18600		= distance_non_agu_define_in_bb (regno1, regno2,
18601						 insn, distance,
18602						 BB_END (e->src),
18603						 &found_in_bb);
18604	      if (found_in_bb)
18605		{
18606		  if (shortest_dist < 0)
18607		    shortest_dist = bb_dist;
18608		  else if (bb_dist > 0)
18609		    shortest_dist = MIN (bb_dist, shortest_dist);
18610
18611		  found = true;
18612		}
18613	    }
18614
18615	  distance = shortest_dist;
18616	}
18617    }
18618
18619  /* get_attr_type may modify recog data.  We want to make sure
18620     that recog data is valid for instruction INSN, on which
18621     distance_non_agu_define is called.  INSN is unchanged here.  */
18622  extract_insn_cached (insn);
18623
18624  if (!found)
18625    return -1;
18626
18627  return distance >> 1;
18628}
18629
18630/* Return the distance in half-cycles between INSN and the next
18631   insn that uses register number REGNO in memory address added
18632   to DISTANCE.  Return -1 if REGNO0 is set.
18633
18634   Put true value into *FOUND if register usage was found and
18635   false otherwise.
18636   Put true value into *REDEFINED if register redefinition was
18637   found and false otherwise.  */
18638
18639static int
18640distance_agu_use_in_bb (unsigned int regno,
18641			rtx_insn *insn, int distance, rtx_insn *start,
18642			bool *found, bool *redefined)
18643{
18644  basic_block bb = NULL;
18645  rtx_insn *next = start;
18646  rtx_insn *prev = NULL;
18647
18648  *found = false;
18649  *redefined = false;
18650
18651  if (start != NULL_RTX)
18652    {
18653      bb = BLOCK_FOR_INSN (start);
18654      if (start != BB_HEAD (bb))
18655	/* If insn and start belong to the same bb, set prev to insn,
18656	   so the call to increase_distance will increase the distance
18657	   between insns by 1.  */
18658	prev = insn;
18659    }
18660
18661  while (next
18662	 && next != insn
18663	 && distance < LEA_SEARCH_THRESHOLD)
18664    {
18665      if (NONDEBUG_INSN_P (next) && NONJUMP_INSN_P (next))
18666	{
18667	  distance = increase_distance(prev, next, distance);
18668	  if (insn_uses_reg_mem (regno, next))
18669	    {
18670	      /* Return DISTANCE if OP0 is used in memory
18671		 address in NEXT.  */
18672	      *found = true;
18673	      return distance;
18674	    }
18675
18676	  if (insn_defines_reg (regno, INVALID_REGNUM, next))
18677	    {
18678	      /* Return -1 if OP0 is set in NEXT.  */
18679	      *redefined = true;
18680	      return -1;
18681	    }
18682
18683	  prev = next;
18684	}
18685
18686      if (next == BB_END (bb))
18687	break;
18688
18689      next = NEXT_INSN (next);
18690    }
18691
18692  return distance;
18693}
18694
18695/* Return the distance between INSN and the next insn that uses
18696   register number REGNO0 in memory address.  Return -1 if no such
18697   a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set.  */
18698
18699static int
18700distance_agu_use (unsigned int regno0, rtx_insn *insn)
18701{
18702  basic_block bb = BLOCK_FOR_INSN (insn);
18703  int distance = 0;
18704  bool found = false;
18705  bool redefined = false;
18706
18707  if (insn != BB_END (bb))
18708    distance = distance_agu_use_in_bb (regno0, insn, distance,
18709				       NEXT_INSN (insn),
18710				       &found, &redefined);
18711
18712  if (!found && !redefined && distance < LEA_SEARCH_THRESHOLD)
18713    {
18714      edge e;
18715      edge_iterator ei;
18716      bool simple_loop = false;
18717
18718      FOR_EACH_EDGE (e, ei, bb->succs)
18719        if (e->dest == bb)
18720	  {
18721	    simple_loop = true;
18722	    break;
18723	  }
18724
18725      if (simple_loop)
18726	distance = distance_agu_use_in_bb (regno0, insn,
18727					   distance, BB_HEAD (bb),
18728					   &found, &redefined);
18729      else
18730	{
18731	  int shortest_dist = -1;
18732	  bool found_in_bb = false;
18733	  bool redefined_in_bb = false;
18734
18735	  FOR_EACH_EDGE (e, ei, bb->succs)
18736	    {
18737	      int bb_dist
18738		= distance_agu_use_in_bb (regno0, insn,
18739					  distance, BB_HEAD (e->dest),
18740					  &found_in_bb, &redefined_in_bb);
18741	      if (found_in_bb)
18742		{
18743		  if (shortest_dist < 0)
18744		    shortest_dist = bb_dist;
18745		  else if (bb_dist > 0)
18746		    shortest_dist = MIN (bb_dist, shortest_dist);
18747
18748		  found = true;
18749		}
18750	    }
18751
18752	  distance = shortest_dist;
18753	}
18754    }
18755
18756  if (!found || redefined)
18757    return -1;
18758
18759  return distance >> 1;
18760}
18761
18762/* Define this macro to tune LEA priority vs ADD, it take effect when
18763   there is a dilemma of choicing LEA or ADD
18764   Negative value: ADD is more preferred than LEA
18765   Zero: Netrual
18766   Positive value: LEA is more preferred than ADD*/
18767#define IX86_LEA_PRIORITY 0
18768
18769/* Return true if usage of lea INSN has performance advantage
18770   over a sequence of instructions.  Instructions sequence has
18771   SPLIT_COST cycles higher latency than lea latency.  */
18772
18773static bool
18774ix86_lea_outperforms (rtx_insn *insn, unsigned int regno0, unsigned int regno1,
18775		      unsigned int regno2, int split_cost, bool has_scale)
18776{
18777  int dist_define, dist_use;
18778
18779  /* For Silvermont if using a 2-source or 3-source LEA for
18780     non-destructive destination purposes, or due to wanting
18781     ability to use SCALE, the use of LEA is justified.  */
18782  if (TARGET_SILVERMONT || TARGET_INTEL)
18783    {
18784      if (has_scale)
18785	return true;
18786      if (split_cost < 1)
18787	return false;
18788      if (regno0 == regno1 || regno0 == regno2)
18789	return false;
18790      return true;
18791    }
18792
18793  dist_define = distance_non_agu_define (regno1, regno2, insn);
18794  dist_use = distance_agu_use (regno0, insn);
18795
18796  if (dist_define < 0 || dist_define >= LEA_MAX_STALL)
18797    {
18798      /* If there is no non AGU operand definition, no AGU
18799	 operand usage and split cost is 0 then both lea
18800	 and non lea variants have same priority.  Currently
18801	 we prefer lea for 64 bit code and non lea on 32 bit
18802	 code.  */
18803      if (dist_use < 0 && split_cost == 0)
18804	return TARGET_64BIT || IX86_LEA_PRIORITY;
18805      else
18806	return true;
18807    }
18808
18809  /* With longer definitions distance lea is more preferable.
18810     Here we change it to take into account splitting cost and
18811     lea priority.  */
18812  dist_define += split_cost + IX86_LEA_PRIORITY;
18813
18814  /* If there is no use in memory addess then we just check
18815     that split cost exceeds AGU stall.  */
18816  if (dist_use < 0)
18817    return dist_define > LEA_MAX_STALL;
18818
18819  /* If this insn has both backward non-agu dependence and forward
18820     agu dependence, the one with short distance takes effect.  */
18821  return dist_define >= dist_use;
18822}
18823
18824/* Return true if it is legal to clobber flags by INSN and
18825   false otherwise.  */
18826
18827static bool
18828ix86_ok_to_clobber_flags (rtx_insn *insn)
18829{
18830  basic_block bb = BLOCK_FOR_INSN (insn);
18831  df_ref use;
18832  bitmap live;
18833
18834  while (insn)
18835    {
18836      if (NONDEBUG_INSN_P (insn))
18837	{
18838	  FOR_EACH_INSN_USE (use, insn)
18839	    if (DF_REF_REG_USE_P (use) && DF_REF_REGNO (use) == FLAGS_REG)
18840	      return false;
18841
18842	  if (insn_defines_reg (FLAGS_REG, INVALID_REGNUM, insn))
18843	    return true;
18844	}
18845
18846      if (insn == BB_END (bb))
18847	break;
18848
18849      insn = NEXT_INSN (insn);
18850    }
18851
18852  live = df_get_live_out(bb);
18853  return !REGNO_REG_SET_P (live, FLAGS_REG);
18854}
18855
18856/* Return true if we need to split op0 = op1 + op2 into a sequence of
18857   move and add to avoid AGU stalls.  */
18858
18859bool
18860ix86_avoid_lea_for_add (rtx_insn *insn, rtx operands[])
18861{
18862  unsigned int regno0, regno1, regno2;
18863
18864  /* Check if we need to optimize.  */
18865  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18866    return false;
18867
18868  /* Check it is correct to split here.  */
18869  if (!ix86_ok_to_clobber_flags(insn))
18870    return false;
18871
18872  regno0 = true_regnum (operands[0]);
18873  regno1 = true_regnum (operands[1]);
18874  regno2 = true_regnum (operands[2]);
18875
18876  /* We need to split only adds with non destructive
18877     destination operand.  */
18878  if (regno0 == regno1 || regno0 == regno2)
18879    return false;
18880  else
18881    return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
18882}
18883
18884/* Return true if we should emit lea instruction instead of mov
18885   instruction.  */
18886
18887bool
18888ix86_use_lea_for_mov (rtx_insn *insn, rtx operands[])
18889{
18890  unsigned int regno0, regno1;
18891
18892  /* Check if we need to optimize.  */
18893  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
18894    return false;
18895
18896  /* Use lea for reg to reg moves only.  */
18897  if (!REG_P (operands[0]) || !REG_P (operands[1]))
18898    return false;
18899
18900  regno0 = true_regnum (operands[0]);
18901  regno1 = true_regnum (operands[1]);
18902
18903  return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
18904}
18905
18906/* Return true if we need to split lea into a sequence of
18907   instructions to avoid AGU stalls. */
18908
18909bool
18910ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
18911{
18912  unsigned int regno0, regno1, regno2;
18913  int split_cost;
18914  struct ix86_address parts;
18915  int ok;
18916
18917  /* Check we need to optimize.  */
18918  if (!TARGET_AVOID_LEA_FOR_ADDR || optimize_function_for_size_p (cfun))
18919    return false;
18920
18921  /* The "at least two components" test below might not catch simple
18922     move or zero extension insns if parts.base is non-NULL and parts.disp
18923     is const0_rtx as the only components in the address, e.g. if the
18924     register is %rbp or %r13.  As this test is much cheaper and moves or
18925     zero extensions are the common case, do this check first.  */
18926  if (REG_P (operands[1])
18927      || (SImode_address_operand (operands[1], VOIDmode)
18928	  && REG_P (XEXP (operands[1], 0))))
18929    return false;
18930
18931  /* Check if it is OK to split here.  */
18932  if (!ix86_ok_to_clobber_flags (insn))
18933    return false;
18934
18935  ok = ix86_decompose_address (operands[1], &parts);
18936  gcc_assert (ok);
18937
18938  /* There should be at least two components in the address.  */
18939  if ((parts.base != NULL_RTX) + (parts.index != NULL_RTX)
18940      + (parts.disp != NULL_RTX) + (parts.scale > 1) < 2)
18941    return false;
18942
18943  /* We should not split into add if non legitimate pic
18944     operand is used as displacement. */
18945  if (parts.disp && flag_pic && !LEGITIMATE_PIC_OPERAND_P (parts.disp))
18946    return false;
18947
18948  regno0 = true_regnum (operands[0]) ;
18949  regno1 = INVALID_REGNUM;
18950  regno2 = INVALID_REGNUM;
18951
18952  if (parts.base)
18953    regno1 = true_regnum (parts.base);
18954  if (parts.index)
18955    regno2 = true_regnum (parts.index);
18956
18957  split_cost = 0;
18958
18959  /* Compute how many cycles we will add to execution time
18960     if split lea into a sequence of instructions.  */
18961  if (parts.base || parts.index)
18962    {
18963      /* Have to use mov instruction if non desctructive
18964	 destination form is used.  */
18965      if (regno1 != regno0 && regno2 != regno0)
18966	split_cost += 1;
18967
18968      /* Have to add index to base if both exist.  */
18969      if (parts.base && parts.index)
18970	split_cost += 1;
18971
18972      /* Have to use shift and adds if scale is 2 or greater.  */
18973      if (parts.scale > 1)
18974	{
18975	  if (regno0 != regno1)
18976	    split_cost += 1;
18977	  else if (regno2 == regno0)
18978	    split_cost += 4;
18979	  else
18980	    split_cost += parts.scale;
18981	}
18982
18983      /* Have to use add instruction with immediate if
18984	 disp is non zero.  */
18985      if (parts.disp && parts.disp != const0_rtx)
18986	split_cost += 1;
18987
18988      /* Subtract the price of lea.  */
18989      split_cost -= 1;
18990    }
18991
18992  return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
18993				parts.scale > 1);
18994}
18995
18996/* Emit x86 binary operand CODE in mode MODE, where the first operand
18997   matches destination.  RTX includes clobber of FLAGS_REG.  */
18998
18999static void
19000ix86_emit_binop (enum rtx_code code, machine_mode mode,
19001		 rtx dst, rtx src)
19002{
19003  rtx op, clob;
19004
19005  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, dst, src));
19006  clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19007
19008  emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
19009}
19010
19011/* Return true if regno1 def is nearest to the insn.  */
19012
19013static bool
19014find_nearest_reg_def (rtx_insn *insn, int regno1, int regno2)
19015{
19016  rtx_insn *prev = insn;
19017  rtx_insn *start = BB_HEAD (BLOCK_FOR_INSN (insn));
19018
19019  if (insn == start)
19020    return false;
19021  while (prev && prev != start)
19022    {
19023      if (!INSN_P (prev) || !NONDEBUG_INSN_P (prev))
19024	{
19025	  prev = PREV_INSN (prev);
19026	  continue;
19027	}
19028      if (insn_defines_reg (regno1, INVALID_REGNUM, prev))
19029	return true;
19030      else if (insn_defines_reg (regno2, INVALID_REGNUM, prev))
19031	return false;
19032      prev = PREV_INSN (prev);
19033    }
19034
19035  /* None of the regs is defined in the bb.  */
19036  return false;
19037}
19038
19039/* Split lea instructions into a sequence of instructions
19040   which are executed on ALU to avoid AGU stalls.
19041   It is assumed that it is allowed to clobber flags register
19042   at lea position.  */
19043
19044void
19045ix86_split_lea_for_addr (rtx_insn *insn, rtx operands[], machine_mode mode)
19046{
19047  unsigned int regno0, regno1, regno2;
19048  struct ix86_address parts;
19049  rtx target, tmp;
19050  int ok, adds;
19051
19052  ok = ix86_decompose_address (operands[1], &parts);
19053  gcc_assert (ok);
19054
19055  target = gen_lowpart (mode, operands[0]);
19056
19057  regno0 = true_regnum (target);
19058  regno1 = INVALID_REGNUM;
19059  regno2 = INVALID_REGNUM;
19060
19061  if (parts.base)
19062    {
19063      parts.base = gen_lowpart (mode, parts.base);
19064      regno1 = true_regnum (parts.base);
19065    }
19066
19067  if (parts.index)
19068    {
19069      parts.index = gen_lowpart (mode, parts.index);
19070      regno2 = true_regnum (parts.index);
19071    }
19072
19073  if (parts.disp)
19074    parts.disp = gen_lowpart (mode, parts.disp);
19075
19076  if (parts.scale > 1)
19077    {
19078      /* Case r1 = r1 + ...  */
19079      if (regno1 == regno0)
19080	{
19081	  /* If we have a case r1 = r1 + C * r2 then we
19082	     should use multiplication which is very
19083	     expensive.  Assume cost model is wrong if we
19084	     have such case here.  */
19085	  gcc_assert (regno2 != regno0);
19086
19087	  for (adds = parts.scale; adds > 0; adds--)
19088	    ix86_emit_binop (PLUS, mode, target, parts.index);
19089	}
19090      else
19091	{
19092	  /* r1 = r2 + r3 * C case.  Need to move r3 into r1.  */
19093	  if (regno0 != regno2)
19094	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
19095
19096	  /* Use shift for scaling.  */
19097	  ix86_emit_binop (ASHIFT, mode, target,
19098			   GEN_INT (exact_log2 (parts.scale)));
19099
19100	  if (parts.base)
19101	    ix86_emit_binop (PLUS, mode, target, parts.base);
19102
19103	  if (parts.disp && parts.disp != const0_rtx)
19104	    ix86_emit_binop (PLUS, mode, target, parts.disp);
19105	}
19106    }
19107  else if (!parts.base && !parts.index)
19108    {
19109      gcc_assert(parts.disp);
19110      emit_insn (gen_rtx_SET (VOIDmode, target, parts.disp));
19111    }
19112  else
19113    {
19114      if (!parts.base)
19115	{
19116	  if (regno0 != regno2)
19117	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.index));
19118	}
19119      else if (!parts.index)
19120	{
19121	  if (regno0 != regno1)
19122	    emit_insn (gen_rtx_SET (VOIDmode, target, parts.base));
19123	}
19124      else
19125	{
19126	  if (regno0 == regno1)
19127	    tmp = parts.index;
19128	  else if (regno0 == regno2)
19129	    tmp = parts.base;
19130	  else
19131	    {
19132	      rtx tmp1;
19133
19134	      /* Find better operand for SET instruction, depending
19135		 on which definition is farther from the insn.  */
19136	      if (find_nearest_reg_def (insn, regno1, regno2))
19137		tmp = parts.index, tmp1 = parts.base;
19138	      else
19139		tmp = parts.base, tmp1 = parts.index;
19140
19141	      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19142
19143	      if (parts.disp && parts.disp != const0_rtx)
19144		ix86_emit_binop (PLUS, mode, target, parts.disp);
19145
19146	      ix86_emit_binop (PLUS, mode, target, tmp1);
19147	      return;
19148	    }
19149
19150	  ix86_emit_binop (PLUS, mode, target, tmp);
19151	}
19152
19153      if (parts.disp && parts.disp != const0_rtx)
19154	ix86_emit_binop (PLUS, mode, target, parts.disp);
19155    }
19156}
19157
19158/* Return true if it is ok to optimize an ADD operation to LEA
19159   operation to avoid flag register consumation.  For most processors,
19160   ADD is faster than LEA.  For the processors like BONNELL, if the
19161   destination register of LEA holds an actual address which will be
19162   used soon, LEA is better and otherwise ADD is better.  */
19163
19164bool
19165ix86_lea_for_add_ok (rtx_insn *insn, rtx operands[])
19166{
19167  unsigned int regno0 = true_regnum (operands[0]);
19168  unsigned int regno1 = true_regnum (operands[1]);
19169  unsigned int regno2 = true_regnum (operands[2]);
19170
19171  /* If a = b + c, (a!=b && a!=c), must use lea form. */
19172  if (regno0 != regno1 && regno0 != regno2)
19173    return true;
19174
19175  if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
19176    return false;
19177
19178  return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
19179}
19180
19181/* Return true if destination reg of SET_BODY is shift count of
19182   USE_BODY.  */
19183
19184static bool
19185ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
19186{
19187  rtx set_dest;
19188  rtx shift_rtx;
19189  int i;
19190
19191  /* Retrieve destination of SET_BODY.  */
19192  switch (GET_CODE (set_body))
19193    {
19194    case SET:
19195      set_dest = SET_DEST (set_body);
19196      if (!set_dest || !REG_P (set_dest))
19197	return false;
19198      break;
19199    case PARALLEL:
19200      for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
19201	if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
19202					  use_body))
19203	  return true;
19204    default:
19205      return false;
19206      break;
19207    }
19208
19209  /* Retrieve shift count of USE_BODY.  */
19210  switch (GET_CODE (use_body))
19211    {
19212    case SET:
19213      shift_rtx = XEXP (use_body, 1);
19214      break;
19215    case PARALLEL:
19216      for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
19217	if (ix86_dep_by_shift_count_body (set_body,
19218					  XVECEXP (use_body, 0, i)))
19219	  return true;
19220    default:
19221      return false;
19222      break;
19223    }
19224
19225  if (shift_rtx
19226      && (GET_CODE (shift_rtx) == ASHIFT
19227	  || GET_CODE (shift_rtx) == LSHIFTRT
19228	  || GET_CODE (shift_rtx) == ASHIFTRT
19229	  || GET_CODE (shift_rtx) == ROTATE
19230	  || GET_CODE (shift_rtx) == ROTATERT))
19231    {
19232      rtx shift_count = XEXP (shift_rtx, 1);
19233
19234      /* Return true if shift count is dest of SET_BODY.  */
19235      if (REG_P (shift_count))
19236	{
19237	  /* Add check since it can be invoked before register
19238	     allocation in pre-reload schedule.  */
19239	  if (reload_completed
19240	      && true_regnum (set_dest) == true_regnum (shift_count))
19241	    return true;
19242	  else if (REGNO(set_dest) == REGNO(shift_count))
19243	    return true;
19244	}
19245    }
19246
19247  return false;
19248}
19249
19250/* Return true if destination reg of SET_INSN is shift count of
19251   USE_INSN.  */
19252
19253bool
19254ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
19255{
19256  return ix86_dep_by_shift_count_body (PATTERN (set_insn),
19257				       PATTERN (use_insn));
19258}
19259
19260/* Return TRUE or FALSE depending on whether the unary operator meets the
19261   appropriate constraints.  */
19262
19263bool
19264ix86_unary_operator_ok (enum rtx_code,
19265			machine_mode,
19266			rtx operands[2])
19267{
19268  /* If one of operands is memory, source and destination must match.  */
19269  if ((MEM_P (operands[0])
19270       || MEM_P (operands[1]))
19271      && ! rtx_equal_p (operands[0], operands[1]))
19272    return false;
19273  return true;
19274}
19275
19276/* Return TRUE if the operands to a vec_interleave_{high,low}v2df
19277   are ok, keeping in mind the possible movddup alternative.  */
19278
19279bool
19280ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
19281{
19282  if (MEM_P (operands[0]))
19283    return rtx_equal_p (operands[0], operands[1 + high]);
19284  if (MEM_P (operands[1]) && MEM_P (operands[2]))
19285    return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
19286  return true;
19287}
19288
19289/* Post-reload splitter for converting an SF or DFmode value in an
19290   SSE register into an unsigned SImode.  */
19291
19292void
19293ix86_split_convert_uns_si_sse (rtx operands[])
19294{
19295  machine_mode vecmode;
19296  rtx value, large, zero_or_two31, input, two31, x;
19297
19298  large = operands[1];
19299  zero_or_two31 = operands[2];
19300  input = operands[3];
19301  two31 = operands[4];
19302  vecmode = GET_MODE (large);
19303  value = gen_rtx_REG (vecmode, REGNO (operands[0]));
19304
19305  /* Load up the value into the low element.  We must ensure that the other
19306     elements are valid floats -- zero is the easiest such value.  */
19307  if (MEM_P (input))
19308    {
19309      if (vecmode == V4SFmode)
19310	emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
19311      else
19312	emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
19313    }
19314  else
19315    {
19316      input = gen_rtx_REG (vecmode, REGNO (input));
19317      emit_move_insn (value, CONST0_RTX (vecmode));
19318      if (vecmode == V4SFmode)
19319	emit_insn (gen_sse_movss (value, value, input));
19320      else
19321	emit_insn (gen_sse2_movsd (value, value, input));
19322    }
19323
19324  emit_move_insn (large, two31);
19325  emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
19326
19327  x = gen_rtx_fmt_ee (LE, vecmode, large, value);
19328  emit_insn (gen_rtx_SET (VOIDmode, large, x));
19329
19330  x = gen_rtx_AND (vecmode, zero_or_two31, large);
19331  emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
19332
19333  x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
19334  emit_insn (gen_rtx_SET (VOIDmode, value, x));
19335
19336  large = gen_rtx_REG (V4SImode, REGNO (large));
19337  emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
19338
19339  x = gen_rtx_REG (V4SImode, REGNO (value));
19340  if (vecmode == V4SFmode)
19341    emit_insn (gen_fix_truncv4sfv4si2 (x, value));
19342  else
19343    emit_insn (gen_sse2_cvttpd2dq (x, value));
19344  value = x;
19345
19346  emit_insn (gen_xorv4si3 (value, value, large));
19347}
19348
19349/* Convert an unsigned DImode value into a DFmode, using only SSE.
19350   Expects the 64-bit DImode to be supplied in a pair of integral
19351   registers.  Requires SSE2; will use SSE3 if available.  For x86_32,
19352   -mfpmath=sse, !optimize_size only.  */
19353
19354void
19355ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
19356{
19357  REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
19358  rtx int_xmm, fp_xmm;
19359  rtx biases, exponents;
19360  rtx x;
19361
19362  int_xmm = gen_reg_rtx (V4SImode);
19363  if (TARGET_INTER_UNIT_MOVES_TO_VEC)
19364    emit_insn (gen_movdi_to_sse (int_xmm, input));
19365  else if (TARGET_SSE_SPLIT_REGS)
19366    {
19367      emit_clobber (int_xmm);
19368      emit_move_insn (gen_lowpart (DImode, int_xmm), input);
19369    }
19370  else
19371    {
19372      x = gen_reg_rtx (V2DImode);
19373      ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
19374      emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
19375    }
19376
19377  x = gen_rtx_CONST_VECTOR (V4SImode,
19378			    gen_rtvec (4, GEN_INT (0x43300000UL),
19379				       GEN_INT (0x45300000UL),
19380				       const0_rtx, const0_rtx));
19381  exponents = validize_mem (force_const_mem (V4SImode, x));
19382
19383  /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
19384  emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
19385
19386  /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
19387     yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
19388     Similarly (0x45300000UL ## fp_value_hi_xmm) yields
19389     (0x1.0p84 + double(fp_value_hi_xmm)).
19390     Note these exponents differ by 32.  */
19391
19392  fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
19393
19394  /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
19395     in [0,2**32-1] and [0]+[2**32,2**64-1] respectively.  */
19396  real_ldexp (&bias_lo_rvt, &dconst1, 52);
19397  real_ldexp (&bias_hi_rvt, &dconst1, 84);
19398  biases = const_double_from_real_value (bias_lo_rvt, DFmode);
19399  x = const_double_from_real_value (bias_hi_rvt, DFmode);
19400  biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
19401  biases = validize_mem (force_const_mem (V2DFmode, biases));
19402  emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
19403
19404  /* Add the upper and lower DFmode values together.  */
19405  if (TARGET_SSE3)
19406    emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
19407  else
19408    {
19409      x = copy_to_mode_reg (V2DFmode, fp_xmm);
19410      emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
19411      emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
19412    }
19413
19414  ix86_expand_vector_extract (false, target, fp_xmm, 0);
19415}
19416
19417/* Not used, but eases macroization of patterns.  */
19418void
19419ix86_expand_convert_uns_sixf_sse (rtx, rtx)
19420{
19421  gcc_unreachable ();
19422}
19423
19424/* Convert an unsigned SImode value into a DFmode.  Only currently used
19425   for SSE, but applicable anywhere.  */
19426
19427void
19428ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
19429{
19430  REAL_VALUE_TYPE TWO31r;
19431  rtx x, fp;
19432
19433  x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
19434			   NULL, 1, OPTAB_DIRECT);
19435
19436  fp = gen_reg_rtx (DFmode);
19437  emit_insn (gen_floatsidf2 (fp, x));
19438
19439  real_ldexp (&TWO31r, &dconst1, 31);
19440  x = const_double_from_real_value (TWO31r, DFmode);
19441
19442  x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
19443  if (x != target)
19444    emit_move_insn (target, x);
19445}
19446
19447/* Convert a signed DImode value into a DFmode.  Only used for SSE in
19448   32-bit mode; otherwise we have a direct convert instruction.  */
19449
19450void
19451ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
19452{
19453  REAL_VALUE_TYPE TWO32r;
19454  rtx fp_lo, fp_hi, x;
19455
19456  fp_lo = gen_reg_rtx (DFmode);
19457  fp_hi = gen_reg_rtx (DFmode);
19458
19459  emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
19460
19461  real_ldexp (&TWO32r, &dconst1, 32);
19462  x = const_double_from_real_value (TWO32r, DFmode);
19463  fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
19464
19465  ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
19466
19467  x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
19468			   0, OPTAB_DIRECT);
19469  if (x != target)
19470    emit_move_insn (target, x);
19471}
19472
19473/* Convert an unsigned SImode value into a SFmode, using only SSE.
19474   For x86_32, -mfpmath=sse, !optimize_size only.  */
19475void
19476ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
19477{
19478  REAL_VALUE_TYPE ONE16r;
19479  rtx fp_hi, fp_lo, int_hi, int_lo, x;
19480
19481  real_ldexp (&ONE16r, &dconst1, 16);
19482  x = const_double_from_real_value (ONE16r, SFmode);
19483  int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
19484				      NULL, 0, OPTAB_DIRECT);
19485  int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
19486				      NULL, 0, OPTAB_DIRECT);
19487  fp_hi = gen_reg_rtx (SFmode);
19488  fp_lo = gen_reg_rtx (SFmode);
19489  emit_insn (gen_floatsisf2 (fp_hi, int_hi));
19490  emit_insn (gen_floatsisf2 (fp_lo, int_lo));
19491  fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
19492			       0, OPTAB_DIRECT);
19493  fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
19494			       0, OPTAB_DIRECT);
19495  if (!rtx_equal_p (target, fp_hi))
19496    emit_move_insn (target, fp_hi);
19497}
19498
19499/* floatunsv{4,8}siv{4,8}sf2 expander.  Expand code to convert
19500   a vector of unsigned ints VAL to vector of floats TARGET.  */
19501
19502void
19503ix86_expand_vector_convert_uns_vsivsf (rtx target, rtx val)
19504{
19505  rtx tmp[8];
19506  REAL_VALUE_TYPE TWO16r;
19507  machine_mode intmode = GET_MODE (val);
19508  machine_mode fltmode = GET_MODE (target);
19509  rtx (*cvt) (rtx, rtx);
19510
19511  if (intmode == V4SImode)
19512    cvt = gen_floatv4siv4sf2;
19513  else
19514    cvt = gen_floatv8siv8sf2;
19515  tmp[0] = ix86_build_const_vector (intmode, 1, GEN_INT (0xffff));
19516  tmp[0] = force_reg (intmode, tmp[0]);
19517  tmp[1] = expand_simple_binop (intmode, AND, val, tmp[0], NULL_RTX, 1,
19518				OPTAB_DIRECT);
19519  tmp[2] = expand_simple_binop (intmode, LSHIFTRT, val, GEN_INT (16),
19520				NULL_RTX, 1, OPTAB_DIRECT);
19521  tmp[3] = gen_reg_rtx (fltmode);
19522  emit_insn (cvt (tmp[3], tmp[1]));
19523  tmp[4] = gen_reg_rtx (fltmode);
19524  emit_insn (cvt (tmp[4], tmp[2]));
19525  real_ldexp (&TWO16r, &dconst1, 16);
19526  tmp[5] = const_double_from_real_value (TWO16r, SFmode);
19527  tmp[5] = force_reg (fltmode, ix86_build_const_vector (fltmode, 1, tmp[5]));
19528  tmp[6] = expand_simple_binop (fltmode, MULT, tmp[4], tmp[5], NULL_RTX, 1,
19529				OPTAB_DIRECT);
19530  tmp[7] = expand_simple_binop (fltmode, PLUS, tmp[3], tmp[6], target, 1,
19531				OPTAB_DIRECT);
19532  if (tmp[7] != target)
19533    emit_move_insn (target, tmp[7]);
19534}
19535
19536/* Adjust a V*SFmode/V*DFmode value VAL so that *sfix_trunc* resp. fix_trunc*
19537   pattern can be used on it instead of *ufix_trunc* resp. fixuns_trunc*.
19538   This is done by doing just signed conversion if < 0x1p31, and otherwise by
19539   subtracting 0x1p31 first and xoring in 0x80000000 from *XORP afterwards.  */
19540
19541rtx
19542ix86_expand_adjust_ufix_to_sfix_si (rtx val, rtx *xorp)
19543{
19544  REAL_VALUE_TYPE TWO31r;
19545  rtx two31r, tmp[4];
19546  machine_mode mode = GET_MODE (val);
19547  machine_mode scalarmode = GET_MODE_INNER (mode);
19548  machine_mode intmode = GET_MODE_SIZE (mode) == 32 ? V8SImode : V4SImode;
19549  rtx (*cmp) (rtx, rtx, rtx, rtx);
19550  int i;
19551
19552  for (i = 0; i < 3; i++)
19553    tmp[i] = gen_reg_rtx (mode);
19554  real_ldexp (&TWO31r, &dconst1, 31);
19555  two31r = const_double_from_real_value (TWO31r, scalarmode);
19556  two31r = ix86_build_const_vector (mode, 1, two31r);
19557  two31r = force_reg (mode, two31r);
19558  switch (mode)
19559    {
19560    case V8SFmode: cmp = gen_avx_maskcmpv8sf3; break;
19561    case V4SFmode: cmp = gen_sse_maskcmpv4sf3; break;
19562    case V4DFmode: cmp = gen_avx_maskcmpv4df3; break;
19563    case V2DFmode: cmp = gen_sse2_maskcmpv2df3; break;
19564    default: gcc_unreachable ();
19565    }
19566  tmp[3] = gen_rtx_LE (mode, two31r, val);
19567  emit_insn (cmp (tmp[0], two31r, val, tmp[3]));
19568  tmp[1] = expand_simple_binop (mode, AND, tmp[0], two31r, tmp[1],
19569				0, OPTAB_DIRECT);
19570  if (intmode == V4SImode || TARGET_AVX2)
19571    *xorp = expand_simple_binop (intmode, ASHIFT,
19572				 gen_lowpart (intmode, tmp[0]),
19573				 GEN_INT (31), NULL_RTX, 0,
19574				 OPTAB_DIRECT);
19575  else
19576    {
19577      rtx two31 = GEN_INT ((unsigned HOST_WIDE_INT) 1 << 31);
19578      two31 = ix86_build_const_vector (intmode, 1, two31);
19579      *xorp = expand_simple_binop (intmode, AND,
19580				   gen_lowpart (intmode, tmp[0]),
19581				   two31, NULL_RTX, 0,
19582				   OPTAB_DIRECT);
19583    }
19584  return expand_simple_binop (mode, MINUS, val, tmp[1], tmp[2],
19585			      0, OPTAB_DIRECT);
19586}
19587
19588/* A subroutine of ix86_build_signbit_mask.  If VECT is true,
19589   then replicate the value for all elements of the vector
19590   register.  */
19591
19592rtx
19593ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
19594{
19595  int i, n_elt;
19596  rtvec v;
19597  machine_mode scalar_mode;
19598
19599  switch (mode)
19600    {
19601    case V64QImode:
19602    case V32QImode:
19603    case V16QImode:
19604    case V32HImode:
19605    case V16HImode:
19606    case V8HImode:
19607    case V16SImode:
19608    case V8SImode:
19609    case V4SImode:
19610    case V8DImode:
19611    case V4DImode:
19612    case V2DImode:
19613      gcc_assert (vect);
19614    case V16SFmode:
19615    case V8SFmode:
19616    case V4SFmode:
19617    case V8DFmode:
19618    case V4DFmode:
19619    case V2DFmode:
19620      n_elt = GET_MODE_NUNITS (mode);
19621      v = rtvec_alloc (n_elt);
19622      scalar_mode = GET_MODE_INNER (mode);
19623
19624      RTVEC_ELT (v, 0) = value;
19625
19626      for (i = 1; i < n_elt; ++i)
19627	RTVEC_ELT (v, i) = vect ? value : CONST0_RTX (scalar_mode);
19628
19629      return gen_rtx_CONST_VECTOR (mode, v);
19630
19631    default:
19632      gcc_unreachable ();
19633    }
19634}
19635
19636/* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
19637   and ix86_expand_int_vcond.  Create a mask for the sign bit in MODE
19638   for an SSE register.  If VECT is true, then replicate the mask for
19639   all elements of the vector register.  If INVERT is true, then create
19640   a mask excluding the sign bit.  */
19641
19642rtx
19643ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
19644{
19645  machine_mode vec_mode, imode;
19646  HOST_WIDE_INT hi, lo;
19647  int shift = 63;
19648  rtx v;
19649  rtx mask;
19650
19651  /* Find the sign bit, sign extended to 2*HWI.  */
19652  switch (mode)
19653    {
19654    case V16SImode:
19655    case V16SFmode:
19656    case V8SImode:
19657    case V4SImode:
19658    case V8SFmode:
19659    case V4SFmode:
19660      vec_mode = mode;
19661      mode = GET_MODE_INNER (mode);
19662      imode = SImode;
19663      lo = 0x80000000, hi = lo < 0;
19664      break;
19665
19666    case V8DImode:
19667    case V4DImode:
19668    case V2DImode:
19669    case V8DFmode:
19670    case V4DFmode:
19671    case V2DFmode:
19672      vec_mode = mode;
19673      mode = GET_MODE_INNER (mode);
19674      imode = DImode;
19675      if (HOST_BITS_PER_WIDE_INT >= 64)
19676	lo = (HOST_WIDE_INT)1 << shift, hi = -1;
19677      else
19678	lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19679      break;
19680
19681    case TImode:
19682    case TFmode:
19683      vec_mode = VOIDmode;
19684      if (HOST_BITS_PER_WIDE_INT >= 64)
19685	{
19686	  imode = TImode;
19687	  lo = 0, hi = (HOST_WIDE_INT)1 << shift;
19688	}
19689      else
19690	{
19691	  rtvec vec;
19692
19693	  imode = DImode;
19694	  lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
19695
19696	  if (invert)
19697	    {
19698	      lo = ~lo, hi = ~hi;
19699	      v = constm1_rtx;
19700	    }
19701	  else
19702	    v = const0_rtx;
19703
19704	  mask = immed_double_const (lo, hi, imode);
19705
19706	  vec = gen_rtvec (2, v, mask);
19707	  v = gen_rtx_CONST_VECTOR (V2DImode, vec);
19708	  v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
19709
19710	  return v;
19711	}
19712     break;
19713
19714    default:
19715      gcc_unreachable ();
19716    }
19717
19718  if (invert)
19719    lo = ~lo, hi = ~hi;
19720
19721  /* Force this value into the low part of a fp vector constant.  */
19722  mask = immed_double_const (lo, hi, imode);
19723  mask = gen_lowpart (mode, mask);
19724
19725  if (vec_mode == VOIDmode)
19726    return force_reg (mode, mask);
19727
19728  v = ix86_build_const_vector (vec_mode, vect, mask);
19729  return force_reg (vec_mode, v);
19730}
19731
19732/* Generate code for floating point ABS or NEG.  */
19733
19734void
19735ix86_expand_fp_absneg_operator (enum rtx_code code, machine_mode mode,
19736				rtx operands[])
19737{
19738  rtx mask, set, dst, src;
19739  bool use_sse = false;
19740  bool vector_mode = VECTOR_MODE_P (mode);
19741  machine_mode vmode = mode;
19742
19743  if (vector_mode)
19744    use_sse = true;
19745  else if (mode == TFmode)
19746    use_sse = true;
19747  else if (TARGET_SSE_MATH)
19748    {
19749      use_sse = SSE_FLOAT_MODE_P (mode);
19750      if (mode == SFmode)
19751	vmode = V4SFmode;
19752      else if (mode == DFmode)
19753	vmode = V2DFmode;
19754    }
19755
19756  /* NEG and ABS performed with SSE use bitwise mask operations.
19757     Create the appropriate mask now.  */
19758  if (use_sse)
19759    mask = ix86_build_signbit_mask (vmode, vector_mode, code == ABS);
19760  else
19761    mask = NULL_RTX;
19762
19763  dst = operands[0];
19764  src = operands[1];
19765
19766  set = gen_rtx_fmt_e (code, mode, src);
19767  set = gen_rtx_SET (VOIDmode, dst, set);
19768
19769  if (mask)
19770    {
19771      rtx use, clob;
19772      rtvec par;
19773
19774      use = gen_rtx_USE (VOIDmode, mask);
19775      if (vector_mode)
19776	par = gen_rtvec (2, set, use);
19777      else
19778	{
19779          clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
19780	  par = gen_rtvec (3, set, use, clob);
19781        }
19782      emit_insn (gen_rtx_PARALLEL (VOIDmode, par));
19783    }
19784  else
19785    emit_insn (set);
19786}
19787
19788/* Expand a copysign operation.  Special case operand 0 being a constant.  */
19789
19790void
19791ix86_expand_copysign (rtx operands[])
19792{
19793  machine_mode mode, vmode;
19794  rtx dest, op0, op1, mask, nmask;
19795
19796  dest = operands[0];
19797  op0 = operands[1];
19798  op1 = operands[2];
19799
19800  mode = GET_MODE (dest);
19801
19802  if (mode == SFmode)
19803    vmode = V4SFmode;
19804  else if (mode == DFmode)
19805    vmode = V2DFmode;
19806  else
19807    vmode = mode;
19808
19809  if (GET_CODE (op0) == CONST_DOUBLE)
19810    {
19811      rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
19812
19813      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
19814	op0 = simplify_unary_operation (ABS, mode, op0, mode);
19815
19816      if (mode == SFmode || mode == DFmode)
19817	{
19818	  if (op0 == CONST0_RTX (mode))
19819	    op0 = CONST0_RTX (vmode);
19820	  else
19821	    {
19822	      rtx v = ix86_build_const_vector (vmode, false, op0);
19823
19824	      op0 = force_reg (vmode, v);
19825	    }
19826	}
19827      else if (op0 != CONST0_RTX (mode))
19828	op0 = force_reg (mode, op0);
19829
19830      mask = ix86_build_signbit_mask (vmode, 0, 0);
19831
19832      if (mode == SFmode)
19833	copysign_insn = gen_copysignsf3_const;
19834      else if (mode == DFmode)
19835	copysign_insn = gen_copysigndf3_const;
19836      else
19837	copysign_insn = gen_copysigntf3_const;
19838
19839	emit_insn (copysign_insn (dest, op0, op1, mask));
19840    }
19841  else
19842    {
19843      rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
19844
19845      nmask = ix86_build_signbit_mask (vmode, 0, 1);
19846      mask = ix86_build_signbit_mask (vmode, 0, 0);
19847
19848      if (mode == SFmode)
19849	copysign_insn = gen_copysignsf3_var;
19850      else if (mode == DFmode)
19851	copysign_insn = gen_copysigndf3_var;
19852      else
19853	copysign_insn = gen_copysigntf3_var;
19854
19855      emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
19856    }
19857}
19858
19859/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
19860   be a constant, and so has already been expanded into a vector constant.  */
19861
19862void
19863ix86_split_copysign_const (rtx operands[])
19864{
19865  machine_mode mode, vmode;
19866  rtx dest, op0, mask, x;
19867
19868  dest = operands[0];
19869  op0 = operands[1];
19870  mask = operands[3];
19871
19872  mode = GET_MODE (dest);
19873  vmode = GET_MODE (mask);
19874
19875  dest = simplify_gen_subreg (vmode, dest, mode, 0);
19876  x = gen_rtx_AND (vmode, dest, mask);
19877  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19878
19879  if (op0 != CONST0_RTX (vmode))
19880    {
19881      x = gen_rtx_IOR (vmode, dest, op0);
19882      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19883    }
19884}
19885
19886/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
19887   so we have to do two masks.  */
19888
19889void
19890ix86_split_copysign_var (rtx operands[])
19891{
19892  machine_mode mode, vmode;
19893  rtx dest, scratch, op0, op1, mask, nmask, x;
19894
19895  dest = operands[0];
19896  scratch = operands[1];
19897  op0 = operands[2];
19898  op1 = operands[3];
19899  nmask = operands[4];
19900  mask = operands[5];
19901
19902  mode = GET_MODE (dest);
19903  vmode = GET_MODE (mask);
19904
19905  if (rtx_equal_p (op0, op1))
19906    {
19907      /* Shouldn't happen often (it's useless, obviously), but when it does
19908	 we'd generate incorrect code if we continue below.  */
19909      emit_move_insn (dest, op0);
19910      return;
19911    }
19912
19913  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
19914    {
19915      gcc_assert (REGNO (op1) == REGNO (scratch));
19916
19917      x = gen_rtx_AND (vmode, scratch, mask);
19918      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19919
19920      dest = mask;
19921      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19922      x = gen_rtx_NOT (vmode, dest);
19923      x = gen_rtx_AND (vmode, x, op0);
19924      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19925    }
19926  else
19927    {
19928      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
19929	{
19930	  x = gen_rtx_AND (vmode, scratch, mask);
19931	}
19932      else						/* alternative 2,4 */
19933	{
19934          gcc_assert (REGNO (mask) == REGNO (scratch));
19935          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
19936	  x = gen_rtx_AND (vmode, scratch, op1);
19937	}
19938      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
19939
19940      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
19941	{
19942	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
19943	  x = gen_rtx_AND (vmode, dest, nmask);
19944	}
19945      else						/* alternative 3,4 */
19946	{
19947          gcc_assert (REGNO (nmask) == REGNO (dest));
19948	  dest = nmask;
19949	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
19950	  x = gen_rtx_AND (vmode, dest, op0);
19951	}
19952      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19953    }
19954
19955  x = gen_rtx_IOR (vmode, dest, scratch);
19956  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
19957}
19958
19959/* Return TRUE or FALSE depending on whether the first SET in INSN
19960   has source and destination with matching CC modes, and that the
19961   CC mode is at least as constrained as REQ_MODE.  */
19962
19963bool
19964ix86_match_ccmode (rtx insn, machine_mode req_mode)
19965{
19966  rtx set;
19967  machine_mode set_mode;
19968
19969  set = PATTERN (insn);
19970  if (GET_CODE (set) == PARALLEL)
19971    set = XVECEXP (set, 0, 0);
19972  gcc_assert (GET_CODE (set) == SET);
19973  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
19974
19975  set_mode = GET_MODE (SET_DEST (set));
19976  switch (set_mode)
19977    {
19978    case CCNOmode:
19979      if (req_mode != CCNOmode
19980	  && (req_mode != CCmode
19981	      || XEXP (SET_SRC (set), 1) != const0_rtx))
19982	return false;
19983      break;
19984    case CCmode:
19985      if (req_mode == CCGCmode)
19986	return false;
19987      /* FALLTHRU */
19988    case CCGCmode:
19989      if (req_mode == CCGOCmode || req_mode == CCNOmode)
19990	return false;
19991      /* FALLTHRU */
19992    case CCGOCmode:
19993      if (req_mode == CCZmode)
19994	return false;
19995      /* FALLTHRU */
19996    case CCZmode:
19997      break;
19998
19999    case CCAmode:
20000    case CCCmode:
20001    case CCOmode:
20002    case CCSmode:
20003      if (set_mode != req_mode)
20004	return false;
20005      break;
20006
20007    default:
20008      gcc_unreachable ();
20009    }
20010
20011  return GET_MODE (SET_SRC (set)) == set_mode;
20012}
20013
20014/* Generate insn patterns to do an integer compare of OPERANDS.  */
20015
20016static rtx
20017ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
20018{
20019  machine_mode cmpmode;
20020  rtx tmp, flags;
20021
20022  cmpmode = SELECT_CC_MODE (code, op0, op1);
20023  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
20024
20025  /* This is very simple, but making the interface the same as in the
20026     FP case makes the rest of the code easier.  */
20027  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
20028  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
20029
20030  /* Return the test that should be put into the flags user, i.e.
20031     the bcc, scc, or cmov instruction.  */
20032  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
20033}
20034
20035/* Figure out whether to use ordered or unordered fp comparisons.
20036   Return the appropriate mode to use.  */
20037
20038machine_mode
20039ix86_fp_compare_mode (enum rtx_code)
20040{
20041  /* ??? In order to make all comparisons reversible, we do all comparisons
20042     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
20043     all forms trapping and nontrapping comparisons, we can make inequality
20044     comparisons trapping again, since it results in better code when using
20045     FCOM based compares.  */
20046  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
20047}
20048
20049machine_mode
20050ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
20051{
20052  machine_mode mode = GET_MODE (op0);
20053
20054  if (SCALAR_FLOAT_MODE_P (mode))
20055    {
20056      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20057      return ix86_fp_compare_mode (code);
20058    }
20059
20060  switch (code)
20061    {
20062      /* Only zero flag is needed.  */
20063    case EQ:			/* ZF=0 */
20064    case NE:			/* ZF!=0 */
20065      return CCZmode;
20066      /* Codes needing carry flag.  */
20067    case GEU:			/* CF=0 */
20068    case LTU:			/* CF=1 */
20069      /* Detect overflow checks.  They need just the carry flag.  */
20070      if (GET_CODE (op0) == PLUS
20071	  && rtx_equal_p (op1, XEXP (op0, 0)))
20072	return CCCmode;
20073      else
20074	return CCmode;
20075    case GTU:			/* CF=0 & ZF=0 */
20076    case LEU:			/* CF=1 | ZF=1 */
20077      return CCmode;
20078      /* Codes possibly doable only with sign flag when
20079         comparing against zero.  */
20080    case GE:			/* SF=OF   or   SF=0 */
20081    case LT:			/* SF<>OF  or   SF=1 */
20082      if (op1 == const0_rtx)
20083	return CCGOCmode;
20084      else
20085	/* For other cases Carry flag is not required.  */
20086	return CCGCmode;
20087      /* Codes doable only with sign flag when comparing
20088         against zero, but we miss jump instruction for it
20089         so we need to use relational tests against overflow
20090         that thus needs to be zero.  */
20091    case GT:			/* ZF=0 & SF=OF */
20092    case LE:			/* ZF=1 | SF<>OF */
20093      if (op1 == const0_rtx)
20094	return CCNOmode;
20095      else
20096	return CCGCmode;
20097      /* strcmp pattern do (use flags) and combine may ask us for proper
20098	 mode.  */
20099    case USE:
20100      return CCmode;
20101    default:
20102      gcc_unreachable ();
20103    }
20104}
20105
20106/* Return the fixed registers used for condition codes.  */
20107
20108static bool
20109ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
20110{
20111  *p1 = FLAGS_REG;
20112  *p2 = FPSR_REG;
20113  return true;
20114}
20115
20116/* If two condition code modes are compatible, return a condition code
20117   mode which is compatible with both.  Otherwise, return
20118   VOIDmode.  */
20119
20120static machine_mode
20121ix86_cc_modes_compatible (machine_mode m1, machine_mode m2)
20122{
20123  if (m1 == m2)
20124    return m1;
20125
20126  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
20127    return VOIDmode;
20128
20129  if ((m1 == CCGCmode && m2 == CCGOCmode)
20130      || (m1 == CCGOCmode && m2 == CCGCmode))
20131    return CCGCmode;
20132
20133  if (m1 == CCZmode && (m2 == CCGCmode || m2 == CCGOCmode))
20134    return m2;
20135  else if (m2 == CCZmode && (m1 == CCGCmode || m1 == CCGOCmode))
20136    return m1;
20137
20138  switch (m1)
20139    {
20140    default:
20141      gcc_unreachable ();
20142
20143    case CCmode:
20144    case CCGCmode:
20145    case CCGOCmode:
20146    case CCNOmode:
20147    case CCAmode:
20148    case CCCmode:
20149    case CCOmode:
20150    case CCSmode:
20151    case CCZmode:
20152      switch (m2)
20153	{
20154	default:
20155	  return VOIDmode;
20156
20157	case CCmode:
20158	case CCGCmode:
20159	case CCGOCmode:
20160	case CCNOmode:
20161	case CCAmode:
20162	case CCCmode:
20163	case CCOmode:
20164	case CCSmode:
20165	case CCZmode:
20166	  return CCmode;
20167	}
20168
20169    case CCFPmode:
20170    case CCFPUmode:
20171      /* These are only compatible with themselves, which we already
20172	 checked above.  */
20173      return VOIDmode;
20174    }
20175}
20176
20177
20178/* Return a comparison we can do and that it is equivalent to
20179   swap_condition (code) apart possibly from orderedness.
20180   But, never change orderedness if TARGET_IEEE_FP, returning
20181   UNKNOWN in that case if necessary.  */
20182
20183static enum rtx_code
20184ix86_fp_swap_condition (enum rtx_code code)
20185{
20186  switch (code)
20187    {
20188    case GT:                   /* GTU - CF=0 & ZF=0 */
20189      return TARGET_IEEE_FP ? UNKNOWN : UNLT;
20190    case GE:                   /* GEU - CF=0 */
20191      return TARGET_IEEE_FP ? UNKNOWN : UNLE;
20192    case UNLT:                 /* LTU - CF=1 */
20193      return TARGET_IEEE_FP ? UNKNOWN : GT;
20194    case UNLE:                 /* LEU - CF=1 | ZF=1 */
20195      return TARGET_IEEE_FP ? UNKNOWN : GE;
20196    default:
20197      return swap_condition (code);
20198    }
20199}
20200
20201/* Return cost of comparison CODE using the best strategy for performance.
20202   All following functions do use number of instructions as a cost metrics.
20203   In future this should be tweaked to compute bytes for optimize_size and
20204   take into account performance of various instructions on various CPUs.  */
20205
20206static int
20207ix86_fp_comparison_cost (enum rtx_code code)
20208{
20209  int arith_cost;
20210
20211  /* The cost of code using bit-twiddling on %ah.  */
20212  switch (code)
20213    {
20214    case UNLE:
20215    case UNLT:
20216    case LTGT:
20217    case GT:
20218    case GE:
20219    case UNORDERED:
20220    case ORDERED:
20221    case UNEQ:
20222      arith_cost = 4;
20223      break;
20224    case LT:
20225    case NE:
20226    case EQ:
20227    case UNGE:
20228      arith_cost = TARGET_IEEE_FP ? 5 : 4;
20229      break;
20230    case LE:
20231    case UNGT:
20232      arith_cost = TARGET_IEEE_FP ? 6 : 4;
20233      break;
20234    default:
20235      gcc_unreachable ();
20236    }
20237
20238  switch (ix86_fp_comparison_strategy (code))
20239    {
20240    case IX86_FPCMP_COMI:
20241      return arith_cost > 4 ? 3 : 2;
20242    case IX86_FPCMP_SAHF:
20243      return arith_cost > 4 ? 4 : 3;
20244    default:
20245      return arith_cost;
20246    }
20247}
20248
20249/* Return strategy to use for floating-point.  We assume that fcomi is always
20250   preferrable where available, since that is also true when looking at size
20251   (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test).  */
20252
20253enum ix86_fpcmp_strategy
20254ix86_fp_comparison_strategy (enum rtx_code)
20255{
20256  /* Do fcomi/sahf based test when profitable.  */
20257
20258  if (TARGET_CMOVE)
20259    return IX86_FPCMP_COMI;
20260
20261  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
20262    return IX86_FPCMP_SAHF;
20263
20264  return IX86_FPCMP_ARITH;
20265}
20266
20267/* Swap, force into registers, or otherwise massage the two operands
20268   to a fp comparison.  The operands are updated in place; the new
20269   comparison code is returned.  */
20270
20271static enum rtx_code
20272ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
20273{
20274  machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
20275  rtx op0 = *pop0, op1 = *pop1;
20276  machine_mode op_mode = GET_MODE (op0);
20277  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
20278
20279  /* All of the unordered compare instructions only work on registers.
20280     The same is true of the fcomi compare instructions.  The XFmode
20281     compare instructions require registers except when comparing
20282     against zero or when converting operand 1 from fixed point to
20283     floating point.  */
20284
20285  if (!is_sse
20286      && (fpcmp_mode == CCFPUmode
20287	  || (op_mode == XFmode
20288	      && ! (standard_80387_constant_p (op0) == 1
20289		    || standard_80387_constant_p (op1) == 1)
20290	      && GET_CODE (op1) != FLOAT)
20291	  || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
20292    {
20293      op0 = force_reg (op_mode, op0);
20294      op1 = force_reg (op_mode, op1);
20295    }
20296  else
20297    {
20298      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
20299	 things around if they appear profitable, otherwise force op0
20300	 into a register.  */
20301
20302      if (standard_80387_constant_p (op0) == 0
20303	  || (MEM_P (op0)
20304	      && ! (standard_80387_constant_p (op1) == 0
20305		    || MEM_P (op1))))
20306	{
20307	  enum rtx_code new_code = ix86_fp_swap_condition (code);
20308	  if (new_code != UNKNOWN)
20309	    {
20310	      std::swap (op0, op1);
20311	      code = new_code;
20312	    }
20313	}
20314
20315      if (!REG_P (op0))
20316	op0 = force_reg (op_mode, op0);
20317
20318      if (CONSTANT_P (op1))
20319	{
20320	  int tmp = standard_80387_constant_p (op1);
20321	  if (tmp == 0)
20322	    op1 = validize_mem (force_const_mem (op_mode, op1));
20323	  else if (tmp == 1)
20324	    {
20325	      if (TARGET_CMOVE)
20326		op1 = force_reg (op_mode, op1);
20327	    }
20328	  else
20329	    op1 = force_reg (op_mode, op1);
20330	}
20331    }
20332
20333  /* Try to rearrange the comparison to make it cheaper.  */
20334  if (ix86_fp_comparison_cost (code)
20335      > ix86_fp_comparison_cost (swap_condition (code))
20336      && (REG_P (op1) || can_create_pseudo_p ()))
20337    {
20338      std::swap (op0, op1);
20339      code = swap_condition (code);
20340      if (!REG_P (op0))
20341	op0 = force_reg (op_mode, op0);
20342    }
20343
20344  *pop0 = op0;
20345  *pop1 = op1;
20346  return code;
20347}
20348
20349/* Convert comparison codes we use to represent FP comparison to integer
20350   code that will result in proper branch.  Return UNKNOWN if no such code
20351   is available.  */
20352
20353enum rtx_code
20354ix86_fp_compare_code_to_integer (enum rtx_code code)
20355{
20356  switch (code)
20357    {
20358    case GT:
20359      return GTU;
20360    case GE:
20361      return GEU;
20362    case ORDERED:
20363    case UNORDERED:
20364      return code;
20365      break;
20366    case UNEQ:
20367      return EQ;
20368      break;
20369    case UNLT:
20370      return LTU;
20371      break;
20372    case UNLE:
20373      return LEU;
20374      break;
20375    case LTGT:
20376      return NE;
20377      break;
20378    default:
20379      return UNKNOWN;
20380    }
20381}
20382
20383/* Generate insn patterns to do a floating point compare of OPERANDS.  */
20384
20385static rtx
20386ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
20387{
20388  machine_mode fpcmp_mode, intcmp_mode;
20389  rtx tmp, tmp2;
20390
20391  fpcmp_mode = ix86_fp_compare_mode (code);
20392  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
20393
20394  /* Do fcomi/sahf based test when profitable.  */
20395  switch (ix86_fp_comparison_strategy (code))
20396    {
20397    case IX86_FPCMP_COMI:
20398      intcmp_mode = fpcmp_mode;
20399      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20400      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20401			 tmp);
20402      emit_insn (tmp);
20403      break;
20404
20405    case IX86_FPCMP_SAHF:
20406      intcmp_mode = fpcmp_mode;
20407      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20408      tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
20409			 tmp);
20410
20411      if (!scratch)
20412	scratch = gen_reg_rtx (HImode);
20413      tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
20414      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
20415      break;
20416
20417    case IX86_FPCMP_ARITH:
20418      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
20419      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
20420      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
20421      if (!scratch)
20422	scratch = gen_reg_rtx (HImode);
20423      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
20424
20425      /* In the unordered case, we have to check C2 for NaN's, which
20426	 doesn't happen to work out to anything nice combination-wise.
20427	 So do some bit twiddling on the value we've got in AH to come
20428	 up with an appropriate set of condition codes.  */
20429
20430      intcmp_mode = CCNOmode;
20431      switch (code)
20432	{
20433	case GT:
20434	case UNGT:
20435	  if (code == GT || !TARGET_IEEE_FP)
20436	    {
20437	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20438	      code = EQ;
20439	    }
20440	  else
20441	    {
20442	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20443	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20444	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
20445	      intcmp_mode = CCmode;
20446	      code = GEU;
20447	    }
20448	  break;
20449	case LT:
20450	case UNLT:
20451	  if (code == LT && TARGET_IEEE_FP)
20452	    {
20453	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20454	      emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
20455	      intcmp_mode = CCmode;
20456	      code = EQ;
20457	    }
20458	  else
20459	    {
20460	      emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
20461	      code = NE;
20462	    }
20463	  break;
20464	case GE:
20465	case UNGE:
20466	  if (code == GE || !TARGET_IEEE_FP)
20467	    {
20468	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
20469	      code = EQ;
20470	    }
20471	  else
20472	    {
20473	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20474	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
20475	      code = NE;
20476	    }
20477	  break;
20478	case LE:
20479	case UNLE:
20480	  if (code == LE && TARGET_IEEE_FP)
20481	    {
20482	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20483	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
20484	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20485	      intcmp_mode = CCmode;
20486	      code = LTU;
20487	    }
20488	  else
20489	    {
20490	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
20491	      code = NE;
20492	    }
20493	  break;
20494	case EQ:
20495	case UNEQ:
20496	  if (code == EQ && TARGET_IEEE_FP)
20497	    {
20498	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20499	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
20500	      intcmp_mode = CCmode;
20501	      code = EQ;
20502	    }
20503	  else
20504	    {
20505	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20506	      code = NE;
20507	    }
20508	  break;
20509	case NE:
20510	case LTGT:
20511	  if (code == NE && TARGET_IEEE_FP)
20512	    {
20513	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
20514	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
20515					     GEN_INT (0x40)));
20516	      code = NE;
20517	    }
20518	  else
20519	    {
20520	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
20521	      code = EQ;
20522	    }
20523	  break;
20524
20525	case UNORDERED:
20526	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20527	  code = NE;
20528	  break;
20529	case ORDERED:
20530	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
20531	  code = EQ;
20532	  break;
20533
20534	default:
20535	  gcc_unreachable ();
20536	}
20537	break;
20538
20539    default:
20540      gcc_unreachable();
20541    }
20542
20543  /* Return the test that should be put into the flags user, i.e.
20544     the bcc, scc, or cmov instruction.  */
20545  return gen_rtx_fmt_ee (code, VOIDmode,
20546			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
20547			 const0_rtx);
20548}
20549
20550static rtx
20551ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
20552{
20553  rtx ret;
20554
20555  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
20556    ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
20557
20558  else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
20559    {
20560      gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
20561      ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20562    }
20563  else
20564    ret = ix86_expand_int_compare (code, op0, op1);
20565
20566  return ret;
20567}
20568
20569void
20570ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
20571{
20572  machine_mode mode = GET_MODE (op0);
20573  rtx tmp;
20574
20575  switch (mode)
20576    {
20577    case SFmode:
20578    case DFmode:
20579    case XFmode:
20580    case QImode:
20581    case HImode:
20582    case SImode:
20583      simple:
20584      tmp = ix86_expand_compare (code, op0, op1);
20585      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20586				  gen_rtx_LABEL_REF (VOIDmode, label),
20587				  pc_rtx);
20588      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20589      return;
20590
20591    case DImode:
20592      if (TARGET_64BIT)
20593	goto simple;
20594    case TImode:
20595      /* Expand DImode branch into multiple compare+branch.  */
20596      {
20597	rtx lo[2], hi[2];
20598	rtx_code_label *label2;
20599	enum rtx_code code1, code2, code3;
20600	machine_mode submode;
20601
20602	if (CONSTANT_P (op0) && !CONSTANT_P (op1))
20603	  {
20604	    std::swap (op0, op1);
20605	    code = swap_condition (code);
20606	  }
20607
20608	split_double_mode (mode, &op0, 1, lo+0, hi+0);
20609	split_double_mode (mode, &op1, 1, lo+1, hi+1);
20610
20611	submode = mode == DImode ? SImode : DImode;
20612
20613	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
20614	   avoid two branches.  This costs one extra insn, so disable when
20615	   optimizing for size.  */
20616
20617	if ((code == EQ || code == NE)
20618	    && (!optimize_insn_for_size_p ()
20619	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
20620	  {
20621	    rtx xor0, xor1;
20622
20623	    xor1 = hi[0];
20624	    if (hi[1] != const0_rtx)
20625	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
20626				   NULL_RTX, 0, OPTAB_WIDEN);
20627
20628	    xor0 = lo[0];
20629	    if (lo[1] != const0_rtx)
20630	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
20631				   NULL_RTX, 0, OPTAB_WIDEN);
20632
20633	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
20634				NULL_RTX, 0, OPTAB_WIDEN);
20635
20636	    ix86_expand_branch (code, tmp, const0_rtx, label);
20637	    return;
20638	  }
20639
20640	/* Otherwise, if we are doing less-than or greater-or-equal-than,
20641	   op1 is a constant and the low word is zero, then we can just
20642	   examine the high word.  Similarly for low word -1 and
20643	   less-or-equal-than or greater-than.  */
20644
20645	if (CONST_INT_P (hi[1]))
20646	  switch (code)
20647	    {
20648	    case LT: case LTU: case GE: case GEU:
20649	      if (lo[1] == const0_rtx)
20650		{
20651		  ix86_expand_branch (code, hi[0], hi[1], label);
20652		  return;
20653		}
20654	      break;
20655	    case LE: case LEU: case GT: case GTU:
20656	      if (lo[1] == constm1_rtx)
20657		{
20658		  ix86_expand_branch (code, hi[0], hi[1], label);
20659		  return;
20660		}
20661	      break;
20662	    default:
20663	      break;
20664	    }
20665
20666	/* Otherwise, we need two or three jumps.  */
20667
20668	label2 = gen_label_rtx ();
20669
20670	code1 = code;
20671	code2 = swap_condition (code);
20672	code3 = unsigned_condition (code);
20673
20674	switch (code)
20675	  {
20676	  case LT: case GT: case LTU: case GTU:
20677	    break;
20678
20679	  case LE:   code1 = LT;  code2 = GT;  break;
20680	  case GE:   code1 = GT;  code2 = LT;  break;
20681	  case LEU:  code1 = LTU; code2 = GTU; break;
20682	  case GEU:  code1 = GTU; code2 = LTU; break;
20683
20684	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
20685	  case NE:   code2 = UNKNOWN; break;
20686
20687	  default:
20688	    gcc_unreachable ();
20689	  }
20690
20691	/*
20692	 * a < b =>
20693	 *    if (hi(a) < hi(b)) goto true;
20694	 *    if (hi(a) > hi(b)) goto false;
20695	 *    if (lo(a) < lo(b)) goto true;
20696	 *  false:
20697	 */
20698
20699	if (code1 != UNKNOWN)
20700	  ix86_expand_branch (code1, hi[0], hi[1], label);
20701	if (code2 != UNKNOWN)
20702	  ix86_expand_branch (code2, hi[0], hi[1], label2);
20703
20704	ix86_expand_branch (code3, lo[0], lo[1], label);
20705
20706	if (code2 != UNKNOWN)
20707	  emit_label (label2);
20708	return;
20709      }
20710
20711    default:
20712      gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
20713      goto simple;
20714    }
20715}
20716
20717/* Split branch based on floating point condition.  */
20718void
20719ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
20720		      rtx target1, rtx target2, rtx tmp)
20721{
20722  rtx condition;
20723  rtx i;
20724
20725  if (target2 != pc_rtx)
20726    {
20727      std::swap (target1, target2);
20728      code = reverse_condition_maybe_unordered (code);
20729    }
20730
20731  condition = ix86_expand_fp_compare (code, op1, op2,
20732				      tmp);
20733
20734  i = emit_jump_insn (gen_rtx_SET
20735		      (VOIDmode, pc_rtx,
20736		       gen_rtx_IF_THEN_ELSE (VOIDmode,
20737					     condition, target1, target2)));
20738  if (split_branch_probability >= 0)
20739    add_int_reg_note (i, REG_BR_PROB, split_branch_probability);
20740}
20741
20742void
20743ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20744{
20745  rtx ret;
20746
20747  gcc_assert (GET_MODE (dest) == QImode);
20748
20749  ret = ix86_expand_compare (code, op0, op1);
20750  PUT_MODE (ret, QImode);
20751  emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
20752}
20753
20754/* Expand comparison setting or clearing carry flag.  Return true when
20755   successful and set pop for the operation.  */
20756static bool
20757ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
20758{
20759  machine_mode mode =
20760    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
20761
20762  /* Do not handle double-mode compares that go through special path.  */
20763  if (mode == (TARGET_64BIT ? TImode : DImode))
20764    return false;
20765
20766  if (SCALAR_FLOAT_MODE_P (mode))
20767    {
20768      rtx compare_op;
20769      rtx_insn *compare_seq;
20770
20771      gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
20772
20773      /* Shortcut:  following common codes never translate
20774	 into carry flag compares.  */
20775      if (code == EQ || code == NE || code == UNEQ || code == LTGT
20776	  || code == ORDERED || code == UNORDERED)
20777	return false;
20778
20779      /* These comparisons require zero flag; swap operands so they won't.  */
20780      if ((code == GT || code == UNLE || code == LE || code == UNGT)
20781	  && !TARGET_IEEE_FP)
20782	{
20783	  std::swap (op0, op1);
20784	  code = swap_condition (code);
20785	}
20786
20787      /* Try to expand the comparison and verify that we end up with
20788	 carry flag based comparison.  This fails to be true only when
20789	 we decide to expand comparison using arithmetic that is not
20790	 too common scenario.  */
20791      start_sequence ();
20792      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
20793      compare_seq = get_insns ();
20794      end_sequence ();
20795
20796      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
20797	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
20798        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
20799      else
20800	code = GET_CODE (compare_op);
20801
20802      if (code != LTU && code != GEU)
20803	return false;
20804
20805      emit_insn (compare_seq);
20806      *pop = compare_op;
20807      return true;
20808    }
20809
20810  if (!INTEGRAL_MODE_P (mode))
20811    return false;
20812
20813  switch (code)
20814    {
20815    case LTU:
20816    case GEU:
20817      break;
20818
20819    /* Convert a==0 into (unsigned)a<1.  */
20820    case EQ:
20821    case NE:
20822      if (op1 != const0_rtx)
20823	return false;
20824      op1 = const1_rtx;
20825      code = (code == EQ ? LTU : GEU);
20826      break;
20827
20828    /* Convert a>b into b<a or a>=b-1.  */
20829    case GTU:
20830    case LEU:
20831      if (CONST_INT_P (op1))
20832	{
20833	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
20834	  /* Bail out on overflow.  We still can swap operands but that
20835	     would force loading of the constant into register.  */
20836	  if (op1 == const0_rtx
20837	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
20838	    return false;
20839	  code = (code == GTU ? GEU : LTU);
20840	}
20841      else
20842	{
20843	  std::swap (op0, op1);
20844	  code = (code == GTU ? LTU : GEU);
20845	}
20846      break;
20847
20848    /* Convert a>=0 into (unsigned)a<0x80000000.  */
20849    case LT:
20850    case GE:
20851      if (mode == DImode || op1 != const0_rtx)
20852	return false;
20853      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20854      code = (code == LT ? GEU : LTU);
20855      break;
20856    case LE:
20857    case GT:
20858      if (mode == DImode || op1 != constm1_rtx)
20859	return false;
20860      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
20861      code = (code == LE ? GEU : LTU);
20862      break;
20863
20864    default:
20865      return false;
20866    }
20867  /* Swapping operands may cause constant to appear as first operand.  */
20868  if (!nonimmediate_operand (op0, VOIDmode))
20869    {
20870      if (!can_create_pseudo_p ())
20871	return false;
20872      op0 = force_reg (mode, op0);
20873    }
20874  *pop = ix86_expand_compare (code, op0, op1);
20875  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
20876  return true;
20877}
20878
20879bool
20880ix86_expand_int_movcc (rtx operands[])
20881{
20882  enum rtx_code code = GET_CODE (operands[1]), compare_code;
20883  rtx_insn *compare_seq;
20884  rtx compare_op;
20885  machine_mode mode = GET_MODE (operands[0]);
20886  bool sign_bit_compare_p = false;
20887  rtx op0 = XEXP (operands[1], 0);
20888  rtx op1 = XEXP (operands[1], 1);
20889
20890  if (GET_MODE (op0) == TImode
20891      || (GET_MODE (op0) == DImode
20892	  && !TARGET_64BIT))
20893    return false;
20894
20895  start_sequence ();
20896  compare_op = ix86_expand_compare (code, op0, op1);
20897  compare_seq = get_insns ();
20898  end_sequence ();
20899
20900  compare_code = GET_CODE (compare_op);
20901
20902  if ((op1 == const0_rtx && (code == GE || code == LT))
20903      || (op1 == constm1_rtx && (code == GT || code == LE)))
20904    sign_bit_compare_p = true;
20905
20906  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
20907     HImode insns, we'd be swallowed in word prefix ops.  */
20908
20909  if ((mode != HImode || TARGET_FAST_PREFIX)
20910      && (mode != (TARGET_64BIT ? TImode : DImode))
20911      && CONST_INT_P (operands[2])
20912      && CONST_INT_P (operands[3]))
20913    {
20914      rtx out = operands[0];
20915      HOST_WIDE_INT ct = INTVAL (operands[2]);
20916      HOST_WIDE_INT cf = INTVAL (operands[3]);
20917      HOST_WIDE_INT diff;
20918
20919      diff = ct - cf;
20920      /*  Sign bit compares are better done using shifts than we do by using
20921	  sbb.  */
20922      if (sign_bit_compare_p
20923	  || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
20924	{
20925	  /* Detect overlap between destination and compare sources.  */
20926	  rtx tmp = out;
20927
20928          if (!sign_bit_compare_p)
20929	    {
20930	      rtx flags;
20931	      bool fpcmp = false;
20932
20933	      compare_code = GET_CODE (compare_op);
20934
20935	      flags = XEXP (compare_op, 0);
20936
20937	      if (GET_MODE (flags) == CCFPmode
20938		  || GET_MODE (flags) == CCFPUmode)
20939		{
20940		  fpcmp = true;
20941		  compare_code
20942		    = ix86_fp_compare_code_to_integer (compare_code);
20943		}
20944
20945	      /* To simplify rest of code, restrict to the GEU case.  */
20946	      if (compare_code == LTU)
20947		{
20948		  std::swap (ct, cf);
20949		  compare_code = reverse_condition (compare_code);
20950		  code = reverse_condition (code);
20951		}
20952	      else
20953		{
20954		  if (fpcmp)
20955		    PUT_CODE (compare_op,
20956			      reverse_condition_maybe_unordered
20957			        (GET_CODE (compare_op)));
20958		  else
20959		    PUT_CODE (compare_op,
20960			      reverse_condition (GET_CODE (compare_op)));
20961		}
20962	      diff = ct - cf;
20963
20964	      if (reg_overlap_mentioned_p (out, op0)
20965		  || reg_overlap_mentioned_p (out, op1))
20966		tmp = gen_reg_rtx (mode);
20967
20968	      if (mode == DImode)
20969		emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
20970	      else
20971		emit_insn (gen_x86_movsicc_0_m1	(gen_lowpart (SImode, tmp),
20972						 flags, compare_op));
20973	    }
20974	  else
20975	    {
20976	      if (code == GT || code == GE)
20977		code = reverse_condition (code);
20978	      else
20979		{
20980		  std::swap (ct, cf);
20981		  diff = ct - cf;
20982		}
20983	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
20984	    }
20985
20986	  if (diff == 1)
20987	    {
20988	      /*
20989	       * cmpl op0,op1
20990	       * sbbl dest,dest
20991	       * [addl dest, ct]
20992	       *
20993	       * Size 5 - 8.
20994	       */
20995	      if (ct)
20996		tmp = expand_simple_binop (mode, PLUS,
20997					   tmp, GEN_INT (ct),
20998					   copy_rtx (tmp), 1, OPTAB_DIRECT);
20999	    }
21000	  else if (cf == -1)
21001	    {
21002	      /*
21003	       * cmpl op0,op1
21004	       * sbbl dest,dest
21005	       * orl $ct, dest
21006	       *
21007	       * Size 8.
21008	       */
21009	      tmp = expand_simple_binop (mode, IOR,
21010					 tmp, GEN_INT (ct),
21011					 copy_rtx (tmp), 1, OPTAB_DIRECT);
21012	    }
21013	  else if (diff == -1 && ct)
21014	    {
21015	      /*
21016	       * cmpl op0,op1
21017	       * sbbl dest,dest
21018	       * notl dest
21019	       * [addl dest, cf]
21020	       *
21021	       * Size 8 - 11.
21022	       */
21023	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
21024	      if (cf)
21025		tmp = expand_simple_binop (mode, PLUS,
21026					   copy_rtx (tmp), GEN_INT (cf),
21027					   copy_rtx (tmp), 1, OPTAB_DIRECT);
21028	    }
21029	  else
21030	    {
21031	      /*
21032	       * cmpl op0,op1
21033	       * sbbl dest,dest
21034	       * [notl dest]
21035	       * andl cf - ct, dest
21036	       * [addl dest, ct]
21037	       *
21038	       * Size 8 - 11.
21039	       */
21040
21041	      if (cf == 0)
21042		{
21043		  cf = ct;
21044		  ct = 0;
21045		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
21046		}
21047
21048	      tmp = expand_simple_binop (mode, AND,
21049					 copy_rtx (tmp),
21050					 gen_int_mode (cf - ct, mode),
21051					 copy_rtx (tmp), 1, OPTAB_DIRECT);
21052	      if (ct)
21053		tmp = expand_simple_binop (mode, PLUS,
21054					   copy_rtx (tmp), GEN_INT (ct),
21055					   copy_rtx (tmp), 1, OPTAB_DIRECT);
21056	    }
21057
21058	  if (!rtx_equal_p (tmp, out))
21059	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
21060
21061	  return true;
21062	}
21063
21064      if (diff < 0)
21065	{
21066	  machine_mode cmp_mode = GET_MODE (op0);
21067	  enum rtx_code new_code;
21068
21069	  if (SCALAR_FLOAT_MODE_P (cmp_mode))
21070	    {
21071	      gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21072
21073	      /* We may be reversing unordered compare to normal compare, that
21074		 is not valid in general (we may convert non-trapping condition
21075		 to trapping one), however on i386 we currently emit all
21076		 comparisons unordered.  */
21077	      new_code = reverse_condition_maybe_unordered (code);
21078	    }
21079	  else
21080	    new_code = ix86_reverse_condition (code, cmp_mode);
21081	  if (new_code != UNKNOWN)
21082	    {
21083	      std::swap (ct, cf);
21084	      diff = -diff;
21085	      code = new_code;
21086	    }
21087	}
21088
21089      compare_code = UNKNOWN;
21090      if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
21091	  && CONST_INT_P (op1))
21092	{
21093	  if (op1 == const0_rtx
21094	      && (code == LT || code == GE))
21095	    compare_code = code;
21096	  else if (op1 == constm1_rtx)
21097	    {
21098	      if (code == LE)
21099		compare_code = LT;
21100	      else if (code == GT)
21101		compare_code = GE;
21102	    }
21103	}
21104
21105      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
21106      if (compare_code != UNKNOWN
21107	  && GET_MODE (op0) == GET_MODE (out)
21108	  && (cf == -1 || ct == -1))
21109	{
21110	  /* If lea code below could be used, only optimize
21111	     if it results in a 2 insn sequence.  */
21112
21113	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
21114		 || diff == 3 || diff == 5 || diff == 9)
21115	      || (compare_code == LT && ct == -1)
21116	      || (compare_code == GE && cf == -1))
21117	    {
21118	      /*
21119	       * notl op1	(if necessary)
21120	       * sarl $31, op1
21121	       * orl cf, op1
21122	       */
21123	      if (ct != -1)
21124		{
21125		  cf = ct;
21126		  ct = -1;
21127		  code = reverse_condition (code);
21128		}
21129
21130	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21131
21132	      out = expand_simple_binop (mode, IOR,
21133					 out, GEN_INT (cf),
21134					 out, 1, OPTAB_DIRECT);
21135	      if (out != operands[0])
21136		emit_move_insn (operands[0], out);
21137
21138	      return true;
21139	    }
21140	}
21141
21142
21143      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
21144	   || diff == 3 || diff == 5 || diff == 9)
21145	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
21146	  && (mode != DImode
21147	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
21148	{
21149	  /*
21150	   * xorl dest,dest
21151	   * cmpl op1,op2
21152	   * setcc dest
21153	   * lea cf(dest*(ct-cf)),dest
21154	   *
21155	   * Size 14.
21156	   *
21157	   * This also catches the degenerate setcc-only case.
21158	   */
21159
21160	  rtx tmp;
21161	  int nops;
21162
21163	  out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21164
21165	  nops = 0;
21166	  /* On x86_64 the lea instruction operates on Pmode, so we need
21167	     to get arithmetics done in proper mode to match.  */
21168	  if (diff == 1)
21169	    tmp = copy_rtx (out);
21170	  else
21171	    {
21172	      rtx out1;
21173	      out1 = copy_rtx (out);
21174	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
21175	      nops++;
21176	      if (diff & 1)
21177		{
21178		  tmp = gen_rtx_PLUS (mode, tmp, out1);
21179		  nops++;
21180		}
21181	    }
21182	  if (cf != 0)
21183	    {
21184	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
21185	      nops++;
21186	    }
21187	  if (!rtx_equal_p (tmp, out))
21188	    {
21189	      if (nops == 1)
21190		out = force_operand (tmp, copy_rtx (out));
21191	      else
21192		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
21193	    }
21194	  if (!rtx_equal_p (out, operands[0]))
21195	    emit_move_insn (operands[0], copy_rtx (out));
21196
21197	  return true;
21198	}
21199
21200      /*
21201       * General case:			Jumpful:
21202       *   xorl dest,dest		cmpl op1, op2
21203       *   cmpl op1, op2		movl ct, dest
21204       *   setcc dest			jcc 1f
21205       *   decl dest			movl cf, dest
21206       *   andl (cf-ct),dest		1:
21207       *   addl ct,dest
21208       *
21209       * Size 20.			Size 14.
21210       *
21211       * This is reasonably steep, but branch mispredict costs are
21212       * high on modern cpus, so consider failing only if optimizing
21213       * for space.
21214       */
21215
21216      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21217	  && BRANCH_COST (optimize_insn_for_speed_p (),
21218		  	  false) >= 2)
21219	{
21220	  if (cf == 0)
21221	    {
21222	      machine_mode cmp_mode = GET_MODE (op0);
21223	      enum rtx_code new_code;
21224
21225	      if (SCALAR_FLOAT_MODE_P (cmp_mode))
21226		{
21227		  gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
21228
21229		  /* We may be reversing unordered compare to normal compare,
21230		     that is not valid in general (we may convert non-trapping
21231		     condition to trapping one), however on i386 we currently
21232		     emit all comparisons unordered.  */
21233		  new_code = reverse_condition_maybe_unordered (code);
21234		}
21235	      else
21236		{
21237		  new_code = ix86_reverse_condition (code, cmp_mode);
21238		  if (compare_code != UNKNOWN && new_code != UNKNOWN)
21239		    compare_code = reverse_condition (compare_code);
21240		}
21241
21242	      if (new_code != UNKNOWN)
21243		{
21244		  cf = ct;
21245		  ct = 0;
21246		  code = new_code;
21247		}
21248	    }
21249
21250	  if (compare_code != UNKNOWN)
21251	    {
21252	      /* notl op1	(if needed)
21253		 sarl $31, op1
21254		 andl (cf-ct), op1
21255		 addl ct, op1
21256
21257		 For x < 0 (resp. x <= -1) there will be no notl,
21258		 so if possible swap the constants to get rid of the
21259		 complement.
21260		 True/false will be -1/0 while code below (store flag
21261		 followed by decrement) is 0/-1, so the constants need
21262		 to be exchanged once more.  */
21263
21264	      if (compare_code == GE || !cf)
21265		{
21266		  code = reverse_condition (code);
21267		  compare_code = LT;
21268		}
21269	      else
21270		std::swap (ct, cf);
21271
21272	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
21273	    }
21274	  else
21275	    {
21276	      out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
21277
21278	      out = expand_simple_binop (mode, PLUS, copy_rtx (out),
21279					 constm1_rtx,
21280					 copy_rtx (out), 1, OPTAB_DIRECT);
21281	    }
21282
21283	  out = expand_simple_binop (mode, AND, copy_rtx (out),
21284				     gen_int_mode (cf - ct, mode),
21285				     copy_rtx (out), 1, OPTAB_DIRECT);
21286	  if (ct)
21287	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
21288				       copy_rtx (out), 1, OPTAB_DIRECT);
21289	  if (!rtx_equal_p (out, operands[0]))
21290	    emit_move_insn (operands[0], copy_rtx (out));
21291
21292	  return true;
21293	}
21294    }
21295
21296  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
21297    {
21298      /* Try a few things more with specific constants and a variable.  */
21299
21300      optab op;
21301      rtx var, orig_out, out, tmp;
21302
21303      if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
21304	return false;
21305
21306      /* If one of the two operands is an interesting constant, load a
21307	 constant with the above and mask it in with a logical operation.  */
21308
21309      if (CONST_INT_P (operands[2]))
21310	{
21311	  var = operands[3];
21312	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
21313	    operands[3] = constm1_rtx, op = and_optab;
21314	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
21315	    operands[3] = const0_rtx, op = ior_optab;
21316	  else
21317	    return false;
21318	}
21319      else if (CONST_INT_P (operands[3]))
21320	{
21321	  var = operands[2];
21322	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
21323	    operands[2] = constm1_rtx, op = and_optab;
21324	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
21325	    operands[2] = const0_rtx, op = ior_optab;
21326	  else
21327	    return false;
21328	}
21329      else
21330        return false;
21331
21332      orig_out = operands[0];
21333      tmp = gen_reg_rtx (mode);
21334      operands[0] = tmp;
21335
21336      /* Recurse to get the constant loaded.  */
21337      if (ix86_expand_int_movcc (operands) == 0)
21338        return false;
21339
21340      /* Mask in the interesting variable.  */
21341      out = expand_binop (mode, op, var, tmp, orig_out, 0,
21342			  OPTAB_WIDEN);
21343      if (!rtx_equal_p (out, orig_out))
21344	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
21345
21346      return true;
21347    }
21348
21349  /*
21350   * For comparison with above,
21351   *
21352   * movl cf,dest
21353   * movl ct,tmp
21354   * cmpl op1,op2
21355   * cmovcc tmp,dest
21356   *
21357   * Size 15.
21358   */
21359
21360  if (! nonimmediate_operand (operands[2], mode))
21361    operands[2] = force_reg (mode, operands[2]);
21362  if (! nonimmediate_operand (operands[3], mode))
21363    operands[3] = force_reg (mode, operands[3]);
21364
21365  if (! register_operand (operands[2], VOIDmode)
21366      && (mode == QImode
21367          || ! register_operand (operands[3], VOIDmode)))
21368    operands[2] = force_reg (mode, operands[2]);
21369
21370  if (mode == QImode
21371      && ! register_operand (operands[3], VOIDmode))
21372    operands[3] = force_reg (mode, operands[3]);
21373
21374  emit_insn (compare_seq);
21375  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21376			  gen_rtx_IF_THEN_ELSE (mode,
21377						compare_op, operands[2],
21378						operands[3])));
21379  return true;
21380}
21381
21382/* Swap, force into registers, or otherwise massage the two operands
21383   to an sse comparison with a mask result.  Thus we differ a bit from
21384   ix86_prepare_fp_compare_args which expects to produce a flags result.
21385
21386   The DEST operand exists to help determine whether to commute commutative
21387   operators.  The POP0/POP1 operands are updated in place.  The new
21388   comparison code is returned, or UNKNOWN if not implementable.  */
21389
21390static enum rtx_code
21391ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
21392				  rtx *pop0, rtx *pop1)
21393{
21394  switch (code)
21395    {
21396    case LTGT:
21397    case UNEQ:
21398      /* AVX supports all the needed comparisons.  */
21399      if (TARGET_AVX)
21400	break;
21401      /* We have no LTGT as an operator.  We could implement it with
21402	 NE & ORDERED, but this requires an extra temporary.  It's
21403	 not clear that it's worth it.  */
21404      return UNKNOWN;
21405
21406    case LT:
21407    case LE:
21408    case UNGT:
21409    case UNGE:
21410      /* These are supported directly.  */
21411      break;
21412
21413    case EQ:
21414    case NE:
21415    case UNORDERED:
21416    case ORDERED:
21417      /* AVX has 3 operand comparisons, no need to swap anything.  */
21418      if (TARGET_AVX)
21419	break;
21420      /* For commutative operators, try to canonicalize the destination
21421	 operand to be first in the comparison - this helps reload to
21422	 avoid extra moves.  */
21423      if (!dest || !rtx_equal_p (dest, *pop1))
21424	break;
21425      /* FALLTHRU */
21426
21427    case GE:
21428    case GT:
21429    case UNLE:
21430    case UNLT:
21431      /* These are not supported directly before AVX, and furthermore
21432	 ix86_expand_sse_fp_minmax only optimizes LT/UNGE.  Swap the
21433	 comparison operands to transform into something that is
21434	 supported.  */
21435      std::swap (*pop0, *pop1);
21436      code = swap_condition (code);
21437      break;
21438
21439    default:
21440      gcc_unreachable ();
21441    }
21442
21443  return code;
21444}
21445
21446/* Detect conditional moves that exactly match min/max operational
21447   semantics.  Note that this is IEEE safe, as long as we don't
21448   interchange the operands.
21449
21450   Returns FALSE if this conditional move doesn't match a MIN/MAX,
21451   and TRUE if the operation is successful and instructions are emitted.  */
21452
21453static bool
21454ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
21455			   rtx cmp_op1, rtx if_true, rtx if_false)
21456{
21457  machine_mode mode;
21458  bool is_min;
21459  rtx tmp;
21460
21461  if (code == LT)
21462    ;
21463  else if (code == UNGE)
21464    std::swap (if_true, if_false);
21465  else
21466    return false;
21467
21468  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
21469    is_min = true;
21470  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
21471    is_min = false;
21472  else
21473    return false;
21474
21475  mode = GET_MODE (dest);
21476
21477  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
21478     but MODE may be a vector mode and thus not appropriate.  */
21479  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
21480    {
21481      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
21482      rtvec v;
21483
21484      if_true = force_reg (mode, if_true);
21485      v = gen_rtvec (2, if_true, if_false);
21486      tmp = gen_rtx_UNSPEC (mode, v, u);
21487    }
21488  else
21489    {
21490      code = is_min ? SMIN : SMAX;
21491      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
21492    }
21493
21494  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
21495  return true;
21496}
21497
21498/* Expand an sse vector comparison.  Return the register with the result.  */
21499
21500static rtx
21501ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
21502		     rtx op_true, rtx op_false)
21503{
21504  machine_mode mode = GET_MODE (dest);
21505  machine_mode cmp_ops_mode = GET_MODE (cmp_op0);
21506
21507  /* In general case result of comparison can differ from operands' type.  */
21508  machine_mode cmp_mode;
21509
21510  /* In AVX512F the result of comparison is an integer mask.  */
21511  bool maskcmp = false;
21512  rtx x;
21513
21514  if (GET_MODE_SIZE (cmp_ops_mode) == 64)
21515    {
21516      cmp_mode = mode_for_size (GET_MODE_NUNITS (cmp_ops_mode), MODE_INT, 0);
21517      gcc_assert (cmp_mode != BLKmode);
21518
21519      maskcmp = true;
21520    }
21521  else
21522    cmp_mode = cmp_ops_mode;
21523
21524
21525  cmp_op0 = force_reg (cmp_ops_mode, cmp_op0);
21526  if (!nonimmediate_operand (cmp_op1, cmp_ops_mode))
21527    cmp_op1 = force_reg (cmp_ops_mode, cmp_op1);
21528
21529  if (optimize
21530      || reg_overlap_mentioned_p (dest, op_true)
21531      || reg_overlap_mentioned_p (dest, op_false))
21532    dest = gen_reg_rtx (maskcmp ? cmp_mode : mode);
21533
21534  /* Compare patterns for int modes are unspec in AVX512F only.  */
21535  if (maskcmp && (code == GT || code == EQ))
21536    {
21537      rtx (*gen)(rtx, rtx, rtx);
21538
21539      switch (cmp_ops_mode)
21540	{
21541	case V64QImode:
21542	  gcc_assert (TARGET_AVX512BW);
21543	  gen = code == GT ? gen_avx512bw_gtv64qi3 : gen_avx512bw_eqv64qi3_1;
21544	  break;
21545	case V32HImode:
21546	  gcc_assert (TARGET_AVX512BW);
21547	  gen = code == GT ? gen_avx512bw_gtv32hi3 : gen_avx512bw_eqv32hi3_1;
21548	  break;
21549	case V16SImode:
21550	  gen = code == GT ? gen_avx512f_gtv16si3 : gen_avx512f_eqv16si3_1;
21551	  break;
21552	case V8DImode:
21553	  gen = code == GT ? gen_avx512f_gtv8di3 : gen_avx512f_eqv8di3_1;
21554	  break;
21555	default:
21556	  gen = NULL;
21557	}
21558
21559      if (gen)
21560	{
21561	  emit_insn (gen (dest, cmp_op0, cmp_op1));
21562	  return dest;
21563	}
21564    }
21565  x = gen_rtx_fmt_ee (code, cmp_mode, cmp_op0, cmp_op1);
21566
21567  if (cmp_mode != mode && !maskcmp)
21568    {
21569      x = force_reg (cmp_ops_mode, x);
21570      convert_move (dest, x, false);
21571    }
21572  else
21573    emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21574
21575  return dest;
21576}
21577
21578/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
21579   operations.  This is used for both scalar and vector conditional moves.  */
21580
21581static void
21582ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
21583{
21584  machine_mode mode = GET_MODE (dest);
21585  machine_mode cmpmode = GET_MODE (cmp);
21586
21587  /* In AVX512F the result of comparison is an integer mask.  */
21588  bool maskcmp = (mode != cmpmode && TARGET_AVX512F);
21589
21590  rtx t2, t3, x;
21591
21592  if (vector_all_ones_operand (op_true, mode)
21593      && rtx_equal_p (op_false, CONST0_RTX (mode))
21594      && !maskcmp)
21595    {
21596      emit_insn (gen_rtx_SET (VOIDmode, dest, cmp));
21597    }
21598  else if (op_false == CONST0_RTX (mode)
21599      && !maskcmp)
21600    {
21601      op_true = force_reg (mode, op_true);
21602      x = gen_rtx_AND (mode, cmp, op_true);
21603      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21604    }
21605  else if (op_true == CONST0_RTX (mode)
21606      && !maskcmp)
21607    {
21608      op_false = force_reg (mode, op_false);
21609      x = gen_rtx_NOT (mode, cmp);
21610      x = gen_rtx_AND (mode, x, op_false);
21611      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21612    }
21613  else if (INTEGRAL_MODE_P (mode) && op_true == CONSTM1_RTX (mode)
21614      && !maskcmp)
21615    {
21616      op_false = force_reg (mode, op_false);
21617      x = gen_rtx_IOR (mode, cmp, op_false);
21618      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21619    }
21620  else if (TARGET_XOP
21621      && !maskcmp)
21622    {
21623      op_true = force_reg (mode, op_true);
21624
21625      if (!nonimmediate_operand (op_false, mode))
21626	op_false = force_reg (mode, op_false);
21627
21628      emit_insn (gen_rtx_SET (mode, dest,
21629			      gen_rtx_IF_THEN_ELSE (mode, cmp,
21630						    op_true,
21631						    op_false)));
21632    }
21633  else
21634    {
21635      rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
21636      rtx d = dest;
21637
21638      if (!nonimmediate_operand (op_true, mode))
21639	op_true = force_reg (mode, op_true);
21640
21641      op_false = force_reg (mode, op_false);
21642
21643      switch (mode)
21644	{
21645	case V4SFmode:
21646	  if (TARGET_SSE4_1)
21647	    gen = gen_sse4_1_blendvps;
21648	  break;
21649	case V2DFmode:
21650	  if (TARGET_SSE4_1)
21651	    gen = gen_sse4_1_blendvpd;
21652	  break;
21653	case V16QImode:
21654	case V8HImode:
21655	case V4SImode:
21656	case V2DImode:
21657	  if (TARGET_SSE4_1)
21658	    {
21659	      gen = gen_sse4_1_pblendvb;
21660	      if (mode != V16QImode)
21661		d = gen_reg_rtx (V16QImode);
21662	      op_false = gen_lowpart (V16QImode, op_false);
21663	      op_true = gen_lowpart (V16QImode, op_true);
21664	      cmp = gen_lowpart (V16QImode, cmp);
21665	    }
21666	  break;
21667	case V8SFmode:
21668	  if (TARGET_AVX)
21669	    gen = gen_avx_blendvps256;
21670	  break;
21671	case V4DFmode:
21672	  if (TARGET_AVX)
21673	    gen = gen_avx_blendvpd256;
21674	  break;
21675	case V32QImode:
21676	case V16HImode:
21677	case V8SImode:
21678	case V4DImode:
21679	  if (TARGET_AVX2)
21680	    {
21681	      gen = gen_avx2_pblendvb;
21682	      if (mode != V32QImode)
21683		d = gen_reg_rtx (V32QImode);
21684	      op_false = gen_lowpart (V32QImode, op_false);
21685	      op_true = gen_lowpart (V32QImode, op_true);
21686	      cmp = gen_lowpart (V32QImode, cmp);
21687	    }
21688	  break;
21689
21690	case V64QImode:
21691	  gen = gen_avx512bw_blendmv64qi;
21692	  break;
21693	case V32HImode:
21694	  gen = gen_avx512bw_blendmv32hi;
21695	  break;
21696	case V16SImode:
21697	  gen = gen_avx512f_blendmv16si;
21698	  break;
21699	case V8DImode:
21700	  gen = gen_avx512f_blendmv8di;
21701	  break;
21702	case V8DFmode:
21703	  gen = gen_avx512f_blendmv8df;
21704	  break;
21705	case V16SFmode:
21706	  gen = gen_avx512f_blendmv16sf;
21707	  break;
21708
21709	default:
21710	  break;
21711	}
21712
21713      if (gen != NULL)
21714	{
21715	  emit_insn (gen (d, op_false, op_true, cmp));
21716	  if (d != dest)
21717	    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
21718	}
21719      else
21720	{
21721	  op_true = force_reg (mode, op_true);
21722
21723	  t2 = gen_reg_rtx (mode);
21724	  if (optimize)
21725	    t3 = gen_reg_rtx (mode);
21726	  else
21727	    t3 = dest;
21728
21729	  x = gen_rtx_AND (mode, op_true, cmp);
21730	  emit_insn (gen_rtx_SET (VOIDmode, t2, x));
21731
21732	  x = gen_rtx_NOT (mode, cmp);
21733	  x = gen_rtx_AND (mode, x, op_false);
21734	  emit_insn (gen_rtx_SET (VOIDmode, t3, x));
21735
21736	  x = gen_rtx_IOR (mode, t3, t2);
21737	  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
21738	}
21739    }
21740}
21741
21742/* Expand a floating-point conditional move.  Return true if successful.  */
21743
21744bool
21745ix86_expand_fp_movcc (rtx operands[])
21746{
21747  machine_mode mode = GET_MODE (operands[0]);
21748  enum rtx_code code = GET_CODE (operands[1]);
21749  rtx tmp, compare_op;
21750  rtx op0 = XEXP (operands[1], 0);
21751  rtx op1 = XEXP (operands[1], 1);
21752
21753  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
21754    {
21755      machine_mode cmode;
21756
21757      /* Since we've no cmove for sse registers, don't force bad register
21758	 allocation just to gain access to it.  Deny movcc when the
21759	 comparison mode doesn't match the move mode.  */
21760      cmode = GET_MODE (op0);
21761      if (cmode == VOIDmode)
21762	cmode = GET_MODE (op1);
21763      if (cmode != mode)
21764	return false;
21765
21766      code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
21767      if (code == UNKNOWN)
21768	return false;
21769
21770      if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
21771				     operands[2], operands[3]))
21772	return true;
21773
21774      tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
21775				 operands[2], operands[3]);
21776      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
21777      return true;
21778    }
21779
21780  if (GET_MODE (op0) == TImode
21781      || (GET_MODE (op0) == DImode
21782	  && !TARGET_64BIT))
21783    return false;
21784
21785  /* The floating point conditional move instructions don't directly
21786     support conditions resulting from a signed integer comparison.  */
21787
21788  compare_op = ix86_expand_compare (code, op0, op1);
21789  if (!fcmov_comparison_operator (compare_op, VOIDmode))
21790    {
21791      tmp = gen_reg_rtx (QImode);
21792      ix86_expand_setcc (tmp, code, op0, op1);
21793
21794      compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
21795    }
21796
21797  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
21798			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
21799						operands[2], operands[3])));
21800
21801  return true;
21802}
21803
21804/* Expand a floating-point vector conditional move; a vcond operation
21805   rather than a movcc operation.  */
21806
21807bool
21808ix86_expand_fp_vcond (rtx operands[])
21809{
21810  enum rtx_code code = GET_CODE (operands[3]);
21811  rtx cmp;
21812
21813  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
21814					   &operands[4], &operands[5]);
21815  if (code == UNKNOWN)
21816    {
21817      rtx temp;
21818      switch (GET_CODE (operands[3]))
21819	{
21820	case LTGT:
21821	  temp = ix86_expand_sse_cmp (operands[0], ORDERED, operands[4],
21822				      operands[5], operands[0], operands[0]);
21823	  cmp = ix86_expand_sse_cmp (operands[0], NE, operands[4],
21824				     operands[5], operands[1], operands[2]);
21825	  code = AND;
21826	  break;
21827	case UNEQ:
21828	  temp = ix86_expand_sse_cmp (operands[0], UNORDERED, operands[4],
21829				      operands[5], operands[0], operands[0]);
21830	  cmp = ix86_expand_sse_cmp (operands[0], EQ, operands[4],
21831				     operands[5], operands[1], operands[2]);
21832	  code = IOR;
21833	  break;
21834	default:
21835	  gcc_unreachable ();
21836	}
21837      cmp = expand_simple_binop (GET_MODE (cmp), code, temp, cmp, cmp, 1,
21838				 OPTAB_DIRECT);
21839      ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21840      return true;
21841    }
21842
21843  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
21844				 operands[5], operands[1], operands[2]))
21845    return true;
21846
21847  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
21848			     operands[1], operands[2]);
21849  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
21850  return true;
21851}
21852
21853/* Expand a signed/unsigned integral vector conditional move.  */
21854
21855bool
21856ix86_expand_int_vcond (rtx operands[])
21857{
21858  machine_mode data_mode = GET_MODE (operands[0]);
21859  machine_mode mode = GET_MODE (operands[4]);
21860  enum rtx_code code = GET_CODE (operands[3]);
21861  bool negate = false;
21862  rtx x, cop0, cop1;
21863
21864  cop0 = operands[4];
21865  cop1 = operands[5];
21866
21867  /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
21868     and x < 0 ? 1 : 0 into (unsigned) x >> 31.  */
21869  if ((code == LT || code == GE)
21870      && data_mode == mode
21871      && cop1 == CONST0_RTX (mode)
21872      && operands[1 + (code == LT)] == CONST0_RTX (data_mode)
21873      && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) > 1
21874      && GET_MODE_SIZE (GET_MODE_INNER (data_mode)) <= 8
21875      && (GET_MODE_SIZE (data_mode) == 16
21876	  || (TARGET_AVX2 && GET_MODE_SIZE (data_mode) == 32)))
21877    {
21878      rtx negop = operands[2 - (code == LT)];
21879      int shift = GET_MODE_BITSIZE (GET_MODE_INNER (data_mode)) - 1;
21880      if (negop == CONST1_RTX (data_mode))
21881	{
21882	  rtx res = expand_simple_binop (mode, LSHIFTRT, cop0, GEN_INT (shift),
21883					 operands[0], 1, OPTAB_DIRECT);
21884	  if (res != operands[0])
21885	    emit_move_insn (operands[0], res);
21886	  return true;
21887	}
21888      else if (GET_MODE_INNER (data_mode) != DImode
21889	       && vector_all_ones_operand (negop, data_mode))
21890	{
21891	  rtx res = expand_simple_binop (mode, ASHIFTRT, cop0, GEN_INT (shift),
21892					 operands[0], 0, OPTAB_DIRECT);
21893	  if (res != operands[0])
21894	    emit_move_insn (operands[0], res);
21895	  return true;
21896	}
21897    }
21898
21899  if (!nonimmediate_operand (cop1, mode))
21900    cop1 = force_reg (mode, cop1);
21901  if (!general_operand (operands[1], data_mode))
21902    operands[1] = force_reg (data_mode, operands[1]);
21903  if (!general_operand (operands[2], data_mode))
21904    operands[2] = force_reg (data_mode, operands[2]);
21905
21906  /* XOP supports all of the comparisons on all 128-bit vector int types.  */
21907  if (TARGET_XOP
21908      && (mode == V16QImode || mode == V8HImode
21909	  || mode == V4SImode || mode == V2DImode))
21910    ;
21911  else
21912    {
21913      /* Canonicalize the comparison to EQ, GT, GTU.  */
21914      switch (code)
21915	{
21916	case EQ:
21917	case GT:
21918	case GTU:
21919	  break;
21920
21921	case NE:
21922	case LE:
21923	case LEU:
21924	  code = reverse_condition (code);
21925	  negate = true;
21926	  break;
21927
21928	case GE:
21929	case GEU:
21930	  code = reverse_condition (code);
21931	  negate = true;
21932	  /* FALLTHRU */
21933
21934	case LT:
21935	case LTU:
21936	  std::swap (cop0, cop1);
21937	  code = swap_condition (code);
21938	  break;
21939
21940	default:
21941	  gcc_unreachable ();
21942	}
21943
21944      /* Only SSE4.1/SSE4.2 supports V2DImode.  */
21945      if (mode == V2DImode)
21946	{
21947	  switch (code)
21948	    {
21949	    case EQ:
21950	      /* SSE4.1 supports EQ.  */
21951	      if (!TARGET_SSE4_1)
21952		return false;
21953	      break;
21954
21955	    case GT:
21956	    case GTU:
21957	      /* SSE4.2 supports GT/GTU.  */
21958	      if (!TARGET_SSE4_2)
21959		return false;
21960	      break;
21961
21962	    default:
21963	      gcc_unreachable ();
21964	    }
21965	}
21966
21967      /* Unsigned parallel compare is not supported by the hardware.
21968	 Play some tricks to turn this into a signed comparison
21969	 against 0.  */
21970      if (code == GTU)
21971	{
21972	  cop0 = force_reg (mode, cop0);
21973
21974	  switch (mode)
21975	    {
21976	    case V16SImode:
21977	    case V8DImode:
21978	    case V8SImode:
21979	    case V4DImode:
21980	    case V4SImode:
21981	    case V2DImode:
21982		{
21983		  rtx t1, t2, mask;
21984		  rtx (*gen_sub3) (rtx, rtx, rtx);
21985
21986		  switch (mode)
21987		    {
21988		    case V16SImode: gen_sub3 = gen_subv16si3; break;
21989		    case V8DImode: gen_sub3 = gen_subv8di3; break;
21990		    case V8SImode: gen_sub3 = gen_subv8si3; break;
21991		    case V4DImode: gen_sub3 = gen_subv4di3; break;
21992		    case V4SImode: gen_sub3 = gen_subv4si3; break;
21993		    case V2DImode: gen_sub3 = gen_subv2di3; break;
21994		    default:
21995		      gcc_unreachable ();
21996		    }
21997		  /* Subtract (-(INT MAX) - 1) from both operands to make
21998		     them signed.  */
21999		  mask = ix86_build_signbit_mask (mode, true, false);
22000		  t1 = gen_reg_rtx (mode);
22001		  emit_insn (gen_sub3 (t1, cop0, mask));
22002
22003		  t2 = gen_reg_rtx (mode);
22004		  emit_insn (gen_sub3 (t2, cop1, mask));
22005
22006		  cop0 = t1;
22007		  cop1 = t2;
22008		  code = GT;
22009		}
22010	      break;
22011
22012	    case V64QImode:
22013	    case V32HImode:
22014	    case V32QImode:
22015	    case V16HImode:
22016	    case V16QImode:
22017	    case V8HImode:
22018	      /* Perform a parallel unsigned saturating subtraction.  */
22019	      x = gen_reg_rtx (mode);
22020	      emit_insn (gen_rtx_SET (VOIDmode, x,
22021				      gen_rtx_US_MINUS (mode, cop0, cop1)));
22022
22023	      cop0 = x;
22024	      cop1 = CONST0_RTX (mode);
22025	      code = EQ;
22026	      negate = !negate;
22027	      break;
22028
22029	    default:
22030	      gcc_unreachable ();
22031	    }
22032	}
22033    }
22034
22035  /* Allow the comparison to be done in one mode, but the movcc to
22036     happen in another mode.  */
22037  if (data_mode == mode)
22038    {
22039      x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
22040			       operands[1+negate], operands[2-negate]);
22041    }
22042  else
22043    {
22044      gcc_assert (GET_MODE_SIZE (data_mode) == GET_MODE_SIZE (mode));
22045      x = ix86_expand_sse_cmp (gen_reg_rtx (mode), code, cop0, cop1,
22046			       operands[1+negate], operands[2-negate]);
22047      if (GET_MODE (x) == mode)
22048	x = gen_lowpart (data_mode, x);
22049    }
22050
22051  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
22052			 operands[2-negate]);
22053  return true;
22054}
22055
22056/* AVX512F does support 64-byte integer vector operations,
22057   thus the longest vector we are faced with is V64QImode.  */
22058#define MAX_VECT_LEN	64
22059
22060struct expand_vec_perm_d
22061{
22062  rtx target, op0, op1;
22063  unsigned char perm[MAX_VECT_LEN];
22064  machine_mode vmode;
22065  unsigned char nelt;
22066  bool one_operand_p;
22067  bool testing_p;
22068};
22069
22070static bool
22071ix86_expand_vec_perm_vpermi2 (rtx target, rtx op0, rtx mask, rtx op1,
22072			      struct expand_vec_perm_d *d)
22073{
22074  /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22075     expander, so args are either in d, or in op0, op1 etc.  */
22076  machine_mode mode = GET_MODE (d ? d->op0 : op0);
22077  machine_mode maskmode = mode;
22078  rtx (*gen) (rtx, rtx, rtx, rtx) = NULL;
22079
22080  switch (mode)
22081    {
22082    case V8HImode:
22083      if (TARGET_AVX512VL && TARGET_AVX512BW)
22084	gen = gen_avx512vl_vpermi2varv8hi3;
22085      break;
22086    case V16HImode:
22087      if (TARGET_AVX512VL && TARGET_AVX512BW)
22088	gen = gen_avx512vl_vpermi2varv16hi3;
22089      break;
22090    case V64QImode:
22091      if (TARGET_AVX512VBMI)
22092	gen = gen_avx512bw_vpermi2varv64qi3;
22093      break;
22094    case V32HImode:
22095      if (TARGET_AVX512BW)
22096	gen = gen_avx512bw_vpermi2varv32hi3;
22097      break;
22098    case V4SImode:
22099      if (TARGET_AVX512VL)
22100	gen = gen_avx512vl_vpermi2varv4si3;
22101      break;
22102    case V8SImode:
22103      if (TARGET_AVX512VL)
22104	gen = gen_avx512vl_vpermi2varv8si3;
22105      break;
22106    case V16SImode:
22107      if (TARGET_AVX512F)
22108	gen = gen_avx512f_vpermi2varv16si3;
22109      break;
22110    case V4SFmode:
22111      if (TARGET_AVX512VL)
22112	{
22113	  gen = gen_avx512vl_vpermi2varv4sf3;
22114	  maskmode = V4SImode;
22115	}
22116      break;
22117    case V8SFmode:
22118      if (TARGET_AVX512VL)
22119	{
22120	  gen = gen_avx512vl_vpermi2varv8sf3;
22121	  maskmode = V8SImode;
22122	}
22123      break;
22124    case V16SFmode:
22125      if (TARGET_AVX512F)
22126	{
22127	  gen = gen_avx512f_vpermi2varv16sf3;
22128	  maskmode = V16SImode;
22129	}
22130      break;
22131    case V2DImode:
22132      if (TARGET_AVX512VL)
22133	gen = gen_avx512vl_vpermi2varv2di3;
22134      break;
22135    case V4DImode:
22136      if (TARGET_AVX512VL)
22137	gen = gen_avx512vl_vpermi2varv4di3;
22138      break;
22139    case V8DImode:
22140      if (TARGET_AVX512F)
22141	gen = gen_avx512f_vpermi2varv8di3;
22142      break;
22143    case V2DFmode:
22144      if (TARGET_AVX512VL)
22145	{
22146	  gen = gen_avx512vl_vpermi2varv2df3;
22147	  maskmode = V2DImode;
22148	}
22149      break;
22150    case V4DFmode:
22151      if (TARGET_AVX512VL)
22152	{
22153	  gen = gen_avx512vl_vpermi2varv4df3;
22154	  maskmode = V4DImode;
22155	}
22156      break;
22157    case V8DFmode:
22158      if (TARGET_AVX512F)
22159	{
22160	  gen = gen_avx512f_vpermi2varv8df3;
22161	  maskmode = V8DImode;
22162	}
22163      break;
22164    default:
22165      break;
22166    }
22167
22168  if (gen == NULL)
22169    return false;
22170
22171  /* ix86_expand_vec_perm_vpermi2 is called from both const and non-const
22172     expander, so args are either in d, or in op0, op1 etc.  */
22173  if (d)
22174    {
22175      rtx vec[64];
22176      target = d->target;
22177      op0 = d->op0;
22178      op1 = d->op1;
22179      for (int i = 0; i < d->nelt; ++i)
22180	vec[i] = GEN_INT (d->perm[i]);
22181      mask = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (d->nelt, vec));
22182    }
22183
22184  emit_insn (gen (target, op0, force_reg (maskmode, mask), op1));
22185  return true;
22186}
22187
22188/* Expand a variable vector permutation.  */
22189
22190void
22191ix86_expand_vec_perm (rtx operands[])
22192{
22193  rtx target = operands[0];
22194  rtx op0 = operands[1];
22195  rtx op1 = operands[2];
22196  rtx mask = operands[3];
22197  rtx t1, t2, t3, t4, t5, t6, t7, t8, vt, vt2, vec[32];
22198  machine_mode mode = GET_MODE (op0);
22199  machine_mode maskmode = GET_MODE (mask);
22200  int w, e, i;
22201  bool one_operand_shuffle = rtx_equal_p (op0, op1);
22202
22203  /* Number of elements in the vector.  */
22204  w = GET_MODE_NUNITS (mode);
22205  e = GET_MODE_UNIT_SIZE (mode);
22206  gcc_assert (w <= 64);
22207
22208  if (ix86_expand_vec_perm_vpermi2 (target, op0, mask, op1, NULL))
22209    return;
22210
22211  if (TARGET_AVX2)
22212    {
22213      if (mode == V4DImode || mode == V4DFmode || mode == V16HImode)
22214	{
22215	  /* Unfortunately, the VPERMQ and VPERMPD instructions only support
22216	     an constant shuffle operand.  With a tiny bit of effort we can
22217	     use VPERMD instead.  A re-interpretation stall for V4DFmode is
22218	     unfortunate but there's no avoiding it.
22219	     Similarly for V16HImode we don't have instructions for variable
22220	     shuffling, while for V32QImode we can use after preparing suitable
22221	     masks vpshufb; vpshufb; vpermq; vpor.  */
22222
22223	  if (mode == V16HImode)
22224	    {
22225	      maskmode = mode = V32QImode;
22226	      w = 32;
22227	      e = 1;
22228	    }
22229	  else
22230	    {
22231	      maskmode = mode = V8SImode;
22232	      w = 8;
22233	      e = 4;
22234	    }
22235	  t1 = gen_reg_rtx (maskmode);
22236
22237	  /* Replicate the low bits of the V4DImode mask into V8SImode:
22238	       mask = { A B C D }
22239	       t1 = { A A B B C C D D }.  */
22240	  for (i = 0; i < w / 2; ++i)
22241	    vec[i*2 + 1] = vec[i*2] = GEN_INT (i * 2);
22242	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22243	  vt = force_reg (maskmode, vt);
22244	  mask = gen_lowpart (maskmode, mask);
22245	  if (maskmode == V8SImode)
22246	    emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
22247	  else
22248	    emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
22249
22250	  /* Multiply the shuffle indicies by two.  */
22251	  t1 = expand_simple_binop (maskmode, PLUS, t1, t1, t1, 1,
22252				    OPTAB_DIRECT);
22253
22254	  /* Add one to the odd shuffle indicies:
22255		t1 = { A*2, A*2+1, B*2, B*2+1, ... }.  */
22256	  for (i = 0; i < w / 2; ++i)
22257	    {
22258	      vec[i * 2] = const0_rtx;
22259	      vec[i * 2 + 1] = const1_rtx;
22260	    }
22261	  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22262	  vt = validize_mem (force_const_mem (maskmode, vt));
22263	  t1 = expand_simple_binop (maskmode, PLUS, t1, vt, t1, 1,
22264				    OPTAB_DIRECT);
22265
22266	  /* Continue as if V8SImode (resp. V32QImode) was used initially.  */
22267	  operands[3] = mask = t1;
22268	  target = gen_reg_rtx (mode);
22269	  op0 = gen_lowpart (mode, op0);
22270	  op1 = gen_lowpart (mode, op1);
22271	}
22272
22273      switch (mode)
22274	{
22275	case V8SImode:
22276	  /* The VPERMD and VPERMPS instructions already properly ignore
22277	     the high bits of the shuffle elements.  No need for us to
22278	     perform an AND ourselves.  */
22279	  if (one_operand_shuffle)
22280	    {
22281	      emit_insn (gen_avx2_permvarv8si (target, op0, mask));
22282	      if (target != operands[0])
22283		emit_move_insn (operands[0],
22284				gen_lowpart (GET_MODE (operands[0]), target));
22285	    }
22286	  else
22287	    {
22288	      t1 = gen_reg_rtx (V8SImode);
22289	      t2 = gen_reg_rtx (V8SImode);
22290	      emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
22291	      emit_insn (gen_avx2_permvarv8si (t2, op1, mask));
22292	      goto merge_two;
22293	    }
22294	  return;
22295
22296	case V8SFmode:
22297	  mask = gen_lowpart (V8SImode, mask);
22298	  if (one_operand_shuffle)
22299	    emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
22300	  else
22301	    {
22302	      t1 = gen_reg_rtx (V8SFmode);
22303	      t2 = gen_reg_rtx (V8SFmode);
22304	      emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
22305	      emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
22306	      goto merge_two;
22307	    }
22308	  return;
22309
22310        case V4SImode:
22311	  /* By combining the two 128-bit input vectors into one 256-bit
22312	     input vector, we can use VPERMD and VPERMPS for the full
22313	     two-operand shuffle.  */
22314	  t1 = gen_reg_rtx (V8SImode);
22315	  t2 = gen_reg_rtx (V8SImode);
22316	  emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
22317	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22318	  emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
22319	  emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
22320	  return;
22321
22322        case V4SFmode:
22323	  t1 = gen_reg_rtx (V8SFmode);
22324	  t2 = gen_reg_rtx (V8SImode);
22325	  mask = gen_lowpart (V4SImode, mask);
22326	  emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
22327	  emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
22328	  emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
22329	  emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
22330	  return;
22331
22332	case V32QImode:
22333	  t1 = gen_reg_rtx (V32QImode);
22334	  t2 = gen_reg_rtx (V32QImode);
22335	  t3 = gen_reg_rtx (V32QImode);
22336	  vt2 = GEN_INT (-128);
22337	  for (i = 0; i < 32; i++)
22338	    vec[i] = vt2;
22339	  vt = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22340	  vt = force_reg (V32QImode, vt);
22341	  for (i = 0; i < 32; i++)
22342	    vec[i] = i < 16 ? vt2 : const0_rtx;
22343	  vt2 = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, vec));
22344	  vt2 = force_reg (V32QImode, vt2);
22345	  /* From mask create two adjusted masks, which contain the same
22346	     bits as mask in the low 7 bits of each vector element.
22347	     The first mask will have the most significant bit clear
22348	     if it requests element from the same 128-bit lane
22349	     and MSB set if it requests element from the other 128-bit lane.
22350	     The second mask will have the opposite values of the MSB,
22351	     and additionally will have its 128-bit lanes swapped.
22352	     E.g. { 07 12 1e 09 ... | 17 19 05 1f ... } mask vector will have
22353	     t1   { 07 92 9e 09 ... | 17 19 85 1f ... } and
22354	     t3   { 97 99 05 9f ... | 87 12 1e 89 ... } where each ...
22355	     stands for other 12 bytes.  */
22356	  /* The bit whether element is from the same lane or the other
22357	     lane is bit 4, so shift it up by 3 to the MSB position.  */
22358	  t5 = gen_reg_rtx (V4DImode);
22359	  emit_insn (gen_ashlv4di3 (t5, gen_lowpart (V4DImode, mask),
22360				    GEN_INT (3)));
22361	  /* Clear MSB bits from the mask just in case it had them set.  */
22362	  emit_insn (gen_avx2_andnotv32qi3 (t2, vt, mask));
22363	  /* After this t1 will have MSB set for elements from other lane.  */
22364	  emit_insn (gen_xorv32qi3 (t1, gen_lowpart (V32QImode, t5), vt2));
22365	  /* Clear bits other than MSB.  */
22366	  emit_insn (gen_andv32qi3 (t1, t1, vt));
22367	  /* Or in the lower bits from mask into t3.  */
22368	  emit_insn (gen_iorv32qi3 (t3, t1, t2));
22369	  /* And invert MSB bits in t1, so MSB is set for elements from the same
22370	     lane.  */
22371	  emit_insn (gen_xorv32qi3 (t1, t1, vt));
22372	  /* Swap 128-bit lanes in t3.  */
22373	  t6 = gen_reg_rtx (V4DImode);
22374	  emit_insn (gen_avx2_permv4di_1 (t6, gen_lowpart (V4DImode, t3),
22375					  const2_rtx, GEN_INT (3),
22376					  const0_rtx, const1_rtx));
22377	  /* And or in the lower bits from mask into t1.  */
22378	  emit_insn (gen_iorv32qi3 (t1, t1, t2));
22379	  if (one_operand_shuffle)
22380	    {
22381	      /* Each of these shuffles will put 0s in places where
22382		 element from the other 128-bit lane is needed, otherwise
22383		 will shuffle in the requested value.  */
22384	      emit_insn (gen_avx2_pshufbv32qi3 (t3, op0,
22385						gen_lowpart (V32QImode, t6)));
22386	      emit_insn (gen_avx2_pshufbv32qi3 (t1, op0, t1));
22387	      /* For t3 the 128-bit lanes are swapped again.  */
22388	      t7 = gen_reg_rtx (V4DImode);
22389	      emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t3),
22390					      const2_rtx, GEN_INT (3),
22391					      const0_rtx, const1_rtx));
22392	      /* And oring both together leads to the result.  */
22393	      emit_insn (gen_iorv32qi3 (target, t1,
22394					gen_lowpart (V32QImode, t7)));
22395	      if (target != operands[0])
22396		emit_move_insn (operands[0],
22397				gen_lowpart (GET_MODE (operands[0]), target));
22398	      return;
22399	    }
22400
22401	  t4 = gen_reg_rtx (V32QImode);
22402	  /* Similarly to the above one_operand_shuffle code,
22403	     just for repeated twice for each operand.  merge_two:
22404	     code will merge the two results together.  */
22405	  emit_insn (gen_avx2_pshufbv32qi3 (t4, op0,
22406					    gen_lowpart (V32QImode, t6)));
22407	  emit_insn (gen_avx2_pshufbv32qi3 (t3, op1,
22408					    gen_lowpart (V32QImode, t6)));
22409	  emit_insn (gen_avx2_pshufbv32qi3 (t2, op0, t1));
22410	  emit_insn (gen_avx2_pshufbv32qi3 (t1, op1, t1));
22411	  t7 = gen_reg_rtx (V4DImode);
22412	  emit_insn (gen_avx2_permv4di_1 (t7, gen_lowpart (V4DImode, t4),
22413					  const2_rtx, GEN_INT (3),
22414					  const0_rtx, const1_rtx));
22415	  t8 = gen_reg_rtx (V4DImode);
22416	  emit_insn (gen_avx2_permv4di_1 (t8, gen_lowpart (V4DImode, t3),
22417					  const2_rtx, GEN_INT (3),
22418					  const0_rtx, const1_rtx));
22419	  emit_insn (gen_iorv32qi3 (t4, t2, gen_lowpart (V32QImode, t7)));
22420	  emit_insn (gen_iorv32qi3 (t3, t1, gen_lowpart (V32QImode, t8)));
22421	  t1 = t4;
22422	  t2 = t3;
22423	  goto merge_two;
22424
22425	default:
22426	  gcc_assert (GET_MODE_SIZE (mode) <= 16);
22427	  break;
22428	}
22429    }
22430
22431  if (TARGET_XOP)
22432    {
22433      /* The XOP VPPERM insn supports three inputs.  By ignoring the
22434	 one_operand_shuffle special case, we avoid creating another
22435	 set of constant vectors in memory.  */
22436      one_operand_shuffle = false;
22437
22438      /* mask = mask & {2*w-1, ...} */
22439      vt = GEN_INT (2*w - 1);
22440    }
22441  else
22442    {
22443      /* mask = mask & {w-1, ...} */
22444      vt = GEN_INT (w - 1);
22445    }
22446
22447  for (i = 0; i < w; i++)
22448    vec[i] = vt;
22449  vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22450  mask = expand_simple_binop (maskmode, AND, mask, vt,
22451			      NULL_RTX, 0, OPTAB_DIRECT);
22452
22453  /* For non-QImode operations, convert the word permutation control
22454     into a byte permutation control.  */
22455  if (mode != V16QImode)
22456    {
22457      mask = expand_simple_binop (maskmode, ASHIFT, mask,
22458				  GEN_INT (exact_log2 (e)),
22459				  NULL_RTX, 0, OPTAB_DIRECT);
22460
22461      /* Convert mask to vector of chars.  */
22462      mask = force_reg (V16QImode, gen_lowpart (V16QImode, mask));
22463
22464      /* Replicate each of the input bytes into byte positions:
22465	 (v2di) --> {0,0,0,0,0,0,0,0, 8,8,8,8,8,8,8,8}
22466	 (v4si) --> {0,0,0,0, 4,4,4,4, 8,8,8,8, 12,12,12,12}
22467	 (v8hi) --> {0,0, 2,2, 4,4, 6,6, ...}.  */
22468      for (i = 0; i < 16; ++i)
22469	vec[i] = GEN_INT (i/e * e);
22470      vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22471      vt = validize_mem (force_const_mem (V16QImode, vt));
22472      if (TARGET_XOP)
22473	emit_insn (gen_xop_pperm (mask, mask, mask, vt));
22474      else
22475	emit_insn (gen_ssse3_pshufbv16qi3 (mask, mask, vt));
22476
22477      /* Convert it into the byte positions by doing
22478	 mask = mask + {0,1,..,16/w, 0,1,..,16/w, ...}  */
22479      for (i = 0; i < 16; ++i)
22480	vec[i] = GEN_INT (i % e);
22481      vt = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, vec));
22482      vt = validize_mem (force_const_mem (V16QImode, vt));
22483      emit_insn (gen_addv16qi3 (mask, mask, vt));
22484    }
22485
22486  /* The actual shuffle operations all operate on V16QImode.  */
22487  op0 = gen_lowpart (V16QImode, op0);
22488  op1 = gen_lowpart (V16QImode, op1);
22489
22490  if (TARGET_XOP)
22491    {
22492      if (GET_MODE (target) != V16QImode)
22493	target = gen_reg_rtx (V16QImode);
22494      emit_insn (gen_xop_pperm (target, op0, op1, mask));
22495      if (target != operands[0])
22496	emit_move_insn (operands[0],
22497			gen_lowpart (GET_MODE (operands[0]), target));
22498    }
22499  else if (one_operand_shuffle)
22500    {
22501      if (GET_MODE (target) != V16QImode)
22502	target = gen_reg_rtx (V16QImode);
22503      emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, mask));
22504      if (target != operands[0])
22505	emit_move_insn (operands[0],
22506			gen_lowpart (GET_MODE (operands[0]), target));
22507    }
22508  else
22509    {
22510      rtx xops[6];
22511      bool ok;
22512
22513      /* Shuffle the two input vectors independently.  */
22514      t1 = gen_reg_rtx (V16QImode);
22515      t2 = gen_reg_rtx (V16QImode);
22516      emit_insn (gen_ssse3_pshufbv16qi3 (t1, op0, mask));
22517      emit_insn (gen_ssse3_pshufbv16qi3 (t2, op1, mask));
22518
22519 merge_two:
22520      /* Then merge them together.  The key is whether any given control
22521         element contained a bit set that indicates the second word.  */
22522      mask = operands[3];
22523      vt = GEN_INT (w);
22524      if (maskmode == V2DImode && !TARGET_SSE4_1)
22525	{
22526	  /* Without SSE4.1, we don't have V2DImode EQ.  Perform one
22527	     more shuffle to convert the V2DI input mask into a V4SI
22528	     input mask.  At which point the masking that expand_int_vcond
22529	     will work as desired.  */
22530	  rtx t3 = gen_reg_rtx (V4SImode);
22531	  emit_insn (gen_sse2_pshufd_1 (t3, gen_lowpart (V4SImode, mask),
22532				        const0_rtx, const0_rtx,
22533				        const2_rtx, const2_rtx));
22534	  mask = t3;
22535	  maskmode = V4SImode;
22536	  e = w = 4;
22537	}
22538
22539      for (i = 0; i < w; i++)
22540	vec[i] = vt;
22541      vt = gen_rtx_CONST_VECTOR (maskmode, gen_rtvec_v (w, vec));
22542      vt = force_reg (maskmode, vt);
22543      mask = expand_simple_binop (maskmode, AND, mask, vt,
22544				  NULL_RTX, 0, OPTAB_DIRECT);
22545
22546      if (GET_MODE (target) != mode)
22547	target = gen_reg_rtx (mode);
22548      xops[0] = target;
22549      xops[1] = gen_lowpart (mode, t2);
22550      xops[2] = gen_lowpart (mode, t1);
22551      xops[3] = gen_rtx_EQ (maskmode, mask, vt);
22552      xops[4] = mask;
22553      xops[5] = vt;
22554      ok = ix86_expand_int_vcond (xops);
22555      gcc_assert (ok);
22556      if (target != operands[0])
22557	emit_move_insn (operands[0],
22558			gen_lowpart (GET_MODE (operands[0]), target));
22559    }
22560}
22561
22562/* Unpack OP[1] into the next wider integer vector type.  UNSIGNED_P is
22563   true if we should do zero extension, else sign extension.  HIGH_P is
22564   true if we want the N/2 high elements, else the low elements.  */
22565
22566void
22567ix86_expand_sse_unpack (rtx dest, rtx src, bool unsigned_p, bool high_p)
22568{
22569  machine_mode imode = GET_MODE (src);
22570  rtx tmp;
22571
22572  if (TARGET_SSE4_1)
22573    {
22574      rtx (*unpack)(rtx, rtx);
22575      rtx (*extract)(rtx, rtx) = NULL;
22576      machine_mode halfmode = BLKmode;
22577
22578      switch (imode)
22579	{
22580	case V64QImode:
22581	  if (unsigned_p)
22582	    unpack = gen_avx512bw_zero_extendv32qiv32hi2;
22583	  else
22584	    unpack = gen_avx512bw_sign_extendv32qiv32hi2;
22585	  halfmode = V32QImode;
22586	  extract
22587	    = high_p ? gen_vec_extract_hi_v64qi : gen_vec_extract_lo_v64qi;
22588	  break;
22589	case V32QImode:
22590	  if (unsigned_p)
22591	    unpack = gen_avx2_zero_extendv16qiv16hi2;
22592	  else
22593	    unpack = gen_avx2_sign_extendv16qiv16hi2;
22594	  halfmode = V16QImode;
22595	  extract
22596	    = high_p ? gen_vec_extract_hi_v32qi : gen_vec_extract_lo_v32qi;
22597	  break;
22598	case V32HImode:
22599	  if (unsigned_p)
22600	    unpack = gen_avx512f_zero_extendv16hiv16si2;
22601	  else
22602	    unpack = gen_avx512f_sign_extendv16hiv16si2;
22603	  halfmode = V16HImode;
22604	  extract
22605	    = high_p ? gen_vec_extract_hi_v32hi : gen_vec_extract_lo_v32hi;
22606	  break;
22607	case V16HImode:
22608	  if (unsigned_p)
22609	    unpack = gen_avx2_zero_extendv8hiv8si2;
22610	  else
22611	    unpack = gen_avx2_sign_extendv8hiv8si2;
22612	  halfmode = V8HImode;
22613	  extract
22614	    = high_p ? gen_vec_extract_hi_v16hi : gen_vec_extract_lo_v16hi;
22615	  break;
22616	case V16SImode:
22617	  if (unsigned_p)
22618	    unpack = gen_avx512f_zero_extendv8siv8di2;
22619	  else
22620	    unpack = gen_avx512f_sign_extendv8siv8di2;
22621	  halfmode = V8SImode;
22622	  extract
22623	    = high_p ? gen_vec_extract_hi_v16si : gen_vec_extract_lo_v16si;
22624	  break;
22625	case V8SImode:
22626	  if (unsigned_p)
22627	    unpack = gen_avx2_zero_extendv4siv4di2;
22628	  else
22629	    unpack = gen_avx2_sign_extendv4siv4di2;
22630	  halfmode = V4SImode;
22631	  extract
22632	    = high_p ? gen_vec_extract_hi_v8si : gen_vec_extract_lo_v8si;
22633	  break;
22634	case V16QImode:
22635	  if (unsigned_p)
22636	    unpack = gen_sse4_1_zero_extendv8qiv8hi2;
22637	  else
22638	    unpack = gen_sse4_1_sign_extendv8qiv8hi2;
22639	  break;
22640	case V8HImode:
22641	  if (unsigned_p)
22642	    unpack = gen_sse4_1_zero_extendv4hiv4si2;
22643	  else
22644	    unpack = gen_sse4_1_sign_extendv4hiv4si2;
22645	  break;
22646	case V4SImode:
22647	  if (unsigned_p)
22648	    unpack = gen_sse4_1_zero_extendv2siv2di2;
22649	  else
22650	    unpack = gen_sse4_1_sign_extendv2siv2di2;
22651	  break;
22652	default:
22653	  gcc_unreachable ();
22654	}
22655
22656      if (GET_MODE_SIZE (imode) >= 32)
22657	{
22658	  tmp = gen_reg_rtx (halfmode);
22659	  emit_insn (extract (tmp, src));
22660	}
22661      else if (high_p)
22662	{
22663	  /* Shift higher 8 bytes to lower 8 bytes.  */
22664	  tmp = gen_reg_rtx (V1TImode);
22665	  emit_insn (gen_sse2_lshrv1ti3 (tmp, gen_lowpart (V1TImode, src),
22666					 GEN_INT (64)));
22667	  tmp = gen_lowpart (imode, tmp);
22668	}
22669      else
22670	tmp = src;
22671
22672      emit_insn (unpack (dest, tmp));
22673    }
22674  else
22675    {
22676      rtx (*unpack)(rtx, rtx, rtx);
22677
22678      switch (imode)
22679	{
22680	case V16QImode:
22681	  if (high_p)
22682	    unpack = gen_vec_interleave_highv16qi;
22683	  else
22684	    unpack = gen_vec_interleave_lowv16qi;
22685	  break;
22686	case V8HImode:
22687	  if (high_p)
22688	    unpack = gen_vec_interleave_highv8hi;
22689	  else
22690	    unpack = gen_vec_interleave_lowv8hi;
22691	  break;
22692	case V4SImode:
22693	  if (high_p)
22694	    unpack = gen_vec_interleave_highv4si;
22695	  else
22696	    unpack = gen_vec_interleave_lowv4si;
22697	  break;
22698	default:
22699	  gcc_unreachable ();
22700	}
22701
22702      if (unsigned_p)
22703	tmp = force_reg (imode, CONST0_RTX (imode));
22704      else
22705	tmp = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
22706				   src, pc_rtx, pc_rtx);
22707
22708      rtx tmp2 = gen_reg_rtx (imode);
22709      emit_insn (unpack (tmp2, src, tmp));
22710      emit_move_insn (dest, gen_lowpart (GET_MODE (dest), tmp2));
22711    }
22712}
22713
22714/* Expand conditional increment or decrement using adb/sbb instructions.
22715   The default case using setcc followed by the conditional move can be
22716   done by generic code.  */
22717bool
22718ix86_expand_int_addcc (rtx operands[])
22719{
22720  enum rtx_code code = GET_CODE (operands[1]);
22721  rtx flags;
22722  rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
22723  rtx compare_op;
22724  rtx val = const0_rtx;
22725  bool fpcmp = false;
22726  machine_mode mode;
22727  rtx op0 = XEXP (operands[1], 0);
22728  rtx op1 = XEXP (operands[1], 1);
22729
22730  if (operands[3] != const1_rtx
22731      && operands[3] != constm1_rtx)
22732    return false;
22733  if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
22734     return false;
22735  code = GET_CODE (compare_op);
22736
22737  flags = XEXP (compare_op, 0);
22738
22739  if (GET_MODE (flags) == CCFPmode
22740      || GET_MODE (flags) == CCFPUmode)
22741    {
22742      fpcmp = true;
22743      code = ix86_fp_compare_code_to_integer (code);
22744    }
22745
22746  if (code != LTU)
22747    {
22748      val = constm1_rtx;
22749      if (fpcmp)
22750	PUT_CODE (compare_op,
22751		  reverse_condition_maybe_unordered
22752		    (GET_CODE (compare_op)));
22753      else
22754	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
22755    }
22756
22757  mode = GET_MODE (operands[0]);
22758
22759  /* Construct either adc or sbb insn.  */
22760  if ((code == LTU) == (operands[3] == constm1_rtx))
22761    {
22762      switch (mode)
22763	{
22764	  case QImode:
22765	    insn = gen_subqi3_carry;
22766	    break;
22767	  case HImode:
22768	    insn = gen_subhi3_carry;
22769	    break;
22770	  case SImode:
22771	    insn = gen_subsi3_carry;
22772	    break;
22773	  case DImode:
22774	    insn = gen_subdi3_carry;
22775	    break;
22776	  default:
22777	    gcc_unreachable ();
22778	}
22779    }
22780  else
22781    {
22782      switch (mode)
22783	{
22784	  case QImode:
22785	    insn = gen_addqi3_carry;
22786	    break;
22787	  case HImode:
22788	    insn = gen_addhi3_carry;
22789	    break;
22790	  case SImode:
22791	    insn = gen_addsi3_carry;
22792	    break;
22793	  case DImode:
22794	    insn = gen_adddi3_carry;
22795	    break;
22796	  default:
22797	    gcc_unreachable ();
22798	}
22799    }
22800  emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
22801
22802  return true;
22803}
22804
22805
22806/* Split operands 0 and 1 into half-mode parts.  Similar to split_double_mode,
22807   but works for floating pointer parameters and nonoffsetable memories.
22808   For pushes, it returns just stack offsets; the values will be saved
22809   in the right order.  Maximally three parts are generated.  */
22810
22811static int
22812ix86_split_to_parts (rtx operand, rtx *parts, machine_mode mode)
22813{
22814  int size;
22815
22816  if (!TARGET_64BIT)
22817    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
22818  else
22819    size = (GET_MODE_SIZE (mode) + 4) / 8;
22820
22821  gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
22822  gcc_assert (size >= 2 && size <= 4);
22823
22824  /* Optimize constant pool reference to immediates.  This is used by fp
22825     moves, that force all constants to memory to allow combining.  */
22826  if (MEM_P (operand) && MEM_READONLY_P (operand))
22827    {
22828      rtx tmp = maybe_get_pool_constant (operand);
22829      if (tmp)
22830	operand = tmp;
22831    }
22832
22833  if (MEM_P (operand) && !offsettable_memref_p (operand))
22834    {
22835      /* The only non-offsetable memories we handle are pushes.  */
22836      int ok = push_operand (operand, VOIDmode);
22837
22838      gcc_assert (ok);
22839
22840      operand = copy_rtx (operand);
22841      PUT_MODE (operand, word_mode);
22842      parts[0] = parts[1] = parts[2] = parts[3] = operand;
22843      return size;
22844    }
22845
22846  if (GET_CODE (operand) == CONST_VECTOR)
22847    {
22848      machine_mode imode = int_mode_for_mode (mode);
22849      /* Caution: if we looked through a constant pool memory above,
22850	 the operand may actually have a different mode now.  That's
22851	 ok, since we want to pun this all the way back to an integer.  */
22852      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
22853      gcc_assert (operand != NULL);
22854      mode = imode;
22855    }
22856
22857  if (!TARGET_64BIT)
22858    {
22859      if (mode == DImode)
22860	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22861      else
22862	{
22863	  int i;
22864
22865	  if (REG_P (operand))
22866	    {
22867	      gcc_assert (reload_completed);
22868	      for (i = 0; i < size; i++)
22869		parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
22870	    }
22871	  else if (offsettable_memref_p (operand))
22872	    {
22873	      operand = adjust_address (operand, SImode, 0);
22874	      parts[0] = operand;
22875	      for (i = 1; i < size; i++)
22876		parts[i] = adjust_address (operand, SImode, 4 * i);
22877	    }
22878	  else if (GET_CODE (operand) == CONST_DOUBLE)
22879	    {
22880	      REAL_VALUE_TYPE r;
22881	      long l[4];
22882
22883	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22884	      switch (mode)
22885		{
22886		case TFmode:
22887		  real_to_target (l, &r, mode);
22888		  parts[3] = gen_int_mode (l[3], SImode);
22889		  parts[2] = gen_int_mode (l[2], SImode);
22890		  break;
22891		case XFmode:
22892		  /* We can't use REAL_VALUE_TO_TARGET_LONG_DOUBLE since
22893		     long double may not be 80-bit.  */
22894		  real_to_target (l, &r, mode);
22895		  parts[2] = gen_int_mode (l[2], SImode);
22896		  break;
22897		case DFmode:
22898		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
22899		  break;
22900		default:
22901		  gcc_unreachable ();
22902		}
22903	      parts[1] = gen_int_mode (l[1], SImode);
22904	      parts[0] = gen_int_mode (l[0], SImode);
22905	    }
22906	  else
22907	    gcc_unreachable ();
22908	}
22909    }
22910  else
22911    {
22912      if (mode == TImode)
22913	split_double_mode (mode, &operand, 1, &parts[0], &parts[1]);
22914      if (mode == XFmode || mode == TFmode)
22915	{
22916	  machine_mode upper_mode = mode==XFmode ? SImode : DImode;
22917	  if (REG_P (operand))
22918	    {
22919	      gcc_assert (reload_completed);
22920	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
22921	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
22922	    }
22923	  else if (offsettable_memref_p (operand))
22924	    {
22925	      operand = adjust_address (operand, DImode, 0);
22926	      parts[0] = operand;
22927	      parts[1] = adjust_address (operand, upper_mode, 8);
22928	    }
22929	  else if (GET_CODE (operand) == CONST_DOUBLE)
22930	    {
22931	      REAL_VALUE_TYPE r;
22932	      long l[4];
22933
22934	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
22935	      real_to_target (l, &r, mode);
22936
22937	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
22938	      if (HOST_BITS_PER_WIDE_INT >= 64)
22939	        parts[0]
22940		  = gen_int_mode
22941		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
22942		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
22943		       DImode);
22944	      else
22945	        parts[0] = immed_double_const (l[0], l[1], DImode);
22946
22947	      if (upper_mode == SImode)
22948	        parts[1] = gen_int_mode (l[2], SImode);
22949	      else if (HOST_BITS_PER_WIDE_INT >= 64)
22950	        parts[1]
22951		  = gen_int_mode
22952		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
22953		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
22954		       DImode);
22955	      else
22956	        parts[1] = immed_double_const (l[2], l[3], DImode);
22957	    }
22958	  else
22959	    gcc_unreachable ();
22960	}
22961    }
22962
22963  return size;
22964}
22965
22966/* Emit insns to perform a move or push of DI, DF, XF, and TF values.
22967   Return false when normal moves are needed; true when all required
22968   insns have been emitted.  Operands 2-4 contain the input values
22969   int the correct order; operands 5-7 contain the output values.  */
22970
22971void
22972ix86_split_long_move (rtx operands[])
22973{
22974  rtx part[2][4];
22975  int nparts, i, j;
22976  int push = 0;
22977  int collisions = 0;
22978  machine_mode mode = GET_MODE (operands[0]);
22979  bool collisionparts[4];
22980
22981  /* The DFmode expanders may ask us to move double.
22982     For 64bit target this is single move.  By hiding the fact
22983     here we simplify i386.md splitters.  */
22984  if (TARGET_64BIT && GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
22985    {
22986      /* Optimize constant pool reference to immediates.  This is used by
22987	 fp moves, that force all constants to memory to allow combining.  */
22988
22989      if (MEM_P (operands[1])
22990	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
22991	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
22992	operands[1] = get_pool_constant (XEXP (operands[1], 0));
22993      if (push_operand (operands[0], VOIDmode))
22994	{
22995	  operands[0] = copy_rtx (operands[0]);
22996	  PUT_MODE (operands[0], word_mode);
22997	}
22998      else
22999        operands[0] = gen_lowpart (DImode, operands[0]);
23000      operands[1] = gen_lowpart (DImode, operands[1]);
23001      emit_move_insn (operands[0], operands[1]);
23002      return;
23003    }
23004
23005  /* The only non-offsettable memory we handle is push.  */
23006  if (push_operand (operands[0], VOIDmode))
23007    push = 1;
23008  else
23009    gcc_assert (!MEM_P (operands[0])
23010		|| offsettable_memref_p (operands[0]));
23011
23012  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
23013  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
23014
23015  /* When emitting push, take care for source operands on the stack.  */
23016  if (push && MEM_P (operands[1])
23017      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
23018    {
23019      rtx src_base = XEXP (part[1][nparts - 1], 0);
23020
23021      /* Compensate for the stack decrement by 4.  */
23022      if (!TARGET_64BIT && nparts == 3
23023	  && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
23024	src_base = plus_constant (Pmode, src_base, 4);
23025
23026      /* src_base refers to the stack pointer and is
23027	 automatically decreased by emitted push.  */
23028      for (i = 0; i < nparts; i++)
23029	part[1][i] = change_address (part[1][i],
23030				     GET_MODE (part[1][i]), src_base);
23031    }
23032
23033  /* We need to do copy in the right order in case an address register
23034     of the source overlaps the destination.  */
23035  if (REG_P (part[0][0]) && MEM_P (part[1][0]))
23036    {
23037      rtx tmp;
23038
23039      for (i = 0; i < nparts; i++)
23040	{
23041	  collisionparts[i]
23042	    = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
23043	  if (collisionparts[i])
23044	    collisions++;
23045	}
23046
23047      /* Collision in the middle part can be handled by reordering.  */
23048      if (collisions == 1 && nparts == 3 && collisionparts [1])
23049	{
23050	  std::swap (part[0][1], part[0][2]);
23051	  std::swap (part[1][1], part[1][2]);
23052	}
23053      else if (collisions == 1
23054	       && nparts == 4
23055	       && (collisionparts [1] || collisionparts [2]))
23056	{
23057	  if (collisionparts [1])
23058	    {
23059	      std::swap (part[0][1], part[0][2]);
23060	      std::swap (part[1][1], part[1][2]);
23061	    }
23062	  else
23063	    {
23064	      std::swap (part[0][2], part[0][3]);
23065	      std::swap (part[1][2], part[1][3]);
23066	    }
23067	}
23068
23069      /* If there are more collisions, we can't handle it by reordering.
23070	 Do an lea to the last part and use only one colliding move.  */
23071      else if (collisions > 1)
23072	{
23073	  rtx base, addr, tls_base = NULL_RTX;
23074
23075	  collisions = 1;
23076
23077	  base = part[0][nparts - 1];
23078
23079	  /* Handle the case when the last part isn't valid for lea.
23080	     Happens in 64-bit mode storing the 12-byte XFmode.  */
23081	  if (GET_MODE (base) != Pmode)
23082	    base = gen_rtx_REG (Pmode, REGNO (base));
23083
23084	  addr = XEXP (part[1][0], 0);
23085	  if (TARGET_TLS_DIRECT_SEG_REFS)
23086	    {
23087	      struct ix86_address parts;
23088	      int ok = ix86_decompose_address (addr, &parts);
23089	      gcc_assert (ok);
23090	      if (parts.seg == DEFAULT_TLS_SEG_REG)
23091		{
23092		  /* It is not valid to use %gs: or %fs: in
23093		     lea though, so we need to remove it from the
23094		     address used for lea and add it to each individual
23095		     memory loads instead.  */
23096		  addr = copy_rtx (addr);
23097		  rtx *x = &addr;
23098		  while (GET_CODE (*x) == PLUS)
23099		    {
23100		      for (i = 0; i < 2; i++)
23101			{
23102			  rtx u = XEXP (*x, i);
23103			  if (GET_CODE (u) == ZERO_EXTEND)
23104			    u = XEXP (u, 0);
23105			  if (GET_CODE (u) == UNSPEC
23106			      && XINT (u, 1) == UNSPEC_TP)
23107			    {
23108			      tls_base = XEXP (*x, i);
23109			      *x = XEXP (*x, 1 - i);
23110			      break;
23111			    }
23112			}
23113		      if (tls_base)
23114			break;
23115		      x = &XEXP (*x, 0);
23116		    }
23117		  gcc_assert (tls_base);
23118		}
23119	    }
23120	  emit_insn (gen_rtx_SET (VOIDmode, base, addr));
23121	  if (tls_base)
23122	    base = gen_rtx_PLUS (GET_MODE (base), base, tls_base);
23123	  part[1][0] = replace_equiv_address (part[1][0], base);
23124	  for (i = 1; i < nparts; i++)
23125	    {
23126	      if (tls_base)
23127		base = copy_rtx (base);
23128	      tmp = plus_constant (Pmode, base, UNITS_PER_WORD * i);
23129	      part[1][i] = replace_equiv_address (part[1][i], tmp);
23130	    }
23131	}
23132    }
23133
23134  if (push)
23135    {
23136      if (!TARGET_64BIT)
23137	{
23138	  if (nparts == 3)
23139	    {
23140	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
23141                emit_insn (ix86_gen_add3 (stack_pointer_rtx,
23142					  stack_pointer_rtx, GEN_INT (-4)));
23143	      emit_move_insn (part[0][2], part[1][2]);
23144	    }
23145	  else if (nparts == 4)
23146	    {
23147	      emit_move_insn (part[0][3], part[1][3]);
23148	      emit_move_insn (part[0][2], part[1][2]);
23149	    }
23150	}
23151      else
23152	{
23153	  /* In 64bit mode we don't have 32bit push available.  In case this is
23154	     register, it is OK - we will just use larger counterpart.  We also
23155	     retype memory - these comes from attempt to avoid REX prefix on
23156	     moving of second half of TFmode value.  */
23157	  if (GET_MODE (part[1][1]) == SImode)
23158	    {
23159	      switch (GET_CODE (part[1][1]))
23160		{
23161		case MEM:
23162		  part[1][1] = adjust_address (part[1][1], DImode, 0);
23163		  break;
23164
23165		case REG:
23166		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
23167		  break;
23168
23169		default:
23170		  gcc_unreachable ();
23171		}
23172
23173	      if (GET_MODE (part[1][0]) == SImode)
23174		part[1][0] = part[1][1];
23175	    }
23176	}
23177      emit_move_insn (part[0][1], part[1][1]);
23178      emit_move_insn (part[0][0], part[1][0]);
23179      return;
23180    }
23181
23182  /* Choose correct order to not overwrite the source before it is copied.  */
23183  if ((REG_P (part[0][0])
23184       && REG_P (part[1][1])
23185       && (REGNO (part[0][0]) == REGNO (part[1][1])
23186	   || (nparts == 3
23187	       && REGNO (part[0][0]) == REGNO (part[1][2]))
23188	   || (nparts == 4
23189	       && REGNO (part[0][0]) == REGNO (part[1][3]))))
23190      || (collisions > 0
23191	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
23192    {
23193      for (i = 0, j = nparts - 1; i < nparts; i++, j--)
23194	{
23195	  operands[2 + i] = part[0][j];
23196	  operands[6 + i] = part[1][j];
23197	}
23198    }
23199  else
23200    {
23201      for (i = 0; i < nparts; i++)
23202	{
23203	  operands[2 + i] = part[0][i];
23204	  operands[6 + i] = part[1][i];
23205	}
23206    }
23207
23208  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
23209  if (optimize_insn_for_size_p ())
23210    {
23211      for (j = 0; j < nparts - 1; j++)
23212	if (CONST_INT_P (operands[6 + j])
23213	    && operands[6 + j] != const0_rtx
23214	    && REG_P (operands[2 + j]))
23215	  for (i = j; i < nparts - 1; i++)
23216	    if (CONST_INT_P (operands[7 + i])
23217		&& INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
23218	      operands[7 + i] = operands[2 + j];
23219    }
23220
23221  for (i = 0; i < nparts; i++)
23222    emit_move_insn (operands[2 + i], operands[6 + i]);
23223
23224  return;
23225}
23226
23227/* Helper function of ix86_split_ashl used to generate an SImode/DImode
23228   left shift by a constant, either using a single shift or
23229   a sequence of add instructions.  */
23230
23231static void
23232ix86_expand_ashl_const (rtx operand, int count, machine_mode mode)
23233{
23234  rtx (*insn)(rtx, rtx, rtx);
23235
23236  if (count == 1
23237      || (count * ix86_cost->add <= ix86_cost->shift_const
23238	  && !optimize_insn_for_size_p ()))
23239    {
23240      insn = mode == DImode ? gen_addsi3 : gen_adddi3;
23241      while (count-- > 0)
23242	emit_insn (insn (operand, operand, operand));
23243    }
23244  else
23245    {
23246      insn = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23247      emit_insn (insn (operand, operand, GEN_INT (count)));
23248    }
23249}
23250
23251void
23252ix86_split_ashl (rtx *operands, rtx scratch, machine_mode mode)
23253{
23254  rtx (*gen_ashl3)(rtx, rtx, rtx);
23255  rtx (*gen_shld)(rtx, rtx, rtx);
23256  int half_width = GET_MODE_BITSIZE (mode) >> 1;
23257
23258  rtx low[2], high[2];
23259  int count;
23260
23261  if (CONST_INT_P (operands[2]))
23262    {
23263      split_double_mode (mode, operands, 2, low, high);
23264      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23265
23266      if (count >= half_width)
23267	{
23268	  emit_move_insn (high[0], low[1]);
23269	  emit_move_insn (low[0], const0_rtx);
23270
23271	  if (count > half_width)
23272	    ix86_expand_ashl_const (high[0], count - half_width, mode);
23273	}
23274      else
23275	{
23276	  gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23277
23278	  if (!rtx_equal_p (operands[0], operands[1]))
23279	    emit_move_insn (operands[0], operands[1]);
23280
23281	  emit_insn (gen_shld (high[0], low[0], GEN_INT (count)));
23282	  ix86_expand_ashl_const (low[0], count, mode);
23283	}
23284      return;
23285    }
23286
23287  split_double_mode (mode, operands, 1, low, high);
23288
23289  gen_ashl3 = mode == DImode ? gen_ashlsi3 : gen_ashldi3;
23290
23291  if (operands[1] == const1_rtx)
23292    {
23293      /* Assuming we've chosen a QImode capable registers, then 1 << N
23294	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
23295      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
23296	{
23297	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
23298
23299	  ix86_expand_clear (low[0]);
23300	  ix86_expand_clear (high[0]);
23301	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (half_width)));
23302
23303	  d = gen_lowpart (QImode, low[0]);
23304	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23305	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
23306	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
23307
23308	  d = gen_lowpart (QImode, high[0]);
23309	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
23310	  s = gen_rtx_NE (QImode, flags, const0_rtx);
23311	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
23312	}
23313
23314      /* Otherwise, we can get the same results by manually performing
23315	 a bit extract operation on bit 5/6, and then performing the two
23316	 shifts.  The two methods of getting 0/1 into low/high are exactly
23317	 the same size.  Avoiding the shift in the bit extract case helps
23318	 pentium4 a bit; no one else seems to care much either way.  */
23319      else
23320	{
23321	  machine_mode half_mode;
23322	  rtx (*gen_lshr3)(rtx, rtx, rtx);
23323	  rtx (*gen_and3)(rtx, rtx, rtx);
23324	  rtx (*gen_xor3)(rtx, rtx, rtx);
23325	  HOST_WIDE_INT bits;
23326	  rtx x;
23327
23328	  if (mode == DImode)
23329	    {
23330	      half_mode = SImode;
23331	      gen_lshr3 = gen_lshrsi3;
23332	      gen_and3 = gen_andsi3;
23333	      gen_xor3 = gen_xorsi3;
23334	      bits = 5;
23335	    }
23336	  else
23337	    {
23338	      half_mode = DImode;
23339	      gen_lshr3 = gen_lshrdi3;
23340	      gen_and3 = gen_anddi3;
23341	      gen_xor3 = gen_xordi3;
23342	      bits = 6;
23343	    }
23344
23345	  if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
23346	    x = gen_rtx_ZERO_EXTEND (half_mode, operands[2]);
23347	  else
23348	    x = gen_lowpart (half_mode, operands[2]);
23349	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
23350
23351	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (bits)));
23352	  emit_insn (gen_and3 (high[0], high[0], const1_rtx));
23353	  emit_move_insn (low[0], high[0]);
23354	  emit_insn (gen_xor3 (low[0], low[0], const1_rtx));
23355	}
23356
23357      emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23358      emit_insn (gen_ashl3 (high[0], high[0], operands[2]));
23359      return;
23360    }
23361
23362  if (operands[1] == constm1_rtx)
23363    {
23364      /* For -1 << N, we can avoid the shld instruction, because we
23365	 know that we're shifting 0...31/63 ones into a -1.  */
23366      emit_move_insn (low[0], constm1_rtx);
23367      if (optimize_insn_for_size_p ())
23368	emit_move_insn (high[0], low[0]);
23369      else
23370	emit_move_insn (high[0], constm1_rtx);
23371    }
23372  else
23373    {
23374      gen_shld = mode == DImode ? gen_x86_shld : gen_x86_64_shld;
23375
23376      if (!rtx_equal_p (operands[0], operands[1]))
23377	emit_move_insn (operands[0], operands[1]);
23378
23379      split_double_mode (mode, operands, 1, low, high);
23380      emit_insn (gen_shld (high[0], low[0], operands[2]));
23381    }
23382
23383  emit_insn (gen_ashl3 (low[0], low[0], operands[2]));
23384
23385  if (TARGET_CMOVE && scratch)
23386    {
23387      rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23388	= mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23389
23390      ix86_expand_clear (scratch);
23391      emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2], scratch));
23392    }
23393  else
23394    {
23395      rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23396	= mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23397
23398      emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
23399    }
23400}
23401
23402void
23403ix86_split_ashr (rtx *operands, rtx scratch, machine_mode mode)
23404{
23405  rtx (*gen_ashr3)(rtx, rtx, rtx)
23406    = mode == DImode ? gen_ashrsi3 : gen_ashrdi3;
23407  rtx (*gen_shrd)(rtx, rtx, rtx);
23408  int half_width = GET_MODE_BITSIZE (mode) >> 1;
23409
23410  rtx low[2], high[2];
23411  int count;
23412
23413  if (CONST_INT_P (operands[2]))
23414    {
23415      split_double_mode (mode, operands, 2, low, high);
23416      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23417
23418      if (count == GET_MODE_BITSIZE (mode) - 1)
23419	{
23420	  emit_move_insn (high[0], high[1]);
23421	  emit_insn (gen_ashr3 (high[0], high[0],
23422				GEN_INT (half_width - 1)));
23423	  emit_move_insn (low[0], high[0]);
23424
23425	}
23426      else if (count >= half_width)
23427	{
23428	  emit_move_insn (low[0], high[1]);
23429	  emit_move_insn (high[0], low[0]);
23430	  emit_insn (gen_ashr3 (high[0], high[0],
23431				GEN_INT (half_width - 1)));
23432
23433	  if (count > half_width)
23434	    emit_insn (gen_ashr3 (low[0], low[0],
23435				  GEN_INT (count - half_width)));
23436	}
23437      else
23438	{
23439	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23440
23441	  if (!rtx_equal_p (operands[0], operands[1]))
23442	    emit_move_insn (operands[0], operands[1]);
23443
23444	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23445	  emit_insn (gen_ashr3 (high[0], high[0], GEN_INT (count)));
23446	}
23447    }
23448  else
23449    {
23450      gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23451
23452     if (!rtx_equal_p (operands[0], operands[1]))
23453	emit_move_insn (operands[0], operands[1]);
23454
23455      split_double_mode (mode, operands, 1, low, high);
23456
23457      emit_insn (gen_shrd (low[0], high[0], operands[2]));
23458      emit_insn (gen_ashr3 (high[0], high[0], operands[2]));
23459
23460      if (TARGET_CMOVE && scratch)
23461	{
23462	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23463	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23464
23465	  emit_move_insn (scratch, high[0]);
23466	  emit_insn (gen_ashr3 (scratch, scratch,
23467				GEN_INT (half_width - 1)));
23468	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23469					  scratch));
23470	}
23471      else
23472	{
23473	  rtx (*gen_x86_shift_adj_3)(rtx, rtx, rtx)
23474	    = mode == DImode ? gen_x86_shiftsi_adj_3 : gen_x86_shiftdi_adj_3;
23475
23476	  emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
23477	}
23478    }
23479}
23480
23481void
23482ix86_split_lshr (rtx *operands, rtx scratch, machine_mode mode)
23483{
23484  rtx (*gen_lshr3)(rtx, rtx, rtx)
23485    = mode == DImode ? gen_lshrsi3 : gen_lshrdi3;
23486  rtx (*gen_shrd)(rtx, rtx, rtx);
23487  int half_width = GET_MODE_BITSIZE (mode) >> 1;
23488
23489  rtx low[2], high[2];
23490  int count;
23491
23492  if (CONST_INT_P (operands[2]))
23493    {
23494      split_double_mode (mode, operands, 2, low, high);
23495      count = INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode) - 1);
23496
23497      if (count >= half_width)
23498	{
23499	  emit_move_insn (low[0], high[1]);
23500	  ix86_expand_clear (high[0]);
23501
23502	  if (count > half_width)
23503	    emit_insn (gen_lshr3 (low[0], low[0],
23504				  GEN_INT (count - half_width)));
23505	}
23506      else
23507	{
23508	  gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23509
23510	  if (!rtx_equal_p (operands[0], operands[1]))
23511	    emit_move_insn (operands[0], operands[1]);
23512
23513	  emit_insn (gen_shrd (low[0], high[0], GEN_INT (count)));
23514	  emit_insn (gen_lshr3 (high[0], high[0], GEN_INT (count)));
23515	}
23516    }
23517  else
23518    {
23519      gen_shrd = mode == DImode ? gen_x86_shrd : gen_x86_64_shrd;
23520
23521      if (!rtx_equal_p (operands[0], operands[1]))
23522	emit_move_insn (operands[0], operands[1]);
23523
23524      split_double_mode (mode, operands, 1, low, high);
23525
23526      emit_insn (gen_shrd (low[0], high[0], operands[2]));
23527      emit_insn (gen_lshr3 (high[0], high[0], operands[2]));
23528
23529      if (TARGET_CMOVE && scratch)
23530	{
23531	  rtx (*gen_x86_shift_adj_1)(rtx, rtx, rtx, rtx)
23532	    = mode == DImode ? gen_x86_shiftsi_adj_1 : gen_x86_shiftdi_adj_1;
23533
23534	  ix86_expand_clear (scratch);
23535	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
23536					  scratch));
23537	}
23538      else
23539	{
23540	  rtx (*gen_x86_shift_adj_2)(rtx, rtx, rtx)
23541	    = mode == DImode ? gen_x86_shiftsi_adj_2 : gen_x86_shiftdi_adj_2;
23542
23543	  emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
23544	}
23545    }
23546}
23547
23548/* Predict just emitted jump instruction to be taken with probability PROB.  */
23549static void
23550predict_jump (int prob)
23551{
23552  rtx insn = get_last_insn ();
23553  gcc_assert (JUMP_P (insn));
23554  add_int_reg_note (insn, REG_BR_PROB, prob);
23555}
23556
23557/* Helper function for the string operations below.  Dest VARIABLE whether
23558   it is aligned to VALUE bytes.  If true, jump to the label.  */
23559static rtx_code_label *
23560ix86_expand_aligntest (rtx variable, int value, bool epilogue)
23561{
23562  rtx_code_label *label = gen_label_rtx ();
23563  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
23564  if (GET_MODE (variable) == DImode)
23565    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
23566  else
23567    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
23568  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
23569			   1, label);
23570  if (epilogue)
23571    predict_jump (REG_BR_PROB_BASE * 50 / 100);
23572  else
23573    predict_jump (REG_BR_PROB_BASE * 90 / 100);
23574  return label;
23575}
23576
23577/* Adjust COUNTER by the VALUE.  */
23578static void
23579ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
23580{
23581  rtx (*gen_add)(rtx, rtx, rtx)
23582    = GET_MODE (countreg) == DImode ? gen_adddi3 : gen_addsi3;
23583
23584  emit_insn (gen_add (countreg, countreg, GEN_INT (-value)));
23585}
23586
23587/* Zero extend possibly SImode EXP to Pmode register.  */
23588rtx
23589ix86_zero_extend_to_Pmode (rtx exp)
23590{
23591  return force_reg (Pmode, convert_to_mode (Pmode, exp, 1));
23592}
23593
23594/* Divide COUNTREG by SCALE.  */
23595static rtx
23596scale_counter (rtx countreg, int scale)
23597{
23598  rtx sc;
23599
23600  if (scale == 1)
23601    return countreg;
23602  if (CONST_INT_P (countreg))
23603    return GEN_INT (INTVAL (countreg) / scale);
23604  gcc_assert (REG_P (countreg));
23605
23606  sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
23607			    GEN_INT (exact_log2 (scale)),
23608			    NULL, 1, OPTAB_DIRECT);
23609  return sc;
23610}
23611
23612/* Return mode for the memcpy/memset loop counter.  Prefer SImode over
23613   DImode for constant loop counts.  */
23614
23615static machine_mode
23616counter_mode (rtx count_exp)
23617{
23618  if (GET_MODE (count_exp) != VOIDmode)
23619    return GET_MODE (count_exp);
23620  if (!CONST_INT_P (count_exp))
23621    return Pmode;
23622  if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
23623    return DImode;
23624  return SImode;
23625}
23626
23627/* Copy the address to a Pmode register.  This is used for x32 to
23628   truncate DImode TLS address to a SImode register. */
23629
23630static rtx
23631ix86_copy_addr_to_reg (rtx addr)
23632{
23633  rtx reg;
23634  if (GET_MODE (addr) == Pmode || GET_MODE (addr) == VOIDmode)
23635    {
23636      reg = copy_addr_to_reg (addr);
23637      REG_POINTER (reg) = 1;
23638      return reg;
23639    }
23640  else
23641    {
23642      gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
23643      reg = copy_to_mode_reg (DImode, addr);
23644      REG_POINTER (reg) = 1;
23645      return gen_rtx_SUBREG (SImode, reg, 0);
23646    }
23647}
23648
23649/* When ISSETMEM is FALSE, output simple loop to move memory pointer to SRCPTR
23650   to DESTPTR via chunks of MODE unrolled UNROLL times, overall size is COUNT
23651   specified in bytes.  When ISSETMEM is TRUE, output the equivalent loop to set
23652   memory by VALUE (supposed to be in MODE).
23653
23654   The size is rounded down to whole number of chunk size moved at once.
23655   SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info.  */
23656
23657
23658static void
23659expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
23660			       rtx destptr, rtx srcptr, rtx value,
23661			       rtx count, machine_mode mode, int unroll,
23662			       int expected_size, bool issetmem)
23663{
23664  rtx_code_label *out_label, *top_label;
23665  rtx iter, tmp;
23666  machine_mode iter_mode = counter_mode (count);
23667  int piece_size_n = GET_MODE_SIZE (mode) * unroll;
23668  rtx piece_size = GEN_INT (piece_size_n);
23669  rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
23670  rtx size;
23671  int i;
23672
23673  top_label = gen_label_rtx ();
23674  out_label = gen_label_rtx ();
23675  iter = gen_reg_rtx (iter_mode);
23676
23677  size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
23678			      NULL, 1, OPTAB_DIRECT);
23679  /* Those two should combine.  */
23680  if (piece_size == const1_rtx)
23681    {
23682      emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
23683			       true, out_label);
23684      predict_jump (REG_BR_PROB_BASE * 10 / 100);
23685    }
23686  emit_move_insn (iter, const0_rtx);
23687
23688  emit_label (top_label);
23689
23690  tmp = convert_modes (Pmode, iter_mode, iter, true);
23691
23692  /* This assert could be relaxed - in this case we'll need to compute
23693     smallest power of two, containing in PIECE_SIZE_N and pass it to
23694     offset_address.  */
23695  gcc_assert ((piece_size_n & (piece_size_n - 1)) == 0);
23696  destmem = offset_address (destmem, tmp, piece_size_n);
23697  destmem = adjust_address (destmem, mode, 0);
23698
23699  if (!issetmem)
23700    {
23701      srcmem = offset_address (srcmem, copy_rtx (tmp), piece_size_n);
23702      srcmem = adjust_address (srcmem, mode, 0);
23703
23704      /* When unrolling for chips that reorder memory reads and writes,
23705	 we can save registers by using single temporary.
23706	 Also using 4 temporaries is overkill in 32bit mode.  */
23707      if (!TARGET_64BIT && 0)
23708	{
23709	  for (i = 0; i < unroll; i++)
23710	    {
23711	      if (i)
23712		{
23713		  destmem =
23714		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23715		  srcmem =
23716		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23717		}
23718	      emit_move_insn (destmem, srcmem);
23719	    }
23720	}
23721      else
23722	{
23723	  rtx tmpreg[4];
23724	  gcc_assert (unroll <= 4);
23725	  for (i = 0; i < unroll; i++)
23726	    {
23727	      tmpreg[i] = gen_reg_rtx (mode);
23728	      if (i)
23729		{
23730		  srcmem =
23731		    adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
23732		}
23733	      emit_move_insn (tmpreg[i], srcmem);
23734	    }
23735	  for (i = 0; i < unroll; i++)
23736	    {
23737	      if (i)
23738		{
23739		  destmem =
23740		    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23741		}
23742	      emit_move_insn (destmem, tmpreg[i]);
23743	    }
23744	}
23745    }
23746  else
23747    for (i = 0; i < unroll; i++)
23748      {
23749	if (i)
23750	  destmem =
23751	    adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
23752	emit_move_insn (destmem, value);
23753      }
23754
23755  tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
23756			     true, OPTAB_LIB_WIDEN);
23757  if (tmp != iter)
23758    emit_move_insn (iter, tmp);
23759
23760  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
23761			   true, top_label);
23762  if (expected_size != -1)
23763    {
23764      expected_size /= GET_MODE_SIZE (mode) * unroll;
23765      if (expected_size == 0)
23766	predict_jump (0);
23767      else if (expected_size > REG_BR_PROB_BASE)
23768	predict_jump (REG_BR_PROB_BASE - 1);
23769      else
23770        predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
23771    }
23772  else
23773    predict_jump (REG_BR_PROB_BASE * 80 / 100);
23774  iter = ix86_zero_extend_to_Pmode (iter);
23775  tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
23776			     true, OPTAB_LIB_WIDEN);
23777  if (tmp != destptr)
23778    emit_move_insn (destptr, tmp);
23779  if (!issetmem)
23780    {
23781      tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
23782				 true, OPTAB_LIB_WIDEN);
23783      if (tmp != srcptr)
23784	emit_move_insn (srcptr, tmp);
23785    }
23786  emit_label (out_label);
23787}
23788
23789/* Output "rep; mov" or "rep; stos" instruction depending on ISSETMEM argument.
23790   When ISSETMEM is true, arguments SRCMEM and SRCPTR are ignored.
23791   When ISSETMEM is false, arguments VALUE and ORIG_VALUE are ignored.
23792   For setmem case, VALUE is a promoted to a wider size ORIG_VALUE.
23793   ORIG_VALUE is the original value passed to memset to fill the memory with.
23794   Other arguments have same meaning as for previous function.  */
23795
23796static void
23797expand_set_or_movmem_via_rep (rtx destmem, rtx srcmem,
23798			   rtx destptr, rtx srcptr, rtx value, rtx orig_value,
23799			   rtx count,
23800			   machine_mode mode, bool issetmem)
23801{
23802  rtx destexp;
23803  rtx srcexp;
23804  rtx countreg;
23805  HOST_WIDE_INT rounded_count;
23806
23807  /* If possible, it is shorter to use rep movs.
23808     TODO: Maybe it is better to move this logic to decide_alg.  */
23809  if (mode == QImode && CONST_INT_P (count) && !(INTVAL (count) & 3)
23810      && (!issetmem || orig_value == const0_rtx))
23811    mode = SImode;
23812
23813  if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
23814    destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
23815
23816  countreg = ix86_zero_extend_to_Pmode (scale_counter (count,
23817						       GET_MODE_SIZE (mode)));
23818  if (mode != QImode)
23819    {
23820      destexp = gen_rtx_ASHIFT (Pmode, countreg,
23821				GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23822      destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
23823    }
23824  else
23825    destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
23826  if ((!issetmem || orig_value == const0_rtx) && CONST_INT_P (count))
23827    {
23828      rounded_count = (INTVAL (count)
23829		       & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23830      destmem = shallow_copy_rtx (destmem);
23831      set_mem_size (destmem, rounded_count);
23832    }
23833  else if (MEM_SIZE_KNOWN_P (destmem))
23834    clear_mem_size (destmem);
23835
23836  if (issetmem)
23837    {
23838      value = force_reg (mode, gen_lowpart (mode, value));
23839      emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
23840    }
23841  else
23842    {
23843      if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
23844	srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
23845      if (mode != QImode)
23846	{
23847	  srcexp = gen_rtx_ASHIFT (Pmode, countreg,
23848				   GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
23849	  srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
23850	}
23851      else
23852	srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
23853      if (CONST_INT_P (count))
23854	{
23855	  rounded_count = (INTVAL (count)
23856			   & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
23857	  srcmem = shallow_copy_rtx (srcmem);
23858	  set_mem_size (srcmem, rounded_count);
23859	}
23860      else
23861	{
23862	  if (MEM_SIZE_KNOWN_P (srcmem))
23863	    clear_mem_size (srcmem);
23864	}
23865      emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
23866			      destexp, srcexp));
23867    }
23868}
23869
23870/* This function emits moves to copy SIZE_TO_MOVE bytes from SRCMEM to
23871   DESTMEM.
23872   SRC is passed by pointer to be updated on return.
23873   Return value is updated DST.  */
23874static rtx
23875emit_memmov (rtx destmem, rtx *srcmem, rtx destptr, rtx srcptr,
23876	     HOST_WIDE_INT size_to_move)
23877{
23878  rtx dst = destmem, src = *srcmem, adjust, tempreg;
23879  enum insn_code code;
23880  machine_mode move_mode;
23881  int piece_size, i;
23882
23883  /* Find the widest mode in which we could perform moves.
23884     Start with the biggest power of 2 less than SIZE_TO_MOVE and half
23885     it until move of such size is supported.  */
23886  piece_size = 1 << floor_log2 (size_to_move);
23887  move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23888  code = optab_handler (mov_optab, move_mode);
23889  while (code == CODE_FOR_nothing && piece_size > 1)
23890    {
23891      piece_size >>= 1;
23892      move_mode = mode_for_size (piece_size * BITS_PER_UNIT, MODE_INT, 0);
23893      code = optab_handler (mov_optab, move_mode);
23894    }
23895
23896  /* Find the corresponding vector mode with the same size as MOVE_MODE.
23897     MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
23898  if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
23899    {
23900      int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
23901      move_mode = mode_for_vector (word_mode, nunits);
23902      code = optab_handler (mov_optab, move_mode);
23903      if (code == CODE_FOR_nothing)
23904	{
23905	  move_mode = word_mode;
23906	  piece_size = GET_MODE_SIZE (move_mode);
23907	  code = optab_handler (mov_optab, move_mode);
23908	}
23909    }
23910  gcc_assert (code != CODE_FOR_nothing);
23911
23912  dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
23913  src = adjust_automodify_address_nv (src, move_mode, srcptr, 0);
23914
23915  /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
23916  gcc_assert (size_to_move % piece_size == 0);
23917  adjust = GEN_INT (piece_size);
23918  for (i = 0; i < size_to_move; i += piece_size)
23919    {
23920      /* We move from memory to memory, so we'll need to do it via
23921	 a temporary register.  */
23922      tempreg = gen_reg_rtx (move_mode);
23923      emit_insn (GEN_FCN (code) (tempreg, src));
23924      emit_insn (GEN_FCN (code) (dst, tempreg));
23925
23926      emit_move_insn (destptr,
23927		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
23928      emit_move_insn (srcptr,
23929		      gen_rtx_PLUS (Pmode, copy_rtx (srcptr), adjust));
23930
23931      dst = adjust_automodify_address_nv (dst, move_mode, destptr,
23932					  piece_size);
23933      src = adjust_automodify_address_nv (src, move_mode, srcptr,
23934					  piece_size);
23935    }
23936
23937  /* Update DST and SRC rtx.  */
23938  *srcmem = src;
23939  return dst;
23940}
23941
23942/* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST.  */
23943static void
23944expand_movmem_epilogue (rtx destmem, rtx srcmem,
23945			rtx destptr, rtx srcptr, rtx count, int max_size)
23946{
23947  rtx src, dest;
23948  if (CONST_INT_P (count))
23949    {
23950      HOST_WIDE_INT countval = INTVAL (count);
23951      HOST_WIDE_INT epilogue_size = countval % max_size;
23952      int i;
23953
23954      /* For now MAX_SIZE should be a power of 2.  This assert could be
23955	 relaxed, but it'll require a bit more complicated epilogue
23956	 expanding.  */
23957      gcc_assert ((max_size & (max_size - 1)) == 0);
23958      for (i = max_size; i >= 1; i >>= 1)
23959	{
23960	  if (epilogue_size & i)
23961	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
23962	}
23963      return;
23964    }
23965  if (max_size > 8)
23966    {
23967      count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
23968				    count, 1, OPTAB_DIRECT);
23969      expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
23970				     count, QImode, 1, 4, false);
23971      return;
23972    }
23973
23974  /* When there are stringops, we can cheaply increase dest and src pointers.
23975     Otherwise we save code size by maintaining offset (zero is readily
23976     available from preceding rep operation) and using x86 addressing modes.
23977   */
23978  if (TARGET_SINGLE_STRINGOP)
23979    {
23980      if (max_size > 4)
23981	{
23982	  rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
23983	  src = change_address (srcmem, SImode, srcptr);
23984	  dest = change_address (destmem, SImode, destptr);
23985	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
23986	  emit_label (label);
23987	  LABEL_NUSES (label) = 1;
23988	}
23989      if (max_size > 2)
23990	{
23991	  rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
23992	  src = change_address (srcmem, HImode, srcptr);
23993	  dest = change_address (destmem, HImode, destptr);
23994	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
23995	  emit_label (label);
23996	  LABEL_NUSES (label) = 1;
23997	}
23998      if (max_size > 1)
23999	{
24000	  rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24001	  src = change_address (srcmem, QImode, srcptr);
24002	  dest = change_address (destmem, QImode, destptr);
24003	  emit_insn (gen_strmov (destptr, dest, srcptr, src));
24004	  emit_label (label);
24005	  LABEL_NUSES (label) = 1;
24006	}
24007    }
24008  else
24009    {
24010      rtx offset = force_reg (Pmode, const0_rtx);
24011      rtx tmp;
24012
24013      if (max_size > 4)
24014	{
24015	  rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24016	  src = change_address (srcmem, SImode, srcptr);
24017	  dest = change_address (destmem, SImode, destptr);
24018	  emit_move_insn (dest, src);
24019	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
24020				     true, OPTAB_LIB_WIDEN);
24021	  if (tmp != offset)
24022	    emit_move_insn (offset, tmp);
24023	  emit_label (label);
24024	  LABEL_NUSES (label) = 1;
24025	}
24026      if (max_size > 2)
24027	{
24028	  rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24029	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
24030	  src = change_address (srcmem, HImode, tmp);
24031	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
24032	  dest = change_address (destmem, HImode, tmp);
24033	  emit_move_insn (dest, src);
24034	  tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
24035				     true, OPTAB_LIB_WIDEN);
24036	  if (tmp != offset)
24037	    emit_move_insn (offset, tmp);
24038	  emit_label (label);
24039	  LABEL_NUSES (label) = 1;
24040	}
24041      if (max_size > 1)
24042	{
24043	  rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24044	  tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
24045	  src = change_address (srcmem, QImode, tmp);
24046	  tmp = gen_rtx_PLUS (Pmode, destptr, offset);
24047	  dest = change_address (destmem, QImode, tmp);
24048	  emit_move_insn (dest, src);
24049	  emit_label (label);
24050	  LABEL_NUSES (label) = 1;
24051	}
24052    }
24053}
24054
24055/* This function emits moves to fill SIZE_TO_MOVE bytes starting from DESTMEM
24056   with value PROMOTED_VAL.
24057   SRC is passed by pointer to be updated on return.
24058   Return value is updated DST.  */
24059static rtx
24060emit_memset (rtx destmem, rtx destptr, rtx promoted_val,
24061	     HOST_WIDE_INT size_to_move)
24062{
24063  rtx dst = destmem, adjust;
24064  enum insn_code code;
24065  machine_mode move_mode;
24066  int piece_size, i;
24067
24068  /* Find the widest mode in which we could perform moves.
24069     Start with the biggest power of 2 less than SIZE_TO_MOVE and half
24070     it until move of such size is supported.  */
24071  move_mode = GET_MODE (promoted_val);
24072  if (move_mode == VOIDmode)
24073    move_mode = QImode;
24074  if (size_to_move < GET_MODE_SIZE (move_mode))
24075    {
24076      move_mode = mode_for_size (size_to_move * BITS_PER_UNIT, MODE_INT, 0);
24077      promoted_val = gen_lowpart (move_mode, promoted_val);
24078    }
24079  piece_size = GET_MODE_SIZE (move_mode);
24080  code = optab_handler (mov_optab, move_mode);
24081  gcc_assert (code != CODE_FOR_nothing && promoted_val != NULL_RTX);
24082
24083  dst = adjust_automodify_address_nv (dst, move_mode, destptr, 0);
24084
24085  /* Emit moves.  We'll need SIZE_TO_MOVE/PIECE_SIZES moves.  */
24086  gcc_assert (size_to_move % piece_size == 0);
24087  adjust = GEN_INT (piece_size);
24088  for (i = 0; i < size_to_move; i += piece_size)
24089    {
24090      if (piece_size <= GET_MODE_SIZE (word_mode))
24091	{
24092	  emit_insn (gen_strset (destptr, dst, promoted_val));
24093	  dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24094					      piece_size);
24095	  continue;
24096	}
24097
24098      emit_insn (GEN_FCN (code) (dst, promoted_val));
24099
24100      emit_move_insn (destptr,
24101		      gen_rtx_PLUS (Pmode, copy_rtx (destptr), adjust));
24102
24103      dst = adjust_automodify_address_nv (dst, move_mode, destptr,
24104					  piece_size);
24105    }
24106
24107  /* Update DST rtx.  */
24108  return dst;
24109}
24110/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
24111static void
24112expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
24113				 rtx count, int max_size)
24114{
24115  count =
24116    expand_simple_binop (counter_mode (count), AND, count,
24117			 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
24118  expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
24119				 gen_lowpart (QImode, value), count, QImode,
24120				 1, max_size / 2, true);
24121}
24122
24123/* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
24124static void
24125expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
24126			rtx count, int max_size)
24127{
24128  rtx dest;
24129
24130  if (CONST_INT_P (count))
24131    {
24132      HOST_WIDE_INT countval = INTVAL (count);
24133      HOST_WIDE_INT epilogue_size = countval % max_size;
24134      int i;
24135
24136      /* For now MAX_SIZE should be a power of 2.  This assert could be
24137	 relaxed, but it'll require a bit more complicated epilogue
24138	 expanding.  */
24139      gcc_assert ((max_size & (max_size - 1)) == 0);
24140      for (i = max_size; i >= 1; i >>= 1)
24141	{
24142	  if (epilogue_size & i)
24143	    {
24144	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24145		destmem = emit_memset (destmem, destptr, vec_value, i);
24146	      else
24147		destmem = emit_memset (destmem, destptr, value, i);
24148	    }
24149	}
24150      return;
24151    }
24152  if (max_size > 32)
24153    {
24154      expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
24155      return;
24156    }
24157  if (max_size > 16)
24158    {
24159      rtx_code_label *label = ix86_expand_aligntest (count, 16, true);
24160      if (TARGET_64BIT)
24161	{
24162	  dest = change_address (destmem, DImode, destptr);
24163	  emit_insn (gen_strset (destptr, dest, value));
24164	  dest = adjust_automodify_address_nv (dest, DImode, destptr, 8);
24165	  emit_insn (gen_strset (destptr, dest, value));
24166	}
24167      else
24168	{
24169	  dest = change_address (destmem, SImode, destptr);
24170	  emit_insn (gen_strset (destptr, dest, value));
24171	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24172	  emit_insn (gen_strset (destptr, dest, value));
24173	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 8);
24174	  emit_insn (gen_strset (destptr, dest, value));
24175	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 12);
24176	  emit_insn (gen_strset (destptr, dest, value));
24177	}
24178      emit_label (label);
24179      LABEL_NUSES (label) = 1;
24180    }
24181  if (max_size > 8)
24182    {
24183      rtx_code_label *label = ix86_expand_aligntest (count, 8, true);
24184      if (TARGET_64BIT)
24185	{
24186	  dest = change_address (destmem, DImode, destptr);
24187	  emit_insn (gen_strset (destptr, dest, value));
24188	}
24189      else
24190	{
24191	  dest = change_address (destmem, SImode, destptr);
24192	  emit_insn (gen_strset (destptr, dest, value));
24193	  dest = adjust_automodify_address_nv (dest, SImode, destptr, 4);
24194	  emit_insn (gen_strset (destptr, dest, value));
24195	}
24196      emit_label (label);
24197      LABEL_NUSES (label) = 1;
24198    }
24199  if (max_size > 4)
24200    {
24201      rtx_code_label *label = ix86_expand_aligntest (count, 4, true);
24202      dest = change_address (destmem, SImode, destptr);
24203      emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
24204      emit_label (label);
24205      LABEL_NUSES (label) = 1;
24206    }
24207  if (max_size > 2)
24208    {
24209      rtx_code_label *label = ix86_expand_aligntest (count, 2, true);
24210      dest = change_address (destmem, HImode, destptr);
24211      emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
24212      emit_label (label);
24213      LABEL_NUSES (label) = 1;
24214    }
24215  if (max_size > 1)
24216    {
24217      rtx_code_label *label = ix86_expand_aligntest (count, 1, true);
24218      dest = change_address (destmem, QImode, destptr);
24219      emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
24220      emit_label (label);
24221      LABEL_NUSES (label) = 1;
24222    }
24223}
24224
24225/* Depending on ISSETMEM, copy enough from SRCMEM to DESTMEM or set enough to
24226   DESTMEM to align it to DESIRED_ALIGNMENT.  Original alignment is ALIGN.
24227   Depending on ISSETMEM, either arguments SRCMEM/SRCPTR or VALUE/VEC_VALUE are
24228   ignored.
24229   Return value is updated DESTMEM.  */
24230static rtx
24231expand_set_or_movmem_prologue (rtx destmem, rtx srcmem,
24232				  rtx destptr, rtx srcptr, rtx value,
24233				  rtx vec_value, rtx count, int align,
24234				  int desired_alignment, bool issetmem)
24235{
24236  int i;
24237  for (i = 1; i < desired_alignment; i <<= 1)
24238    {
24239      if (align <= i)
24240	{
24241	  rtx_code_label *label = ix86_expand_aligntest (destptr, i, false);
24242	  if (issetmem)
24243	    {
24244	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
24245		destmem = emit_memset (destmem, destptr, vec_value, i);
24246	      else
24247		destmem = emit_memset (destmem, destptr, value, i);
24248	    }
24249	  else
24250	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
24251	  ix86_adjust_counter (count, i);
24252	  emit_label (label);
24253	  LABEL_NUSES (label) = 1;
24254	  set_mem_align (destmem, i * 2 * BITS_PER_UNIT);
24255	}
24256    }
24257  return destmem;
24258}
24259
24260/* Test if COUNT&SIZE is nonzero and if so, expand movme
24261   or setmem sequence that is valid for SIZE..2*SIZE-1 bytes
24262   and jump to DONE_LABEL.  */
24263static void
24264expand_small_movmem_or_setmem (rtx destmem, rtx srcmem,
24265			       rtx destptr, rtx srcptr,
24266			       rtx value, rtx vec_value,
24267			       rtx count, int size,
24268			       rtx done_label, bool issetmem)
24269{
24270  rtx_code_label *label = ix86_expand_aligntest (count, size, false);
24271  machine_mode mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 1);
24272  rtx modesize;
24273  int n;
24274
24275  /* If we do not have vector value to copy, we must reduce size.  */
24276  if (issetmem)
24277    {
24278      if (!vec_value)
24279	{
24280	  if (GET_MODE (value) == VOIDmode && size > 8)
24281	    mode = Pmode;
24282	  else if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (value)))
24283	    mode = GET_MODE (value);
24284	}
24285      else
24286	mode = GET_MODE (vec_value), value = vec_value;
24287    }
24288  else
24289    {
24290      /* Choose appropriate vector mode.  */
24291      if (size >= 32)
24292	mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
24293      else if (size >= 16)
24294	mode = TARGET_SSE ? V16QImode : DImode;
24295      srcmem = change_address (srcmem, mode, srcptr);
24296    }
24297  destmem = change_address (destmem, mode, destptr);
24298  modesize = GEN_INT (GET_MODE_SIZE (mode));
24299  gcc_assert (GET_MODE_SIZE (mode) <= size);
24300  for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24301    {
24302      if (issetmem)
24303	emit_move_insn (destmem, gen_lowpart (mode, value));
24304      else
24305	{
24306          emit_move_insn (destmem, srcmem);
24307          srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24308	}
24309      destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24310    }
24311
24312  destmem = offset_address (destmem, count, 1);
24313  destmem = offset_address (destmem, GEN_INT (-2 * size),
24314			    GET_MODE_SIZE (mode));
24315  if (!issetmem)
24316    {
24317      srcmem = offset_address (srcmem, count, 1);
24318      srcmem = offset_address (srcmem, GEN_INT (-2 * size),
24319			       GET_MODE_SIZE (mode));
24320    }
24321  for (n = 0; n * GET_MODE_SIZE (mode) < size; n++)
24322    {
24323      if (issetmem)
24324	emit_move_insn (destmem, gen_lowpart (mode, value));
24325      else
24326	{
24327	  emit_move_insn (destmem, srcmem);
24328	  srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24329	}
24330      destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24331    }
24332  emit_jump_insn (gen_jump (done_label));
24333  emit_barrier ();
24334
24335  emit_label (label);
24336  LABEL_NUSES (label) = 1;
24337}
24338
24339/* Handle small memcpy (up to SIZE that is supposed to be small power of 2.
24340   and get ready for the main memcpy loop by copying iniital DESIRED_ALIGN-ALIGN
24341   bytes and last SIZE bytes adjusitng DESTPTR/SRCPTR/COUNT in a way we can
24342   proceed with an loop copying SIZE bytes at once. Do moves in MODE.
24343   DONE_LABEL is a label after the whole copying sequence. The label is created
24344   on demand if *DONE_LABEL is NULL.
24345   MIN_SIZE is minimal size of block copied.  This value gets adjusted for new
24346   bounds after the initial copies.
24347
24348   DESTMEM/SRCMEM are memory expressions pointing to the copies block,
24349   DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
24350   we will dispatch to a library call for large blocks.
24351
24352   In pseudocode we do:
24353
24354   if (COUNT < SIZE)
24355     {
24356       Assume that SIZE is 4. Bigger sizes are handled analogously
24357       if (COUNT & 4)
24358	 {
24359	    copy 4 bytes from SRCPTR to DESTPTR
24360	    copy 4 bytes from SRCPTR + COUNT - 4 to DESTPTR + COUNT - 4
24361	    goto done_label
24362	 }
24363       if (!COUNT)
24364	 goto done_label;
24365       copy 1 byte from SRCPTR to DESTPTR
24366       if (COUNT & 2)
24367	 {
24368	    copy 2 bytes from SRCPTR to DESTPTR
24369	    copy 2 bytes from SRCPTR + COUNT - 2 to DESTPTR + COUNT - 2
24370	 }
24371     }
24372   else
24373     {
24374       copy at least DESIRED_ALIGN-ALIGN bytes from SRCPTR to DESTPTR
24375       copy SIZE bytes from SRCPTR + COUNT - SIZE to DESTPTR + COUNT -SIZE
24376
24377       OLD_DESPTR = DESTPTR;
24378       Align DESTPTR up to DESIRED_ALIGN
24379       SRCPTR += DESTPTR - OLD_DESTPTR
24380       COUNT -= DEST_PTR - OLD_DESTPTR
24381       if (DYNAMIC_CHECK)
24382	 Round COUNT down to multiple of SIZE
24383       << optional caller supplied zero size guard is here >>
24384       << optional caller supplied dynamic check is here >>
24385       << caller supplied main copy loop is here >>
24386     }
24387   done_label:
24388  */
24389static void
24390expand_set_or_movmem_prologue_epilogue_by_misaligned_moves (rtx destmem, rtx srcmem,
24391							    rtx *destptr, rtx *srcptr,
24392							    machine_mode mode,
24393							    rtx value, rtx vec_value,
24394							    rtx *count,
24395							    rtx_code_label **done_label,
24396							    int size,
24397							    int desired_align,
24398							    int align,
24399							    unsigned HOST_WIDE_INT *min_size,
24400							    bool dynamic_check,
24401							    bool issetmem)
24402{
24403  rtx_code_label *loop_label = NULL, *label;
24404  int n;
24405  rtx modesize;
24406  int prolog_size = 0;
24407  rtx mode_value;
24408
24409  /* Chose proper value to copy.  */
24410  if (issetmem && VECTOR_MODE_P (mode))
24411    mode_value = vec_value;
24412  else
24413    mode_value = value;
24414  gcc_assert (GET_MODE_SIZE (mode) <= size);
24415
24416  /* See if block is big or small, handle small blocks.  */
24417  if (!CONST_INT_P (*count) && *min_size < (unsigned HOST_WIDE_INT)size)
24418    {
24419      int size2 = size;
24420      loop_label = gen_label_rtx ();
24421
24422      if (!*done_label)
24423	*done_label = gen_label_rtx ();
24424
24425      emit_cmp_and_jump_insns (*count, GEN_INT (size2), GE, 0, GET_MODE (*count),
24426			       1, loop_label);
24427      size2 >>= 1;
24428
24429      /* Handle sizes > 3.  */
24430      for (;size2 > 2; size2 >>= 1)
24431	expand_small_movmem_or_setmem (destmem, srcmem,
24432				       *destptr, *srcptr,
24433				       value, vec_value,
24434				       *count,
24435				       size2, *done_label, issetmem);
24436      /* Nothing to copy?  Jump to DONE_LABEL if so */
24437      emit_cmp_and_jump_insns (*count, const0_rtx, EQ, 0, GET_MODE (*count),
24438			       1, *done_label);
24439
24440      /* Do a byte copy.  */
24441      destmem = change_address (destmem, QImode, *destptr);
24442      if (issetmem)
24443	emit_move_insn (destmem, gen_lowpart (QImode, value));
24444      else
24445	{
24446          srcmem = change_address (srcmem, QImode, *srcptr);
24447          emit_move_insn (destmem, srcmem);
24448	}
24449
24450      /* Handle sizes 2 and 3.  */
24451      label = ix86_expand_aligntest (*count, 2, false);
24452      destmem = change_address (destmem, HImode, *destptr);
24453      destmem = offset_address (destmem, *count, 1);
24454      destmem = offset_address (destmem, GEN_INT (-2), 2);
24455      if (issetmem)
24456        emit_move_insn (destmem, gen_lowpart (HImode, value));
24457      else
24458	{
24459	  srcmem = change_address (srcmem, HImode, *srcptr);
24460	  srcmem = offset_address (srcmem, *count, 1);
24461	  srcmem = offset_address (srcmem, GEN_INT (-2), 2);
24462	  emit_move_insn (destmem, srcmem);
24463	}
24464
24465      emit_label (label);
24466      LABEL_NUSES (label) = 1;
24467      emit_jump_insn (gen_jump (*done_label));
24468      emit_barrier ();
24469    }
24470  else
24471    gcc_assert (*min_size >= (unsigned HOST_WIDE_INT)size
24472		|| UINTVAL (*count) >= (unsigned HOST_WIDE_INT)size);
24473
24474  /* Start memcpy for COUNT >= SIZE.  */
24475  if (loop_label)
24476    {
24477       emit_label (loop_label);
24478       LABEL_NUSES (loop_label) = 1;
24479    }
24480
24481  /* Copy first desired_align bytes.  */
24482  if (!issetmem)
24483    srcmem = change_address (srcmem, mode, *srcptr);
24484  destmem = change_address (destmem, mode, *destptr);
24485  modesize = GEN_INT (GET_MODE_SIZE (mode));
24486  for (n = 0; prolog_size < desired_align - align; n++)
24487    {
24488      if (issetmem)
24489        emit_move_insn (destmem, mode_value);
24490      else
24491	{
24492          emit_move_insn (destmem, srcmem);
24493          srcmem = offset_address (srcmem, modesize, GET_MODE_SIZE (mode));
24494	}
24495      destmem = offset_address (destmem, modesize, GET_MODE_SIZE (mode));
24496      prolog_size += GET_MODE_SIZE (mode);
24497    }
24498
24499
24500  /* Copy last SIZE bytes.  */
24501  destmem = offset_address (destmem, *count, 1);
24502  destmem = offset_address (destmem,
24503			    GEN_INT (-size - prolog_size),
24504			    1);
24505  if (issetmem)
24506    emit_move_insn (destmem, mode_value);
24507  else
24508    {
24509      srcmem = offset_address (srcmem, *count, 1);
24510      srcmem = offset_address (srcmem,
24511			       GEN_INT (-size - prolog_size),
24512			       1);
24513      emit_move_insn (destmem, srcmem);
24514    }
24515  for (n = 1; n * GET_MODE_SIZE (mode) < size; n++)
24516    {
24517      destmem = offset_address (destmem, modesize, 1);
24518      if (issetmem)
24519	emit_move_insn (destmem, mode_value);
24520      else
24521	{
24522          srcmem = offset_address (srcmem, modesize, 1);
24523          emit_move_insn (destmem, srcmem);
24524	}
24525    }
24526
24527  /* Align destination.  */
24528  if (desired_align > 1 && desired_align > align)
24529    {
24530      rtx saveddest = *destptr;
24531
24532      gcc_assert (desired_align <= size);
24533      /* Align destptr up, place it to new register.  */
24534      *destptr = expand_simple_binop (GET_MODE (*destptr), PLUS, *destptr,
24535				      GEN_INT (prolog_size),
24536				      NULL_RTX, 1, OPTAB_DIRECT);
24537      if (REG_P (*destptr) && REG_P (saveddest) && REG_POINTER (saveddest))
24538	REG_POINTER (*destptr) = 1;
24539      *destptr = expand_simple_binop (GET_MODE (*destptr), AND, *destptr,
24540				      GEN_INT (-desired_align),
24541				      *destptr, 1, OPTAB_DIRECT);
24542      /* See how many bytes we skipped.  */
24543      saveddest = expand_simple_binop (GET_MODE (*destptr), MINUS, saveddest,
24544				       *destptr,
24545				       saveddest, 1, OPTAB_DIRECT);
24546      /* Adjust srcptr and count.  */
24547      if (!issetmem)
24548	*srcptr = expand_simple_binop (GET_MODE (*srcptr), MINUS, *srcptr,
24549				       saveddest, *srcptr, 1, OPTAB_DIRECT);
24550      *count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24551				    saveddest, *count, 1, OPTAB_DIRECT);
24552      /* We copied at most size + prolog_size.  */
24553      if (*min_size > (unsigned HOST_WIDE_INT)(size + prolog_size))
24554	*min_size = (*min_size - size) & ~(unsigned HOST_WIDE_INT)(size - 1);
24555      else
24556	*min_size = 0;
24557
24558      /* Our loops always round down the block size, but for dispatch to
24559         library we need precise value.  */
24560      if (dynamic_check)
24561	*count = expand_simple_binop (GET_MODE (*count), AND, *count,
24562				      GEN_INT (-size), *count, 1, OPTAB_DIRECT);
24563    }
24564  else
24565    {
24566      gcc_assert (prolog_size == 0);
24567      /* Decrease count, so we won't end up copying last word twice.  */
24568      if (!CONST_INT_P (*count))
24569	*count = expand_simple_binop (GET_MODE (*count), PLUS, *count,
24570				      constm1_rtx, *count, 1, OPTAB_DIRECT);
24571      else
24572	*count = GEN_INT ((UINTVAL (*count) - 1) & ~(unsigned HOST_WIDE_INT)(size - 1));
24573      if (*min_size)
24574	*min_size = (*min_size - 1) & ~(unsigned HOST_WIDE_INT)(size - 1);
24575    }
24576}
24577
24578
24579/* This function is like the previous one, except here we know how many bytes
24580   need to be copied.  That allows us to update alignment not only of DST, which
24581   is returned, but also of SRC, which is passed as a pointer for that
24582   reason.  */
24583static rtx
24584expand_set_or_movmem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
24585					   rtx srcreg, rtx value, rtx vec_value,
24586					   int desired_align, int align_bytes,
24587					   bool issetmem)
24588{
24589  rtx src = NULL;
24590  rtx orig_dst = dst;
24591  rtx orig_src = NULL;
24592  int piece_size = 1;
24593  int copied_bytes = 0;
24594
24595  if (!issetmem)
24596    {
24597      gcc_assert (srcp != NULL);
24598      src = *srcp;
24599      orig_src = src;
24600    }
24601
24602  for (piece_size = 1;
24603       piece_size <= desired_align && copied_bytes < align_bytes;
24604       piece_size <<= 1)
24605    {
24606      if (align_bytes & piece_size)
24607	{
24608	  if (issetmem)
24609	    {
24610	      if (vec_value && piece_size > GET_MODE_SIZE (GET_MODE (value)))
24611		dst = emit_memset (dst, destreg, vec_value, piece_size);
24612	      else
24613		dst = emit_memset (dst, destreg, value, piece_size);
24614	    }
24615	  else
24616	    dst = emit_memmov (dst, &src, destreg, srcreg, piece_size);
24617	  copied_bytes += piece_size;
24618	}
24619    }
24620  if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
24621    set_mem_align (dst, desired_align * BITS_PER_UNIT);
24622  if (MEM_SIZE_KNOWN_P (orig_dst))
24623    set_mem_size (dst, MEM_SIZE (orig_dst) - align_bytes);
24624
24625  if (!issetmem)
24626    {
24627      int src_align_bytes = get_mem_align_offset (src, desired_align
24628						       * BITS_PER_UNIT);
24629      if (src_align_bytes >= 0)
24630	src_align_bytes = desired_align - src_align_bytes;
24631      if (src_align_bytes >= 0)
24632	{
24633	  unsigned int src_align;
24634	  for (src_align = desired_align; src_align >= 2; src_align >>= 1)
24635	    {
24636	      if ((src_align_bytes & (src_align - 1))
24637		   == (align_bytes & (src_align - 1)))
24638		break;
24639	    }
24640	  if (src_align > (unsigned int) desired_align)
24641	    src_align = desired_align;
24642	  if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
24643	    set_mem_align (src, src_align * BITS_PER_UNIT);
24644	}
24645      if (MEM_SIZE_KNOWN_P (orig_src))
24646	set_mem_size (src, MEM_SIZE (orig_src) - align_bytes);
24647      *srcp = src;
24648    }
24649
24650  return dst;
24651}
24652
24653/* Return true if ALG can be used in current context.
24654   Assume we expand memset if MEMSET is true.  */
24655static bool
24656alg_usable_p (enum stringop_alg alg, bool memset)
24657{
24658  if (alg == no_stringop)
24659    return false;
24660  if (alg == vector_loop)
24661    return TARGET_SSE || TARGET_AVX;
24662  /* Algorithms using the rep prefix want at least edi and ecx;
24663     additionally, memset wants eax and memcpy wants esi.  Don't
24664     consider such algorithms if the user has appropriated those
24665     registers for their own purposes.	*/
24666  if (alg == rep_prefix_1_byte
24667      || alg == rep_prefix_4_byte
24668      || alg == rep_prefix_8_byte)
24669    return !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
24670             || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
24671  return true;
24672}
24673
24674/* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation.  */
24675static enum stringop_alg
24676decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
24677	    unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
24678	    bool memset, bool zero_memset, int *dynamic_check, bool *noalign,
24679	    bool recur)
24680{
24681  const struct stringop_algs *algs;
24682  bool optimize_for_speed;
24683  int max = 0;
24684  const struct processor_costs *cost;
24685  int i;
24686  bool any_alg_usable_p = false;
24687
24688  *noalign = false;
24689  *dynamic_check = -1;
24690
24691  /* Even if the string operation call is cold, we still might spend a lot
24692     of time processing large blocks.  */
24693  if (optimize_function_for_size_p (cfun)
24694      || (optimize_insn_for_size_p ()
24695 	  && (max_size < 256
24696              || (expected_size != -1 && expected_size < 256))))
24697    optimize_for_speed = false;
24698  else
24699    optimize_for_speed = true;
24700
24701  cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
24702  if (memset)
24703    algs = &cost->memset[TARGET_64BIT != 0];
24704  else
24705    algs = &cost->memcpy[TARGET_64BIT != 0];
24706
24707  /* See maximal size for user defined algorithm.  */
24708  for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24709    {
24710      enum stringop_alg candidate = algs->size[i].alg;
24711      bool usable = alg_usable_p (candidate, memset);
24712      any_alg_usable_p |= usable;
24713
24714      if (candidate != libcall && candidate && usable)
24715	max = algs->size[i].max;
24716    }
24717
24718  /* If expected size is not known but max size is small enough
24719     so inline version is a win, set expected size into
24720     the range.  */
24721  if (((max > 1 && (unsigned HOST_WIDE_INT) max >= max_size) || max == -1)
24722      && expected_size == -1)
24723    expected_size = min_size / 2 + max_size / 2;
24724
24725  /* If user specified the algorithm, honor it if possible.  */
24726  if (ix86_stringop_alg != no_stringop
24727      && alg_usable_p (ix86_stringop_alg, memset))
24728    return ix86_stringop_alg;
24729  /* rep; movq or rep; movl is the smallest variant.  */
24730  else if (!optimize_for_speed)
24731    {
24732      *noalign = true;
24733      if (!count || (count & 3) || (memset && !zero_memset))
24734	return alg_usable_p (rep_prefix_1_byte, memset)
24735	       ? rep_prefix_1_byte : loop_1_byte;
24736      else
24737	return alg_usable_p (rep_prefix_4_byte, memset)
24738	       ? rep_prefix_4_byte : loop;
24739    }
24740  /* Very tiny blocks are best handled via the loop, REP is expensive to
24741     setup.  */
24742  else if (expected_size != -1 && expected_size < 4)
24743    return loop_1_byte;
24744  else if (expected_size != -1)
24745    {
24746      enum stringop_alg alg = libcall;
24747      bool alg_noalign = false;
24748      for (i = 0; i < MAX_STRINGOP_ALGS; i++)
24749	{
24750	  /* We get here if the algorithms that were not libcall-based
24751	     were rep-prefix based and we are unable to use rep prefixes
24752	     based on global register usage.  Break out of the loop and
24753	     use the heuristic below.  */
24754	  if (algs->size[i].max == 0)
24755	    break;
24756	  if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
24757	    {
24758	      enum stringop_alg candidate = algs->size[i].alg;
24759
24760	      if (candidate != libcall && alg_usable_p (candidate, memset))
24761		{
24762		  alg = candidate;
24763		  alg_noalign = algs->size[i].noalign;
24764		}
24765	      /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
24766		 last non-libcall inline algorithm.  */
24767	      if (TARGET_INLINE_ALL_STRINGOPS)
24768		{
24769		  /* When the current size is best to be copied by a libcall,
24770		     but we are still forced to inline, run the heuristic below
24771		     that will pick code for medium sized blocks.  */
24772		  if (alg != libcall)
24773		    {
24774		      *noalign = alg_noalign;
24775		      return alg;
24776		    }
24777		  else if (!any_alg_usable_p)
24778		    break;
24779		}
24780	      else if (alg_usable_p (candidate, memset))
24781		{
24782		  *noalign = algs->size[i].noalign;
24783		  return candidate;
24784		}
24785	    }
24786	}
24787    }
24788  /* When asked to inline the call anyway, try to pick meaningful choice.
24789     We look for maximal size of block that is faster to copy by hand and
24790     take blocks of at most of that size guessing that average size will
24791     be roughly half of the block.
24792
24793     If this turns out to be bad, we might simply specify the preferred
24794     choice in ix86_costs.  */
24795  if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24796      && (algs->unknown_size == libcall
24797	  || !alg_usable_p (algs->unknown_size, memset)))
24798    {
24799      enum stringop_alg alg;
24800      HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
24801
24802      /* If there aren't any usable algorithms or if recursing already,
24803	 then recursing on smaller sizes or same size isn't going to
24804	 find anything.  Just return the simple byte-at-a-time copy loop.  */
24805      if (!any_alg_usable_p || recur)
24806	{
24807	  /* Pick something reasonable.  */
24808	  if (TARGET_INLINE_STRINGOPS_DYNAMICALLY && !recur)
24809	    *dynamic_check = 128;
24810	  return loop_1_byte;
24811	}
24812      alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
24813			zero_memset, dynamic_check, noalign, true);
24814      gcc_assert (*dynamic_check == -1);
24815      if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
24816	*dynamic_check = max;
24817      else
24818	gcc_assert (alg != libcall);
24819      return alg;
24820    }
24821  return (alg_usable_p (algs->unknown_size, memset)
24822	  ? algs->unknown_size : libcall);
24823}
24824
24825/* Decide on alignment.  We know that the operand is already aligned to ALIGN
24826   (ALIGN can be based on profile feedback and thus it is not 100% guaranteed).  */
24827static int
24828decide_alignment (int align,
24829		  enum stringop_alg alg,
24830		  int expected_size,
24831		  machine_mode move_mode)
24832{
24833  int desired_align = 0;
24834
24835  gcc_assert (alg != no_stringop);
24836
24837  if (alg == libcall)
24838    return 0;
24839  if (move_mode == VOIDmode)
24840    return 0;
24841
24842  desired_align = GET_MODE_SIZE (move_mode);
24843  /* PentiumPro has special logic triggering for 8 byte aligned blocks.
24844     copying whole cacheline at once.  */
24845  if (TARGET_PENTIUMPRO
24846      && (alg == rep_prefix_4_byte || alg == rep_prefix_1_byte))
24847    desired_align = 8;
24848
24849  if (optimize_size)
24850    desired_align = 1;
24851  if (desired_align < align)
24852    desired_align = align;
24853  if (expected_size != -1 && expected_size < 4)
24854    desired_align = align;
24855
24856  return desired_align;
24857}
24858
24859
24860/* Helper function for memcpy.  For QImode value 0xXY produce
24861   0xXYXYXYXY of wide specified by MODE.  This is essentially
24862   a * 0x10101010, but we can do slightly better than
24863   synth_mult by unwinding the sequence by hand on CPUs with
24864   slow multiply.  */
24865static rtx
24866promote_duplicated_reg (machine_mode mode, rtx val)
24867{
24868  machine_mode valmode = GET_MODE (val);
24869  rtx tmp;
24870  int nops = mode == DImode ? 3 : 2;
24871
24872  gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
24873  if (val == const0_rtx)
24874    return copy_to_mode_reg (mode, CONST0_RTX (mode));
24875  if (CONST_INT_P (val))
24876    {
24877      HOST_WIDE_INT v = INTVAL (val) & 255;
24878
24879      v |= v << 8;
24880      v |= v << 16;
24881      if (mode == DImode)
24882        v |= (v << 16) << 16;
24883      return copy_to_mode_reg (mode, gen_int_mode (v, mode));
24884    }
24885
24886  if (valmode == VOIDmode)
24887    valmode = QImode;
24888  if (valmode != QImode)
24889    val = gen_lowpart (QImode, val);
24890  if (mode == QImode)
24891    return val;
24892  if (!TARGET_PARTIAL_REG_STALL)
24893    nops--;
24894  if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
24895      + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
24896      <= (ix86_cost->shift_const + ix86_cost->add) * nops
24897          + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
24898    {
24899      rtx reg = convert_modes (mode, QImode, val, true);
24900      tmp = promote_duplicated_reg (mode, const1_rtx);
24901      return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
24902				  OPTAB_DIRECT);
24903    }
24904  else
24905    {
24906      rtx reg = convert_modes (mode, QImode, val, true);
24907
24908      if (!TARGET_PARTIAL_REG_STALL)
24909	if (mode == SImode)
24910	  emit_insn (gen_movsi_insv_1 (reg, reg));
24911	else
24912	  emit_insn (gen_movdi_insv_1 (reg, reg));
24913      else
24914	{
24915	  tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
24916				     NULL, 1, OPTAB_DIRECT);
24917	  reg =
24918	    expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24919	}
24920      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
24921			         NULL, 1, OPTAB_DIRECT);
24922      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24923      if (mode == SImode)
24924	return reg;
24925      tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
24926				 NULL, 1, OPTAB_DIRECT);
24927      reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
24928      return reg;
24929    }
24930}
24931
24932/* Duplicate value VAL using promote_duplicated_reg into maximal size that will
24933   be needed by main loop copying SIZE_NEEDED chunks and prologue getting
24934   alignment from ALIGN to DESIRED_ALIGN.  */
24935static rtx
24936promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align,
24937				int align)
24938{
24939  rtx promoted_val;
24940
24941  if (TARGET_64BIT
24942      && (size_needed > 4 || (desired_align > align && desired_align > 4)))
24943    promoted_val = promote_duplicated_reg (DImode, val);
24944  else if (size_needed > 2 || (desired_align > align && desired_align > 2))
24945    promoted_val = promote_duplicated_reg (SImode, val);
24946  else if (size_needed > 1 || (desired_align > align && desired_align > 1))
24947    promoted_val = promote_duplicated_reg (HImode, val);
24948  else
24949    promoted_val = val;
24950
24951  return promoted_val;
24952}
24953
24954/* Expand string move (memcpy) ot store (memset) operation.  Use i386 string
24955   operations when profitable.  The code depends upon architecture, block size
24956   and alignment, but always has one of the following overall structures:
24957
24958   Aligned move sequence:
24959
24960     1) Prologue guard: Conditional that jumps up to epilogues for small
24961	blocks that can be handled by epilogue alone.  This is faster
24962	but also needed for correctness, since prologue assume the block
24963	is larger than the desired alignment.
24964
24965	Optional dynamic check for size and libcall for large
24966	blocks is emitted here too, with -minline-stringops-dynamically.
24967
24968     2) Prologue: copy first few bytes in order to get destination
24969	aligned to DESIRED_ALIGN.  It is emitted only when ALIGN is less
24970	than DESIRED_ALIGN and up to DESIRED_ALIGN - ALIGN bytes can be
24971	copied.  We emit either a jump tree on power of two sized
24972	blocks, or a byte loop.
24973
24974     3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24975	with specified algorithm.
24976
24977     4) Epilogue: code copying tail of the block that is too small to be
24978	handled by main body (or up to size guarded by prologue guard).
24979
24980  Misaligned move sequence
24981
24982     1) missaligned move prologue/epilogue containing:
24983        a) Prologue handling small memory blocks and jumping to done_label
24984	   (skipped if blocks are known to be large enough)
24985	b) Signle move copying first DESIRED_ALIGN-ALIGN bytes if alignment is
24986           needed by single possibly misaligned move
24987	   (skipped if alignment is not needed)
24988        c) Copy of last SIZE_NEEDED bytes by possibly misaligned moves
24989
24990     2) Zero size guard dispatching to done_label, if needed
24991
24992     3) dispatch to library call, if needed,
24993
24994     3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
24995	with specified algorithm.  */
24996bool
24997ix86_expand_set_or_movmem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
24998			   rtx align_exp, rtx expected_align_exp,
24999			   rtx expected_size_exp, rtx min_size_exp,
25000			   rtx max_size_exp, rtx probable_max_size_exp,
25001			   bool issetmem)
25002{
25003  rtx destreg;
25004  rtx srcreg = NULL;
25005  rtx_code_label *label = NULL;
25006  rtx tmp;
25007  rtx_code_label *jump_around_label = NULL;
25008  HOST_WIDE_INT align = 1;
25009  unsigned HOST_WIDE_INT count = 0;
25010  HOST_WIDE_INT expected_size = -1;
25011  int size_needed = 0, epilogue_size_needed;
25012  int desired_align = 0, align_bytes = 0;
25013  enum stringop_alg alg;
25014  rtx promoted_val = NULL;
25015  rtx vec_promoted_val = NULL;
25016  bool force_loopy_epilogue = false;
25017  int dynamic_check;
25018  bool need_zero_guard = false;
25019  bool noalign;
25020  machine_mode move_mode = VOIDmode;
25021  int unroll_factor = 1;
25022  /* TODO: Once value ranges are available, fill in proper data.  */
25023  unsigned HOST_WIDE_INT min_size = 0;
25024  unsigned HOST_WIDE_INT max_size = -1;
25025  unsigned HOST_WIDE_INT probable_max_size = -1;
25026  bool misaligned_prologue_used = false;
25027
25028  if (CONST_INT_P (align_exp))
25029    align = INTVAL (align_exp);
25030  /* i386 can do misaligned access on reasonably increased cost.  */
25031  if (CONST_INT_P (expected_align_exp)
25032      && INTVAL (expected_align_exp) > align)
25033    align = INTVAL (expected_align_exp);
25034  /* ALIGN is the minimum of destination and source alignment, but we care here
25035     just about destination alignment.  */
25036  else if (!issetmem
25037	   && MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
25038    align = MEM_ALIGN (dst) / BITS_PER_UNIT;
25039
25040  if (CONST_INT_P (count_exp))
25041    {
25042      min_size = max_size = probable_max_size = count = expected_size
25043	= INTVAL (count_exp);
25044      /* When COUNT is 0, there is nothing to do.  */
25045      if (!count)
25046	return true;
25047    }
25048  else
25049    {
25050      if (min_size_exp)
25051	min_size = INTVAL (min_size_exp);
25052      if (max_size_exp)
25053	max_size = INTVAL (max_size_exp);
25054      if (probable_max_size_exp)
25055	probable_max_size = INTVAL (probable_max_size_exp);
25056      if (CONST_INT_P (expected_size_exp))
25057	expected_size = INTVAL (expected_size_exp);
25058     }
25059
25060  /* Make sure we don't need to care about overflow later on.  */
25061  if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
25062    return false;
25063
25064  /* Step 0: Decide on preferred algorithm, desired alignment and
25065     size of chunks to be copied by main loop.  */
25066  alg = decide_alg (count, expected_size, min_size, probable_max_size,
25067		    issetmem,
25068		    issetmem && val_exp == const0_rtx,
25069		    &dynamic_check, &noalign, false);
25070  if (alg == libcall)
25071    return false;
25072  gcc_assert (alg != no_stringop);
25073
25074  /* For now vector-version of memset is generated only for memory zeroing, as
25075     creating of promoted vector value is very cheap in this case.  */
25076  if (issetmem && alg == vector_loop && val_exp != const0_rtx)
25077    alg = unrolled_loop;
25078
25079  if (!count)
25080    count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
25081  destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
25082  if (!issetmem)
25083    srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
25084
25085  unroll_factor = 1;
25086  move_mode = word_mode;
25087  switch (alg)
25088    {
25089    case libcall:
25090    case no_stringop:
25091    case last_alg:
25092      gcc_unreachable ();
25093    case loop_1_byte:
25094      need_zero_guard = true;
25095      move_mode = QImode;
25096      break;
25097    case loop:
25098      need_zero_guard = true;
25099      break;
25100    case unrolled_loop:
25101      need_zero_guard = true;
25102      unroll_factor = (TARGET_64BIT ? 4 : 2);
25103      break;
25104    case vector_loop:
25105      need_zero_guard = true;
25106      unroll_factor = 4;
25107      /* Find the widest supported mode.  */
25108      move_mode = word_mode;
25109      while (optab_handler (mov_optab, GET_MODE_WIDER_MODE (move_mode))
25110	     != CODE_FOR_nothing)
25111	  move_mode = GET_MODE_WIDER_MODE (move_mode);
25112
25113      /* Find the corresponding vector mode with the same size as MOVE_MODE.
25114	 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
25115      if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
25116	{
25117	  int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
25118	  move_mode = mode_for_vector (word_mode, nunits);
25119	  if (optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
25120	    move_mode = word_mode;
25121	}
25122      gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
25123      break;
25124    case rep_prefix_8_byte:
25125      move_mode = DImode;
25126      break;
25127    case rep_prefix_4_byte:
25128      move_mode = SImode;
25129      break;
25130    case rep_prefix_1_byte:
25131      move_mode = QImode;
25132      break;
25133    }
25134  size_needed = GET_MODE_SIZE (move_mode) * unroll_factor;
25135  epilogue_size_needed = size_needed;
25136
25137  /* If we are going to call any library calls conditionally, make sure any
25138     pending stack adjustment happen before the first conditional branch,
25139     otherwise they will be emitted before the library call only and won't
25140     happen from the other branches.  */
25141  if (dynamic_check != -1)
25142    do_pending_stack_adjust ();
25143
25144  desired_align = decide_alignment (align, alg, expected_size, move_mode);
25145  if (!TARGET_ALIGN_STRINGOPS || noalign)
25146    align = desired_align;
25147
25148  /* Step 1: Prologue guard.  */
25149
25150  /* Alignment code needs count to be in register.  */
25151  if (CONST_INT_P (count_exp) && desired_align > align)
25152    {
25153      if (INTVAL (count_exp) > desired_align
25154	  && INTVAL (count_exp) > size_needed)
25155	{
25156	  align_bytes
25157	    = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
25158	  if (align_bytes <= 0)
25159	    align_bytes = 0;
25160	  else
25161	    align_bytes = desired_align - align_bytes;
25162	}
25163      if (align_bytes == 0)
25164	count_exp = force_reg (counter_mode (count_exp), count_exp);
25165    }
25166  gcc_assert (desired_align >= 1 && align >= 1);
25167
25168  /* Misaligned move sequences handle both prologue and epilogue at once.
25169     Default code generation results in a smaller code for large alignments
25170     and also avoids redundant job when sizes are known precisely.  */
25171  misaligned_prologue_used
25172    = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
25173       && MAX (desired_align, epilogue_size_needed) <= 32
25174       && desired_align <= epilogue_size_needed
25175       && ((desired_align > align && !align_bytes)
25176	   || (!count && epilogue_size_needed > 1)));
25177
25178  /* Do the cheap promotion to allow better CSE across the
25179     main loop and epilogue (ie one load of the big constant in the
25180     front of all code.
25181     For now the misaligned move sequences do not have fast path
25182     without broadcasting.  */
25183  if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
25184    {
25185      if (alg == vector_loop)
25186	{
25187	  gcc_assert (val_exp == const0_rtx);
25188	  vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
25189	  promoted_val = promote_duplicated_reg_to_size (val_exp,
25190							 GET_MODE_SIZE (word_mode),
25191							 desired_align, align);
25192	}
25193      else
25194	{
25195	  promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25196							 desired_align, align);
25197	}
25198    }
25199  /* Misaligned move sequences handles both prologues and epilogues at once.
25200     Default code generation results in smaller code for large alignments and
25201     also avoids redundant job when sizes are known precisely.  */
25202  if (misaligned_prologue_used)
25203    {
25204      /* Misaligned move prologue handled small blocks by itself.  */
25205      expand_set_or_movmem_prologue_epilogue_by_misaligned_moves
25206	   (dst, src, &destreg, &srcreg,
25207	    move_mode, promoted_val, vec_promoted_val,
25208	    &count_exp,
25209	    &jump_around_label,
25210            desired_align < align
25211	    ? MAX (desired_align, epilogue_size_needed) : epilogue_size_needed,
25212	    desired_align, align, &min_size, dynamic_check, issetmem);
25213      if (!issetmem)
25214        src = change_address (src, BLKmode, srcreg);
25215      dst = change_address (dst, BLKmode, destreg);
25216      set_mem_align (dst, desired_align * BITS_PER_UNIT);
25217      epilogue_size_needed = 0;
25218      if (need_zero_guard
25219	  && min_size < (unsigned HOST_WIDE_INT) size_needed)
25220	{
25221	  /* It is possible that we copied enough so the main loop will not
25222	     execute.  */
25223	  gcc_assert (size_needed > 1);
25224	  if (jump_around_label == NULL_RTX)
25225	    jump_around_label = gen_label_rtx ();
25226	  emit_cmp_and_jump_insns (count_exp,
25227				   GEN_INT (size_needed),
25228				   LTU, 0, counter_mode (count_exp), 1, jump_around_label);
25229	  if (expected_size == -1
25230	      || expected_size < (desired_align - align) / 2 + size_needed)
25231	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
25232	  else
25233	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
25234	}
25235    }
25236  /* Ensure that alignment prologue won't copy past end of block.  */
25237  else if (size_needed > 1 || (desired_align > 1 && desired_align > align))
25238    {
25239      epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
25240      /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
25241	 Make sure it is power of 2.  */
25242      epilogue_size_needed = 1 << (floor_log2 (epilogue_size_needed) + 1);
25243
25244      /* To improve performance of small blocks, we jump around the VAL
25245	 promoting mode.  This mean that if the promoted VAL is not constant,
25246	 we might not use it in the epilogue and have to use byte
25247	 loop variant.  */
25248      if (issetmem && epilogue_size_needed > 2 && !promoted_val)
25249	force_loopy_epilogue = true;
25250      if ((count && count < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25251	  || max_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25252	{
25253	  /* If main algorithm works on QImode, no epilogue is needed.
25254	     For small sizes just don't align anything.  */
25255	  if (size_needed == 1)
25256	    desired_align = align;
25257	  else
25258	    goto epilogue;
25259	}
25260      else if (!count
25261	       && min_size < (unsigned HOST_WIDE_INT) epilogue_size_needed)
25262	{
25263	  label = gen_label_rtx ();
25264	  emit_cmp_and_jump_insns (count_exp,
25265				   GEN_INT (epilogue_size_needed),
25266				   LTU, 0, counter_mode (count_exp), 1, label);
25267	  if (expected_size == -1 || expected_size < epilogue_size_needed)
25268	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
25269	  else
25270	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
25271	}
25272    }
25273
25274  /* Emit code to decide on runtime whether library call or inline should be
25275     used.  */
25276  if (dynamic_check != -1)
25277    {
25278      if (!issetmem && CONST_INT_P (count_exp))
25279	{
25280	  if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
25281	    {
25282	      emit_block_move_via_libcall (dst, src, count_exp, false);
25283	      count_exp = const0_rtx;
25284	      goto epilogue;
25285	    }
25286	}
25287      else
25288	{
25289	  rtx_code_label *hot_label = gen_label_rtx ();
25290	  if (jump_around_label == NULL_RTX)
25291	    jump_around_label = gen_label_rtx ();
25292	  emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
25293				   LEU, 0, counter_mode (count_exp),
25294				   1, hot_label);
25295	  predict_jump (REG_BR_PROB_BASE * 90 / 100);
25296	  if (issetmem)
25297	    set_storage_via_libcall (dst, count_exp, val_exp, false);
25298	  else
25299	    emit_block_move_via_libcall (dst, src, count_exp, false);
25300	  emit_jump (jump_around_label);
25301	  emit_label (hot_label);
25302	}
25303    }
25304
25305  /* Step 2: Alignment prologue.  */
25306  /* Do the expensive promotion once we branched off the small blocks.  */
25307  if (issetmem && !promoted_val)
25308    promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
25309						   desired_align, align);
25310
25311  if (desired_align > align && !misaligned_prologue_used)
25312    {
25313      if (align_bytes == 0)
25314	{
25315	  /* Except for the first move in prologue, we no longer know
25316	     constant offset in aliasing info.  It don't seems to worth
25317	     the pain to maintain it for the first move, so throw away
25318	     the info early.  */
25319	  dst = change_address (dst, BLKmode, destreg);
25320	  if (!issetmem)
25321	    src = change_address (src, BLKmode, srcreg);
25322	  dst = expand_set_or_movmem_prologue (dst, src, destreg, srcreg,
25323					    promoted_val, vec_promoted_val,
25324					    count_exp, align, desired_align,
25325					    issetmem);
25326	  /* At most desired_align - align bytes are copied.  */
25327	  if (min_size < (unsigned)(desired_align - align))
25328	    min_size = 0;
25329	  else
25330	    min_size -= desired_align - align;
25331	}
25332      else
25333	{
25334	  /* If we know how many bytes need to be stored before dst is
25335	     sufficiently aligned, maintain aliasing info accurately.  */
25336	  dst = expand_set_or_movmem_constant_prologue (dst, &src, destreg,
25337							   srcreg,
25338							   promoted_val,
25339							   vec_promoted_val,
25340							   desired_align,
25341							   align_bytes,
25342							   issetmem);
25343
25344	  count_exp = plus_constant (counter_mode (count_exp),
25345				     count_exp, -align_bytes);
25346	  count -= align_bytes;
25347	  min_size -= align_bytes;
25348	  max_size -= align_bytes;
25349	}
25350      if (need_zero_guard
25351	  && min_size < (unsigned HOST_WIDE_INT) size_needed
25352	  && (count < (unsigned HOST_WIDE_INT) size_needed
25353	      || (align_bytes == 0
25354		  && count < ((unsigned HOST_WIDE_INT) size_needed
25355			      + desired_align - align))))
25356	{
25357	  /* It is possible that we copied enough so the main loop will not
25358	     execute.  */
25359	  gcc_assert (size_needed > 1);
25360	  if (label == NULL_RTX)
25361	    label = gen_label_rtx ();
25362	  emit_cmp_and_jump_insns (count_exp,
25363				   GEN_INT (size_needed),
25364				   LTU, 0, counter_mode (count_exp), 1, label);
25365	  if (expected_size == -1
25366	      || expected_size < (desired_align - align) / 2 + size_needed)
25367	    predict_jump (REG_BR_PROB_BASE * 20 / 100);
25368	  else
25369	    predict_jump (REG_BR_PROB_BASE * 60 / 100);
25370	}
25371    }
25372  if (label && size_needed == 1)
25373    {
25374      emit_label (label);
25375      LABEL_NUSES (label) = 1;
25376      label = NULL;
25377      epilogue_size_needed = 1;
25378      if (issetmem)
25379	promoted_val = val_exp;
25380    }
25381  else if (label == NULL_RTX && !misaligned_prologue_used)
25382    epilogue_size_needed = size_needed;
25383
25384  /* Step 3: Main loop.  */
25385
25386  switch (alg)
25387    {
25388    case libcall:
25389    case no_stringop:
25390    case last_alg:
25391      gcc_unreachable ();
25392    case loop_1_byte:
25393    case loop:
25394    case unrolled_loop:
25395      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, promoted_val,
25396				     count_exp, move_mode, unroll_factor,
25397				     expected_size, issetmem);
25398      break;
25399    case vector_loop:
25400      expand_set_or_movmem_via_loop (dst, src, destreg, srcreg,
25401				     vec_promoted_val, count_exp, move_mode,
25402				     unroll_factor, expected_size, issetmem);
25403      break;
25404    case rep_prefix_8_byte:
25405    case rep_prefix_4_byte:
25406    case rep_prefix_1_byte:
25407      expand_set_or_movmem_via_rep (dst, src, destreg, srcreg, promoted_val,
25408				       val_exp, count_exp, move_mode, issetmem);
25409      break;
25410    }
25411  /* Adjust properly the offset of src and dest memory for aliasing.  */
25412  if (CONST_INT_P (count_exp))
25413    {
25414      if (!issetmem)
25415	src = adjust_automodify_address_nv (src, BLKmode, srcreg,
25416					    (count / size_needed) * size_needed);
25417      dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
25418					  (count / size_needed) * size_needed);
25419    }
25420  else
25421    {
25422      if (!issetmem)
25423	src = change_address (src, BLKmode, srcreg);
25424      dst = change_address (dst, BLKmode, destreg);
25425    }
25426
25427  /* Step 4: Epilogue to copy the remaining bytes.  */
25428 epilogue:
25429  if (label)
25430    {
25431      /* When the main loop is done, COUNT_EXP might hold original count,
25432	 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
25433	 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
25434	 bytes. Compensate if needed.  */
25435
25436      if (size_needed < epilogue_size_needed)
25437	{
25438	  tmp =
25439	    expand_simple_binop (counter_mode (count_exp), AND, count_exp,
25440				 GEN_INT (size_needed - 1), count_exp, 1,
25441				 OPTAB_DIRECT);
25442	  if (tmp != count_exp)
25443	    emit_move_insn (count_exp, tmp);
25444	}
25445      emit_label (label);
25446      LABEL_NUSES (label) = 1;
25447    }
25448
25449  if (count_exp != const0_rtx && epilogue_size_needed > 1)
25450    {
25451      if (force_loopy_epilogue)
25452	expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
25453					 epilogue_size_needed);
25454      else
25455	{
25456	  if (issetmem)
25457	    expand_setmem_epilogue (dst, destreg, promoted_val,
25458				    vec_promoted_val, count_exp,
25459				    epilogue_size_needed);
25460	  else
25461	    expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
25462				    epilogue_size_needed);
25463	}
25464    }
25465  if (jump_around_label)
25466    emit_label (jump_around_label);
25467  return true;
25468}
25469
25470
25471/* Expand the appropriate insns for doing strlen if not just doing
25472   repnz; scasb
25473
25474   out = result, initialized with the start address
25475   align_rtx = alignment of the address.
25476   scratch = scratch register, initialized with the startaddress when
25477	not aligned, otherwise undefined
25478
25479   This is just the body. It needs the initializations mentioned above and
25480   some address computing at the end.  These things are done in i386.md.  */
25481
25482static void
25483ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
25484{
25485  int align;
25486  rtx tmp;
25487  rtx_code_label *align_2_label = NULL;
25488  rtx_code_label *align_3_label = NULL;
25489  rtx_code_label *align_4_label = gen_label_rtx ();
25490  rtx_code_label *end_0_label = gen_label_rtx ();
25491  rtx mem;
25492  rtx tmpreg = gen_reg_rtx (SImode);
25493  rtx scratch = gen_reg_rtx (SImode);
25494  rtx cmp;
25495
25496  align = 0;
25497  if (CONST_INT_P (align_rtx))
25498    align = INTVAL (align_rtx);
25499
25500  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
25501
25502  /* Is there a known alignment and is it less than 4?  */
25503  if (align < 4)
25504    {
25505      rtx scratch1 = gen_reg_rtx (Pmode);
25506      emit_move_insn (scratch1, out);
25507      /* Is there a known alignment and is it not 2? */
25508      if (align != 2)
25509	{
25510	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
25511	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
25512
25513	  /* Leave just the 3 lower bits.  */
25514	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
25515				    NULL_RTX, 0, OPTAB_WIDEN);
25516
25517	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25518				   Pmode, 1, align_4_label);
25519	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
25520				   Pmode, 1, align_2_label);
25521	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
25522				   Pmode, 1, align_3_label);
25523	}
25524      else
25525        {
25526	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
25527	     check if is aligned to 4 - byte.  */
25528
25529	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
25530				    NULL_RTX, 0, OPTAB_WIDEN);
25531
25532	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
25533				   Pmode, 1, align_4_label);
25534        }
25535
25536      mem = change_address (src, QImode, out);
25537
25538      /* Now compare the bytes.  */
25539
25540      /* Compare the first n unaligned byte on a byte per byte basis.  */
25541      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
25542			       QImode, 1, end_0_label);
25543
25544      /* Increment the address.  */
25545      emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25546
25547      /* Not needed with an alignment of 2 */
25548      if (align != 2)
25549	{
25550	  emit_label (align_2_label);
25551
25552	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25553				   end_0_label);
25554
25555	  emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25556
25557	  emit_label (align_3_label);
25558	}
25559
25560      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
25561			       end_0_label);
25562
25563      emit_insn (ix86_gen_add3 (out, out, const1_rtx));
25564    }
25565
25566  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
25567     align this loop.  It gives only huge programs, but does not help to
25568     speed up.  */
25569  emit_label (align_4_label);
25570
25571  mem = change_address (src, SImode, out);
25572  emit_move_insn (scratch, mem);
25573  emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
25574
25575  /* This formula yields a nonzero result iff one of the bytes is zero.
25576     This saves three branches inside loop and many cycles.  */
25577
25578  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
25579  emit_insn (gen_one_cmplsi2 (scratch, scratch));
25580  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
25581  emit_insn (gen_andsi3 (tmpreg, tmpreg,
25582			 gen_int_mode (0x80808080, SImode)));
25583  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
25584			   align_4_label);
25585
25586  if (TARGET_CMOVE)
25587    {
25588       rtx reg = gen_reg_rtx (SImode);
25589       rtx reg2 = gen_reg_rtx (Pmode);
25590       emit_move_insn (reg, tmpreg);
25591       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
25592
25593       /* If zero is not in the first two bytes, move two bytes forward.  */
25594       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25595       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25596       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25597       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
25598			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
25599						     reg,
25600						     tmpreg)));
25601       /* Emit lea manually to avoid clobbering of flags.  */
25602       emit_insn (gen_rtx_SET (SImode, reg2,
25603			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
25604
25605       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25606       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
25607       emit_insn (gen_rtx_SET (VOIDmode, out,
25608			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
25609						     reg2,
25610						     out)));
25611    }
25612  else
25613    {
25614       rtx_code_label *end_2_label = gen_label_rtx ();
25615       /* Is zero in the first two bytes? */
25616
25617       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
25618       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25619       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
25620       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25621                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
25622                            pc_rtx);
25623       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25624       JUMP_LABEL (tmp) = end_2_label;
25625
25626       /* Not in the first two.  Move two bytes forward.  */
25627       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
25628       emit_insn (ix86_gen_add3 (out, out, const2_rtx));
25629
25630       emit_label (end_2_label);
25631
25632    }
25633
25634  /* Avoid branch in fixing the byte.  */
25635  tmpreg = gen_lowpart (QImode, tmpreg);
25636  emit_insn (gen_addqi3_cconly_overflow (tmpreg, tmpreg));
25637  tmp = gen_rtx_REG (CCmode, FLAGS_REG);
25638  cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
25639  emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
25640
25641  emit_label (end_0_label);
25642}
25643
25644/* Expand strlen.  */
25645
25646bool
25647ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
25648{
25649  rtx addr, scratch1, scratch2, scratch3, scratch4;
25650
25651  /* The generic case of strlen expander is long.  Avoid it's
25652     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
25653
25654  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25655      && !TARGET_INLINE_ALL_STRINGOPS
25656      && !optimize_insn_for_size_p ()
25657      && (!CONST_INT_P (align) || INTVAL (align) < 4))
25658    return false;
25659
25660  addr = force_reg (Pmode, XEXP (src, 0));
25661  scratch1 = gen_reg_rtx (Pmode);
25662
25663  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
25664      && !optimize_insn_for_size_p ())
25665    {
25666      /* Well it seems that some optimizer does not combine a call like
25667         foo(strlen(bar), strlen(bar));
25668         when the move and the subtraction is done here.  It does calculate
25669         the length just once when these instructions are done inside of
25670         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
25671         often used and I use one fewer register for the lifetime of
25672         output_strlen_unroll() this is better.  */
25673
25674      emit_move_insn (out, addr);
25675
25676      ix86_expand_strlensi_unroll_1 (out, src, align);
25677
25678      /* strlensi_unroll_1 returns the address of the zero at the end of
25679         the string, like memchr(), so compute the length by subtracting
25680         the start address.  */
25681      emit_insn (ix86_gen_sub3 (out, out, addr));
25682    }
25683  else
25684    {
25685      rtx unspec;
25686
25687      /* Can't use this if the user has appropriated eax, ecx, or edi.  */
25688      if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
25689        return false;
25690
25691      scratch2 = gen_reg_rtx (Pmode);
25692      scratch3 = gen_reg_rtx (Pmode);
25693      scratch4 = force_reg (Pmode, constm1_rtx);
25694
25695      emit_move_insn (scratch3, addr);
25696      eoschar = force_reg (QImode, eoschar);
25697
25698      src = replace_equiv_address_nv (src, scratch3);
25699
25700      /* If .md starts supporting :P, this can be done in .md.  */
25701      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
25702						 scratch4), UNSPEC_SCAS);
25703      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
25704      emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
25705      emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
25706    }
25707  return true;
25708}
25709
25710/* For given symbol (function) construct code to compute address of it's PLT
25711   entry in large x86-64 PIC model.  */
25712static rtx
25713construct_plt_address (rtx symbol)
25714{
25715  rtx tmp, unspec;
25716
25717  gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
25718  gcc_assert (ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF);
25719  gcc_assert (Pmode == DImode);
25720
25721  tmp = gen_reg_rtx (Pmode);
25722  unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
25723
25724  emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
25725  emit_insn (ix86_gen_add3 (tmp, tmp, pic_offset_table_rtx));
25726  return tmp;
25727}
25728
25729rtx
25730ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
25731		  rtx callarg2,
25732		  rtx pop, bool sibcall)
25733{
25734  rtx vec[3];
25735  rtx use = NULL, call;
25736  unsigned int vec_len = 0;
25737
25738  if (pop == const0_rtx)
25739    pop = NULL;
25740  gcc_assert (!TARGET_64BIT || !pop);
25741
25742  if (TARGET_MACHO && !TARGET_64BIT)
25743    {
25744#if TARGET_MACHO
25745      if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
25746	fnaddr = machopic_indirect_call_target (fnaddr);
25747#endif
25748    }
25749  else
25750    {
25751      /* Static functions and indirect calls don't need the pic register.  */
25752      if (flag_pic
25753	  && (!TARGET_64BIT
25754	      || (ix86_cmodel == CM_LARGE_PIC
25755		  && DEFAULT_ABI != MS_ABI))
25756	  && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25757	  && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
25758	{
25759	  use_reg (&use, gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM));
25760	  if (ix86_use_pseudo_pic_reg ())
25761	    emit_move_insn (gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM),
25762			    pic_offset_table_rtx);
25763	}
25764    }
25765
25766  /* Skip setting up RAX register for -mskip-rax-setup when there are no
25767     parameters passed in vector registers.  */
25768  if (TARGET_64BIT
25769      && (INTVAL (callarg2) > 0
25770	  || (INTVAL (callarg2) == 0
25771	      && (TARGET_SSE || !flag_skip_rax_setup))))
25772    {
25773      rtx al = gen_rtx_REG (QImode, AX_REG);
25774      emit_move_insn (al, callarg2);
25775      use_reg (&use, al);
25776    }
25777
25778  if (ix86_cmodel == CM_LARGE_PIC
25779      && !TARGET_PECOFF
25780      && MEM_P (fnaddr)
25781      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
25782      && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
25783    fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
25784  else if (sibcall
25785	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
25786	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
25787    {
25788      fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
25789      fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
25790    }
25791
25792  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
25793
25794  if (retval)
25795    {
25796      /* We should add bounds as destination register in case
25797	 pointer with bounds may be returned.  */
25798      if (TARGET_MPX && SCALAR_INT_MODE_P (GET_MODE (retval)))
25799	{
25800	  rtx b0 = gen_rtx_REG (BND64mode, FIRST_BND_REG);
25801	  rtx b1 = gen_rtx_REG (BND64mode, FIRST_BND_REG + 1);
25802	  if (GET_CODE (retval) == PARALLEL)
25803	    {
25804	      b0 = gen_rtx_EXPR_LIST (VOIDmode, b0, const0_rtx);
25805	      b1 = gen_rtx_EXPR_LIST (VOIDmode, b1, const0_rtx);
25806	      rtx par = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, b0, b1));
25807	      retval = chkp_join_splitted_slot (retval, par);
25808	    }
25809	  else
25810	    {
25811	      retval = gen_rtx_PARALLEL (VOIDmode,
25812					 gen_rtvec (3, retval, b0, b1));
25813	      chkp_put_regs_to_expr_list (retval);
25814	    }
25815	}
25816
25817      call = gen_rtx_SET (VOIDmode, retval, call);
25818    }
25819  vec[vec_len++] = call;
25820
25821  if (pop)
25822    {
25823      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
25824      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
25825      vec[vec_len++] = pop;
25826    }
25827
25828  if (TARGET_64BIT_MS_ABI
25829      && (!callarg2 || INTVAL (callarg2) != -2))
25830    {
25831      int const cregs_size
25832	= ARRAY_SIZE (x86_64_ms_sysv_extra_clobbered_registers);
25833      int i;
25834
25835      for (i = 0; i < cregs_size; i++)
25836	{
25837	  int regno = x86_64_ms_sysv_extra_clobbered_registers[i];
25838	  machine_mode mode = SSE_REGNO_P (regno) ? TImode : DImode;
25839
25840	  clobber_reg (&use, gen_rtx_REG (mode, regno));
25841	}
25842    }
25843
25844  if (vec_len > 1)
25845    call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (vec_len, vec));
25846  call = emit_call_insn (call);
25847  if (use)
25848    CALL_INSN_FUNCTION_USAGE (call) = use;
25849
25850  return call;
25851}
25852
25853/* Output the assembly for a call instruction.  */
25854
25855const char *
25856ix86_output_call_insn (rtx_insn *insn, rtx call_op)
25857{
25858  bool direct_p = constant_call_address_operand (call_op, VOIDmode);
25859  bool seh_nop_p = false;
25860  const char *xasm;
25861
25862  if (SIBLING_CALL_P (insn))
25863    {
25864      if (direct_p)
25865	xasm = "%!jmp\t%P0";
25866      /* SEH epilogue detection requires the indirect branch case
25867	 to include REX.W.  */
25868      else if (TARGET_SEH)
25869	xasm = "%!rex.W jmp %A0";
25870      else
25871	xasm = "%!jmp\t%A0";
25872
25873      output_asm_insn (xasm, &call_op);
25874      return "";
25875    }
25876
25877  /* SEH unwinding can require an extra nop to be emitted in several
25878     circumstances.  Determine if we have one of those.  */
25879  if (TARGET_SEH)
25880    {
25881      rtx_insn *i;
25882
25883      for (i = NEXT_INSN (insn); i ; i = NEXT_INSN (i))
25884	{
25885	  /* If we get to another real insn, we don't need the nop.  */
25886	  if (INSN_P (i))
25887	    break;
25888
25889	  /* If we get to the epilogue note, prevent a catch region from
25890	     being adjacent to the standard epilogue sequence.  If non-
25891	     call-exceptions, we'll have done this during epilogue emission. */
25892	  if (NOTE_P (i) && NOTE_KIND (i) == NOTE_INSN_EPILOGUE_BEG
25893	      && !flag_non_call_exceptions
25894	      && !can_throw_internal (insn))
25895	    {
25896	      seh_nop_p = true;
25897	      break;
25898	    }
25899	}
25900
25901      /* If we didn't find a real insn following the call, prevent the
25902	 unwinder from looking into the next function.  */
25903      if (i == NULL)
25904	seh_nop_p = true;
25905    }
25906
25907  if (direct_p)
25908    xasm = "%!call\t%P0";
25909  else
25910    xasm = "%!call\t%A0";
25911
25912  output_asm_insn (xasm, &call_op);
25913
25914  if (seh_nop_p)
25915    return "nop";
25916
25917  return "";
25918}
25919
25920/* Clear stack slot assignments remembered from previous functions.
25921   This is called from INIT_EXPANDERS once before RTL is emitted for each
25922   function.  */
25923
25924static struct machine_function *
25925ix86_init_machine_status (void)
25926{
25927  struct machine_function *f;
25928
25929  f = ggc_cleared_alloc<machine_function> ();
25930  f->use_fast_prologue_epilogue_nregs = -1;
25931  f->call_abi = ix86_abi;
25932
25933  return f;
25934}
25935
25936/* Return a MEM corresponding to a stack slot with mode MODE.
25937   Allocate a new slot if necessary.
25938
25939   The RTL for a function can have several slots available: N is
25940   which slot to use.  */
25941
25942rtx
25943assign_386_stack_local (machine_mode mode, enum ix86_stack_slot n)
25944{
25945  struct stack_local_entry *s;
25946
25947  gcc_assert (n < MAX_386_STACK_LOCALS);
25948
25949  for (s = ix86_stack_locals; s; s = s->next)
25950    if (s->mode == mode && s->n == n)
25951      return validize_mem (copy_rtx (s->rtl));
25952
25953  s = ggc_alloc<stack_local_entry> ();
25954  s->n = n;
25955  s->mode = mode;
25956  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
25957
25958  s->next = ix86_stack_locals;
25959  ix86_stack_locals = s;
25960  return validize_mem (copy_rtx (s->rtl));
25961}
25962
25963static void
25964ix86_instantiate_decls (void)
25965{
25966  struct stack_local_entry *s;
25967
25968  for (s = ix86_stack_locals; s; s = s->next)
25969    if (s->rtl != NULL_RTX)
25970      instantiate_decl_rtl (s->rtl);
25971}
25972
25973/* Check whether x86 address PARTS is a pc-relative address.  */
25974
25975static bool
25976rip_relative_addr_p (struct ix86_address *parts)
25977{
25978  rtx base, index, disp;
25979
25980  base = parts->base;
25981  index = parts->index;
25982  disp = parts->disp;
25983
25984  if (disp && !base && !index)
25985    {
25986      if (TARGET_64BIT)
25987	{
25988	  rtx symbol = disp;
25989
25990	  if (GET_CODE (disp) == CONST)
25991	    symbol = XEXP (disp, 0);
25992	  if (GET_CODE (symbol) == PLUS
25993	      && CONST_INT_P (XEXP (symbol, 1)))
25994	    symbol = XEXP (symbol, 0);
25995
25996	  if (GET_CODE (symbol) == LABEL_REF
25997	      || (GET_CODE (symbol) == SYMBOL_REF
25998		  && SYMBOL_REF_TLS_MODEL (symbol) == 0)
25999	      || (GET_CODE (symbol) == UNSPEC
26000		  && (XINT (symbol, 1) == UNSPEC_GOTPCREL
26001		      || XINT (symbol, 1) == UNSPEC_PCREL
26002		      || XINT (symbol, 1) == UNSPEC_GOTNTPOFF)))
26003	    return true;
26004	}
26005    }
26006  return false;
26007}
26008
26009/* Calculate the length of the memory address in the instruction encoding.
26010   Includes addr32 prefix, does not include the one-byte modrm, opcode,
26011   or other prefixes.  We never generate addr32 prefix for LEA insn.  */
26012
26013int
26014memory_address_length (rtx addr, bool lea)
26015{
26016  struct ix86_address parts;
26017  rtx base, index, disp;
26018  int len;
26019  int ok;
26020
26021  if (GET_CODE (addr) == PRE_DEC
26022      || GET_CODE (addr) == POST_INC
26023      || GET_CODE (addr) == PRE_MODIFY
26024      || GET_CODE (addr) == POST_MODIFY)
26025    return 0;
26026
26027  ok = ix86_decompose_address (addr, &parts);
26028  gcc_assert (ok);
26029
26030  len = (parts.seg == SEG_DEFAULT) ? 0 : 1;
26031
26032  /*  If this is not LEA instruction, add the length of addr32 prefix.  */
26033  if (TARGET_64BIT && !lea
26034      && (SImode_address_operand (addr, VOIDmode)
26035	  || (parts.base && GET_MODE (parts.base) == SImode)
26036	  || (parts.index && GET_MODE (parts.index) == SImode)))
26037    len++;
26038
26039  base = parts.base;
26040  index = parts.index;
26041  disp = parts.disp;
26042
26043  if (base && GET_CODE (base) == SUBREG)
26044    base = SUBREG_REG (base);
26045  if (index && GET_CODE (index) == SUBREG)
26046    index = SUBREG_REG (index);
26047
26048  gcc_assert (base == NULL_RTX || REG_P (base));
26049  gcc_assert (index == NULL_RTX || REG_P (index));
26050
26051  /* Rule of thumb:
26052       - esp as the base always wants an index,
26053       - ebp as the base always wants a displacement,
26054       - r12 as the base always wants an index,
26055       - r13 as the base always wants a displacement.  */
26056
26057  /* Register Indirect.  */
26058  if (base && !index && !disp)
26059    {
26060      /* esp (for its index) and ebp (for its displacement) need
26061	 the two-byte modrm form.  Similarly for r12 and r13 in 64-bit
26062	 code.  */
26063      if (base == arg_pointer_rtx
26064	  || base == frame_pointer_rtx
26065	  || REGNO (base) == SP_REG
26066	  || REGNO (base) == BP_REG
26067	  || REGNO (base) == R12_REG
26068	  || REGNO (base) == R13_REG)
26069	len++;
26070    }
26071
26072  /* Direct Addressing.  In 64-bit mode mod 00 r/m 5
26073     is not disp32, but disp32(%rip), so for disp32
26074     SIB byte is needed, unless print_operand_address
26075     optimizes it into disp32(%rip) or (%rip) is implied
26076     by UNSPEC.  */
26077  else if (disp && !base && !index)
26078    {
26079      len += 4;
26080      if (rip_relative_addr_p (&parts))
26081	len++;
26082    }
26083  else
26084    {
26085      /* Find the length of the displacement constant.  */
26086      if (disp)
26087	{
26088	  if (base && satisfies_constraint_K (disp))
26089	    len += 1;
26090	  else
26091	    len += 4;
26092	}
26093      /* ebp always wants a displacement.  Similarly r13.  */
26094      else if (base && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
26095	len++;
26096
26097      /* An index requires the two-byte modrm form....  */
26098      if (index
26099	  /* ...like esp (or r12), which always wants an index.  */
26100	  || base == arg_pointer_rtx
26101	  || base == frame_pointer_rtx
26102	  || (base && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
26103	len++;
26104    }
26105
26106  return len;
26107}
26108
26109/* Compute default value for "length_immediate" attribute.  When SHORTFORM
26110   is set, expect that insn have 8bit immediate alternative.  */
26111int
26112ix86_attr_length_immediate_default (rtx_insn *insn, bool shortform)
26113{
26114  int len = 0;
26115  int i;
26116  extract_insn_cached (insn);
26117  for (i = recog_data.n_operands - 1; i >= 0; --i)
26118    if (CONSTANT_P (recog_data.operand[i]))
26119      {
26120        enum attr_mode mode = get_attr_mode (insn);
26121
26122	gcc_assert (!len);
26123	if (shortform && CONST_INT_P (recog_data.operand[i]))
26124	  {
26125	    HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
26126	    switch (mode)
26127	      {
26128	      case MODE_QI:
26129		len = 1;
26130		continue;
26131	      case MODE_HI:
26132		ival = trunc_int_for_mode (ival, HImode);
26133		break;
26134	      case MODE_SI:
26135		ival = trunc_int_for_mode (ival, SImode);
26136		break;
26137	      default:
26138		break;
26139	      }
26140	    if (IN_RANGE (ival, -128, 127))
26141	      {
26142		len = 1;
26143		continue;
26144	      }
26145	  }
26146	switch (mode)
26147	  {
26148	  case MODE_QI:
26149	    len = 1;
26150	    break;
26151	  case MODE_HI:
26152	    len = 2;
26153	    break;
26154	  case MODE_SI:
26155	    len = 4;
26156	    break;
26157	  /* Immediates for DImode instructions are encoded
26158	     as 32bit sign extended values.  */
26159	  case MODE_DI:
26160	    len = 4;
26161	    break;
26162	  default:
26163	    fatal_insn ("unknown insn mode", insn);
26164	}
26165      }
26166  return len;
26167}
26168
26169/* Compute default value for "length_address" attribute.  */
26170int
26171ix86_attr_length_address_default (rtx_insn *insn)
26172{
26173  int i;
26174
26175  if (get_attr_type (insn) == TYPE_LEA)
26176    {
26177      rtx set = PATTERN (insn), addr;
26178
26179      if (GET_CODE (set) == PARALLEL)
26180	set = XVECEXP (set, 0, 0);
26181
26182      gcc_assert (GET_CODE (set) == SET);
26183
26184      addr = SET_SRC (set);
26185
26186      return memory_address_length (addr, true);
26187    }
26188
26189  extract_insn_cached (insn);
26190  for (i = recog_data.n_operands - 1; i >= 0; --i)
26191    if (MEM_P (recog_data.operand[i]))
26192      {
26193        constrain_operands_cached (insn, reload_completed);
26194        if (which_alternative != -1)
26195	  {
26196	    const char *constraints = recog_data.constraints[i];
26197	    int alt = which_alternative;
26198
26199	    while (*constraints == '=' || *constraints == '+')
26200	      constraints++;
26201	    while (alt-- > 0)
26202	      while (*constraints++ != ',')
26203		;
26204	    /* Skip ignored operands.  */
26205	    if (*constraints == 'X')
26206	      continue;
26207	  }
26208	return memory_address_length (XEXP (recog_data.operand[i], 0), false);
26209      }
26210  return 0;
26211}
26212
26213/* Compute default value for "length_vex" attribute. It includes
26214   2 or 3 byte VEX prefix and 1 opcode byte.  */
26215
26216int
26217ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
26218			      bool has_vex_w)
26219{
26220  int i;
26221
26222  /* Only 0f opcode can use 2 byte VEX prefix and  VEX W bit uses 3
26223     byte VEX prefix.  */
26224  if (!has_0f_opcode || has_vex_w)
26225    return 3 + 1;
26226
26227 /* We can always use 2 byte VEX prefix in 32bit.  */
26228  if (!TARGET_64BIT)
26229    return 2 + 1;
26230
26231  extract_insn_cached (insn);
26232
26233  for (i = recog_data.n_operands - 1; i >= 0; --i)
26234    if (REG_P (recog_data.operand[i]))
26235      {
26236	/* REX.W bit uses 3 byte VEX prefix.  */
26237	if (GET_MODE (recog_data.operand[i]) == DImode
26238	    && GENERAL_REG_P (recog_data.operand[i]))
26239	  return 3 + 1;
26240      }
26241    else
26242      {
26243	/* REX.X or REX.B bits use 3 byte VEX prefix.  */
26244	if (MEM_P (recog_data.operand[i])
26245	    && x86_extended_reg_mentioned_p (recog_data.operand[i]))
26246	  return 3 + 1;
26247      }
26248
26249  return 2 + 1;
26250}
26251
26252/* Return the maximum number of instructions a cpu can issue.  */
26253
26254static int
26255ix86_issue_rate (void)
26256{
26257  switch (ix86_tune)
26258    {
26259    case PROCESSOR_PENTIUM:
26260    case PROCESSOR_BONNELL:
26261    case PROCESSOR_SILVERMONT:
26262    case PROCESSOR_KNL:
26263    case PROCESSOR_INTEL:
26264    case PROCESSOR_K6:
26265    case PROCESSOR_BTVER2:
26266    case PROCESSOR_PENTIUM4:
26267    case PROCESSOR_NOCONA:
26268      return 2;
26269
26270    case PROCESSOR_PENTIUMPRO:
26271    case PROCESSOR_ATHLON:
26272    case PROCESSOR_K8:
26273    case PROCESSOR_AMDFAM10:
26274    case PROCESSOR_GENERIC:
26275    case PROCESSOR_BTVER1:
26276      return 3;
26277
26278    case PROCESSOR_BDVER1:
26279    case PROCESSOR_BDVER2:
26280    case PROCESSOR_BDVER3:
26281    case PROCESSOR_BDVER4:
26282    case PROCESSOR_CORE2:
26283    case PROCESSOR_NEHALEM:
26284    case PROCESSOR_SANDYBRIDGE:
26285    case PROCESSOR_HASWELL:
26286      return 4;
26287
26288    default:
26289      return 1;
26290    }
26291}
26292
26293/* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
26294   by DEP_INSN and nothing set by DEP_INSN.  */
26295
26296static bool
26297ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
26298{
26299  rtx set, set2;
26300
26301  /* Simplify the test for uninteresting insns.  */
26302  if (insn_type != TYPE_SETCC
26303      && insn_type != TYPE_ICMOV
26304      && insn_type != TYPE_FCMOV
26305      && insn_type != TYPE_IBR)
26306    return false;
26307
26308  if ((set = single_set (dep_insn)) != 0)
26309    {
26310      set = SET_DEST (set);
26311      set2 = NULL_RTX;
26312    }
26313  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
26314	   && XVECLEN (PATTERN (dep_insn), 0) == 2
26315	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
26316	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
26317    {
26318      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26319      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
26320    }
26321  else
26322    return false;
26323
26324  if (!REG_P (set) || REGNO (set) != FLAGS_REG)
26325    return false;
26326
26327  /* This test is true if the dependent insn reads the flags but
26328     not any other potentially set register.  */
26329  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
26330    return false;
26331
26332  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
26333    return false;
26334
26335  return true;
26336}
26337
26338/* Return true iff USE_INSN has a memory address with operands set by
26339   SET_INSN.  */
26340
26341bool
26342ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
26343{
26344  int i;
26345  extract_insn_cached (use_insn);
26346  for (i = recog_data.n_operands - 1; i >= 0; --i)
26347    if (MEM_P (recog_data.operand[i]))
26348      {
26349	rtx addr = XEXP (recog_data.operand[i], 0);
26350	return modified_in_p (addr, set_insn) != 0;
26351      }
26352  return false;
26353}
26354
26355/* Helper function for exact_store_load_dependency.
26356   Return true if addr is found in insn.  */
26357static bool
26358exact_dependency_1 (rtx addr, rtx insn)
26359{
26360  enum rtx_code code;
26361  const char *format_ptr;
26362  int i, j;
26363
26364  code = GET_CODE (insn);
26365  switch (code)
26366    {
26367    case MEM:
26368      if (rtx_equal_p (addr, insn))
26369	return true;
26370      break;
26371    case REG:
26372    CASE_CONST_ANY:
26373    case SYMBOL_REF:
26374    case CODE_LABEL:
26375    case PC:
26376    case CC0:
26377    case EXPR_LIST:
26378      return false;
26379    default:
26380      break;
26381    }
26382
26383  format_ptr = GET_RTX_FORMAT (code);
26384  for (i = 0; i < GET_RTX_LENGTH (code); i++)
26385    {
26386      switch (*format_ptr++)
26387	{
26388	case 'e':
26389	  if (exact_dependency_1 (addr, XEXP (insn, i)))
26390	    return true;
26391	  break;
26392	case 'E':
26393	  for (j = 0; j < XVECLEN (insn, i); j++)
26394	    if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
26395	      return true;
26396	    break;
26397	}
26398    }
26399  return false;
26400}
26401
26402/* Return true if there exists exact dependency for store & load, i.e.
26403   the same memory address is used in them.  */
26404static bool
26405exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
26406{
26407  rtx set1, set2;
26408
26409  set1 = single_set (store);
26410  if (!set1)
26411    return false;
26412  if (!MEM_P (SET_DEST (set1)))
26413    return false;
26414  set2 = single_set (load);
26415  if (!set2)
26416    return false;
26417  if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
26418    return true;
26419  return false;
26420}
26421
26422static int
26423ix86_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26424{
26425  enum attr_type insn_type, dep_insn_type;
26426  enum attr_memory memory;
26427  rtx set, set2;
26428  int dep_insn_code_number;
26429
26430  /* Anti and output dependencies have zero cost on all CPUs.  */
26431  if (REG_NOTE_KIND (link) != 0)
26432    return 0;
26433
26434  dep_insn_code_number = recog_memoized (dep_insn);
26435
26436  /* If we can't recognize the insns, we can't really do anything.  */
26437  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
26438    return cost;
26439
26440  insn_type = get_attr_type (insn);
26441  dep_insn_type = get_attr_type (dep_insn);
26442
26443  switch (ix86_tune)
26444    {
26445    case PROCESSOR_PENTIUM:
26446      /* Address Generation Interlock adds a cycle of latency.  */
26447      if (insn_type == TYPE_LEA)
26448	{
26449	  rtx addr = PATTERN (insn);
26450
26451	  if (GET_CODE (addr) == PARALLEL)
26452	    addr = XVECEXP (addr, 0, 0);
26453
26454	  gcc_assert (GET_CODE (addr) == SET);
26455
26456	  addr = SET_SRC (addr);
26457	  if (modified_in_p (addr, dep_insn))
26458	    cost += 1;
26459	}
26460      else if (ix86_agi_dependent (dep_insn, insn))
26461	cost += 1;
26462
26463      /* ??? Compares pair with jump/setcc.  */
26464      if (ix86_flags_dependent (insn, dep_insn, insn_type))
26465	cost = 0;
26466
26467      /* Floating point stores require value to be ready one cycle earlier.  */
26468      if (insn_type == TYPE_FMOV
26469	  && get_attr_memory (insn) == MEMORY_STORE
26470	  && !ix86_agi_dependent (dep_insn, insn))
26471	cost += 1;
26472      break;
26473
26474    case PROCESSOR_PENTIUMPRO:
26475      /* INT->FP conversion is expensive.  */
26476      if (get_attr_fp_int_src (dep_insn))
26477	cost += 5;
26478
26479      /* There is one cycle extra latency between an FP op and a store.  */
26480      if (insn_type == TYPE_FMOV
26481	  && (set = single_set (dep_insn)) != NULL_RTX
26482	  && (set2 = single_set (insn)) != NULL_RTX
26483	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
26484	  && MEM_P (SET_DEST (set2)))
26485	cost += 1;
26486
26487      memory = get_attr_memory (insn);
26488
26489      /* Show ability of reorder buffer to hide latency of load by executing
26490	 in parallel with previous instruction in case
26491	 previous instruction is not needed to compute the address.  */
26492      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26493	  && !ix86_agi_dependent (dep_insn, insn))
26494	{
26495	  /* Claim moves to take one cycle, as core can issue one load
26496	     at time and the next load can start cycle later.  */
26497	  if (dep_insn_type == TYPE_IMOV
26498	      || dep_insn_type == TYPE_FMOV)
26499	    cost = 1;
26500	  else if (cost > 1)
26501	    cost--;
26502	}
26503      break;
26504
26505    case PROCESSOR_K6:
26506     /* The esp dependency is resolved before
26507	the instruction is really finished.  */
26508      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26509	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26510	return 1;
26511
26512      /* INT->FP conversion is expensive.  */
26513      if (get_attr_fp_int_src (dep_insn))
26514	cost += 5;
26515
26516      memory = get_attr_memory (insn);
26517
26518      /* Show ability of reorder buffer to hide latency of load by executing
26519	 in parallel with previous instruction in case
26520	 previous instruction is not needed to compute the address.  */
26521      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26522	  && !ix86_agi_dependent (dep_insn, insn))
26523	{
26524	  /* Claim moves to take one cycle, as core can issue one load
26525	     at time and the next load can start cycle later.  */
26526	  if (dep_insn_type == TYPE_IMOV
26527	      || dep_insn_type == TYPE_FMOV)
26528	    cost = 1;
26529	  else if (cost > 2)
26530	    cost -= 2;
26531	  else
26532	    cost = 1;
26533	}
26534      break;
26535
26536    case PROCESSOR_AMDFAM10:
26537    case PROCESSOR_BDVER1:
26538    case PROCESSOR_BDVER2:
26539    case PROCESSOR_BDVER3:
26540    case PROCESSOR_BDVER4:
26541    case PROCESSOR_BTVER1:
26542    case PROCESSOR_BTVER2:
26543    case PROCESSOR_GENERIC:
26544      /* Stack engine allows to execute push&pop instructions in parall.  */
26545      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26546	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26547	return 0;
26548      /* FALLTHRU */
26549
26550    case PROCESSOR_ATHLON:
26551    case PROCESSOR_K8:
26552      memory = get_attr_memory (insn);
26553
26554      /* Show ability of reorder buffer to hide latency of load by executing
26555	 in parallel with previous instruction in case
26556	 previous instruction is not needed to compute the address.  */
26557      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26558	  && !ix86_agi_dependent (dep_insn, insn))
26559	{
26560	  enum attr_unit unit = get_attr_unit (insn);
26561	  int loadcost = 3;
26562
26563	  /* Because of the difference between the length of integer and
26564	     floating unit pipeline preparation stages, the memory operands
26565	     for floating point are cheaper.
26566
26567	     ??? For Athlon it the difference is most probably 2.  */
26568	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
26569	    loadcost = 3;
26570	  else
26571	    loadcost = TARGET_ATHLON ? 2 : 0;
26572
26573	  if (cost >= loadcost)
26574	    cost -= loadcost;
26575	  else
26576	    cost = 0;
26577	}
26578      break;
26579
26580    case PROCESSOR_CORE2:
26581    case PROCESSOR_NEHALEM:
26582    case PROCESSOR_SANDYBRIDGE:
26583    case PROCESSOR_HASWELL:
26584      /* Stack engine allows to execute push&pop instructions in parall.  */
26585      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
26586	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
26587	return 0;
26588
26589      memory = get_attr_memory (insn);
26590
26591      /* Show ability of reorder buffer to hide latency of load by executing
26592	 in parallel with previous instruction in case
26593	 previous instruction is not needed to compute the address.  */
26594      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26595	  && !ix86_agi_dependent (dep_insn, insn))
26596	{
26597	  if (cost >= 4)
26598	    cost -= 4;
26599	  else
26600	    cost = 0;
26601	}
26602      break;
26603
26604    case PROCESSOR_SILVERMONT:
26605    case PROCESSOR_KNL:
26606    case PROCESSOR_INTEL:
26607      if (!reload_completed)
26608	return cost;
26609
26610      /* Increase cost of integer loads.  */
26611      memory = get_attr_memory (dep_insn);
26612      if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
26613	{
26614	  enum attr_unit unit = get_attr_unit (dep_insn);
26615	  if (unit == UNIT_INTEGER && cost == 1)
26616	    {
26617	      if (memory == MEMORY_LOAD)
26618		cost = 3;
26619	      else
26620		{
26621		  /* Increase cost of ld/st for short int types only
26622		     because of store forwarding issue.  */
26623		  rtx set = single_set (dep_insn);
26624		  if (set && (GET_MODE (SET_DEST (set)) == QImode
26625			      || GET_MODE (SET_DEST (set)) == HImode))
26626		    {
26627		      /* Increase cost of store/load insn if exact
26628			 dependence exists and it is load insn.  */
26629		      enum attr_memory insn_memory = get_attr_memory (insn);
26630		      if (insn_memory == MEMORY_LOAD
26631			  && exact_store_load_dependency (dep_insn, insn))
26632			cost = 3;
26633		    }
26634		}
26635	    }
26636	}
26637
26638    default:
26639      break;
26640    }
26641
26642  return cost;
26643}
26644
26645/* How many alternative schedules to try.  This should be as wide as the
26646   scheduling freedom in the DFA, but no wider.  Making this value too
26647   large results extra work for the scheduler.  */
26648
26649static int
26650ia32_multipass_dfa_lookahead (void)
26651{
26652  switch (ix86_tune)
26653    {
26654    case PROCESSOR_PENTIUM:
26655      return 2;
26656
26657    case PROCESSOR_PENTIUMPRO:
26658    case PROCESSOR_K6:
26659      return 1;
26660
26661    case PROCESSOR_BDVER1:
26662    case PROCESSOR_BDVER2:
26663    case PROCESSOR_BDVER3:
26664    case PROCESSOR_BDVER4:
26665      /* We use lookahead value 4 for BD both before and after reload
26666	 schedules. Plan is to have value 8 included for O3. */
26667        return 4;
26668
26669    case PROCESSOR_CORE2:
26670    case PROCESSOR_NEHALEM:
26671    case PROCESSOR_SANDYBRIDGE:
26672    case PROCESSOR_HASWELL:
26673    case PROCESSOR_BONNELL:
26674    case PROCESSOR_SILVERMONT:
26675    case PROCESSOR_KNL:
26676    case PROCESSOR_INTEL:
26677      /* Generally, we want haifa-sched:max_issue() to look ahead as far
26678	 as many instructions can be executed on a cycle, i.e.,
26679	 issue_rate.  I wonder why tuning for many CPUs does not do this.  */
26680      if (reload_completed)
26681        return ix86_issue_rate ();
26682      /* Don't use lookahead for pre-reload schedule to save compile time.  */
26683      return 0;
26684
26685    default:
26686      return 0;
26687    }
26688}
26689
26690/* Return true if target platform supports macro-fusion.  */
26691
26692static bool
26693ix86_macro_fusion_p ()
26694{
26695  return TARGET_FUSE_CMP_AND_BRANCH;
26696}
26697
26698/* Check whether current microarchitecture support macro fusion
26699   for insn pair "CONDGEN + CONDJMP". Refer to
26700   "Intel Architectures Optimization Reference Manual". */
26701
26702static bool
26703ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
26704{
26705  rtx src, dest;
26706  enum rtx_code ccode;
26707  rtx compare_set = NULL_RTX, test_if, cond;
26708  rtx alu_set = NULL_RTX, addr = NULL_RTX;
26709
26710  if (!any_condjump_p (condjmp))
26711    return false;
26712
26713  if (get_attr_type (condgen) != TYPE_TEST
26714      && get_attr_type (condgen) != TYPE_ICMP
26715      && get_attr_type (condgen) != TYPE_INCDEC
26716      && get_attr_type (condgen) != TYPE_ALU)
26717    return false;
26718
26719  compare_set = single_set (condgen);
26720  if (compare_set == NULL_RTX
26721      && !TARGET_FUSE_ALU_AND_BRANCH)
26722    return false;
26723
26724  if (compare_set == NULL_RTX)
26725    {
26726      int i;
26727      rtx pat = PATTERN (condgen);
26728      for (i = 0; i < XVECLEN (pat, 0); i++)
26729	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
26730	  {
26731	    rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
26732	    if (GET_CODE (set_src) == COMPARE)
26733	      compare_set = XVECEXP (pat, 0, i);
26734	    else
26735	      alu_set = XVECEXP (pat, 0, i);
26736	  }
26737    }
26738  if (compare_set == NULL_RTX)
26739    return false;
26740  src = SET_SRC (compare_set);
26741  if (GET_CODE (src) != COMPARE)
26742    return false;
26743
26744  /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
26745     supported.  */
26746  if ((MEM_P (XEXP (src, 0))
26747       && CONST_INT_P (XEXP (src, 1)))
26748      || (MEM_P (XEXP (src, 1))
26749	  && CONST_INT_P (XEXP (src, 0))))
26750    return false;
26751
26752  /* No fusion for RIP-relative address.  */
26753  if (MEM_P (XEXP (src, 0)))
26754    addr = XEXP (XEXP (src, 0), 0);
26755  else if (MEM_P (XEXP (src, 1)))
26756    addr = XEXP (XEXP (src, 1), 0);
26757
26758  if (addr) {
26759    ix86_address parts;
26760    int ok = ix86_decompose_address (addr, &parts);
26761    gcc_assert (ok);
26762
26763    if (rip_relative_addr_p (&parts))
26764      return false;
26765  }
26766
26767  test_if = SET_SRC (pc_set (condjmp));
26768  cond = XEXP (test_if, 0);
26769  ccode = GET_CODE (cond);
26770  /* Check whether conditional jump use Sign or Overflow Flags.  */
26771  if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
26772      && (ccode == GE
26773          || ccode == GT
26774	  || ccode == LE
26775	  || ccode == LT))
26776    return false;
26777
26778  /* Return true for TYPE_TEST and TYPE_ICMP.  */
26779  if (get_attr_type (condgen) == TYPE_TEST
26780      || get_attr_type (condgen) == TYPE_ICMP)
26781    return true;
26782
26783  /* The following is the case that macro-fusion for alu + jmp.  */
26784  if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
26785    return false;
26786
26787  /* No fusion for alu op with memory destination operand.  */
26788  dest = SET_DEST (alu_set);
26789  if (MEM_P (dest))
26790    return false;
26791
26792  /* Macro-fusion for inc/dec + unsigned conditional jump is not
26793     supported.  */
26794  if (get_attr_type (condgen) == TYPE_INCDEC
26795      && (ccode == GEU
26796	  || ccode == GTU
26797	  || ccode == LEU
26798	  || ccode == LTU))
26799    return false;
26800
26801  return true;
26802}
26803
26804/* Try to reorder ready list to take advantage of Atom pipelined IMUL
26805   execution. It is applied if
26806   (1) IMUL instruction is on the top of list;
26807   (2) There exists the only producer of independent IMUL instruction in
26808       ready list.
26809   Return index of IMUL producer if it was found and -1 otherwise.  */
26810static int
26811do_reorder_for_imul (rtx_insn **ready, int n_ready)
26812{
26813  rtx_insn *insn;
26814  rtx set, insn1, insn2;
26815  sd_iterator_def sd_it;
26816  dep_t dep;
26817  int index = -1;
26818  int i;
26819
26820  if (!TARGET_BONNELL)
26821    return index;
26822
26823  /* Check that IMUL instruction is on the top of ready list.  */
26824  insn = ready[n_ready - 1];
26825  set = single_set (insn);
26826  if (!set)
26827    return index;
26828  if (!(GET_CODE (SET_SRC (set)) == MULT
26829      && GET_MODE (SET_SRC (set)) == SImode))
26830    return index;
26831
26832  /* Search for producer of independent IMUL instruction.  */
26833  for (i = n_ready - 2; i >= 0; i--)
26834    {
26835      insn = ready[i];
26836      if (!NONDEBUG_INSN_P (insn))
26837	continue;
26838      /* Skip IMUL instruction.  */
26839      insn2 = PATTERN (insn);
26840      if (GET_CODE (insn2) == PARALLEL)
26841	insn2 = XVECEXP (insn2, 0, 0);
26842      if (GET_CODE (insn2) == SET
26843	  && GET_CODE (SET_SRC (insn2)) == MULT
26844	  && GET_MODE (SET_SRC (insn2)) == SImode)
26845	continue;
26846
26847      FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
26848	{
26849	  rtx con;
26850	  con = DEP_CON (dep);
26851	  if (!NONDEBUG_INSN_P (con))
26852	    continue;
26853	  insn1 = PATTERN (con);
26854	  if (GET_CODE (insn1) == PARALLEL)
26855	    insn1 = XVECEXP (insn1, 0, 0);
26856
26857	  if (GET_CODE (insn1) == SET
26858	      && GET_CODE (SET_SRC (insn1)) == MULT
26859	      && GET_MODE (SET_SRC (insn1)) == SImode)
26860	    {
26861	      sd_iterator_def sd_it1;
26862	      dep_t dep1;
26863	      /* Check if there is no other dependee for IMUL.  */
26864	      index = i;
26865	      FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
26866		{
26867		  rtx pro;
26868		  pro = DEP_PRO (dep1);
26869		  if (!NONDEBUG_INSN_P (pro))
26870		    continue;
26871		  if (pro != insn)
26872		    index = -1;
26873		}
26874	      if (index >= 0)
26875		break;
26876	    }
26877	}
26878      if (index >= 0)
26879	break;
26880    }
26881  return index;
26882}
26883
26884/* Try to find the best candidate on the top of ready list if two insns
26885   have the same priority - candidate is best if its dependees were
26886   scheduled earlier. Applied for Silvermont only.
26887   Return true if top 2 insns must be interchanged.  */
26888static bool
26889swap_top_of_ready_list (rtx_insn **ready, int n_ready)
26890{
26891  rtx_insn *top = ready[n_ready - 1];
26892  rtx_insn *next = ready[n_ready - 2];
26893  rtx set;
26894  sd_iterator_def sd_it;
26895  dep_t dep;
26896  int clock1 = -1;
26897  int clock2 = -1;
26898  #define INSN_TICK(INSN) (HID (INSN)->tick)
26899
26900  if (!TARGET_SILVERMONT && !TARGET_INTEL)
26901    return false;
26902
26903  if (!NONDEBUG_INSN_P (top))
26904    return false;
26905  if (!NONJUMP_INSN_P (top))
26906    return false;
26907  if (!NONDEBUG_INSN_P (next))
26908    return false;
26909  if (!NONJUMP_INSN_P (next))
26910    return false;
26911  set = single_set (top);
26912  if (!set)
26913    return false;
26914  set = single_set (next);
26915  if (!set)
26916    return false;
26917
26918  if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
26919    {
26920      if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
26921	return false;
26922      /* Determine winner more precise.  */
26923      FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
26924	{
26925	  rtx pro;
26926	  pro = DEP_PRO (dep);
26927	  if (!NONDEBUG_INSN_P (pro))
26928	    continue;
26929	  if (INSN_TICK (pro) > clock1)
26930	    clock1 = INSN_TICK (pro);
26931	}
26932      FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
26933	{
26934	  rtx pro;
26935	  pro = DEP_PRO (dep);
26936	  if (!NONDEBUG_INSN_P (pro))
26937	    continue;
26938	  if (INSN_TICK (pro) > clock2)
26939	    clock2 = INSN_TICK (pro);
26940	}
26941
26942      if (clock1 == clock2)
26943	{
26944	  /* Determine winner - load must win. */
26945	  enum attr_memory memory1, memory2;
26946	  memory1 = get_attr_memory (top);
26947	  memory2 = get_attr_memory (next);
26948	  if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
26949	    return true;
26950	}
26951	return (bool) (clock2 < clock1);
26952    }
26953  return false;
26954  #undef INSN_TICK
26955}
26956
26957/* Perform possible reodering of ready list for Atom/Silvermont only.
26958   Return issue rate.  */
26959static int
26960ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
26961		    int *pn_ready, int clock_var)
26962{
26963  int issue_rate = -1;
26964  int n_ready = *pn_ready;
26965  int i;
26966  rtx_insn *insn;
26967  int index = -1;
26968
26969  /* Set up issue rate.  */
26970  issue_rate = ix86_issue_rate ();
26971
26972  /* Do reodering for BONNELL/SILVERMONT only.  */
26973  if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
26974    return issue_rate;
26975
26976  /* Nothing to do if ready list contains only 1 instruction.  */
26977  if (n_ready <= 1)
26978    return issue_rate;
26979
26980  /* Do reodering for post-reload scheduler only.  */
26981  if (!reload_completed)
26982    return issue_rate;
26983
26984  if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
26985    {
26986      if (sched_verbose > 1)
26987	fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
26988		 INSN_UID (ready[index]));
26989
26990      /* Put IMUL producer (ready[index]) at the top of ready list.  */
26991      insn = ready[index];
26992      for (i = index; i < n_ready - 1; i++)
26993	ready[i] = ready[i + 1];
26994      ready[n_ready - 1] = insn;
26995      return issue_rate;
26996    }
26997
26998  /* Skip selective scheduling since HID is not populated in it.  */
26999  if (clock_var != 0
27000      && !sel_sched_p ()
27001      && swap_top_of_ready_list (ready, n_ready))
27002    {
27003      if (sched_verbose > 1)
27004	fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
27005		 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
27006      /* Swap 2 top elements of ready list.  */
27007      insn = ready[n_ready - 1];
27008      ready[n_ready - 1] = ready[n_ready - 2];
27009      ready[n_ready - 2] = insn;
27010    }
27011  return issue_rate;
27012}
27013
27014static bool
27015ix86_class_likely_spilled_p (reg_class_t);
27016
27017/* Returns true if lhs of insn is HW function argument register and set up
27018   is_spilled to true if it is likely spilled HW register.  */
27019static bool
27020insn_is_function_arg (rtx insn, bool* is_spilled)
27021{
27022  rtx dst;
27023
27024  if (!NONDEBUG_INSN_P (insn))
27025    return false;
27026  /* Call instructions are not movable, ignore it.  */
27027  if (CALL_P (insn))
27028    return false;
27029  insn = PATTERN (insn);
27030  if (GET_CODE (insn) == PARALLEL)
27031    insn = XVECEXP (insn, 0, 0);
27032  if (GET_CODE (insn) != SET)
27033    return false;
27034  dst = SET_DEST (insn);
27035  if (REG_P (dst) && HARD_REGISTER_P (dst)
27036      && ix86_function_arg_regno_p (REGNO (dst)))
27037    {
27038      /* Is it likely spilled HW register?  */
27039      if (!TEST_HARD_REG_BIT (fixed_reg_set, REGNO (dst))
27040	  && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (dst))))
27041	*is_spilled = true;
27042      return true;
27043    }
27044  return false;
27045}
27046
27047/* Add output dependencies for chain of function adjacent arguments if only
27048   there is a move to likely spilled HW register.  Return first argument
27049   if at least one dependence was added or NULL otherwise.  */
27050static rtx_insn *
27051add_parameter_dependencies (rtx_insn *call, rtx_insn *head)
27052{
27053  rtx_insn *insn;
27054  rtx_insn *last = call;
27055  rtx_insn *first_arg = NULL;
27056  bool is_spilled = false;
27057
27058  head = PREV_INSN (head);
27059
27060  /* Find nearest to call argument passing instruction.  */
27061  while (true)
27062    {
27063      last = PREV_INSN (last);
27064      if (last == head)
27065	return NULL;
27066      if (!NONDEBUG_INSN_P (last))
27067	continue;
27068      if (insn_is_function_arg (last, &is_spilled))
27069	break;
27070      return NULL;
27071    }
27072
27073  first_arg = last;
27074  while (true)
27075    {
27076      insn = PREV_INSN (last);
27077      if (!INSN_P (insn))
27078	break;
27079      if (insn == head)
27080	break;
27081      if (!NONDEBUG_INSN_P (insn))
27082	{
27083	  last = insn;
27084	  continue;
27085	}
27086      if (insn_is_function_arg (insn, &is_spilled))
27087	{
27088	  /* Add output depdendence between two function arguments if chain
27089	     of output arguments contains likely spilled HW registers.  */
27090	  if (is_spilled)
27091	    add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27092	  first_arg = last = insn;
27093	}
27094      else
27095	break;
27096    }
27097  if (!is_spilled)
27098    return NULL;
27099  return first_arg;
27100}
27101
27102/* Add output or anti dependency from insn to first_arg to restrict its code
27103   motion.  */
27104static void
27105avoid_func_arg_motion (rtx_insn *first_arg, rtx_insn *insn)
27106{
27107  rtx set;
27108  rtx tmp;
27109
27110  /* Add anti dependencies for bounds stores.  */
27111  if (INSN_P (insn)
27112      && GET_CODE (PATTERN (insn)) == PARALLEL
27113      && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
27114      && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_BNDSTX)
27115    {
27116      add_dependence (first_arg, insn, REG_DEP_ANTI);
27117      return;
27118    }
27119
27120  set = single_set (insn);
27121  if (!set)
27122    return;
27123  tmp = SET_DEST (set);
27124  if (REG_P (tmp))
27125    {
27126      /* Add output dependency to the first function argument.  */
27127      add_dependence (first_arg, insn, REG_DEP_OUTPUT);
27128      return;
27129    }
27130  /* Add anti dependency.  */
27131  add_dependence (first_arg, insn, REG_DEP_ANTI);
27132}
27133
27134/* Avoid cross block motion of function argument through adding dependency
27135   from the first non-jump instruction in bb.  */
27136static void
27137add_dependee_for_func_arg (rtx_insn *arg, basic_block bb)
27138{
27139  rtx_insn *insn = BB_END (bb);
27140
27141  while (insn)
27142    {
27143      if (NONDEBUG_INSN_P (insn) && NONJUMP_INSN_P (insn))
27144	{
27145	  rtx set = single_set (insn);
27146	  if (set)
27147	    {
27148	      avoid_func_arg_motion (arg, insn);
27149	      return;
27150	    }
27151	}
27152      if (insn == BB_HEAD (bb))
27153	return;
27154      insn = PREV_INSN (insn);
27155    }
27156}
27157
27158/* Hook for pre-reload schedule - avoid motion of function arguments
27159   passed in likely spilled HW registers.  */
27160static void
27161ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
27162{
27163  rtx_insn *insn;
27164  rtx_insn *first_arg = NULL;
27165  if (reload_completed)
27166    return;
27167  while (head != tail && DEBUG_INSN_P (head))
27168    head = NEXT_INSN (head);
27169  for (insn = tail; insn != head; insn = PREV_INSN (insn))
27170    if (INSN_P (insn) && CALL_P (insn))
27171      {
27172	first_arg = add_parameter_dependencies (insn, head);
27173	if (first_arg)
27174	  {
27175	    /* Add dependee for first argument to predecessors if only
27176	       region contains more than one block.  */
27177	    basic_block bb =  BLOCK_FOR_INSN (insn);
27178	    int rgn = CONTAINING_RGN (bb->index);
27179	    int nr_blks = RGN_NR_BLOCKS (rgn);
27180	    /* Skip trivial regions and region head blocks that can have
27181	       predecessors outside of region.  */
27182	    if (nr_blks > 1 && BLOCK_TO_BB (bb->index) != 0)
27183	      {
27184		edge e;
27185		edge_iterator ei;
27186
27187		/* Regions are SCCs with the exception of selective
27188		   scheduling with pipelining of outer blocks enabled.
27189		   So also check that immediate predecessors of a non-head
27190		   block are in the same region.  */
27191		FOR_EACH_EDGE (e, ei, bb->preds)
27192		  {
27193		    /* Avoid creating of loop-carried dependencies through
27194		       using topological ordering in the region.  */
27195		    if (rgn == CONTAINING_RGN (e->src->index)
27196			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
27197		      add_dependee_for_func_arg (first_arg, e->src);
27198		  }
27199	      }
27200	    insn = first_arg;
27201	    if (insn == head)
27202	      break;
27203	  }
27204      }
27205    else if (first_arg)
27206      avoid_func_arg_motion (first_arg, insn);
27207}
27208
27209/* Hook for pre-reload schedule - set priority of moves from likely spilled
27210   HW registers to maximum, to schedule them at soon as possible. These are
27211   moves from function argument registers at the top of the function entry
27212   and moves from function return value registers after call.  */
27213static int
27214ix86_adjust_priority (rtx_insn *insn, int priority)
27215{
27216  rtx set;
27217
27218  if (reload_completed)
27219    return priority;
27220
27221  if (!NONDEBUG_INSN_P (insn))
27222    return priority;
27223
27224  set = single_set (insn);
27225  if (set)
27226    {
27227      rtx tmp = SET_SRC (set);
27228      if (REG_P (tmp)
27229          && HARD_REGISTER_P (tmp)
27230          && !TEST_HARD_REG_BIT (fixed_reg_set, REGNO (tmp))
27231          && ix86_class_likely_spilled_p (REGNO_REG_CLASS (REGNO (tmp))))
27232	return current_sched_info->sched_max_insns_priority;
27233    }
27234
27235  return priority;
27236}
27237
27238/* Model decoder of Core 2/i7.
27239   Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
27240   track the instruction fetch block boundaries and make sure that long
27241   (9+ bytes) instructions are assigned to D0.  */
27242
27243/* Maximum length of an insn that can be handled by
27244   a secondary decoder unit.  '8' for Core 2/i7.  */
27245static int core2i7_secondary_decoder_max_insn_size;
27246
27247/* Ifetch block size, i.e., number of bytes decoder reads per cycle.
27248   '16' for Core 2/i7.  */
27249static int core2i7_ifetch_block_size;
27250
27251/* Maximum number of instructions decoder can handle per cycle.
27252   '6' for Core 2/i7.  */
27253static int core2i7_ifetch_block_max_insns;
27254
27255typedef struct ix86_first_cycle_multipass_data_ *
27256  ix86_first_cycle_multipass_data_t;
27257typedef const struct ix86_first_cycle_multipass_data_ *
27258  const_ix86_first_cycle_multipass_data_t;
27259
27260/* A variable to store target state across calls to max_issue within
27261   one cycle.  */
27262static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
27263  *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
27264
27265/* Initialize DATA.  */
27266static void
27267core2i7_first_cycle_multipass_init (void *_data)
27268{
27269  ix86_first_cycle_multipass_data_t data
27270    = (ix86_first_cycle_multipass_data_t) _data;
27271
27272  data->ifetch_block_len = 0;
27273  data->ifetch_block_n_insns = 0;
27274  data->ready_try_change = NULL;
27275  data->ready_try_change_size = 0;
27276}
27277
27278/* Advancing the cycle; reset ifetch block counts.  */
27279static void
27280core2i7_dfa_post_advance_cycle (void)
27281{
27282  ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
27283
27284  gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27285
27286  data->ifetch_block_len = 0;
27287  data->ifetch_block_n_insns = 0;
27288}
27289
27290static int min_insn_size (rtx_insn *);
27291
27292/* Filter out insns from ready_try that the core will not be able to issue
27293   on current cycle due to decoder.  */
27294static void
27295core2i7_first_cycle_multipass_filter_ready_try
27296(const_ix86_first_cycle_multipass_data_t data,
27297 signed char *ready_try, int n_ready, bool first_cycle_insn_p)
27298{
27299  while (n_ready--)
27300    {
27301      rtx_insn *insn;
27302      int insn_size;
27303
27304      if (ready_try[n_ready])
27305	continue;
27306
27307      insn = get_ready_element (n_ready);
27308      insn_size = min_insn_size (insn);
27309
27310      if (/* If this is a too long an insn for a secondary decoder ...  */
27311	  (!first_cycle_insn_p
27312	   && insn_size > core2i7_secondary_decoder_max_insn_size)
27313	  /* ... or it would not fit into the ifetch block ...  */
27314	  || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
27315	  /* ... or the decoder is full already ...  */
27316	  || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
27317	/* ... mask the insn out.  */
27318	{
27319	  ready_try[n_ready] = 1;
27320
27321	  if (data->ready_try_change)
27322	    bitmap_set_bit (data->ready_try_change, n_ready);
27323	}
27324    }
27325}
27326
27327/* Prepare for a new round of multipass lookahead scheduling.  */
27328static void
27329core2i7_first_cycle_multipass_begin (void *_data,
27330				     signed char *ready_try, int n_ready,
27331				     bool first_cycle_insn_p)
27332{
27333  ix86_first_cycle_multipass_data_t data
27334    = (ix86_first_cycle_multipass_data_t) _data;
27335  const_ix86_first_cycle_multipass_data_t prev_data
27336    = ix86_first_cycle_multipass_data;
27337
27338  /* Restore the state from the end of the previous round.  */
27339  data->ifetch_block_len = prev_data->ifetch_block_len;
27340  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
27341
27342  /* Filter instructions that cannot be issued on current cycle due to
27343     decoder restrictions.  */
27344  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27345						  first_cycle_insn_p);
27346}
27347
27348/* INSN is being issued in current solution.  Account for its impact on
27349   the decoder model.  */
27350static void
27351core2i7_first_cycle_multipass_issue (void *_data,
27352				     signed char *ready_try, int n_ready,
27353				     rtx_insn *insn, const void *_prev_data)
27354{
27355  ix86_first_cycle_multipass_data_t data
27356    = (ix86_first_cycle_multipass_data_t) _data;
27357  const_ix86_first_cycle_multipass_data_t prev_data
27358    = (const_ix86_first_cycle_multipass_data_t) _prev_data;
27359
27360  int insn_size = min_insn_size (insn);
27361
27362  data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
27363  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
27364  gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
27365	      && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
27366
27367  /* Allocate or resize the bitmap for storing INSN's effect on ready_try.  */
27368  if (!data->ready_try_change)
27369    {
27370      data->ready_try_change = sbitmap_alloc (n_ready);
27371      data->ready_try_change_size = n_ready;
27372    }
27373  else if (data->ready_try_change_size < n_ready)
27374    {
27375      data->ready_try_change = sbitmap_resize (data->ready_try_change,
27376					       n_ready, 0);
27377      data->ready_try_change_size = n_ready;
27378    }
27379  bitmap_clear (data->ready_try_change);
27380
27381  /* Filter out insns from ready_try that the core will not be able to issue
27382     on current cycle due to decoder.  */
27383  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
27384						  false);
27385}
27386
27387/* Revert the effect on ready_try.  */
27388static void
27389core2i7_first_cycle_multipass_backtrack (const void *_data,
27390					 signed char *ready_try,
27391					 int n_ready ATTRIBUTE_UNUSED)
27392{
27393  const_ix86_first_cycle_multipass_data_t data
27394    = (const_ix86_first_cycle_multipass_data_t) _data;
27395  unsigned int i = 0;
27396  sbitmap_iterator sbi;
27397
27398  gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
27399  EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
27400    {
27401      ready_try[i] = 0;
27402    }
27403}
27404
27405/* Save the result of multipass lookahead scheduling for the next round.  */
27406static void
27407core2i7_first_cycle_multipass_end (const void *_data)
27408{
27409  const_ix86_first_cycle_multipass_data_t data
27410    = (const_ix86_first_cycle_multipass_data_t) _data;
27411  ix86_first_cycle_multipass_data_t next_data
27412    = ix86_first_cycle_multipass_data;
27413
27414  if (data != NULL)
27415    {
27416      next_data->ifetch_block_len = data->ifetch_block_len;
27417      next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
27418    }
27419}
27420
27421/* Deallocate target data.  */
27422static void
27423core2i7_first_cycle_multipass_fini (void *_data)
27424{
27425  ix86_first_cycle_multipass_data_t data
27426    = (ix86_first_cycle_multipass_data_t) _data;
27427
27428  if (data->ready_try_change)
27429    {
27430      sbitmap_free (data->ready_try_change);
27431      data->ready_try_change = NULL;
27432      data->ready_try_change_size = 0;
27433    }
27434}
27435
27436/* Prepare for scheduling pass.  */
27437static void
27438ix86_sched_init_global (FILE *, int, int)
27439{
27440  /* Install scheduling hooks for current CPU.  Some of these hooks are used
27441     in time-critical parts of the scheduler, so we only set them up when
27442     they are actually used.  */
27443  switch (ix86_tune)
27444    {
27445    case PROCESSOR_CORE2:
27446    case PROCESSOR_NEHALEM:
27447    case PROCESSOR_SANDYBRIDGE:
27448    case PROCESSOR_HASWELL:
27449      /* Do not perform multipass scheduling for pre-reload schedule
27450         to save compile time.  */
27451      if (reload_completed)
27452	{
27453	  targetm.sched.dfa_post_advance_cycle
27454	    = core2i7_dfa_post_advance_cycle;
27455	  targetm.sched.first_cycle_multipass_init
27456	    = core2i7_first_cycle_multipass_init;
27457	  targetm.sched.first_cycle_multipass_begin
27458	    = core2i7_first_cycle_multipass_begin;
27459	  targetm.sched.first_cycle_multipass_issue
27460	    = core2i7_first_cycle_multipass_issue;
27461	  targetm.sched.first_cycle_multipass_backtrack
27462	    = core2i7_first_cycle_multipass_backtrack;
27463	  targetm.sched.first_cycle_multipass_end
27464	    = core2i7_first_cycle_multipass_end;
27465	  targetm.sched.first_cycle_multipass_fini
27466	    = core2i7_first_cycle_multipass_fini;
27467
27468	  /* Set decoder parameters.  */
27469	  core2i7_secondary_decoder_max_insn_size = 8;
27470	  core2i7_ifetch_block_size = 16;
27471	  core2i7_ifetch_block_max_insns = 6;
27472	  break;
27473	}
27474      /* ... Fall through ...  */
27475    default:
27476      targetm.sched.dfa_post_advance_cycle = NULL;
27477      targetm.sched.first_cycle_multipass_init = NULL;
27478      targetm.sched.first_cycle_multipass_begin = NULL;
27479      targetm.sched.first_cycle_multipass_issue = NULL;
27480      targetm.sched.first_cycle_multipass_backtrack = NULL;
27481      targetm.sched.first_cycle_multipass_end = NULL;
27482      targetm.sched.first_cycle_multipass_fini = NULL;
27483      break;
27484    }
27485}
27486
27487
27488/* Compute the alignment given to a constant that is being placed in memory.
27489   EXP is the constant and ALIGN is the alignment that the object would
27490   ordinarily have.
27491   The value of this function is used instead of that alignment to align
27492   the object.  */
27493
27494int
27495ix86_constant_alignment (tree exp, int align)
27496{
27497  if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
27498      || TREE_CODE (exp) == INTEGER_CST)
27499    {
27500      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
27501	return 64;
27502      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
27503	return 128;
27504    }
27505  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
27506	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
27507    return BITS_PER_WORD;
27508
27509  return align;
27510}
27511
27512/* Compute the alignment for a static variable.
27513   TYPE is the data type, and ALIGN is the alignment that
27514   the object would ordinarily have.  The value of this function is used
27515   instead of that alignment to align the object.  */
27516
27517int
27518ix86_data_alignment (tree type, int align, bool opt)
27519{
27520  /* GCC 4.8 and earlier used to incorrectly assume this alignment even
27521     for symbols from other compilation units or symbols that don't need
27522     to bind locally.  In order to preserve some ABI compatibility with
27523     those compilers, ensure we don't decrease alignment from what we
27524     used to assume.  */
27525
27526  int max_align_compat = MIN (256, MAX_OFILE_ALIGNMENT);
27527
27528  /* A data structure, equal or greater than the size of a cache line
27529     (64 bytes in the Pentium 4 and other recent Intel processors, including
27530     processors based on Intel Core microarchitecture) should be aligned
27531     so that its base address is a multiple of a cache line size.  */
27532
27533  int max_align
27534    = MIN ((unsigned) ix86_tune_cost->prefetch_block * 8, MAX_OFILE_ALIGNMENT);
27535
27536  if (max_align < BITS_PER_WORD)
27537    max_align = BITS_PER_WORD;
27538
27539  switch (ix86_align_data_type)
27540    {
27541    case ix86_align_data_type_abi: opt = false; break;
27542    case ix86_align_data_type_compat: max_align = BITS_PER_WORD; break;
27543    case ix86_align_data_type_cacheline: break;
27544    }
27545
27546  if (opt
27547      && AGGREGATE_TYPE_P (type)
27548      && TYPE_SIZE (type)
27549      && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
27550    {
27551      if (wi::geu_p (TYPE_SIZE (type), max_align_compat)
27552	  && align < max_align_compat)
27553	align = max_align_compat;
27554       if (wi::geu_p (TYPE_SIZE (type), max_align)
27555	   && align < max_align)
27556	 align = max_align;
27557    }
27558
27559  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27560     to 16byte boundary.  */
27561  if (TARGET_64BIT)
27562    {
27563      if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
27564	  && TYPE_SIZE (type)
27565	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27566	  && wi::geu_p (TYPE_SIZE (type), 128)
27567	  && align < 128)
27568	return 128;
27569    }
27570
27571  if (!opt)
27572    return align;
27573
27574  if (TREE_CODE (type) == ARRAY_TYPE)
27575    {
27576      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27577	return 64;
27578      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27579	return 128;
27580    }
27581  else if (TREE_CODE (type) == COMPLEX_TYPE)
27582    {
27583
27584      if (TYPE_MODE (type) == DCmode && align < 64)
27585	return 64;
27586      if ((TYPE_MODE (type) == XCmode
27587	   || TYPE_MODE (type) == TCmode) && align < 128)
27588	return 128;
27589    }
27590  else if ((TREE_CODE (type) == RECORD_TYPE
27591	    || TREE_CODE (type) == UNION_TYPE
27592	    || TREE_CODE (type) == QUAL_UNION_TYPE)
27593	   && TYPE_FIELDS (type))
27594    {
27595      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27596	return 64;
27597      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27598	return 128;
27599    }
27600  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27601	   || TREE_CODE (type) == INTEGER_TYPE)
27602    {
27603      if (TYPE_MODE (type) == DFmode && align < 64)
27604	return 64;
27605      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27606	return 128;
27607    }
27608
27609  return align;
27610}
27611
27612/* Compute the alignment for a local variable or a stack slot.  EXP is
27613   the data type or decl itself, MODE is the widest mode available and
27614   ALIGN is the alignment that the object would ordinarily have.  The
27615   value of this macro is used instead of that alignment to align the
27616   object.  */
27617
27618unsigned int
27619ix86_local_alignment (tree exp, machine_mode mode,
27620		      unsigned int align)
27621{
27622  tree type, decl;
27623
27624  if (exp && DECL_P (exp))
27625    {
27626      type = TREE_TYPE (exp);
27627      decl = exp;
27628    }
27629  else
27630    {
27631      type = exp;
27632      decl = NULL;
27633    }
27634
27635  /* Don't do dynamic stack realignment for long long objects with
27636     -mpreferred-stack-boundary=2.  */
27637  if (!TARGET_64BIT
27638      && align == 64
27639      && ix86_preferred_stack_boundary < 64
27640      && (mode == DImode || (type && TYPE_MODE (type) == DImode))
27641      && (!type || !TYPE_USER_ALIGN (type))
27642      && (!decl || !DECL_USER_ALIGN (decl)))
27643    align = 32;
27644
27645  /* If TYPE is NULL, we are allocating a stack slot for caller-save
27646     register in MODE.  We will return the largest alignment of XF
27647     and DF.  */
27648  if (!type)
27649    {
27650      if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
27651	align = GET_MODE_ALIGNMENT (DFmode);
27652      return align;
27653    }
27654
27655  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
27656     to 16byte boundary.  Exact wording is:
27657
27658     An array uses the same alignment as its elements, except that a local or
27659     global array variable of length at least 16 bytes or
27660     a C99 variable-length array variable always has alignment of at least 16 bytes.
27661
27662     This was added to allow use of aligned SSE instructions at arrays.  This
27663     rule is meant for static storage (where compiler can not do the analysis
27664     by itself).  We follow it for automatic variables only when convenient.
27665     We fully control everything in the function compiled and functions from
27666     other unit can not rely on the alignment.
27667
27668     Exclude va_list type.  It is the common case of local array where
27669     we can not benefit from the alignment.
27670
27671     TODO: Probably one should optimize for size only when var is not escaping.  */
27672  if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
27673      && TARGET_SSE)
27674    {
27675      if (AGGREGATE_TYPE_P (type)
27676	  && (va_list_type_node == NULL_TREE
27677	      || (TYPE_MAIN_VARIANT (type)
27678		  != TYPE_MAIN_VARIANT (va_list_type_node)))
27679	  && TYPE_SIZE (type)
27680	  && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
27681	  && wi::geu_p (TYPE_SIZE (type), 16)
27682	  && align < 128)
27683	return 128;
27684    }
27685  if (TREE_CODE (type) == ARRAY_TYPE)
27686    {
27687      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
27688	return 64;
27689      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
27690	return 128;
27691    }
27692  else if (TREE_CODE (type) == COMPLEX_TYPE)
27693    {
27694      if (TYPE_MODE (type) == DCmode && align < 64)
27695	return 64;
27696      if ((TYPE_MODE (type) == XCmode
27697	   || TYPE_MODE (type) == TCmode) && align < 128)
27698	return 128;
27699    }
27700  else if ((TREE_CODE (type) == RECORD_TYPE
27701	    || TREE_CODE (type) == UNION_TYPE
27702	    || TREE_CODE (type) == QUAL_UNION_TYPE)
27703	   && TYPE_FIELDS (type))
27704    {
27705      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
27706	return 64;
27707      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
27708	return 128;
27709    }
27710  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
27711	   || TREE_CODE (type) == INTEGER_TYPE)
27712    {
27713
27714      if (TYPE_MODE (type) == DFmode && align < 64)
27715	return 64;
27716      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
27717	return 128;
27718    }
27719  return align;
27720}
27721
27722/* Compute the minimum required alignment for dynamic stack realignment
27723   purposes for a local variable, parameter or a stack slot.  EXP is
27724   the data type or decl itself, MODE is its mode and ALIGN is the
27725   alignment that the object would ordinarily have.  */
27726
27727unsigned int
27728ix86_minimum_alignment (tree exp, machine_mode mode,
27729			unsigned int align)
27730{
27731  tree type, decl;
27732
27733  if (exp && DECL_P (exp))
27734    {
27735      type = TREE_TYPE (exp);
27736      decl = exp;
27737    }
27738  else
27739    {
27740      type = exp;
27741      decl = NULL;
27742    }
27743
27744  if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
27745    return align;
27746
27747  /* Don't do dynamic stack realignment for long long objects with
27748     -mpreferred-stack-boundary=2.  */
27749  if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
27750      && (!type || !TYPE_USER_ALIGN (type))
27751      && (!decl || !DECL_USER_ALIGN (decl)))
27752    return 32;
27753
27754  return align;
27755}
27756
27757/* Find a location for the static chain incoming to a nested function.
27758   This is a register, unless all free registers are used by arguments.  */
27759
27760static rtx
27761ix86_static_chain (const_tree fndecl_or_type, bool incoming_p)
27762{
27763  unsigned regno;
27764
27765  /* While this function won't be called by the middle-end when a static
27766     chain isn't needed, it's also used throughout the backend so it's
27767     easiest to keep this check centralized.  */
27768  if (DECL_P (fndecl_or_type) && !DECL_STATIC_CHAIN (fndecl_or_type))
27769    return NULL;
27770
27771  if (TARGET_64BIT)
27772    {
27773      /* We always use R10 in 64-bit mode.  */
27774      regno = R10_REG;
27775    }
27776  else
27777    {
27778      const_tree fntype, fndecl;
27779      unsigned int ccvt;
27780
27781      /* By default in 32-bit mode we use ECX to pass the static chain.  */
27782      regno = CX_REG;
27783
27784      if (TREE_CODE (fndecl_or_type) == FUNCTION_DECL)
27785	{
27786          fntype = TREE_TYPE (fndecl_or_type);
27787	  fndecl = fndecl_or_type;
27788	}
27789      else
27790	{
27791	  fntype = fndecl_or_type;
27792	  fndecl = NULL;
27793	}
27794
27795      ccvt = ix86_get_callcvt (fntype);
27796      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
27797	{
27798	  /* Fastcall functions use ecx/edx for arguments, which leaves
27799	     us with EAX for the static chain.
27800	     Thiscall functions use ecx for arguments, which also
27801	     leaves us with EAX for the static chain.  */
27802	  regno = AX_REG;
27803	}
27804      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
27805	{
27806	  /* Thiscall functions use ecx for arguments, which leaves
27807	     us with EAX and EDX for the static chain.
27808	     We are using for abi-compatibility EAX.  */
27809	  regno = AX_REG;
27810	}
27811      else if (ix86_function_regparm (fntype, fndecl) == 3)
27812	{
27813	  /* For regparm 3, we have no free call-clobbered registers in
27814	     which to store the static chain.  In order to implement this,
27815	     we have the trampoline push the static chain to the stack.
27816	     However, we can't push a value below the return address when
27817	     we call the nested function directly, so we have to use an
27818	     alternate entry point.  For this we use ESI, and have the
27819	     alternate entry point push ESI, so that things appear the
27820	     same once we're executing the nested function.  */
27821	  if (incoming_p)
27822	    {
27823	      if (fndecl == current_function_decl)
27824		ix86_static_chain_on_stack = true;
27825	      return gen_frame_mem (SImode,
27826				    plus_constant (Pmode,
27827						   arg_pointer_rtx, -8));
27828	    }
27829	  regno = SI_REG;
27830	}
27831    }
27832
27833  return gen_rtx_REG (Pmode, regno);
27834}
27835
27836/* Emit RTL insns to initialize the variable parts of a trampoline.
27837   FNDECL is the decl of the target address; M_TRAMP is a MEM for
27838   the trampoline, and CHAIN_VALUE is an RTX for the static chain
27839   to be passed to the target function.  */
27840
27841static void
27842ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
27843{
27844  rtx mem, fnaddr;
27845  int opcode;
27846  int offset = 0;
27847
27848  fnaddr = XEXP (DECL_RTL (fndecl), 0);
27849
27850  if (TARGET_64BIT)
27851    {
27852      int size;
27853
27854      /* Load the function address to r11.  Try to load address using
27855	 the shorter movl instead of movabs.  We may want to support
27856	 movq for kernel mode, but kernel does not use trampolines at
27857	 the moment.  FNADDR is a 32bit address and may not be in
27858	 DImode when ptr_mode == SImode.  Always use movl in this
27859	 case.  */
27860      if (ptr_mode == SImode
27861	  || x86_64_zext_immediate_operand (fnaddr, VOIDmode))
27862	{
27863	  fnaddr = copy_addr_to_reg (fnaddr);
27864
27865	  mem = adjust_address (m_tramp, HImode, offset);
27866	  emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
27867
27868	  mem = adjust_address (m_tramp, SImode, offset + 2);
27869	  emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
27870	  offset += 6;
27871	}
27872      else
27873	{
27874	  mem = adjust_address (m_tramp, HImode, offset);
27875	  emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
27876
27877	  mem = adjust_address (m_tramp, DImode, offset + 2);
27878	  emit_move_insn (mem, fnaddr);
27879	  offset += 10;
27880	}
27881
27882      /* Load static chain using movabs to r10.  Use the shorter movl
27883         instead of movabs when ptr_mode == SImode.  */
27884      if (ptr_mode == SImode)
27885	{
27886	  opcode = 0xba41;
27887	  size = 6;
27888	}
27889      else
27890	{
27891	  opcode = 0xba49;
27892	  size = 10;
27893	}
27894
27895      mem = adjust_address (m_tramp, HImode, offset);
27896      emit_move_insn (mem, gen_int_mode (opcode, HImode));
27897
27898      mem = adjust_address (m_tramp, ptr_mode, offset + 2);
27899      emit_move_insn (mem, chain_value);
27900      offset += size;
27901
27902      /* Jump to r11; the last (unused) byte is a nop, only there to
27903	 pad the write out to a single 32-bit store.  */
27904      mem = adjust_address (m_tramp, SImode, offset);
27905      emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
27906      offset += 4;
27907    }
27908  else
27909    {
27910      rtx disp, chain;
27911
27912      /* Depending on the static chain location, either load a register
27913	 with a constant, or push the constant to the stack.  All of the
27914	 instructions are the same size.  */
27915      chain = ix86_static_chain (fndecl, true);
27916      if (REG_P (chain))
27917	{
27918	  switch (REGNO (chain))
27919	    {
27920	    case AX_REG:
27921	      opcode = 0xb8; break;
27922	    case CX_REG:
27923	      opcode = 0xb9; break;
27924	    default:
27925	      gcc_unreachable ();
27926	    }
27927	}
27928      else
27929	opcode = 0x68;
27930
27931      mem = adjust_address (m_tramp, QImode, offset);
27932      emit_move_insn (mem, gen_int_mode (opcode, QImode));
27933
27934      mem = adjust_address (m_tramp, SImode, offset + 1);
27935      emit_move_insn (mem, chain_value);
27936      offset += 5;
27937
27938      mem = adjust_address (m_tramp, QImode, offset);
27939      emit_move_insn (mem, gen_int_mode (0xe9, QImode));
27940
27941      mem = adjust_address (m_tramp, SImode, offset + 1);
27942
27943      /* Compute offset from the end of the jmp to the target function.
27944	 In the case in which the trampoline stores the static chain on
27945	 the stack, we need to skip the first insn which pushes the
27946	 (call-saved) register static chain; this push is 1 byte.  */
27947      offset += 5;
27948      disp = expand_binop (SImode, sub_optab, fnaddr,
27949			   plus_constant (Pmode, XEXP (m_tramp, 0),
27950					  offset - (MEM_P (chain) ? 1 : 0)),
27951			   NULL_RTX, 1, OPTAB_DIRECT);
27952      emit_move_insn (mem, disp);
27953    }
27954
27955  gcc_assert (offset <= TRAMPOLINE_SIZE);
27956
27957#ifdef HAVE_ENABLE_EXECUTE_STACK
27958#ifdef CHECK_EXECUTE_STACK_ENABLED
27959  if (CHECK_EXECUTE_STACK_ENABLED)
27960#endif
27961  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
27962		     LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
27963#endif
27964}
27965
27966/* The following file contains several enumerations and data structures
27967   built from the definitions in i386-builtin-types.def.  */
27968
27969#include "i386-builtin-types.inc"
27970
27971/* Table for the ix86 builtin non-function types.  */
27972static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
27973
27974/* Retrieve an element from the above table, building some of
27975   the types lazily.  */
27976
27977static tree
27978ix86_get_builtin_type (enum ix86_builtin_type tcode)
27979{
27980  unsigned int index;
27981  tree type, itype;
27982
27983  gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
27984
27985  type = ix86_builtin_type_tab[(int) tcode];
27986  if (type != NULL)
27987    return type;
27988
27989  gcc_assert (tcode > IX86_BT_LAST_PRIM);
27990  if (tcode <= IX86_BT_LAST_VECT)
27991    {
27992      machine_mode mode;
27993
27994      index = tcode - IX86_BT_LAST_PRIM - 1;
27995      itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
27996      mode = ix86_builtin_type_vect_mode[index];
27997
27998      type = build_vector_type_for_mode (itype, mode);
27999    }
28000  else
28001    {
28002      int quals;
28003
28004      index = tcode - IX86_BT_LAST_VECT - 1;
28005      if (tcode <= IX86_BT_LAST_PTR)
28006	quals = TYPE_UNQUALIFIED;
28007      else
28008	quals = TYPE_QUAL_CONST;
28009
28010      itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
28011      if (quals != TYPE_UNQUALIFIED)
28012	itype = build_qualified_type (itype, quals);
28013
28014      type = build_pointer_type (itype);
28015    }
28016
28017  ix86_builtin_type_tab[(int) tcode] = type;
28018  return type;
28019}
28020
28021/* Table for the ix86 builtin function types.  */
28022static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
28023
28024/* Retrieve an element from the above table, building some of
28025   the types lazily.  */
28026
28027static tree
28028ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
28029{
28030  tree type;
28031
28032  gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
28033
28034  type = ix86_builtin_func_type_tab[(int) tcode];
28035  if (type != NULL)
28036    return type;
28037
28038  if (tcode <= IX86_BT_LAST_FUNC)
28039    {
28040      unsigned start = ix86_builtin_func_start[(int) tcode];
28041      unsigned after = ix86_builtin_func_start[(int) tcode + 1];
28042      tree rtype, atype, args = void_list_node;
28043      unsigned i;
28044
28045      rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
28046      for (i = after - 1; i > start; --i)
28047	{
28048	  atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
28049	  args = tree_cons (NULL, atype, args);
28050	}
28051
28052      type = build_function_type (rtype, args);
28053    }
28054  else
28055    {
28056      unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
28057      enum ix86_builtin_func_type icode;
28058
28059      icode = ix86_builtin_func_alias_base[index];
28060      type = ix86_get_builtin_func_type (icode);
28061    }
28062
28063  ix86_builtin_func_type_tab[(int) tcode] = type;
28064  return type;
28065}
28066
28067
28068/* Codes for all the SSE/MMX builtins.  */
28069enum ix86_builtins
28070{
28071  IX86_BUILTIN_ADDPS,
28072  IX86_BUILTIN_ADDSS,
28073  IX86_BUILTIN_DIVPS,
28074  IX86_BUILTIN_DIVSS,
28075  IX86_BUILTIN_MULPS,
28076  IX86_BUILTIN_MULSS,
28077  IX86_BUILTIN_SUBPS,
28078  IX86_BUILTIN_SUBSS,
28079
28080  IX86_BUILTIN_CMPEQPS,
28081  IX86_BUILTIN_CMPLTPS,
28082  IX86_BUILTIN_CMPLEPS,
28083  IX86_BUILTIN_CMPGTPS,
28084  IX86_BUILTIN_CMPGEPS,
28085  IX86_BUILTIN_CMPNEQPS,
28086  IX86_BUILTIN_CMPNLTPS,
28087  IX86_BUILTIN_CMPNLEPS,
28088  IX86_BUILTIN_CMPNGTPS,
28089  IX86_BUILTIN_CMPNGEPS,
28090  IX86_BUILTIN_CMPORDPS,
28091  IX86_BUILTIN_CMPUNORDPS,
28092  IX86_BUILTIN_CMPEQSS,
28093  IX86_BUILTIN_CMPLTSS,
28094  IX86_BUILTIN_CMPLESS,
28095  IX86_BUILTIN_CMPNEQSS,
28096  IX86_BUILTIN_CMPNLTSS,
28097  IX86_BUILTIN_CMPNLESS,
28098  IX86_BUILTIN_CMPORDSS,
28099  IX86_BUILTIN_CMPUNORDSS,
28100
28101  IX86_BUILTIN_COMIEQSS,
28102  IX86_BUILTIN_COMILTSS,
28103  IX86_BUILTIN_COMILESS,
28104  IX86_BUILTIN_COMIGTSS,
28105  IX86_BUILTIN_COMIGESS,
28106  IX86_BUILTIN_COMINEQSS,
28107  IX86_BUILTIN_UCOMIEQSS,
28108  IX86_BUILTIN_UCOMILTSS,
28109  IX86_BUILTIN_UCOMILESS,
28110  IX86_BUILTIN_UCOMIGTSS,
28111  IX86_BUILTIN_UCOMIGESS,
28112  IX86_BUILTIN_UCOMINEQSS,
28113
28114  IX86_BUILTIN_CVTPI2PS,
28115  IX86_BUILTIN_CVTPS2PI,
28116  IX86_BUILTIN_CVTSI2SS,
28117  IX86_BUILTIN_CVTSI642SS,
28118  IX86_BUILTIN_CVTSS2SI,
28119  IX86_BUILTIN_CVTSS2SI64,
28120  IX86_BUILTIN_CVTTPS2PI,
28121  IX86_BUILTIN_CVTTSS2SI,
28122  IX86_BUILTIN_CVTTSS2SI64,
28123
28124  IX86_BUILTIN_MAXPS,
28125  IX86_BUILTIN_MAXSS,
28126  IX86_BUILTIN_MINPS,
28127  IX86_BUILTIN_MINSS,
28128
28129  IX86_BUILTIN_LOADUPS,
28130  IX86_BUILTIN_STOREUPS,
28131  IX86_BUILTIN_MOVSS,
28132
28133  IX86_BUILTIN_MOVHLPS,
28134  IX86_BUILTIN_MOVLHPS,
28135  IX86_BUILTIN_LOADHPS,
28136  IX86_BUILTIN_LOADLPS,
28137  IX86_BUILTIN_STOREHPS,
28138  IX86_BUILTIN_STORELPS,
28139
28140  IX86_BUILTIN_MASKMOVQ,
28141  IX86_BUILTIN_MOVMSKPS,
28142  IX86_BUILTIN_PMOVMSKB,
28143
28144  IX86_BUILTIN_MOVNTPS,
28145  IX86_BUILTIN_MOVNTQ,
28146
28147  IX86_BUILTIN_LOADDQU,
28148  IX86_BUILTIN_STOREDQU,
28149
28150  IX86_BUILTIN_PACKSSWB,
28151  IX86_BUILTIN_PACKSSDW,
28152  IX86_BUILTIN_PACKUSWB,
28153
28154  IX86_BUILTIN_PADDB,
28155  IX86_BUILTIN_PADDW,
28156  IX86_BUILTIN_PADDD,
28157  IX86_BUILTIN_PADDQ,
28158  IX86_BUILTIN_PADDSB,
28159  IX86_BUILTIN_PADDSW,
28160  IX86_BUILTIN_PADDUSB,
28161  IX86_BUILTIN_PADDUSW,
28162  IX86_BUILTIN_PSUBB,
28163  IX86_BUILTIN_PSUBW,
28164  IX86_BUILTIN_PSUBD,
28165  IX86_BUILTIN_PSUBQ,
28166  IX86_BUILTIN_PSUBSB,
28167  IX86_BUILTIN_PSUBSW,
28168  IX86_BUILTIN_PSUBUSB,
28169  IX86_BUILTIN_PSUBUSW,
28170
28171  IX86_BUILTIN_PAND,
28172  IX86_BUILTIN_PANDN,
28173  IX86_BUILTIN_POR,
28174  IX86_BUILTIN_PXOR,
28175
28176  IX86_BUILTIN_PAVGB,
28177  IX86_BUILTIN_PAVGW,
28178
28179  IX86_BUILTIN_PCMPEQB,
28180  IX86_BUILTIN_PCMPEQW,
28181  IX86_BUILTIN_PCMPEQD,
28182  IX86_BUILTIN_PCMPGTB,
28183  IX86_BUILTIN_PCMPGTW,
28184  IX86_BUILTIN_PCMPGTD,
28185
28186  IX86_BUILTIN_PMADDWD,
28187
28188  IX86_BUILTIN_PMAXSW,
28189  IX86_BUILTIN_PMAXUB,
28190  IX86_BUILTIN_PMINSW,
28191  IX86_BUILTIN_PMINUB,
28192
28193  IX86_BUILTIN_PMULHUW,
28194  IX86_BUILTIN_PMULHW,
28195  IX86_BUILTIN_PMULLW,
28196
28197  IX86_BUILTIN_PSADBW,
28198  IX86_BUILTIN_PSHUFW,
28199
28200  IX86_BUILTIN_PSLLW,
28201  IX86_BUILTIN_PSLLD,
28202  IX86_BUILTIN_PSLLQ,
28203  IX86_BUILTIN_PSRAW,
28204  IX86_BUILTIN_PSRAD,
28205  IX86_BUILTIN_PSRLW,
28206  IX86_BUILTIN_PSRLD,
28207  IX86_BUILTIN_PSRLQ,
28208  IX86_BUILTIN_PSLLWI,
28209  IX86_BUILTIN_PSLLDI,
28210  IX86_BUILTIN_PSLLQI,
28211  IX86_BUILTIN_PSRAWI,
28212  IX86_BUILTIN_PSRADI,
28213  IX86_BUILTIN_PSRLWI,
28214  IX86_BUILTIN_PSRLDI,
28215  IX86_BUILTIN_PSRLQI,
28216
28217  IX86_BUILTIN_PUNPCKHBW,
28218  IX86_BUILTIN_PUNPCKHWD,
28219  IX86_BUILTIN_PUNPCKHDQ,
28220  IX86_BUILTIN_PUNPCKLBW,
28221  IX86_BUILTIN_PUNPCKLWD,
28222  IX86_BUILTIN_PUNPCKLDQ,
28223
28224  IX86_BUILTIN_SHUFPS,
28225
28226  IX86_BUILTIN_RCPPS,
28227  IX86_BUILTIN_RCPSS,
28228  IX86_BUILTIN_RSQRTPS,
28229  IX86_BUILTIN_RSQRTPS_NR,
28230  IX86_BUILTIN_RSQRTSS,
28231  IX86_BUILTIN_RSQRTF,
28232  IX86_BUILTIN_SQRTPS,
28233  IX86_BUILTIN_SQRTPS_NR,
28234  IX86_BUILTIN_SQRTSS,
28235
28236  IX86_BUILTIN_UNPCKHPS,
28237  IX86_BUILTIN_UNPCKLPS,
28238
28239  IX86_BUILTIN_ANDPS,
28240  IX86_BUILTIN_ANDNPS,
28241  IX86_BUILTIN_ORPS,
28242  IX86_BUILTIN_XORPS,
28243
28244  IX86_BUILTIN_EMMS,
28245  IX86_BUILTIN_LDMXCSR,
28246  IX86_BUILTIN_STMXCSR,
28247  IX86_BUILTIN_SFENCE,
28248
28249  IX86_BUILTIN_FXSAVE,
28250  IX86_BUILTIN_FXRSTOR,
28251  IX86_BUILTIN_FXSAVE64,
28252  IX86_BUILTIN_FXRSTOR64,
28253
28254  IX86_BUILTIN_XSAVE,
28255  IX86_BUILTIN_XRSTOR,
28256  IX86_BUILTIN_XSAVE64,
28257  IX86_BUILTIN_XRSTOR64,
28258
28259  IX86_BUILTIN_XSAVEOPT,
28260  IX86_BUILTIN_XSAVEOPT64,
28261
28262  IX86_BUILTIN_XSAVEC,
28263  IX86_BUILTIN_XSAVEC64,
28264
28265  IX86_BUILTIN_XSAVES,
28266  IX86_BUILTIN_XRSTORS,
28267  IX86_BUILTIN_XSAVES64,
28268  IX86_BUILTIN_XRSTORS64,
28269
28270  /* 3DNow! Original */
28271  IX86_BUILTIN_FEMMS,
28272  IX86_BUILTIN_PAVGUSB,
28273  IX86_BUILTIN_PF2ID,
28274  IX86_BUILTIN_PFACC,
28275  IX86_BUILTIN_PFADD,
28276  IX86_BUILTIN_PFCMPEQ,
28277  IX86_BUILTIN_PFCMPGE,
28278  IX86_BUILTIN_PFCMPGT,
28279  IX86_BUILTIN_PFMAX,
28280  IX86_BUILTIN_PFMIN,
28281  IX86_BUILTIN_PFMUL,
28282  IX86_BUILTIN_PFRCP,
28283  IX86_BUILTIN_PFRCPIT1,
28284  IX86_BUILTIN_PFRCPIT2,
28285  IX86_BUILTIN_PFRSQIT1,
28286  IX86_BUILTIN_PFRSQRT,
28287  IX86_BUILTIN_PFSUB,
28288  IX86_BUILTIN_PFSUBR,
28289  IX86_BUILTIN_PI2FD,
28290  IX86_BUILTIN_PMULHRW,
28291
28292  /* 3DNow! Athlon Extensions */
28293  IX86_BUILTIN_PF2IW,
28294  IX86_BUILTIN_PFNACC,
28295  IX86_BUILTIN_PFPNACC,
28296  IX86_BUILTIN_PI2FW,
28297  IX86_BUILTIN_PSWAPDSI,
28298  IX86_BUILTIN_PSWAPDSF,
28299
28300  /* SSE2 */
28301  IX86_BUILTIN_ADDPD,
28302  IX86_BUILTIN_ADDSD,
28303  IX86_BUILTIN_DIVPD,
28304  IX86_BUILTIN_DIVSD,
28305  IX86_BUILTIN_MULPD,
28306  IX86_BUILTIN_MULSD,
28307  IX86_BUILTIN_SUBPD,
28308  IX86_BUILTIN_SUBSD,
28309
28310  IX86_BUILTIN_CMPEQPD,
28311  IX86_BUILTIN_CMPLTPD,
28312  IX86_BUILTIN_CMPLEPD,
28313  IX86_BUILTIN_CMPGTPD,
28314  IX86_BUILTIN_CMPGEPD,
28315  IX86_BUILTIN_CMPNEQPD,
28316  IX86_BUILTIN_CMPNLTPD,
28317  IX86_BUILTIN_CMPNLEPD,
28318  IX86_BUILTIN_CMPNGTPD,
28319  IX86_BUILTIN_CMPNGEPD,
28320  IX86_BUILTIN_CMPORDPD,
28321  IX86_BUILTIN_CMPUNORDPD,
28322  IX86_BUILTIN_CMPEQSD,
28323  IX86_BUILTIN_CMPLTSD,
28324  IX86_BUILTIN_CMPLESD,
28325  IX86_BUILTIN_CMPNEQSD,
28326  IX86_BUILTIN_CMPNLTSD,
28327  IX86_BUILTIN_CMPNLESD,
28328  IX86_BUILTIN_CMPORDSD,
28329  IX86_BUILTIN_CMPUNORDSD,
28330
28331  IX86_BUILTIN_COMIEQSD,
28332  IX86_BUILTIN_COMILTSD,
28333  IX86_BUILTIN_COMILESD,
28334  IX86_BUILTIN_COMIGTSD,
28335  IX86_BUILTIN_COMIGESD,
28336  IX86_BUILTIN_COMINEQSD,
28337  IX86_BUILTIN_UCOMIEQSD,
28338  IX86_BUILTIN_UCOMILTSD,
28339  IX86_BUILTIN_UCOMILESD,
28340  IX86_BUILTIN_UCOMIGTSD,
28341  IX86_BUILTIN_UCOMIGESD,
28342  IX86_BUILTIN_UCOMINEQSD,
28343
28344  IX86_BUILTIN_MAXPD,
28345  IX86_BUILTIN_MAXSD,
28346  IX86_BUILTIN_MINPD,
28347  IX86_BUILTIN_MINSD,
28348
28349  IX86_BUILTIN_ANDPD,
28350  IX86_BUILTIN_ANDNPD,
28351  IX86_BUILTIN_ORPD,
28352  IX86_BUILTIN_XORPD,
28353
28354  IX86_BUILTIN_SQRTPD,
28355  IX86_BUILTIN_SQRTSD,
28356
28357  IX86_BUILTIN_UNPCKHPD,
28358  IX86_BUILTIN_UNPCKLPD,
28359
28360  IX86_BUILTIN_SHUFPD,
28361
28362  IX86_BUILTIN_LOADUPD,
28363  IX86_BUILTIN_STOREUPD,
28364  IX86_BUILTIN_MOVSD,
28365
28366  IX86_BUILTIN_LOADHPD,
28367  IX86_BUILTIN_LOADLPD,
28368
28369  IX86_BUILTIN_CVTDQ2PD,
28370  IX86_BUILTIN_CVTDQ2PS,
28371
28372  IX86_BUILTIN_CVTPD2DQ,
28373  IX86_BUILTIN_CVTPD2PI,
28374  IX86_BUILTIN_CVTPD2PS,
28375  IX86_BUILTIN_CVTTPD2DQ,
28376  IX86_BUILTIN_CVTTPD2PI,
28377
28378  IX86_BUILTIN_CVTPI2PD,
28379  IX86_BUILTIN_CVTSI2SD,
28380  IX86_BUILTIN_CVTSI642SD,
28381
28382  IX86_BUILTIN_CVTSD2SI,
28383  IX86_BUILTIN_CVTSD2SI64,
28384  IX86_BUILTIN_CVTSD2SS,
28385  IX86_BUILTIN_CVTSS2SD,
28386  IX86_BUILTIN_CVTTSD2SI,
28387  IX86_BUILTIN_CVTTSD2SI64,
28388
28389  IX86_BUILTIN_CVTPS2DQ,
28390  IX86_BUILTIN_CVTPS2PD,
28391  IX86_BUILTIN_CVTTPS2DQ,
28392
28393  IX86_BUILTIN_MOVNTI,
28394  IX86_BUILTIN_MOVNTI64,
28395  IX86_BUILTIN_MOVNTPD,
28396  IX86_BUILTIN_MOVNTDQ,
28397
28398  IX86_BUILTIN_MOVQ128,
28399
28400  /* SSE2 MMX */
28401  IX86_BUILTIN_MASKMOVDQU,
28402  IX86_BUILTIN_MOVMSKPD,
28403  IX86_BUILTIN_PMOVMSKB128,
28404
28405  IX86_BUILTIN_PACKSSWB128,
28406  IX86_BUILTIN_PACKSSDW128,
28407  IX86_BUILTIN_PACKUSWB128,
28408
28409  IX86_BUILTIN_PADDB128,
28410  IX86_BUILTIN_PADDW128,
28411  IX86_BUILTIN_PADDD128,
28412  IX86_BUILTIN_PADDQ128,
28413  IX86_BUILTIN_PADDSB128,
28414  IX86_BUILTIN_PADDSW128,
28415  IX86_BUILTIN_PADDUSB128,
28416  IX86_BUILTIN_PADDUSW128,
28417  IX86_BUILTIN_PSUBB128,
28418  IX86_BUILTIN_PSUBW128,
28419  IX86_BUILTIN_PSUBD128,
28420  IX86_BUILTIN_PSUBQ128,
28421  IX86_BUILTIN_PSUBSB128,
28422  IX86_BUILTIN_PSUBSW128,
28423  IX86_BUILTIN_PSUBUSB128,
28424  IX86_BUILTIN_PSUBUSW128,
28425
28426  IX86_BUILTIN_PAND128,
28427  IX86_BUILTIN_PANDN128,
28428  IX86_BUILTIN_POR128,
28429  IX86_BUILTIN_PXOR128,
28430
28431  IX86_BUILTIN_PAVGB128,
28432  IX86_BUILTIN_PAVGW128,
28433
28434  IX86_BUILTIN_PCMPEQB128,
28435  IX86_BUILTIN_PCMPEQW128,
28436  IX86_BUILTIN_PCMPEQD128,
28437  IX86_BUILTIN_PCMPGTB128,
28438  IX86_BUILTIN_PCMPGTW128,
28439  IX86_BUILTIN_PCMPGTD128,
28440
28441  IX86_BUILTIN_PMADDWD128,
28442
28443  IX86_BUILTIN_PMAXSW128,
28444  IX86_BUILTIN_PMAXUB128,
28445  IX86_BUILTIN_PMINSW128,
28446  IX86_BUILTIN_PMINUB128,
28447
28448  IX86_BUILTIN_PMULUDQ,
28449  IX86_BUILTIN_PMULUDQ128,
28450  IX86_BUILTIN_PMULHUW128,
28451  IX86_BUILTIN_PMULHW128,
28452  IX86_BUILTIN_PMULLW128,
28453
28454  IX86_BUILTIN_PSADBW128,
28455  IX86_BUILTIN_PSHUFHW,
28456  IX86_BUILTIN_PSHUFLW,
28457  IX86_BUILTIN_PSHUFD,
28458
28459  IX86_BUILTIN_PSLLDQI128,
28460  IX86_BUILTIN_PSLLWI128,
28461  IX86_BUILTIN_PSLLDI128,
28462  IX86_BUILTIN_PSLLQI128,
28463  IX86_BUILTIN_PSRAWI128,
28464  IX86_BUILTIN_PSRADI128,
28465  IX86_BUILTIN_PSRLDQI128,
28466  IX86_BUILTIN_PSRLWI128,
28467  IX86_BUILTIN_PSRLDI128,
28468  IX86_BUILTIN_PSRLQI128,
28469
28470  IX86_BUILTIN_PSLLDQ128,
28471  IX86_BUILTIN_PSLLW128,
28472  IX86_BUILTIN_PSLLD128,
28473  IX86_BUILTIN_PSLLQ128,
28474  IX86_BUILTIN_PSRAW128,
28475  IX86_BUILTIN_PSRAD128,
28476  IX86_BUILTIN_PSRLW128,
28477  IX86_BUILTIN_PSRLD128,
28478  IX86_BUILTIN_PSRLQ128,
28479
28480  IX86_BUILTIN_PUNPCKHBW128,
28481  IX86_BUILTIN_PUNPCKHWD128,
28482  IX86_BUILTIN_PUNPCKHDQ128,
28483  IX86_BUILTIN_PUNPCKHQDQ128,
28484  IX86_BUILTIN_PUNPCKLBW128,
28485  IX86_BUILTIN_PUNPCKLWD128,
28486  IX86_BUILTIN_PUNPCKLDQ128,
28487  IX86_BUILTIN_PUNPCKLQDQ128,
28488
28489  IX86_BUILTIN_CLFLUSH,
28490  IX86_BUILTIN_MFENCE,
28491  IX86_BUILTIN_LFENCE,
28492  IX86_BUILTIN_PAUSE,
28493
28494  IX86_BUILTIN_FNSTENV,
28495  IX86_BUILTIN_FLDENV,
28496  IX86_BUILTIN_FNSTSW,
28497  IX86_BUILTIN_FNCLEX,
28498
28499  IX86_BUILTIN_BSRSI,
28500  IX86_BUILTIN_BSRDI,
28501  IX86_BUILTIN_RDPMC,
28502  IX86_BUILTIN_RDTSC,
28503  IX86_BUILTIN_RDTSCP,
28504  IX86_BUILTIN_ROLQI,
28505  IX86_BUILTIN_ROLHI,
28506  IX86_BUILTIN_RORQI,
28507  IX86_BUILTIN_RORHI,
28508
28509  /* SSE3.  */
28510  IX86_BUILTIN_ADDSUBPS,
28511  IX86_BUILTIN_HADDPS,
28512  IX86_BUILTIN_HSUBPS,
28513  IX86_BUILTIN_MOVSHDUP,
28514  IX86_BUILTIN_MOVSLDUP,
28515  IX86_BUILTIN_ADDSUBPD,
28516  IX86_BUILTIN_HADDPD,
28517  IX86_BUILTIN_HSUBPD,
28518  IX86_BUILTIN_LDDQU,
28519
28520  IX86_BUILTIN_MONITOR,
28521  IX86_BUILTIN_MWAIT,
28522
28523  /* SSSE3.  */
28524  IX86_BUILTIN_PHADDW,
28525  IX86_BUILTIN_PHADDD,
28526  IX86_BUILTIN_PHADDSW,
28527  IX86_BUILTIN_PHSUBW,
28528  IX86_BUILTIN_PHSUBD,
28529  IX86_BUILTIN_PHSUBSW,
28530  IX86_BUILTIN_PMADDUBSW,
28531  IX86_BUILTIN_PMULHRSW,
28532  IX86_BUILTIN_PSHUFB,
28533  IX86_BUILTIN_PSIGNB,
28534  IX86_BUILTIN_PSIGNW,
28535  IX86_BUILTIN_PSIGND,
28536  IX86_BUILTIN_PALIGNR,
28537  IX86_BUILTIN_PABSB,
28538  IX86_BUILTIN_PABSW,
28539  IX86_BUILTIN_PABSD,
28540
28541  IX86_BUILTIN_PHADDW128,
28542  IX86_BUILTIN_PHADDD128,
28543  IX86_BUILTIN_PHADDSW128,
28544  IX86_BUILTIN_PHSUBW128,
28545  IX86_BUILTIN_PHSUBD128,
28546  IX86_BUILTIN_PHSUBSW128,
28547  IX86_BUILTIN_PMADDUBSW128,
28548  IX86_BUILTIN_PMULHRSW128,
28549  IX86_BUILTIN_PSHUFB128,
28550  IX86_BUILTIN_PSIGNB128,
28551  IX86_BUILTIN_PSIGNW128,
28552  IX86_BUILTIN_PSIGND128,
28553  IX86_BUILTIN_PALIGNR128,
28554  IX86_BUILTIN_PABSB128,
28555  IX86_BUILTIN_PABSW128,
28556  IX86_BUILTIN_PABSD128,
28557
28558  /* AMDFAM10 - SSE4A New Instructions.  */
28559  IX86_BUILTIN_MOVNTSD,
28560  IX86_BUILTIN_MOVNTSS,
28561  IX86_BUILTIN_EXTRQI,
28562  IX86_BUILTIN_EXTRQ,
28563  IX86_BUILTIN_INSERTQI,
28564  IX86_BUILTIN_INSERTQ,
28565
28566  /* SSE4.1.  */
28567  IX86_BUILTIN_BLENDPD,
28568  IX86_BUILTIN_BLENDPS,
28569  IX86_BUILTIN_BLENDVPD,
28570  IX86_BUILTIN_BLENDVPS,
28571  IX86_BUILTIN_PBLENDVB128,
28572  IX86_BUILTIN_PBLENDW128,
28573
28574  IX86_BUILTIN_DPPD,
28575  IX86_BUILTIN_DPPS,
28576
28577  IX86_BUILTIN_INSERTPS128,
28578
28579  IX86_BUILTIN_MOVNTDQA,
28580  IX86_BUILTIN_MPSADBW128,
28581  IX86_BUILTIN_PACKUSDW128,
28582  IX86_BUILTIN_PCMPEQQ,
28583  IX86_BUILTIN_PHMINPOSUW128,
28584
28585  IX86_BUILTIN_PMAXSB128,
28586  IX86_BUILTIN_PMAXSD128,
28587  IX86_BUILTIN_PMAXUD128,
28588  IX86_BUILTIN_PMAXUW128,
28589
28590  IX86_BUILTIN_PMINSB128,
28591  IX86_BUILTIN_PMINSD128,
28592  IX86_BUILTIN_PMINUD128,
28593  IX86_BUILTIN_PMINUW128,
28594
28595  IX86_BUILTIN_PMOVSXBW128,
28596  IX86_BUILTIN_PMOVSXBD128,
28597  IX86_BUILTIN_PMOVSXBQ128,
28598  IX86_BUILTIN_PMOVSXWD128,
28599  IX86_BUILTIN_PMOVSXWQ128,
28600  IX86_BUILTIN_PMOVSXDQ128,
28601
28602  IX86_BUILTIN_PMOVZXBW128,
28603  IX86_BUILTIN_PMOVZXBD128,
28604  IX86_BUILTIN_PMOVZXBQ128,
28605  IX86_BUILTIN_PMOVZXWD128,
28606  IX86_BUILTIN_PMOVZXWQ128,
28607  IX86_BUILTIN_PMOVZXDQ128,
28608
28609  IX86_BUILTIN_PMULDQ128,
28610  IX86_BUILTIN_PMULLD128,
28611
28612  IX86_BUILTIN_ROUNDSD,
28613  IX86_BUILTIN_ROUNDSS,
28614
28615  IX86_BUILTIN_ROUNDPD,
28616  IX86_BUILTIN_ROUNDPS,
28617
28618  IX86_BUILTIN_FLOORPD,
28619  IX86_BUILTIN_CEILPD,
28620  IX86_BUILTIN_TRUNCPD,
28621  IX86_BUILTIN_RINTPD,
28622  IX86_BUILTIN_ROUNDPD_AZ,
28623
28624  IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX,
28625  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX,
28626  IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX,
28627
28628  IX86_BUILTIN_FLOORPS,
28629  IX86_BUILTIN_CEILPS,
28630  IX86_BUILTIN_TRUNCPS,
28631  IX86_BUILTIN_RINTPS,
28632  IX86_BUILTIN_ROUNDPS_AZ,
28633
28634  IX86_BUILTIN_FLOORPS_SFIX,
28635  IX86_BUILTIN_CEILPS_SFIX,
28636  IX86_BUILTIN_ROUNDPS_AZ_SFIX,
28637
28638  IX86_BUILTIN_PTESTZ,
28639  IX86_BUILTIN_PTESTC,
28640  IX86_BUILTIN_PTESTNZC,
28641
28642  IX86_BUILTIN_VEC_INIT_V2SI,
28643  IX86_BUILTIN_VEC_INIT_V4HI,
28644  IX86_BUILTIN_VEC_INIT_V8QI,
28645  IX86_BUILTIN_VEC_EXT_V2DF,
28646  IX86_BUILTIN_VEC_EXT_V2DI,
28647  IX86_BUILTIN_VEC_EXT_V4SF,
28648  IX86_BUILTIN_VEC_EXT_V4SI,
28649  IX86_BUILTIN_VEC_EXT_V8HI,
28650  IX86_BUILTIN_VEC_EXT_V2SI,
28651  IX86_BUILTIN_VEC_EXT_V4HI,
28652  IX86_BUILTIN_VEC_EXT_V16QI,
28653  IX86_BUILTIN_VEC_SET_V2DI,
28654  IX86_BUILTIN_VEC_SET_V4SF,
28655  IX86_BUILTIN_VEC_SET_V4SI,
28656  IX86_BUILTIN_VEC_SET_V8HI,
28657  IX86_BUILTIN_VEC_SET_V4HI,
28658  IX86_BUILTIN_VEC_SET_V16QI,
28659
28660  IX86_BUILTIN_VEC_PACK_SFIX,
28661  IX86_BUILTIN_VEC_PACK_SFIX256,
28662
28663  /* SSE4.2.  */
28664  IX86_BUILTIN_CRC32QI,
28665  IX86_BUILTIN_CRC32HI,
28666  IX86_BUILTIN_CRC32SI,
28667  IX86_BUILTIN_CRC32DI,
28668
28669  IX86_BUILTIN_PCMPESTRI128,
28670  IX86_BUILTIN_PCMPESTRM128,
28671  IX86_BUILTIN_PCMPESTRA128,
28672  IX86_BUILTIN_PCMPESTRC128,
28673  IX86_BUILTIN_PCMPESTRO128,
28674  IX86_BUILTIN_PCMPESTRS128,
28675  IX86_BUILTIN_PCMPESTRZ128,
28676  IX86_BUILTIN_PCMPISTRI128,
28677  IX86_BUILTIN_PCMPISTRM128,
28678  IX86_BUILTIN_PCMPISTRA128,
28679  IX86_BUILTIN_PCMPISTRC128,
28680  IX86_BUILTIN_PCMPISTRO128,
28681  IX86_BUILTIN_PCMPISTRS128,
28682  IX86_BUILTIN_PCMPISTRZ128,
28683
28684  IX86_BUILTIN_PCMPGTQ,
28685
28686  /* AES instructions */
28687  IX86_BUILTIN_AESENC128,
28688  IX86_BUILTIN_AESENCLAST128,
28689  IX86_BUILTIN_AESDEC128,
28690  IX86_BUILTIN_AESDECLAST128,
28691  IX86_BUILTIN_AESIMC128,
28692  IX86_BUILTIN_AESKEYGENASSIST128,
28693
28694  /* PCLMUL instruction */
28695  IX86_BUILTIN_PCLMULQDQ128,
28696
28697  /* AVX */
28698  IX86_BUILTIN_ADDPD256,
28699  IX86_BUILTIN_ADDPS256,
28700  IX86_BUILTIN_ADDSUBPD256,
28701  IX86_BUILTIN_ADDSUBPS256,
28702  IX86_BUILTIN_ANDPD256,
28703  IX86_BUILTIN_ANDPS256,
28704  IX86_BUILTIN_ANDNPD256,
28705  IX86_BUILTIN_ANDNPS256,
28706  IX86_BUILTIN_BLENDPD256,
28707  IX86_BUILTIN_BLENDPS256,
28708  IX86_BUILTIN_BLENDVPD256,
28709  IX86_BUILTIN_BLENDVPS256,
28710  IX86_BUILTIN_DIVPD256,
28711  IX86_BUILTIN_DIVPS256,
28712  IX86_BUILTIN_DPPS256,
28713  IX86_BUILTIN_HADDPD256,
28714  IX86_BUILTIN_HADDPS256,
28715  IX86_BUILTIN_HSUBPD256,
28716  IX86_BUILTIN_HSUBPS256,
28717  IX86_BUILTIN_MAXPD256,
28718  IX86_BUILTIN_MAXPS256,
28719  IX86_BUILTIN_MINPD256,
28720  IX86_BUILTIN_MINPS256,
28721  IX86_BUILTIN_MULPD256,
28722  IX86_BUILTIN_MULPS256,
28723  IX86_BUILTIN_ORPD256,
28724  IX86_BUILTIN_ORPS256,
28725  IX86_BUILTIN_SHUFPD256,
28726  IX86_BUILTIN_SHUFPS256,
28727  IX86_BUILTIN_SUBPD256,
28728  IX86_BUILTIN_SUBPS256,
28729  IX86_BUILTIN_XORPD256,
28730  IX86_BUILTIN_XORPS256,
28731  IX86_BUILTIN_CMPSD,
28732  IX86_BUILTIN_CMPSS,
28733  IX86_BUILTIN_CMPPD,
28734  IX86_BUILTIN_CMPPS,
28735  IX86_BUILTIN_CMPPD256,
28736  IX86_BUILTIN_CMPPS256,
28737  IX86_BUILTIN_CVTDQ2PD256,
28738  IX86_BUILTIN_CVTDQ2PS256,
28739  IX86_BUILTIN_CVTPD2PS256,
28740  IX86_BUILTIN_CVTPS2DQ256,
28741  IX86_BUILTIN_CVTPS2PD256,
28742  IX86_BUILTIN_CVTTPD2DQ256,
28743  IX86_BUILTIN_CVTPD2DQ256,
28744  IX86_BUILTIN_CVTTPS2DQ256,
28745  IX86_BUILTIN_EXTRACTF128PD256,
28746  IX86_BUILTIN_EXTRACTF128PS256,
28747  IX86_BUILTIN_EXTRACTF128SI256,
28748  IX86_BUILTIN_VZEROALL,
28749  IX86_BUILTIN_VZEROUPPER,
28750  IX86_BUILTIN_VPERMILVARPD,
28751  IX86_BUILTIN_VPERMILVARPS,
28752  IX86_BUILTIN_VPERMILVARPD256,
28753  IX86_BUILTIN_VPERMILVARPS256,
28754  IX86_BUILTIN_VPERMILPD,
28755  IX86_BUILTIN_VPERMILPS,
28756  IX86_BUILTIN_VPERMILPD256,
28757  IX86_BUILTIN_VPERMILPS256,
28758  IX86_BUILTIN_VPERMIL2PD,
28759  IX86_BUILTIN_VPERMIL2PS,
28760  IX86_BUILTIN_VPERMIL2PD256,
28761  IX86_BUILTIN_VPERMIL2PS256,
28762  IX86_BUILTIN_VPERM2F128PD256,
28763  IX86_BUILTIN_VPERM2F128PS256,
28764  IX86_BUILTIN_VPERM2F128SI256,
28765  IX86_BUILTIN_VBROADCASTSS,
28766  IX86_BUILTIN_VBROADCASTSD256,
28767  IX86_BUILTIN_VBROADCASTSS256,
28768  IX86_BUILTIN_VBROADCASTPD256,
28769  IX86_BUILTIN_VBROADCASTPS256,
28770  IX86_BUILTIN_VINSERTF128PD256,
28771  IX86_BUILTIN_VINSERTF128PS256,
28772  IX86_BUILTIN_VINSERTF128SI256,
28773  IX86_BUILTIN_LOADUPD256,
28774  IX86_BUILTIN_LOADUPS256,
28775  IX86_BUILTIN_STOREUPD256,
28776  IX86_BUILTIN_STOREUPS256,
28777  IX86_BUILTIN_LDDQU256,
28778  IX86_BUILTIN_MOVNTDQ256,
28779  IX86_BUILTIN_MOVNTPD256,
28780  IX86_BUILTIN_MOVNTPS256,
28781  IX86_BUILTIN_LOADDQU256,
28782  IX86_BUILTIN_STOREDQU256,
28783  IX86_BUILTIN_MASKLOADPD,
28784  IX86_BUILTIN_MASKLOADPS,
28785  IX86_BUILTIN_MASKSTOREPD,
28786  IX86_BUILTIN_MASKSTOREPS,
28787  IX86_BUILTIN_MASKLOADPD256,
28788  IX86_BUILTIN_MASKLOADPS256,
28789  IX86_BUILTIN_MASKSTOREPD256,
28790  IX86_BUILTIN_MASKSTOREPS256,
28791  IX86_BUILTIN_MOVSHDUP256,
28792  IX86_BUILTIN_MOVSLDUP256,
28793  IX86_BUILTIN_MOVDDUP256,
28794
28795  IX86_BUILTIN_SQRTPD256,
28796  IX86_BUILTIN_SQRTPS256,
28797  IX86_BUILTIN_SQRTPS_NR256,
28798  IX86_BUILTIN_RSQRTPS256,
28799  IX86_BUILTIN_RSQRTPS_NR256,
28800
28801  IX86_BUILTIN_RCPPS256,
28802
28803  IX86_BUILTIN_ROUNDPD256,
28804  IX86_BUILTIN_ROUNDPS256,
28805
28806  IX86_BUILTIN_FLOORPD256,
28807  IX86_BUILTIN_CEILPD256,
28808  IX86_BUILTIN_TRUNCPD256,
28809  IX86_BUILTIN_RINTPD256,
28810  IX86_BUILTIN_ROUNDPD_AZ256,
28811
28812  IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256,
28813  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256,
28814  IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256,
28815
28816  IX86_BUILTIN_FLOORPS256,
28817  IX86_BUILTIN_CEILPS256,
28818  IX86_BUILTIN_TRUNCPS256,
28819  IX86_BUILTIN_RINTPS256,
28820  IX86_BUILTIN_ROUNDPS_AZ256,
28821
28822  IX86_BUILTIN_FLOORPS_SFIX256,
28823  IX86_BUILTIN_CEILPS_SFIX256,
28824  IX86_BUILTIN_ROUNDPS_AZ_SFIX256,
28825
28826  IX86_BUILTIN_UNPCKHPD256,
28827  IX86_BUILTIN_UNPCKLPD256,
28828  IX86_BUILTIN_UNPCKHPS256,
28829  IX86_BUILTIN_UNPCKLPS256,
28830
28831  IX86_BUILTIN_SI256_SI,
28832  IX86_BUILTIN_PS256_PS,
28833  IX86_BUILTIN_PD256_PD,
28834  IX86_BUILTIN_SI_SI256,
28835  IX86_BUILTIN_PS_PS256,
28836  IX86_BUILTIN_PD_PD256,
28837
28838  IX86_BUILTIN_VTESTZPD,
28839  IX86_BUILTIN_VTESTCPD,
28840  IX86_BUILTIN_VTESTNZCPD,
28841  IX86_BUILTIN_VTESTZPS,
28842  IX86_BUILTIN_VTESTCPS,
28843  IX86_BUILTIN_VTESTNZCPS,
28844  IX86_BUILTIN_VTESTZPD256,
28845  IX86_BUILTIN_VTESTCPD256,
28846  IX86_BUILTIN_VTESTNZCPD256,
28847  IX86_BUILTIN_VTESTZPS256,
28848  IX86_BUILTIN_VTESTCPS256,
28849  IX86_BUILTIN_VTESTNZCPS256,
28850  IX86_BUILTIN_PTESTZ256,
28851  IX86_BUILTIN_PTESTC256,
28852  IX86_BUILTIN_PTESTNZC256,
28853
28854  IX86_BUILTIN_MOVMSKPD256,
28855  IX86_BUILTIN_MOVMSKPS256,
28856
28857  /* AVX2 */
28858  IX86_BUILTIN_MPSADBW256,
28859  IX86_BUILTIN_PABSB256,
28860  IX86_BUILTIN_PABSW256,
28861  IX86_BUILTIN_PABSD256,
28862  IX86_BUILTIN_PACKSSDW256,
28863  IX86_BUILTIN_PACKSSWB256,
28864  IX86_BUILTIN_PACKUSDW256,
28865  IX86_BUILTIN_PACKUSWB256,
28866  IX86_BUILTIN_PADDB256,
28867  IX86_BUILTIN_PADDW256,
28868  IX86_BUILTIN_PADDD256,
28869  IX86_BUILTIN_PADDQ256,
28870  IX86_BUILTIN_PADDSB256,
28871  IX86_BUILTIN_PADDSW256,
28872  IX86_BUILTIN_PADDUSB256,
28873  IX86_BUILTIN_PADDUSW256,
28874  IX86_BUILTIN_PALIGNR256,
28875  IX86_BUILTIN_AND256I,
28876  IX86_BUILTIN_ANDNOT256I,
28877  IX86_BUILTIN_PAVGB256,
28878  IX86_BUILTIN_PAVGW256,
28879  IX86_BUILTIN_PBLENDVB256,
28880  IX86_BUILTIN_PBLENDVW256,
28881  IX86_BUILTIN_PCMPEQB256,
28882  IX86_BUILTIN_PCMPEQW256,
28883  IX86_BUILTIN_PCMPEQD256,
28884  IX86_BUILTIN_PCMPEQQ256,
28885  IX86_BUILTIN_PCMPGTB256,
28886  IX86_BUILTIN_PCMPGTW256,
28887  IX86_BUILTIN_PCMPGTD256,
28888  IX86_BUILTIN_PCMPGTQ256,
28889  IX86_BUILTIN_PHADDW256,
28890  IX86_BUILTIN_PHADDD256,
28891  IX86_BUILTIN_PHADDSW256,
28892  IX86_BUILTIN_PHSUBW256,
28893  IX86_BUILTIN_PHSUBD256,
28894  IX86_BUILTIN_PHSUBSW256,
28895  IX86_BUILTIN_PMADDUBSW256,
28896  IX86_BUILTIN_PMADDWD256,
28897  IX86_BUILTIN_PMAXSB256,
28898  IX86_BUILTIN_PMAXSW256,
28899  IX86_BUILTIN_PMAXSD256,
28900  IX86_BUILTIN_PMAXUB256,
28901  IX86_BUILTIN_PMAXUW256,
28902  IX86_BUILTIN_PMAXUD256,
28903  IX86_BUILTIN_PMINSB256,
28904  IX86_BUILTIN_PMINSW256,
28905  IX86_BUILTIN_PMINSD256,
28906  IX86_BUILTIN_PMINUB256,
28907  IX86_BUILTIN_PMINUW256,
28908  IX86_BUILTIN_PMINUD256,
28909  IX86_BUILTIN_PMOVMSKB256,
28910  IX86_BUILTIN_PMOVSXBW256,
28911  IX86_BUILTIN_PMOVSXBD256,
28912  IX86_BUILTIN_PMOVSXBQ256,
28913  IX86_BUILTIN_PMOVSXWD256,
28914  IX86_BUILTIN_PMOVSXWQ256,
28915  IX86_BUILTIN_PMOVSXDQ256,
28916  IX86_BUILTIN_PMOVZXBW256,
28917  IX86_BUILTIN_PMOVZXBD256,
28918  IX86_BUILTIN_PMOVZXBQ256,
28919  IX86_BUILTIN_PMOVZXWD256,
28920  IX86_BUILTIN_PMOVZXWQ256,
28921  IX86_BUILTIN_PMOVZXDQ256,
28922  IX86_BUILTIN_PMULDQ256,
28923  IX86_BUILTIN_PMULHRSW256,
28924  IX86_BUILTIN_PMULHUW256,
28925  IX86_BUILTIN_PMULHW256,
28926  IX86_BUILTIN_PMULLW256,
28927  IX86_BUILTIN_PMULLD256,
28928  IX86_BUILTIN_PMULUDQ256,
28929  IX86_BUILTIN_POR256,
28930  IX86_BUILTIN_PSADBW256,
28931  IX86_BUILTIN_PSHUFB256,
28932  IX86_BUILTIN_PSHUFD256,
28933  IX86_BUILTIN_PSHUFHW256,
28934  IX86_BUILTIN_PSHUFLW256,
28935  IX86_BUILTIN_PSIGNB256,
28936  IX86_BUILTIN_PSIGNW256,
28937  IX86_BUILTIN_PSIGND256,
28938  IX86_BUILTIN_PSLLDQI256,
28939  IX86_BUILTIN_PSLLWI256,
28940  IX86_BUILTIN_PSLLW256,
28941  IX86_BUILTIN_PSLLDI256,
28942  IX86_BUILTIN_PSLLD256,
28943  IX86_BUILTIN_PSLLQI256,
28944  IX86_BUILTIN_PSLLQ256,
28945  IX86_BUILTIN_PSRAWI256,
28946  IX86_BUILTIN_PSRAW256,
28947  IX86_BUILTIN_PSRADI256,
28948  IX86_BUILTIN_PSRAD256,
28949  IX86_BUILTIN_PSRLDQI256,
28950  IX86_BUILTIN_PSRLWI256,
28951  IX86_BUILTIN_PSRLW256,
28952  IX86_BUILTIN_PSRLDI256,
28953  IX86_BUILTIN_PSRLD256,
28954  IX86_BUILTIN_PSRLQI256,
28955  IX86_BUILTIN_PSRLQ256,
28956  IX86_BUILTIN_PSUBB256,
28957  IX86_BUILTIN_PSUBW256,
28958  IX86_BUILTIN_PSUBD256,
28959  IX86_BUILTIN_PSUBQ256,
28960  IX86_BUILTIN_PSUBSB256,
28961  IX86_BUILTIN_PSUBSW256,
28962  IX86_BUILTIN_PSUBUSB256,
28963  IX86_BUILTIN_PSUBUSW256,
28964  IX86_BUILTIN_PUNPCKHBW256,
28965  IX86_BUILTIN_PUNPCKHWD256,
28966  IX86_BUILTIN_PUNPCKHDQ256,
28967  IX86_BUILTIN_PUNPCKHQDQ256,
28968  IX86_BUILTIN_PUNPCKLBW256,
28969  IX86_BUILTIN_PUNPCKLWD256,
28970  IX86_BUILTIN_PUNPCKLDQ256,
28971  IX86_BUILTIN_PUNPCKLQDQ256,
28972  IX86_BUILTIN_PXOR256,
28973  IX86_BUILTIN_MOVNTDQA256,
28974  IX86_BUILTIN_VBROADCASTSS_PS,
28975  IX86_BUILTIN_VBROADCASTSS_PS256,
28976  IX86_BUILTIN_VBROADCASTSD_PD256,
28977  IX86_BUILTIN_VBROADCASTSI256,
28978  IX86_BUILTIN_PBLENDD256,
28979  IX86_BUILTIN_PBLENDD128,
28980  IX86_BUILTIN_PBROADCASTB256,
28981  IX86_BUILTIN_PBROADCASTW256,
28982  IX86_BUILTIN_PBROADCASTD256,
28983  IX86_BUILTIN_PBROADCASTQ256,
28984  IX86_BUILTIN_PBROADCASTB128,
28985  IX86_BUILTIN_PBROADCASTW128,
28986  IX86_BUILTIN_PBROADCASTD128,
28987  IX86_BUILTIN_PBROADCASTQ128,
28988  IX86_BUILTIN_VPERMVARSI256,
28989  IX86_BUILTIN_VPERMDF256,
28990  IX86_BUILTIN_VPERMVARSF256,
28991  IX86_BUILTIN_VPERMDI256,
28992  IX86_BUILTIN_VPERMTI256,
28993  IX86_BUILTIN_VEXTRACT128I256,
28994  IX86_BUILTIN_VINSERT128I256,
28995  IX86_BUILTIN_MASKLOADD,
28996  IX86_BUILTIN_MASKLOADQ,
28997  IX86_BUILTIN_MASKLOADD256,
28998  IX86_BUILTIN_MASKLOADQ256,
28999  IX86_BUILTIN_MASKSTORED,
29000  IX86_BUILTIN_MASKSTOREQ,
29001  IX86_BUILTIN_MASKSTORED256,
29002  IX86_BUILTIN_MASKSTOREQ256,
29003  IX86_BUILTIN_PSLLVV4DI,
29004  IX86_BUILTIN_PSLLVV2DI,
29005  IX86_BUILTIN_PSLLVV8SI,
29006  IX86_BUILTIN_PSLLVV4SI,
29007  IX86_BUILTIN_PSRAVV8SI,
29008  IX86_BUILTIN_PSRAVV4SI,
29009  IX86_BUILTIN_PSRLVV4DI,
29010  IX86_BUILTIN_PSRLVV2DI,
29011  IX86_BUILTIN_PSRLVV8SI,
29012  IX86_BUILTIN_PSRLVV4SI,
29013
29014  IX86_BUILTIN_GATHERSIV2DF,
29015  IX86_BUILTIN_GATHERSIV4DF,
29016  IX86_BUILTIN_GATHERDIV2DF,
29017  IX86_BUILTIN_GATHERDIV4DF,
29018  IX86_BUILTIN_GATHERSIV4SF,
29019  IX86_BUILTIN_GATHERSIV8SF,
29020  IX86_BUILTIN_GATHERDIV4SF,
29021  IX86_BUILTIN_GATHERDIV8SF,
29022  IX86_BUILTIN_GATHERSIV2DI,
29023  IX86_BUILTIN_GATHERSIV4DI,
29024  IX86_BUILTIN_GATHERDIV2DI,
29025  IX86_BUILTIN_GATHERDIV4DI,
29026  IX86_BUILTIN_GATHERSIV4SI,
29027  IX86_BUILTIN_GATHERSIV8SI,
29028  IX86_BUILTIN_GATHERDIV4SI,
29029  IX86_BUILTIN_GATHERDIV8SI,
29030
29031  /* AVX512F */
29032  IX86_BUILTIN_SI512_SI256,
29033  IX86_BUILTIN_PD512_PD256,
29034  IX86_BUILTIN_PS512_PS256,
29035  IX86_BUILTIN_SI512_SI,
29036  IX86_BUILTIN_PD512_PD,
29037  IX86_BUILTIN_PS512_PS,
29038  IX86_BUILTIN_ADDPD512,
29039  IX86_BUILTIN_ADDPS512,
29040  IX86_BUILTIN_ADDSD_ROUND,
29041  IX86_BUILTIN_ADDSS_ROUND,
29042  IX86_BUILTIN_ALIGND512,
29043  IX86_BUILTIN_ALIGNQ512,
29044  IX86_BUILTIN_BLENDMD512,
29045  IX86_BUILTIN_BLENDMPD512,
29046  IX86_BUILTIN_BLENDMPS512,
29047  IX86_BUILTIN_BLENDMQ512,
29048  IX86_BUILTIN_BROADCASTF32X4_512,
29049  IX86_BUILTIN_BROADCASTF64X4_512,
29050  IX86_BUILTIN_BROADCASTI32X4_512,
29051  IX86_BUILTIN_BROADCASTI64X4_512,
29052  IX86_BUILTIN_BROADCASTSD512,
29053  IX86_BUILTIN_BROADCASTSS512,
29054  IX86_BUILTIN_CMPD512,
29055  IX86_BUILTIN_CMPPD512,
29056  IX86_BUILTIN_CMPPS512,
29057  IX86_BUILTIN_CMPQ512,
29058  IX86_BUILTIN_CMPSD_MASK,
29059  IX86_BUILTIN_CMPSS_MASK,
29060  IX86_BUILTIN_COMIDF,
29061  IX86_BUILTIN_COMISF,
29062  IX86_BUILTIN_COMPRESSPD512,
29063  IX86_BUILTIN_COMPRESSPDSTORE512,
29064  IX86_BUILTIN_COMPRESSPS512,
29065  IX86_BUILTIN_COMPRESSPSSTORE512,
29066  IX86_BUILTIN_CVTDQ2PD512,
29067  IX86_BUILTIN_CVTDQ2PS512,
29068  IX86_BUILTIN_CVTPD2DQ512,
29069  IX86_BUILTIN_CVTPD2PS512,
29070  IX86_BUILTIN_CVTPD2UDQ512,
29071  IX86_BUILTIN_CVTPH2PS512,
29072  IX86_BUILTIN_CVTPS2DQ512,
29073  IX86_BUILTIN_CVTPS2PD512,
29074  IX86_BUILTIN_CVTPS2PH512,
29075  IX86_BUILTIN_CVTPS2UDQ512,
29076  IX86_BUILTIN_CVTSD2SS_ROUND,
29077  IX86_BUILTIN_CVTSI2SD64,
29078  IX86_BUILTIN_CVTSI2SS32,
29079  IX86_BUILTIN_CVTSI2SS64,
29080  IX86_BUILTIN_CVTSS2SD_ROUND,
29081  IX86_BUILTIN_CVTTPD2DQ512,
29082  IX86_BUILTIN_CVTTPD2UDQ512,
29083  IX86_BUILTIN_CVTTPS2DQ512,
29084  IX86_BUILTIN_CVTTPS2UDQ512,
29085  IX86_BUILTIN_CVTUDQ2PD512,
29086  IX86_BUILTIN_CVTUDQ2PS512,
29087  IX86_BUILTIN_CVTUSI2SD32,
29088  IX86_BUILTIN_CVTUSI2SD64,
29089  IX86_BUILTIN_CVTUSI2SS32,
29090  IX86_BUILTIN_CVTUSI2SS64,
29091  IX86_BUILTIN_DIVPD512,
29092  IX86_BUILTIN_DIVPS512,
29093  IX86_BUILTIN_DIVSD_ROUND,
29094  IX86_BUILTIN_DIVSS_ROUND,
29095  IX86_BUILTIN_EXPANDPD512,
29096  IX86_BUILTIN_EXPANDPD512Z,
29097  IX86_BUILTIN_EXPANDPDLOAD512,
29098  IX86_BUILTIN_EXPANDPDLOAD512Z,
29099  IX86_BUILTIN_EXPANDPS512,
29100  IX86_BUILTIN_EXPANDPS512Z,
29101  IX86_BUILTIN_EXPANDPSLOAD512,
29102  IX86_BUILTIN_EXPANDPSLOAD512Z,
29103  IX86_BUILTIN_EXTRACTF32X4,
29104  IX86_BUILTIN_EXTRACTF64X4,
29105  IX86_BUILTIN_EXTRACTI32X4,
29106  IX86_BUILTIN_EXTRACTI64X4,
29107  IX86_BUILTIN_FIXUPIMMPD512_MASK,
29108  IX86_BUILTIN_FIXUPIMMPD512_MASKZ,
29109  IX86_BUILTIN_FIXUPIMMPS512_MASK,
29110  IX86_BUILTIN_FIXUPIMMPS512_MASKZ,
29111  IX86_BUILTIN_FIXUPIMMSD128_MASK,
29112  IX86_BUILTIN_FIXUPIMMSD128_MASKZ,
29113  IX86_BUILTIN_FIXUPIMMSS128_MASK,
29114  IX86_BUILTIN_FIXUPIMMSS128_MASKZ,
29115  IX86_BUILTIN_GETEXPPD512,
29116  IX86_BUILTIN_GETEXPPS512,
29117  IX86_BUILTIN_GETEXPSD128,
29118  IX86_BUILTIN_GETEXPSS128,
29119  IX86_BUILTIN_GETMANTPD512,
29120  IX86_BUILTIN_GETMANTPS512,
29121  IX86_BUILTIN_GETMANTSD128,
29122  IX86_BUILTIN_GETMANTSS128,
29123  IX86_BUILTIN_INSERTF32X4,
29124  IX86_BUILTIN_INSERTF64X4,
29125  IX86_BUILTIN_INSERTI32X4,
29126  IX86_BUILTIN_INSERTI64X4,
29127  IX86_BUILTIN_LOADAPD512,
29128  IX86_BUILTIN_LOADAPS512,
29129  IX86_BUILTIN_LOADDQUDI512,
29130  IX86_BUILTIN_LOADDQUSI512,
29131  IX86_BUILTIN_LOADUPD512,
29132  IX86_BUILTIN_LOADUPS512,
29133  IX86_BUILTIN_MAXPD512,
29134  IX86_BUILTIN_MAXPS512,
29135  IX86_BUILTIN_MAXSD_ROUND,
29136  IX86_BUILTIN_MAXSS_ROUND,
29137  IX86_BUILTIN_MINPD512,
29138  IX86_BUILTIN_MINPS512,
29139  IX86_BUILTIN_MINSD_ROUND,
29140  IX86_BUILTIN_MINSS_ROUND,
29141  IX86_BUILTIN_MOVAPD512,
29142  IX86_BUILTIN_MOVAPS512,
29143  IX86_BUILTIN_MOVDDUP512,
29144  IX86_BUILTIN_MOVDQA32LOAD512,
29145  IX86_BUILTIN_MOVDQA32STORE512,
29146  IX86_BUILTIN_MOVDQA32_512,
29147  IX86_BUILTIN_MOVDQA64LOAD512,
29148  IX86_BUILTIN_MOVDQA64STORE512,
29149  IX86_BUILTIN_MOVDQA64_512,
29150  IX86_BUILTIN_MOVNTDQ512,
29151  IX86_BUILTIN_MOVNTDQA512,
29152  IX86_BUILTIN_MOVNTPD512,
29153  IX86_BUILTIN_MOVNTPS512,
29154  IX86_BUILTIN_MOVSHDUP512,
29155  IX86_BUILTIN_MOVSLDUP512,
29156  IX86_BUILTIN_MULPD512,
29157  IX86_BUILTIN_MULPS512,
29158  IX86_BUILTIN_MULSD_ROUND,
29159  IX86_BUILTIN_MULSS_ROUND,
29160  IX86_BUILTIN_PABSD512,
29161  IX86_BUILTIN_PABSQ512,
29162  IX86_BUILTIN_PADDD512,
29163  IX86_BUILTIN_PADDQ512,
29164  IX86_BUILTIN_PANDD512,
29165  IX86_BUILTIN_PANDND512,
29166  IX86_BUILTIN_PANDNQ512,
29167  IX86_BUILTIN_PANDQ512,
29168  IX86_BUILTIN_PBROADCASTD512,
29169  IX86_BUILTIN_PBROADCASTD512_GPR,
29170  IX86_BUILTIN_PBROADCASTMB512,
29171  IX86_BUILTIN_PBROADCASTMW512,
29172  IX86_BUILTIN_PBROADCASTQ512,
29173  IX86_BUILTIN_PBROADCASTQ512_GPR,
29174  IX86_BUILTIN_PCMPEQD512_MASK,
29175  IX86_BUILTIN_PCMPEQQ512_MASK,
29176  IX86_BUILTIN_PCMPGTD512_MASK,
29177  IX86_BUILTIN_PCMPGTQ512_MASK,
29178  IX86_BUILTIN_PCOMPRESSD512,
29179  IX86_BUILTIN_PCOMPRESSDSTORE512,
29180  IX86_BUILTIN_PCOMPRESSQ512,
29181  IX86_BUILTIN_PCOMPRESSQSTORE512,
29182  IX86_BUILTIN_PEXPANDD512,
29183  IX86_BUILTIN_PEXPANDD512Z,
29184  IX86_BUILTIN_PEXPANDDLOAD512,
29185  IX86_BUILTIN_PEXPANDDLOAD512Z,
29186  IX86_BUILTIN_PEXPANDQ512,
29187  IX86_BUILTIN_PEXPANDQ512Z,
29188  IX86_BUILTIN_PEXPANDQLOAD512,
29189  IX86_BUILTIN_PEXPANDQLOAD512Z,
29190  IX86_BUILTIN_PMAXSD512,
29191  IX86_BUILTIN_PMAXSQ512,
29192  IX86_BUILTIN_PMAXUD512,
29193  IX86_BUILTIN_PMAXUQ512,
29194  IX86_BUILTIN_PMINSD512,
29195  IX86_BUILTIN_PMINSQ512,
29196  IX86_BUILTIN_PMINUD512,
29197  IX86_BUILTIN_PMINUQ512,
29198  IX86_BUILTIN_PMOVDB512,
29199  IX86_BUILTIN_PMOVDB512_MEM,
29200  IX86_BUILTIN_PMOVDW512,
29201  IX86_BUILTIN_PMOVDW512_MEM,
29202  IX86_BUILTIN_PMOVQB512,
29203  IX86_BUILTIN_PMOVQB512_MEM,
29204  IX86_BUILTIN_PMOVQD512,
29205  IX86_BUILTIN_PMOVQD512_MEM,
29206  IX86_BUILTIN_PMOVQW512,
29207  IX86_BUILTIN_PMOVQW512_MEM,
29208  IX86_BUILTIN_PMOVSDB512,
29209  IX86_BUILTIN_PMOVSDB512_MEM,
29210  IX86_BUILTIN_PMOVSDW512,
29211  IX86_BUILTIN_PMOVSDW512_MEM,
29212  IX86_BUILTIN_PMOVSQB512,
29213  IX86_BUILTIN_PMOVSQB512_MEM,
29214  IX86_BUILTIN_PMOVSQD512,
29215  IX86_BUILTIN_PMOVSQD512_MEM,
29216  IX86_BUILTIN_PMOVSQW512,
29217  IX86_BUILTIN_PMOVSQW512_MEM,
29218  IX86_BUILTIN_PMOVSXBD512,
29219  IX86_BUILTIN_PMOVSXBQ512,
29220  IX86_BUILTIN_PMOVSXDQ512,
29221  IX86_BUILTIN_PMOVSXWD512,
29222  IX86_BUILTIN_PMOVSXWQ512,
29223  IX86_BUILTIN_PMOVUSDB512,
29224  IX86_BUILTIN_PMOVUSDB512_MEM,
29225  IX86_BUILTIN_PMOVUSDW512,
29226  IX86_BUILTIN_PMOVUSDW512_MEM,
29227  IX86_BUILTIN_PMOVUSQB512,
29228  IX86_BUILTIN_PMOVUSQB512_MEM,
29229  IX86_BUILTIN_PMOVUSQD512,
29230  IX86_BUILTIN_PMOVUSQD512_MEM,
29231  IX86_BUILTIN_PMOVUSQW512,
29232  IX86_BUILTIN_PMOVUSQW512_MEM,
29233  IX86_BUILTIN_PMOVZXBD512,
29234  IX86_BUILTIN_PMOVZXBQ512,
29235  IX86_BUILTIN_PMOVZXDQ512,
29236  IX86_BUILTIN_PMOVZXWD512,
29237  IX86_BUILTIN_PMOVZXWQ512,
29238  IX86_BUILTIN_PMULDQ512,
29239  IX86_BUILTIN_PMULLD512,
29240  IX86_BUILTIN_PMULUDQ512,
29241  IX86_BUILTIN_PORD512,
29242  IX86_BUILTIN_PORQ512,
29243  IX86_BUILTIN_PROLD512,
29244  IX86_BUILTIN_PROLQ512,
29245  IX86_BUILTIN_PROLVD512,
29246  IX86_BUILTIN_PROLVQ512,
29247  IX86_BUILTIN_PRORD512,
29248  IX86_BUILTIN_PRORQ512,
29249  IX86_BUILTIN_PRORVD512,
29250  IX86_BUILTIN_PRORVQ512,
29251  IX86_BUILTIN_PSHUFD512,
29252  IX86_BUILTIN_PSLLD512,
29253  IX86_BUILTIN_PSLLDI512,
29254  IX86_BUILTIN_PSLLQ512,
29255  IX86_BUILTIN_PSLLQI512,
29256  IX86_BUILTIN_PSLLVV16SI,
29257  IX86_BUILTIN_PSLLVV8DI,
29258  IX86_BUILTIN_PSRAD512,
29259  IX86_BUILTIN_PSRADI512,
29260  IX86_BUILTIN_PSRAQ512,
29261  IX86_BUILTIN_PSRAQI512,
29262  IX86_BUILTIN_PSRAVV16SI,
29263  IX86_BUILTIN_PSRAVV8DI,
29264  IX86_BUILTIN_PSRLD512,
29265  IX86_BUILTIN_PSRLDI512,
29266  IX86_BUILTIN_PSRLQ512,
29267  IX86_BUILTIN_PSRLQI512,
29268  IX86_BUILTIN_PSRLVV16SI,
29269  IX86_BUILTIN_PSRLVV8DI,
29270  IX86_BUILTIN_PSUBD512,
29271  IX86_BUILTIN_PSUBQ512,
29272  IX86_BUILTIN_PTESTMD512,
29273  IX86_BUILTIN_PTESTMQ512,
29274  IX86_BUILTIN_PTESTNMD512,
29275  IX86_BUILTIN_PTESTNMQ512,
29276  IX86_BUILTIN_PUNPCKHDQ512,
29277  IX86_BUILTIN_PUNPCKHQDQ512,
29278  IX86_BUILTIN_PUNPCKLDQ512,
29279  IX86_BUILTIN_PUNPCKLQDQ512,
29280  IX86_BUILTIN_PXORD512,
29281  IX86_BUILTIN_PXORQ512,
29282  IX86_BUILTIN_RCP14PD512,
29283  IX86_BUILTIN_RCP14PS512,
29284  IX86_BUILTIN_RCP14SD,
29285  IX86_BUILTIN_RCP14SS,
29286  IX86_BUILTIN_RNDSCALEPD,
29287  IX86_BUILTIN_RNDSCALEPS,
29288  IX86_BUILTIN_RNDSCALESD,
29289  IX86_BUILTIN_RNDSCALESS,
29290  IX86_BUILTIN_RSQRT14PD512,
29291  IX86_BUILTIN_RSQRT14PS512,
29292  IX86_BUILTIN_RSQRT14SD,
29293  IX86_BUILTIN_RSQRT14SS,
29294  IX86_BUILTIN_SCALEFPD512,
29295  IX86_BUILTIN_SCALEFPS512,
29296  IX86_BUILTIN_SCALEFSD,
29297  IX86_BUILTIN_SCALEFSS,
29298  IX86_BUILTIN_SHUFPD512,
29299  IX86_BUILTIN_SHUFPS512,
29300  IX86_BUILTIN_SHUF_F32x4,
29301  IX86_BUILTIN_SHUF_F64x2,
29302  IX86_BUILTIN_SHUF_I32x4,
29303  IX86_BUILTIN_SHUF_I64x2,
29304  IX86_BUILTIN_SQRTPD512,
29305  IX86_BUILTIN_SQRTPD512_MASK,
29306  IX86_BUILTIN_SQRTPS512_MASK,
29307  IX86_BUILTIN_SQRTPS_NR512,
29308  IX86_BUILTIN_SQRTSD_ROUND,
29309  IX86_BUILTIN_SQRTSS_ROUND,
29310  IX86_BUILTIN_STOREAPD512,
29311  IX86_BUILTIN_STOREAPS512,
29312  IX86_BUILTIN_STOREDQUDI512,
29313  IX86_BUILTIN_STOREDQUSI512,
29314  IX86_BUILTIN_STOREUPD512,
29315  IX86_BUILTIN_STOREUPS512,
29316  IX86_BUILTIN_SUBPD512,
29317  IX86_BUILTIN_SUBPS512,
29318  IX86_BUILTIN_SUBSD_ROUND,
29319  IX86_BUILTIN_SUBSS_ROUND,
29320  IX86_BUILTIN_UCMPD512,
29321  IX86_BUILTIN_UCMPQ512,
29322  IX86_BUILTIN_UNPCKHPD512,
29323  IX86_BUILTIN_UNPCKHPS512,
29324  IX86_BUILTIN_UNPCKLPD512,
29325  IX86_BUILTIN_UNPCKLPS512,
29326  IX86_BUILTIN_VCVTSD2SI32,
29327  IX86_BUILTIN_VCVTSD2SI64,
29328  IX86_BUILTIN_VCVTSD2USI32,
29329  IX86_BUILTIN_VCVTSD2USI64,
29330  IX86_BUILTIN_VCVTSS2SI32,
29331  IX86_BUILTIN_VCVTSS2SI64,
29332  IX86_BUILTIN_VCVTSS2USI32,
29333  IX86_BUILTIN_VCVTSS2USI64,
29334  IX86_BUILTIN_VCVTTSD2SI32,
29335  IX86_BUILTIN_VCVTTSD2SI64,
29336  IX86_BUILTIN_VCVTTSD2USI32,
29337  IX86_BUILTIN_VCVTTSD2USI64,
29338  IX86_BUILTIN_VCVTTSS2SI32,
29339  IX86_BUILTIN_VCVTTSS2SI64,
29340  IX86_BUILTIN_VCVTTSS2USI32,
29341  IX86_BUILTIN_VCVTTSS2USI64,
29342  IX86_BUILTIN_VFMADDPD512_MASK,
29343  IX86_BUILTIN_VFMADDPD512_MASK3,
29344  IX86_BUILTIN_VFMADDPD512_MASKZ,
29345  IX86_BUILTIN_VFMADDPS512_MASK,
29346  IX86_BUILTIN_VFMADDPS512_MASK3,
29347  IX86_BUILTIN_VFMADDPS512_MASKZ,
29348  IX86_BUILTIN_VFMADDSD3_ROUND,
29349  IX86_BUILTIN_VFMADDSS3_ROUND,
29350  IX86_BUILTIN_VFMADDSUBPD512_MASK,
29351  IX86_BUILTIN_VFMADDSUBPD512_MASK3,
29352  IX86_BUILTIN_VFMADDSUBPD512_MASKZ,
29353  IX86_BUILTIN_VFMADDSUBPS512_MASK,
29354  IX86_BUILTIN_VFMADDSUBPS512_MASK3,
29355  IX86_BUILTIN_VFMADDSUBPS512_MASKZ,
29356  IX86_BUILTIN_VFMSUBADDPD512_MASK3,
29357  IX86_BUILTIN_VFMSUBADDPS512_MASK3,
29358  IX86_BUILTIN_VFMSUBPD512_MASK3,
29359  IX86_BUILTIN_VFMSUBPS512_MASK3,
29360  IX86_BUILTIN_VFMSUBSD3_MASK3,
29361  IX86_BUILTIN_VFMSUBSS3_MASK3,
29362  IX86_BUILTIN_VFNMADDPD512_MASK,
29363  IX86_BUILTIN_VFNMADDPS512_MASK,
29364  IX86_BUILTIN_VFNMSUBPD512_MASK,
29365  IX86_BUILTIN_VFNMSUBPD512_MASK3,
29366  IX86_BUILTIN_VFNMSUBPS512_MASK,
29367  IX86_BUILTIN_VFNMSUBPS512_MASK3,
29368  IX86_BUILTIN_VPCLZCNTD512,
29369  IX86_BUILTIN_VPCLZCNTQ512,
29370  IX86_BUILTIN_VPCONFLICTD512,
29371  IX86_BUILTIN_VPCONFLICTQ512,
29372  IX86_BUILTIN_VPERMDF512,
29373  IX86_BUILTIN_VPERMDI512,
29374  IX86_BUILTIN_VPERMI2VARD512,
29375  IX86_BUILTIN_VPERMI2VARPD512,
29376  IX86_BUILTIN_VPERMI2VARPS512,
29377  IX86_BUILTIN_VPERMI2VARQ512,
29378  IX86_BUILTIN_VPERMILPD512,
29379  IX86_BUILTIN_VPERMILPS512,
29380  IX86_BUILTIN_VPERMILVARPD512,
29381  IX86_BUILTIN_VPERMILVARPS512,
29382  IX86_BUILTIN_VPERMT2VARD512,
29383  IX86_BUILTIN_VPERMT2VARD512_MASKZ,
29384  IX86_BUILTIN_VPERMT2VARPD512,
29385  IX86_BUILTIN_VPERMT2VARPD512_MASKZ,
29386  IX86_BUILTIN_VPERMT2VARPS512,
29387  IX86_BUILTIN_VPERMT2VARPS512_MASKZ,
29388  IX86_BUILTIN_VPERMT2VARQ512,
29389  IX86_BUILTIN_VPERMT2VARQ512_MASKZ,
29390  IX86_BUILTIN_VPERMVARDF512,
29391  IX86_BUILTIN_VPERMVARDI512,
29392  IX86_BUILTIN_VPERMVARSF512,
29393  IX86_BUILTIN_VPERMVARSI512,
29394  IX86_BUILTIN_VTERNLOGD512_MASK,
29395  IX86_BUILTIN_VTERNLOGD512_MASKZ,
29396  IX86_BUILTIN_VTERNLOGQ512_MASK,
29397  IX86_BUILTIN_VTERNLOGQ512_MASKZ,
29398
29399  /* Mask arithmetic operations */
29400  IX86_BUILTIN_KAND16,
29401  IX86_BUILTIN_KANDN16,
29402  IX86_BUILTIN_KNOT16,
29403  IX86_BUILTIN_KOR16,
29404  IX86_BUILTIN_KORTESTC16,
29405  IX86_BUILTIN_KORTESTZ16,
29406  IX86_BUILTIN_KUNPCKBW,
29407  IX86_BUILTIN_KXNOR16,
29408  IX86_BUILTIN_KXOR16,
29409  IX86_BUILTIN_KMOV16,
29410
29411  /* AVX512VL.  */
29412  IX86_BUILTIN_PMOVUSQD256_MEM,
29413  IX86_BUILTIN_PMOVUSQD128_MEM,
29414  IX86_BUILTIN_PMOVSQD256_MEM,
29415  IX86_BUILTIN_PMOVSQD128_MEM,
29416  IX86_BUILTIN_PMOVQD256_MEM,
29417  IX86_BUILTIN_PMOVQD128_MEM,
29418  IX86_BUILTIN_PMOVUSQW256_MEM,
29419  IX86_BUILTIN_PMOVUSQW128_MEM,
29420  IX86_BUILTIN_PMOVSQW256_MEM,
29421  IX86_BUILTIN_PMOVSQW128_MEM,
29422  IX86_BUILTIN_PMOVQW256_MEM,
29423  IX86_BUILTIN_PMOVQW128_MEM,
29424  IX86_BUILTIN_PMOVUSQB256_MEM,
29425  IX86_BUILTIN_PMOVUSQB128_MEM,
29426  IX86_BUILTIN_PMOVSQB256_MEM,
29427  IX86_BUILTIN_PMOVSQB128_MEM,
29428  IX86_BUILTIN_PMOVQB256_MEM,
29429  IX86_BUILTIN_PMOVQB128_MEM,
29430  IX86_BUILTIN_PMOVUSDW256_MEM,
29431  IX86_BUILTIN_PMOVUSDW128_MEM,
29432  IX86_BUILTIN_PMOVSDW256_MEM,
29433  IX86_BUILTIN_PMOVSDW128_MEM,
29434  IX86_BUILTIN_PMOVDW256_MEM,
29435  IX86_BUILTIN_PMOVDW128_MEM,
29436  IX86_BUILTIN_PMOVUSDB256_MEM,
29437  IX86_BUILTIN_PMOVUSDB128_MEM,
29438  IX86_BUILTIN_PMOVSDB256_MEM,
29439  IX86_BUILTIN_PMOVSDB128_MEM,
29440  IX86_BUILTIN_PMOVDB256_MEM,
29441  IX86_BUILTIN_PMOVDB128_MEM,
29442  IX86_BUILTIN_MOVDQA64LOAD256_MASK,
29443  IX86_BUILTIN_MOVDQA64LOAD128_MASK,
29444  IX86_BUILTIN_MOVDQA32LOAD256_MASK,
29445  IX86_BUILTIN_MOVDQA32LOAD128_MASK,
29446  IX86_BUILTIN_MOVDQA64STORE256_MASK,
29447  IX86_BUILTIN_MOVDQA64STORE128_MASK,
29448  IX86_BUILTIN_MOVDQA32STORE256_MASK,
29449  IX86_BUILTIN_MOVDQA32STORE128_MASK,
29450  IX86_BUILTIN_LOADAPD256_MASK,
29451  IX86_BUILTIN_LOADAPD128_MASK,
29452  IX86_BUILTIN_LOADAPS256_MASK,
29453  IX86_BUILTIN_LOADAPS128_MASK,
29454  IX86_BUILTIN_STOREAPD256_MASK,
29455  IX86_BUILTIN_STOREAPD128_MASK,
29456  IX86_BUILTIN_STOREAPS256_MASK,
29457  IX86_BUILTIN_STOREAPS128_MASK,
29458  IX86_BUILTIN_LOADUPD256_MASK,
29459  IX86_BUILTIN_LOADUPD128_MASK,
29460  IX86_BUILTIN_LOADUPS256_MASK,
29461  IX86_BUILTIN_LOADUPS128_MASK,
29462  IX86_BUILTIN_STOREUPD256_MASK,
29463  IX86_BUILTIN_STOREUPD128_MASK,
29464  IX86_BUILTIN_STOREUPS256_MASK,
29465  IX86_BUILTIN_STOREUPS128_MASK,
29466  IX86_BUILTIN_LOADDQUDI256_MASK,
29467  IX86_BUILTIN_LOADDQUDI128_MASK,
29468  IX86_BUILTIN_LOADDQUSI256_MASK,
29469  IX86_BUILTIN_LOADDQUSI128_MASK,
29470  IX86_BUILTIN_LOADDQUHI256_MASK,
29471  IX86_BUILTIN_LOADDQUHI128_MASK,
29472  IX86_BUILTIN_LOADDQUQI256_MASK,
29473  IX86_BUILTIN_LOADDQUQI128_MASK,
29474  IX86_BUILTIN_STOREDQUDI256_MASK,
29475  IX86_BUILTIN_STOREDQUDI128_MASK,
29476  IX86_BUILTIN_STOREDQUSI256_MASK,
29477  IX86_BUILTIN_STOREDQUSI128_MASK,
29478  IX86_BUILTIN_STOREDQUHI256_MASK,
29479  IX86_BUILTIN_STOREDQUHI128_MASK,
29480  IX86_BUILTIN_STOREDQUQI256_MASK,
29481  IX86_BUILTIN_STOREDQUQI128_MASK,
29482  IX86_BUILTIN_COMPRESSPDSTORE256,
29483  IX86_BUILTIN_COMPRESSPDSTORE128,
29484  IX86_BUILTIN_COMPRESSPSSTORE256,
29485  IX86_BUILTIN_COMPRESSPSSTORE128,
29486  IX86_BUILTIN_PCOMPRESSQSTORE256,
29487  IX86_BUILTIN_PCOMPRESSQSTORE128,
29488  IX86_BUILTIN_PCOMPRESSDSTORE256,
29489  IX86_BUILTIN_PCOMPRESSDSTORE128,
29490  IX86_BUILTIN_EXPANDPDLOAD256,
29491  IX86_BUILTIN_EXPANDPDLOAD128,
29492  IX86_BUILTIN_EXPANDPSLOAD256,
29493  IX86_BUILTIN_EXPANDPSLOAD128,
29494  IX86_BUILTIN_PEXPANDQLOAD256,
29495  IX86_BUILTIN_PEXPANDQLOAD128,
29496  IX86_BUILTIN_PEXPANDDLOAD256,
29497  IX86_BUILTIN_PEXPANDDLOAD128,
29498  IX86_BUILTIN_EXPANDPDLOAD256Z,
29499  IX86_BUILTIN_EXPANDPDLOAD128Z,
29500  IX86_BUILTIN_EXPANDPSLOAD256Z,
29501  IX86_BUILTIN_EXPANDPSLOAD128Z,
29502  IX86_BUILTIN_PEXPANDQLOAD256Z,
29503  IX86_BUILTIN_PEXPANDQLOAD128Z,
29504  IX86_BUILTIN_PEXPANDDLOAD256Z,
29505  IX86_BUILTIN_PEXPANDDLOAD128Z,
29506  IX86_BUILTIN_PALIGNR256_MASK,
29507  IX86_BUILTIN_PALIGNR128_MASK,
29508  IX86_BUILTIN_MOVDQA64_256_MASK,
29509  IX86_BUILTIN_MOVDQA64_128_MASK,
29510  IX86_BUILTIN_MOVDQA32_256_MASK,
29511  IX86_BUILTIN_MOVDQA32_128_MASK,
29512  IX86_BUILTIN_MOVAPD256_MASK,
29513  IX86_BUILTIN_MOVAPD128_MASK,
29514  IX86_BUILTIN_MOVAPS256_MASK,
29515  IX86_BUILTIN_MOVAPS128_MASK,
29516  IX86_BUILTIN_MOVDQUHI256_MASK,
29517  IX86_BUILTIN_MOVDQUHI128_MASK,
29518  IX86_BUILTIN_MOVDQUQI256_MASK,
29519  IX86_BUILTIN_MOVDQUQI128_MASK,
29520  IX86_BUILTIN_MINPS128_MASK,
29521  IX86_BUILTIN_MAXPS128_MASK,
29522  IX86_BUILTIN_MINPD128_MASK,
29523  IX86_BUILTIN_MAXPD128_MASK,
29524  IX86_BUILTIN_MAXPD256_MASK,
29525  IX86_BUILTIN_MAXPS256_MASK,
29526  IX86_BUILTIN_MINPD256_MASK,
29527  IX86_BUILTIN_MINPS256_MASK,
29528  IX86_BUILTIN_MULPS128_MASK,
29529  IX86_BUILTIN_DIVPS128_MASK,
29530  IX86_BUILTIN_MULPD128_MASK,
29531  IX86_BUILTIN_DIVPD128_MASK,
29532  IX86_BUILTIN_DIVPD256_MASK,
29533  IX86_BUILTIN_DIVPS256_MASK,
29534  IX86_BUILTIN_MULPD256_MASK,
29535  IX86_BUILTIN_MULPS256_MASK,
29536  IX86_BUILTIN_ADDPD128_MASK,
29537  IX86_BUILTIN_ADDPD256_MASK,
29538  IX86_BUILTIN_ADDPS128_MASK,
29539  IX86_BUILTIN_ADDPS256_MASK,
29540  IX86_BUILTIN_SUBPD128_MASK,
29541  IX86_BUILTIN_SUBPD256_MASK,
29542  IX86_BUILTIN_SUBPS128_MASK,
29543  IX86_BUILTIN_SUBPS256_MASK,
29544  IX86_BUILTIN_XORPD256_MASK,
29545  IX86_BUILTIN_XORPD128_MASK,
29546  IX86_BUILTIN_XORPS256_MASK,
29547  IX86_BUILTIN_XORPS128_MASK,
29548  IX86_BUILTIN_ORPD256_MASK,
29549  IX86_BUILTIN_ORPD128_MASK,
29550  IX86_BUILTIN_ORPS256_MASK,
29551  IX86_BUILTIN_ORPS128_MASK,
29552  IX86_BUILTIN_BROADCASTF32x2_256,
29553  IX86_BUILTIN_BROADCASTI32x2_256,
29554  IX86_BUILTIN_BROADCASTI32x2_128,
29555  IX86_BUILTIN_BROADCASTF64X2_256,
29556  IX86_BUILTIN_BROADCASTI64X2_256,
29557  IX86_BUILTIN_BROADCASTF32X4_256,
29558  IX86_BUILTIN_BROADCASTI32X4_256,
29559  IX86_BUILTIN_EXTRACTF32X4_256,
29560  IX86_BUILTIN_EXTRACTI32X4_256,
29561  IX86_BUILTIN_DBPSADBW256,
29562  IX86_BUILTIN_DBPSADBW128,
29563  IX86_BUILTIN_CVTTPD2QQ256,
29564  IX86_BUILTIN_CVTTPD2QQ128,
29565  IX86_BUILTIN_CVTTPD2UQQ256,
29566  IX86_BUILTIN_CVTTPD2UQQ128,
29567  IX86_BUILTIN_CVTPD2QQ256,
29568  IX86_BUILTIN_CVTPD2QQ128,
29569  IX86_BUILTIN_CVTPD2UQQ256,
29570  IX86_BUILTIN_CVTPD2UQQ128,
29571  IX86_BUILTIN_CVTPD2UDQ256_MASK,
29572  IX86_BUILTIN_CVTPD2UDQ128_MASK,
29573  IX86_BUILTIN_CVTTPS2QQ256,
29574  IX86_BUILTIN_CVTTPS2QQ128,
29575  IX86_BUILTIN_CVTTPS2UQQ256,
29576  IX86_BUILTIN_CVTTPS2UQQ128,
29577  IX86_BUILTIN_CVTTPS2DQ256_MASK,
29578  IX86_BUILTIN_CVTTPS2DQ128_MASK,
29579  IX86_BUILTIN_CVTTPS2UDQ256,
29580  IX86_BUILTIN_CVTTPS2UDQ128,
29581  IX86_BUILTIN_CVTTPD2DQ256_MASK,
29582  IX86_BUILTIN_CVTTPD2DQ128_MASK,
29583  IX86_BUILTIN_CVTTPD2UDQ256_MASK,
29584  IX86_BUILTIN_CVTTPD2UDQ128_MASK,
29585  IX86_BUILTIN_CVTPD2DQ256_MASK,
29586  IX86_BUILTIN_CVTPD2DQ128_MASK,
29587  IX86_BUILTIN_CVTDQ2PD256_MASK,
29588  IX86_BUILTIN_CVTDQ2PD128_MASK,
29589  IX86_BUILTIN_CVTUDQ2PD256_MASK,
29590  IX86_BUILTIN_CVTUDQ2PD128_MASK,
29591  IX86_BUILTIN_CVTDQ2PS256_MASK,
29592  IX86_BUILTIN_CVTDQ2PS128_MASK,
29593  IX86_BUILTIN_CVTUDQ2PS256_MASK,
29594  IX86_BUILTIN_CVTUDQ2PS128_MASK,
29595  IX86_BUILTIN_CVTPS2PD256_MASK,
29596  IX86_BUILTIN_CVTPS2PD128_MASK,
29597  IX86_BUILTIN_PBROADCASTB256_MASK,
29598  IX86_BUILTIN_PBROADCASTB256_GPR_MASK,
29599  IX86_BUILTIN_PBROADCASTB128_MASK,
29600  IX86_BUILTIN_PBROADCASTB128_GPR_MASK,
29601  IX86_BUILTIN_PBROADCASTW256_MASK,
29602  IX86_BUILTIN_PBROADCASTW256_GPR_MASK,
29603  IX86_BUILTIN_PBROADCASTW128_MASK,
29604  IX86_BUILTIN_PBROADCASTW128_GPR_MASK,
29605  IX86_BUILTIN_PBROADCASTD256_MASK,
29606  IX86_BUILTIN_PBROADCASTD256_GPR_MASK,
29607  IX86_BUILTIN_PBROADCASTD128_MASK,
29608  IX86_BUILTIN_PBROADCASTD128_GPR_MASK,
29609  IX86_BUILTIN_PBROADCASTQ256_MASK,
29610  IX86_BUILTIN_PBROADCASTQ256_GPR_MASK,
29611  IX86_BUILTIN_PBROADCASTQ128_MASK,
29612  IX86_BUILTIN_PBROADCASTQ128_GPR_MASK,
29613  IX86_BUILTIN_BROADCASTSS256,
29614  IX86_BUILTIN_BROADCASTSS128,
29615  IX86_BUILTIN_BROADCASTSD256,
29616  IX86_BUILTIN_EXTRACTF64X2_256,
29617  IX86_BUILTIN_EXTRACTI64X2_256,
29618  IX86_BUILTIN_INSERTF32X4_256,
29619  IX86_BUILTIN_INSERTI32X4_256,
29620  IX86_BUILTIN_PMOVSXBW256_MASK,
29621  IX86_BUILTIN_PMOVSXBW128_MASK,
29622  IX86_BUILTIN_PMOVSXBD256_MASK,
29623  IX86_BUILTIN_PMOVSXBD128_MASK,
29624  IX86_BUILTIN_PMOVSXBQ256_MASK,
29625  IX86_BUILTIN_PMOVSXBQ128_MASK,
29626  IX86_BUILTIN_PMOVSXWD256_MASK,
29627  IX86_BUILTIN_PMOVSXWD128_MASK,
29628  IX86_BUILTIN_PMOVSXWQ256_MASK,
29629  IX86_BUILTIN_PMOVSXWQ128_MASK,
29630  IX86_BUILTIN_PMOVSXDQ256_MASK,
29631  IX86_BUILTIN_PMOVSXDQ128_MASK,
29632  IX86_BUILTIN_PMOVZXBW256_MASK,
29633  IX86_BUILTIN_PMOVZXBW128_MASK,
29634  IX86_BUILTIN_PMOVZXBD256_MASK,
29635  IX86_BUILTIN_PMOVZXBD128_MASK,
29636  IX86_BUILTIN_PMOVZXBQ256_MASK,
29637  IX86_BUILTIN_PMOVZXBQ128_MASK,
29638  IX86_BUILTIN_PMOVZXWD256_MASK,
29639  IX86_BUILTIN_PMOVZXWD128_MASK,
29640  IX86_BUILTIN_PMOVZXWQ256_MASK,
29641  IX86_BUILTIN_PMOVZXWQ128_MASK,
29642  IX86_BUILTIN_PMOVZXDQ256_MASK,
29643  IX86_BUILTIN_PMOVZXDQ128_MASK,
29644  IX86_BUILTIN_REDUCEPD256_MASK,
29645  IX86_BUILTIN_REDUCEPD128_MASK,
29646  IX86_BUILTIN_REDUCEPS256_MASK,
29647  IX86_BUILTIN_REDUCEPS128_MASK,
29648  IX86_BUILTIN_REDUCESD_MASK,
29649  IX86_BUILTIN_REDUCESS_MASK,
29650  IX86_BUILTIN_VPERMVARHI256_MASK,
29651  IX86_BUILTIN_VPERMVARHI128_MASK,
29652  IX86_BUILTIN_VPERMT2VARHI256,
29653  IX86_BUILTIN_VPERMT2VARHI256_MASKZ,
29654  IX86_BUILTIN_VPERMT2VARHI128,
29655  IX86_BUILTIN_VPERMT2VARHI128_MASKZ,
29656  IX86_BUILTIN_VPERMI2VARHI256,
29657  IX86_BUILTIN_VPERMI2VARHI128,
29658  IX86_BUILTIN_RCP14PD256,
29659  IX86_BUILTIN_RCP14PD128,
29660  IX86_BUILTIN_RCP14PS256,
29661  IX86_BUILTIN_RCP14PS128,
29662  IX86_BUILTIN_RSQRT14PD256_MASK,
29663  IX86_BUILTIN_RSQRT14PD128_MASK,
29664  IX86_BUILTIN_RSQRT14PS256_MASK,
29665  IX86_BUILTIN_RSQRT14PS128_MASK,
29666  IX86_BUILTIN_SQRTPD256_MASK,
29667  IX86_BUILTIN_SQRTPD128_MASK,
29668  IX86_BUILTIN_SQRTPS256_MASK,
29669  IX86_BUILTIN_SQRTPS128_MASK,
29670  IX86_BUILTIN_PADDB128_MASK,
29671  IX86_BUILTIN_PADDW128_MASK,
29672  IX86_BUILTIN_PADDD128_MASK,
29673  IX86_BUILTIN_PADDQ128_MASK,
29674  IX86_BUILTIN_PSUBB128_MASK,
29675  IX86_BUILTIN_PSUBW128_MASK,
29676  IX86_BUILTIN_PSUBD128_MASK,
29677  IX86_BUILTIN_PSUBQ128_MASK,
29678  IX86_BUILTIN_PADDSB128_MASK,
29679  IX86_BUILTIN_PADDSW128_MASK,
29680  IX86_BUILTIN_PSUBSB128_MASK,
29681  IX86_BUILTIN_PSUBSW128_MASK,
29682  IX86_BUILTIN_PADDUSB128_MASK,
29683  IX86_BUILTIN_PADDUSW128_MASK,
29684  IX86_BUILTIN_PSUBUSB128_MASK,
29685  IX86_BUILTIN_PSUBUSW128_MASK,
29686  IX86_BUILTIN_PADDB256_MASK,
29687  IX86_BUILTIN_PADDW256_MASK,
29688  IX86_BUILTIN_PADDD256_MASK,
29689  IX86_BUILTIN_PADDQ256_MASK,
29690  IX86_BUILTIN_PADDSB256_MASK,
29691  IX86_BUILTIN_PADDSW256_MASK,
29692  IX86_BUILTIN_PADDUSB256_MASK,
29693  IX86_BUILTIN_PADDUSW256_MASK,
29694  IX86_BUILTIN_PSUBB256_MASK,
29695  IX86_BUILTIN_PSUBW256_MASK,
29696  IX86_BUILTIN_PSUBD256_MASK,
29697  IX86_BUILTIN_PSUBQ256_MASK,
29698  IX86_BUILTIN_PSUBSB256_MASK,
29699  IX86_BUILTIN_PSUBSW256_MASK,
29700  IX86_BUILTIN_PSUBUSB256_MASK,
29701  IX86_BUILTIN_PSUBUSW256_MASK,
29702  IX86_BUILTIN_SHUF_F64x2_256,
29703  IX86_BUILTIN_SHUF_I64x2_256,
29704  IX86_BUILTIN_SHUF_I32x4_256,
29705  IX86_BUILTIN_SHUF_F32x4_256,
29706  IX86_BUILTIN_PMOVWB128,
29707  IX86_BUILTIN_PMOVWB256,
29708  IX86_BUILTIN_PMOVSWB128,
29709  IX86_BUILTIN_PMOVSWB256,
29710  IX86_BUILTIN_PMOVUSWB128,
29711  IX86_BUILTIN_PMOVUSWB256,
29712  IX86_BUILTIN_PMOVDB128,
29713  IX86_BUILTIN_PMOVDB256,
29714  IX86_BUILTIN_PMOVSDB128,
29715  IX86_BUILTIN_PMOVSDB256,
29716  IX86_BUILTIN_PMOVUSDB128,
29717  IX86_BUILTIN_PMOVUSDB256,
29718  IX86_BUILTIN_PMOVDW128,
29719  IX86_BUILTIN_PMOVDW256,
29720  IX86_BUILTIN_PMOVSDW128,
29721  IX86_BUILTIN_PMOVSDW256,
29722  IX86_BUILTIN_PMOVUSDW128,
29723  IX86_BUILTIN_PMOVUSDW256,
29724  IX86_BUILTIN_PMOVQB128,
29725  IX86_BUILTIN_PMOVQB256,
29726  IX86_BUILTIN_PMOVSQB128,
29727  IX86_BUILTIN_PMOVSQB256,
29728  IX86_BUILTIN_PMOVUSQB128,
29729  IX86_BUILTIN_PMOVUSQB256,
29730  IX86_BUILTIN_PMOVQW128,
29731  IX86_BUILTIN_PMOVQW256,
29732  IX86_BUILTIN_PMOVSQW128,
29733  IX86_BUILTIN_PMOVSQW256,
29734  IX86_BUILTIN_PMOVUSQW128,
29735  IX86_BUILTIN_PMOVUSQW256,
29736  IX86_BUILTIN_PMOVQD128,
29737  IX86_BUILTIN_PMOVQD256,
29738  IX86_BUILTIN_PMOVSQD128,
29739  IX86_BUILTIN_PMOVSQD256,
29740  IX86_BUILTIN_PMOVUSQD128,
29741  IX86_BUILTIN_PMOVUSQD256,
29742  IX86_BUILTIN_RANGEPD256,
29743  IX86_BUILTIN_RANGEPD128,
29744  IX86_BUILTIN_RANGEPS256,
29745  IX86_BUILTIN_RANGEPS128,
29746  IX86_BUILTIN_GETEXPPS256,
29747  IX86_BUILTIN_GETEXPPD256,
29748  IX86_BUILTIN_GETEXPPS128,
29749  IX86_BUILTIN_GETEXPPD128,
29750  IX86_BUILTIN_FIXUPIMMPD256_MASK,
29751  IX86_BUILTIN_FIXUPIMMPD256_MASKZ,
29752  IX86_BUILTIN_FIXUPIMMPS256_MASK,
29753  IX86_BUILTIN_FIXUPIMMPS256_MASKZ,
29754  IX86_BUILTIN_FIXUPIMMPD128_MASK,
29755  IX86_BUILTIN_FIXUPIMMPD128_MASKZ,
29756  IX86_BUILTIN_FIXUPIMMPS128_MASK,
29757  IX86_BUILTIN_FIXUPIMMPS128_MASKZ,
29758  IX86_BUILTIN_PABSQ256,
29759  IX86_BUILTIN_PABSQ128,
29760  IX86_BUILTIN_PABSD256_MASK,
29761  IX86_BUILTIN_PABSD128_MASK,
29762  IX86_BUILTIN_PMULHRSW256_MASK,
29763  IX86_BUILTIN_PMULHRSW128_MASK,
29764  IX86_BUILTIN_PMULHUW128_MASK,
29765  IX86_BUILTIN_PMULHUW256_MASK,
29766  IX86_BUILTIN_PMULHW256_MASK,
29767  IX86_BUILTIN_PMULHW128_MASK,
29768  IX86_BUILTIN_PMULLW256_MASK,
29769  IX86_BUILTIN_PMULLW128_MASK,
29770  IX86_BUILTIN_PMULLQ256,
29771  IX86_BUILTIN_PMULLQ128,
29772  IX86_BUILTIN_ANDPD256_MASK,
29773  IX86_BUILTIN_ANDPD128_MASK,
29774  IX86_BUILTIN_ANDPS256_MASK,
29775  IX86_BUILTIN_ANDPS128_MASK,
29776  IX86_BUILTIN_ANDNPD256_MASK,
29777  IX86_BUILTIN_ANDNPD128_MASK,
29778  IX86_BUILTIN_ANDNPS256_MASK,
29779  IX86_BUILTIN_ANDNPS128_MASK,
29780  IX86_BUILTIN_PSLLWI128_MASK,
29781  IX86_BUILTIN_PSLLDI128_MASK,
29782  IX86_BUILTIN_PSLLQI128_MASK,
29783  IX86_BUILTIN_PSLLW128_MASK,
29784  IX86_BUILTIN_PSLLD128_MASK,
29785  IX86_BUILTIN_PSLLQ128_MASK,
29786  IX86_BUILTIN_PSLLWI256_MASK ,
29787  IX86_BUILTIN_PSLLW256_MASK,
29788  IX86_BUILTIN_PSLLDI256_MASK,
29789  IX86_BUILTIN_PSLLD256_MASK,
29790  IX86_BUILTIN_PSLLQI256_MASK,
29791  IX86_BUILTIN_PSLLQ256_MASK,
29792  IX86_BUILTIN_PSRADI128_MASK,
29793  IX86_BUILTIN_PSRAD128_MASK,
29794  IX86_BUILTIN_PSRADI256_MASK,
29795  IX86_BUILTIN_PSRAD256_MASK,
29796  IX86_BUILTIN_PSRAQI128_MASK,
29797  IX86_BUILTIN_PSRAQ128_MASK,
29798  IX86_BUILTIN_PSRAQI256_MASK,
29799  IX86_BUILTIN_PSRAQ256_MASK,
29800  IX86_BUILTIN_PANDD256,
29801  IX86_BUILTIN_PANDD128,
29802  IX86_BUILTIN_PSRLDI128_MASK,
29803  IX86_BUILTIN_PSRLD128_MASK,
29804  IX86_BUILTIN_PSRLDI256_MASK,
29805  IX86_BUILTIN_PSRLD256_MASK,
29806  IX86_BUILTIN_PSRLQI128_MASK,
29807  IX86_BUILTIN_PSRLQ128_MASK,
29808  IX86_BUILTIN_PSRLQI256_MASK,
29809  IX86_BUILTIN_PSRLQ256_MASK,
29810  IX86_BUILTIN_PANDQ256,
29811  IX86_BUILTIN_PANDQ128,
29812  IX86_BUILTIN_PANDND256,
29813  IX86_BUILTIN_PANDND128,
29814  IX86_BUILTIN_PANDNQ256,
29815  IX86_BUILTIN_PANDNQ128,
29816  IX86_BUILTIN_PORD256,
29817  IX86_BUILTIN_PORD128,
29818  IX86_BUILTIN_PORQ256,
29819  IX86_BUILTIN_PORQ128,
29820  IX86_BUILTIN_PXORD256,
29821  IX86_BUILTIN_PXORD128,
29822  IX86_BUILTIN_PXORQ256,
29823  IX86_BUILTIN_PXORQ128,
29824  IX86_BUILTIN_PACKSSWB256_MASK,
29825  IX86_BUILTIN_PACKSSWB128_MASK,
29826  IX86_BUILTIN_PACKUSWB256_MASK,
29827  IX86_BUILTIN_PACKUSWB128_MASK,
29828  IX86_BUILTIN_RNDSCALEPS256,
29829  IX86_BUILTIN_RNDSCALEPD256,
29830  IX86_BUILTIN_RNDSCALEPS128,
29831  IX86_BUILTIN_RNDSCALEPD128,
29832  IX86_BUILTIN_VTERNLOGQ256_MASK,
29833  IX86_BUILTIN_VTERNLOGQ256_MASKZ,
29834  IX86_BUILTIN_VTERNLOGD256_MASK,
29835  IX86_BUILTIN_VTERNLOGD256_MASKZ,
29836  IX86_BUILTIN_VTERNLOGQ128_MASK,
29837  IX86_BUILTIN_VTERNLOGQ128_MASKZ,
29838  IX86_BUILTIN_VTERNLOGD128_MASK,
29839  IX86_BUILTIN_VTERNLOGD128_MASKZ,
29840  IX86_BUILTIN_SCALEFPD256,
29841  IX86_BUILTIN_SCALEFPS256,
29842  IX86_BUILTIN_SCALEFPD128,
29843  IX86_BUILTIN_SCALEFPS128,
29844  IX86_BUILTIN_VFMADDPD256_MASK,
29845  IX86_BUILTIN_VFMADDPD256_MASK3,
29846  IX86_BUILTIN_VFMADDPD256_MASKZ,
29847  IX86_BUILTIN_VFMADDPD128_MASK,
29848  IX86_BUILTIN_VFMADDPD128_MASK3,
29849  IX86_BUILTIN_VFMADDPD128_MASKZ,
29850  IX86_BUILTIN_VFMADDPS256_MASK,
29851  IX86_BUILTIN_VFMADDPS256_MASK3,
29852  IX86_BUILTIN_VFMADDPS256_MASKZ,
29853  IX86_BUILTIN_VFMADDPS128_MASK,
29854  IX86_BUILTIN_VFMADDPS128_MASK3,
29855  IX86_BUILTIN_VFMADDPS128_MASKZ,
29856  IX86_BUILTIN_VFMSUBPD256_MASK3,
29857  IX86_BUILTIN_VFMSUBPD128_MASK3,
29858  IX86_BUILTIN_VFMSUBPS256_MASK3,
29859  IX86_BUILTIN_VFMSUBPS128_MASK3,
29860  IX86_BUILTIN_VFNMADDPD256_MASK,
29861  IX86_BUILTIN_VFNMADDPD128_MASK,
29862  IX86_BUILTIN_VFNMADDPS256_MASK,
29863  IX86_BUILTIN_VFNMADDPS128_MASK,
29864  IX86_BUILTIN_VFNMSUBPD256_MASK,
29865  IX86_BUILTIN_VFNMSUBPD256_MASK3,
29866  IX86_BUILTIN_VFNMSUBPD128_MASK,
29867  IX86_BUILTIN_VFNMSUBPD128_MASK3,
29868  IX86_BUILTIN_VFNMSUBPS256_MASK,
29869  IX86_BUILTIN_VFNMSUBPS256_MASK3,
29870  IX86_BUILTIN_VFNMSUBPS128_MASK,
29871  IX86_BUILTIN_VFNMSUBPS128_MASK3,
29872  IX86_BUILTIN_VFMADDSUBPD256_MASK,
29873  IX86_BUILTIN_VFMADDSUBPD256_MASK3,
29874  IX86_BUILTIN_VFMADDSUBPD256_MASKZ,
29875  IX86_BUILTIN_VFMADDSUBPD128_MASK,
29876  IX86_BUILTIN_VFMADDSUBPD128_MASK3,
29877  IX86_BUILTIN_VFMADDSUBPD128_MASKZ,
29878  IX86_BUILTIN_VFMADDSUBPS256_MASK,
29879  IX86_BUILTIN_VFMADDSUBPS256_MASK3,
29880  IX86_BUILTIN_VFMADDSUBPS256_MASKZ,
29881  IX86_BUILTIN_VFMADDSUBPS128_MASK,
29882  IX86_BUILTIN_VFMADDSUBPS128_MASK3,
29883  IX86_BUILTIN_VFMADDSUBPS128_MASKZ,
29884  IX86_BUILTIN_VFMSUBADDPD256_MASK3,
29885  IX86_BUILTIN_VFMSUBADDPD128_MASK3,
29886  IX86_BUILTIN_VFMSUBADDPS256_MASK3,
29887  IX86_BUILTIN_VFMSUBADDPS128_MASK3,
29888  IX86_BUILTIN_INSERTF64X2_256,
29889  IX86_BUILTIN_INSERTI64X2_256,
29890  IX86_BUILTIN_PSRAVV16HI,
29891  IX86_BUILTIN_PSRAVV8HI,
29892  IX86_BUILTIN_PMADDUBSW256_MASK,
29893  IX86_BUILTIN_PMADDUBSW128_MASK,
29894  IX86_BUILTIN_PMADDWD256_MASK,
29895  IX86_BUILTIN_PMADDWD128_MASK,
29896  IX86_BUILTIN_PSRLVV16HI,
29897  IX86_BUILTIN_PSRLVV8HI,
29898  IX86_BUILTIN_CVTPS2DQ256_MASK,
29899  IX86_BUILTIN_CVTPS2DQ128_MASK,
29900  IX86_BUILTIN_CVTPS2UDQ256,
29901  IX86_BUILTIN_CVTPS2UDQ128,
29902  IX86_BUILTIN_CVTPS2QQ256,
29903  IX86_BUILTIN_CVTPS2QQ128,
29904  IX86_BUILTIN_CVTPS2UQQ256,
29905  IX86_BUILTIN_CVTPS2UQQ128,
29906  IX86_BUILTIN_GETMANTPS256,
29907  IX86_BUILTIN_GETMANTPS128,
29908  IX86_BUILTIN_GETMANTPD256,
29909  IX86_BUILTIN_GETMANTPD128,
29910  IX86_BUILTIN_MOVDDUP256_MASK,
29911  IX86_BUILTIN_MOVDDUP128_MASK,
29912  IX86_BUILTIN_MOVSHDUP256_MASK,
29913  IX86_BUILTIN_MOVSHDUP128_MASK,
29914  IX86_BUILTIN_MOVSLDUP256_MASK,
29915  IX86_BUILTIN_MOVSLDUP128_MASK,
29916  IX86_BUILTIN_CVTQQ2PS256,
29917  IX86_BUILTIN_CVTQQ2PS128,
29918  IX86_BUILTIN_CVTUQQ2PS256,
29919  IX86_BUILTIN_CVTUQQ2PS128,
29920  IX86_BUILTIN_CVTQQ2PD256,
29921  IX86_BUILTIN_CVTQQ2PD128,
29922  IX86_BUILTIN_CVTUQQ2PD256,
29923  IX86_BUILTIN_CVTUQQ2PD128,
29924  IX86_BUILTIN_VPERMT2VARQ256,
29925  IX86_BUILTIN_VPERMT2VARQ256_MASKZ,
29926  IX86_BUILTIN_VPERMT2VARD256,
29927  IX86_BUILTIN_VPERMT2VARD256_MASKZ,
29928  IX86_BUILTIN_VPERMI2VARQ256,
29929  IX86_BUILTIN_VPERMI2VARD256,
29930  IX86_BUILTIN_VPERMT2VARPD256,
29931  IX86_BUILTIN_VPERMT2VARPD256_MASKZ,
29932  IX86_BUILTIN_VPERMT2VARPS256,
29933  IX86_BUILTIN_VPERMT2VARPS256_MASKZ,
29934  IX86_BUILTIN_VPERMI2VARPD256,
29935  IX86_BUILTIN_VPERMI2VARPS256,
29936  IX86_BUILTIN_VPERMT2VARQ128,
29937  IX86_BUILTIN_VPERMT2VARQ128_MASKZ,
29938  IX86_BUILTIN_VPERMT2VARD128,
29939  IX86_BUILTIN_VPERMT2VARD128_MASKZ,
29940  IX86_BUILTIN_VPERMI2VARQ128,
29941  IX86_BUILTIN_VPERMI2VARD128,
29942  IX86_BUILTIN_VPERMT2VARPD128,
29943  IX86_BUILTIN_VPERMT2VARPD128_MASKZ,
29944  IX86_BUILTIN_VPERMT2VARPS128,
29945  IX86_BUILTIN_VPERMT2VARPS128_MASKZ,
29946  IX86_BUILTIN_VPERMI2VARPD128,
29947  IX86_BUILTIN_VPERMI2VARPS128,
29948  IX86_BUILTIN_PSHUFB256_MASK,
29949  IX86_BUILTIN_PSHUFB128_MASK,
29950  IX86_BUILTIN_PSHUFHW256_MASK,
29951  IX86_BUILTIN_PSHUFHW128_MASK,
29952  IX86_BUILTIN_PSHUFLW256_MASK,
29953  IX86_BUILTIN_PSHUFLW128_MASK,
29954  IX86_BUILTIN_PSHUFD256_MASK,
29955  IX86_BUILTIN_PSHUFD128_MASK,
29956  IX86_BUILTIN_SHUFPD256_MASK,
29957  IX86_BUILTIN_SHUFPD128_MASK,
29958  IX86_BUILTIN_SHUFPS256_MASK,
29959  IX86_BUILTIN_SHUFPS128_MASK,
29960  IX86_BUILTIN_PROLVQ256,
29961  IX86_BUILTIN_PROLVQ128,
29962  IX86_BUILTIN_PROLQ256,
29963  IX86_BUILTIN_PROLQ128,
29964  IX86_BUILTIN_PRORVQ256,
29965  IX86_BUILTIN_PRORVQ128,
29966  IX86_BUILTIN_PRORQ256,
29967  IX86_BUILTIN_PRORQ128,
29968  IX86_BUILTIN_PSRAVQ128,
29969  IX86_BUILTIN_PSRAVQ256,
29970  IX86_BUILTIN_PSLLVV4DI_MASK,
29971  IX86_BUILTIN_PSLLVV2DI_MASK,
29972  IX86_BUILTIN_PSLLVV8SI_MASK,
29973  IX86_BUILTIN_PSLLVV4SI_MASK,
29974  IX86_BUILTIN_PSRAVV8SI_MASK,
29975  IX86_BUILTIN_PSRAVV4SI_MASK,
29976  IX86_BUILTIN_PSRLVV4DI_MASK,
29977  IX86_BUILTIN_PSRLVV2DI_MASK,
29978  IX86_BUILTIN_PSRLVV8SI_MASK,
29979  IX86_BUILTIN_PSRLVV4SI_MASK,
29980  IX86_BUILTIN_PSRAWI256_MASK,
29981  IX86_BUILTIN_PSRAW256_MASK,
29982  IX86_BUILTIN_PSRAWI128_MASK,
29983  IX86_BUILTIN_PSRAW128_MASK,
29984  IX86_BUILTIN_PSRLWI256_MASK,
29985  IX86_BUILTIN_PSRLW256_MASK,
29986  IX86_BUILTIN_PSRLWI128_MASK,
29987  IX86_BUILTIN_PSRLW128_MASK,
29988  IX86_BUILTIN_PRORVD256,
29989  IX86_BUILTIN_PROLVD256,
29990  IX86_BUILTIN_PRORD256,
29991  IX86_BUILTIN_PROLD256,
29992  IX86_BUILTIN_PRORVD128,
29993  IX86_BUILTIN_PROLVD128,
29994  IX86_BUILTIN_PRORD128,
29995  IX86_BUILTIN_PROLD128,
29996  IX86_BUILTIN_FPCLASSPD256,
29997  IX86_BUILTIN_FPCLASSPD128,
29998  IX86_BUILTIN_FPCLASSSD,
29999  IX86_BUILTIN_FPCLASSPS256,
30000  IX86_BUILTIN_FPCLASSPS128,
30001  IX86_BUILTIN_FPCLASSSS,
30002  IX86_BUILTIN_CVTB2MASK128,
30003  IX86_BUILTIN_CVTB2MASK256,
30004  IX86_BUILTIN_CVTW2MASK128,
30005  IX86_BUILTIN_CVTW2MASK256,
30006  IX86_BUILTIN_CVTD2MASK128,
30007  IX86_BUILTIN_CVTD2MASK256,
30008  IX86_BUILTIN_CVTQ2MASK128,
30009  IX86_BUILTIN_CVTQ2MASK256,
30010  IX86_BUILTIN_CVTMASK2B128,
30011  IX86_BUILTIN_CVTMASK2B256,
30012  IX86_BUILTIN_CVTMASK2W128,
30013  IX86_BUILTIN_CVTMASK2W256,
30014  IX86_BUILTIN_CVTMASK2D128,
30015  IX86_BUILTIN_CVTMASK2D256,
30016  IX86_BUILTIN_CVTMASK2Q128,
30017  IX86_BUILTIN_CVTMASK2Q256,
30018  IX86_BUILTIN_PCMPEQB128_MASK,
30019  IX86_BUILTIN_PCMPEQB256_MASK,
30020  IX86_BUILTIN_PCMPEQW128_MASK,
30021  IX86_BUILTIN_PCMPEQW256_MASK,
30022  IX86_BUILTIN_PCMPEQD128_MASK,
30023  IX86_BUILTIN_PCMPEQD256_MASK,
30024  IX86_BUILTIN_PCMPEQQ128_MASK,
30025  IX86_BUILTIN_PCMPEQQ256_MASK,
30026  IX86_BUILTIN_PCMPGTB128_MASK,
30027  IX86_BUILTIN_PCMPGTB256_MASK,
30028  IX86_BUILTIN_PCMPGTW128_MASK,
30029  IX86_BUILTIN_PCMPGTW256_MASK,
30030  IX86_BUILTIN_PCMPGTD128_MASK,
30031  IX86_BUILTIN_PCMPGTD256_MASK,
30032  IX86_BUILTIN_PCMPGTQ128_MASK,
30033  IX86_BUILTIN_PCMPGTQ256_MASK,
30034  IX86_BUILTIN_PTESTMB128,
30035  IX86_BUILTIN_PTESTMB256,
30036  IX86_BUILTIN_PTESTMW128,
30037  IX86_BUILTIN_PTESTMW256,
30038  IX86_BUILTIN_PTESTMD128,
30039  IX86_BUILTIN_PTESTMD256,
30040  IX86_BUILTIN_PTESTMQ128,
30041  IX86_BUILTIN_PTESTMQ256,
30042  IX86_BUILTIN_PTESTNMB128,
30043  IX86_BUILTIN_PTESTNMB256,
30044  IX86_BUILTIN_PTESTNMW128,
30045  IX86_BUILTIN_PTESTNMW256,
30046  IX86_BUILTIN_PTESTNMD128,
30047  IX86_BUILTIN_PTESTNMD256,
30048  IX86_BUILTIN_PTESTNMQ128,
30049  IX86_BUILTIN_PTESTNMQ256,
30050  IX86_BUILTIN_PBROADCASTMB128,
30051  IX86_BUILTIN_PBROADCASTMB256,
30052  IX86_BUILTIN_PBROADCASTMW128,
30053  IX86_BUILTIN_PBROADCASTMW256,
30054  IX86_BUILTIN_COMPRESSPD256,
30055  IX86_BUILTIN_COMPRESSPD128,
30056  IX86_BUILTIN_COMPRESSPS256,
30057  IX86_BUILTIN_COMPRESSPS128,
30058  IX86_BUILTIN_PCOMPRESSQ256,
30059  IX86_BUILTIN_PCOMPRESSQ128,
30060  IX86_BUILTIN_PCOMPRESSD256,
30061  IX86_BUILTIN_PCOMPRESSD128,
30062  IX86_BUILTIN_EXPANDPD256,
30063  IX86_BUILTIN_EXPANDPD128,
30064  IX86_BUILTIN_EXPANDPS256,
30065  IX86_BUILTIN_EXPANDPS128,
30066  IX86_BUILTIN_PEXPANDQ256,
30067  IX86_BUILTIN_PEXPANDQ128,
30068  IX86_BUILTIN_PEXPANDD256,
30069  IX86_BUILTIN_PEXPANDD128,
30070  IX86_BUILTIN_EXPANDPD256Z,
30071  IX86_BUILTIN_EXPANDPD128Z,
30072  IX86_BUILTIN_EXPANDPS256Z,
30073  IX86_BUILTIN_EXPANDPS128Z,
30074  IX86_BUILTIN_PEXPANDQ256Z,
30075  IX86_BUILTIN_PEXPANDQ128Z,
30076  IX86_BUILTIN_PEXPANDD256Z,
30077  IX86_BUILTIN_PEXPANDD128Z,
30078  IX86_BUILTIN_PMAXSD256_MASK,
30079  IX86_BUILTIN_PMINSD256_MASK,
30080  IX86_BUILTIN_PMAXUD256_MASK,
30081  IX86_BUILTIN_PMINUD256_MASK,
30082  IX86_BUILTIN_PMAXSD128_MASK,
30083  IX86_BUILTIN_PMINSD128_MASK,
30084  IX86_BUILTIN_PMAXUD128_MASK,
30085  IX86_BUILTIN_PMINUD128_MASK,
30086  IX86_BUILTIN_PMAXSQ256_MASK,
30087  IX86_BUILTIN_PMINSQ256_MASK,
30088  IX86_BUILTIN_PMAXUQ256_MASK,
30089  IX86_BUILTIN_PMINUQ256_MASK,
30090  IX86_BUILTIN_PMAXSQ128_MASK,
30091  IX86_BUILTIN_PMINSQ128_MASK,
30092  IX86_BUILTIN_PMAXUQ128_MASK,
30093  IX86_BUILTIN_PMINUQ128_MASK,
30094  IX86_BUILTIN_PMINSB256_MASK,
30095  IX86_BUILTIN_PMINUB256_MASK,
30096  IX86_BUILTIN_PMAXSB256_MASK,
30097  IX86_BUILTIN_PMAXUB256_MASK,
30098  IX86_BUILTIN_PMINSB128_MASK,
30099  IX86_BUILTIN_PMINUB128_MASK,
30100  IX86_BUILTIN_PMAXSB128_MASK,
30101  IX86_BUILTIN_PMAXUB128_MASK,
30102  IX86_BUILTIN_PMINSW256_MASK,
30103  IX86_BUILTIN_PMINUW256_MASK,
30104  IX86_BUILTIN_PMAXSW256_MASK,
30105  IX86_BUILTIN_PMAXUW256_MASK,
30106  IX86_BUILTIN_PMINSW128_MASK,
30107  IX86_BUILTIN_PMINUW128_MASK,
30108  IX86_BUILTIN_PMAXSW128_MASK,
30109  IX86_BUILTIN_PMAXUW128_MASK,
30110  IX86_BUILTIN_VPCONFLICTQ256,
30111  IX86_BUILTIN_VPCONFLICTD256,
30112  IX86_BUILTIN_VPCLZCNTQ256,
30113  IX86_BUILTIN_VPCLZCNTD256,
30114  IX86_BUILTIN_UNPCKHPD256_MASK,
30115  IX86_BUILTIN_UNPCKHPD128_MASK,
30116  IX86_BUILTIN_UNPCKHPS256_MASK,
30117  IX86_BUILTIN_UNPCKHPS128_MASK,
30118  IX86_BUILTIN_UNPCKLPD256_MASK,
30119  IX86_BUILTIN_UNPCKLPD128_MASK,
30120  IX86_BUILTIN_UNPCKLPS256_MASK,
30121  IX86_BUILTIN_VPCONFLICTQ128,
30122  IX86_BUILTIN_VPCONFLICTD128,
30123  IX86_BUILTIN_VPCLZCNTQ128,
30124  IX86_BUILTIN_VPCLZCNTD128,
30125  IX86_BUILTIN_UNPCKLPS128_MASK,
30126  IX86_BUILTIN_ALIGND256,
30127  IX86_BUILTIN_ALIGNQ256,
30128  IX86_BUILTIN_ALIGND128,
30129  IX86_BUILTIN_ALIGNQ128,
30130  IX86_BUILTIN_CVTPS2PH256_MASK,
30131  IX86_BUILTIN_CVTPS2PH_MASK,
30132  IX86_BUILTIN_CVTPH2PS_MASK,
30133  IX86_BUILTIN_CVTPH2PS256_MASK,
30134  IX86_BUILTIN_PUNPCKHDQ128_MASK,
30135  IX86_BUILTIN_PUNPCKHDQ256_MASK,
30136  IX86_BUILTIN_PUNPCKHQDQ128_MASK,
30137  IX86_BUILTIN_PUNPCKHQDQ256_MASK,
30138  IX86_BUILTIN_PUNPCKLDQ128_MASK,
30139  IX86_BUILTIN_PUNPCKLDQ256_MASK,
30140  IX86_BUILTIN_PUNPCKLQDQ128_MASK,
30141  IX86_BUILTIN_PUNPCKLQDQ256_MASK,
30142  IX86_BUILTIN_PUNPCKHBW128_MASK,
30143  IX86_BUILTIN_PUNPCKHBW256_MASK,
30144  IX86_BUILTIN_PUNPCKHWD128_MASK,
30145  IX86_BUILTIN_PUNPCKHWD256_MASK,
30146  IX86_BUILTIN_PUNPCKLBW128_MASK,
30147  IX86_BUILTIN_PUNPCKLBW256_MASK,
30148  IX86_BUILTIN_PUNPCKLWD128_MASK,
30149  IX86_BUILTIN_PUNPCKLWD256_MASK,
30150  IX86_BUILTIN_PSLLVV16HI,
30151  IX86_BUILTIN_PSLLVV8HI,
30152  IX86_BUILTIN_PACKSSDW256_MASK,
30153  IX86_BUILTIN_PACKSSDW128_MASK,
30154  IX86_BUILTIN_PACKUSDW256_MASK,
30155  IX86_BUILTIN_PACKUSDW128_MASK,
30156  IX86_BUILTIN_PAVGB256_MASK,
30157  IX86_BUILTIN_PAVGW256_MASK,
30158  IX86_BUILTIN_PAVGB128_MASK,
30159  IX86_BUILTIN_PAVGW128_MASK,
30160  IX86_BUILTIN_VPERMVARSF256_MASK,
30161  IX86_BUILTIN_VPERMVARDF256_MASK,
30162  IX86_BUILTIN_VPERMDF256_MASK,
30163  IX86_BUILTIN_PABSB256_MASK,
30164  IX86_BUILTIN_PABSB128_MASK,
30165  IX86_BUILTIN_PABSW256_MASK,
30166  IX86_BUILTIN_PABSW128_MASK,
30167  IX86_BUILTIN_VPERMILVARPD_MASK,
30168  IX86_BUILTIN_VPERMILVARPS_MASK,
30169  IX86_BUILTIN_VPERMILVARPD256_MASK,
30170  IX86_BUILTIN_VPERMILVARPS256_MASK,
30171  IX86_BUILTIN_VPERMILPD_MASK,
30172  IX86_BUILTIN_VPERMILPS_MASK,
30173  IX86_BUILTIN_VPERMILPD256_MASK,
30174  IX86_BUILTIN_VPERMILPS256_MASK,
30175  IX86_BUILTIN_BLENDMQ256,
30176  IX86_BUILTIN_BLENDMD256,
30177  IX86_BUILTIN_BLENDMPD256,
30178  IX86_BUILTIN_BLENDMPS256,
30179  IX86_BUILTIN_BLENDMQ128,
30180  IX86_BUILTIN_BLENDMD128,
30181  IX86_BUILTIN_BLENDMPD128,
30182  IX86_BUILTIN_BLENDMPS128,
30183  IX86_BUILTIN_BLENDMW256,
30184  IX86_BUILTIN_BLENDMB256,
30185  IX86_BUILTIN_BLENDMW128,
30186  IX86_BUILTIN_BLENDMB128,
30187  IX86_BUILTIN_PMULLD256_MASK,
30188  IX86_BUILTIN_PMULLD128_MASK,
30189  IX86_BUILTIN_PMULUDQ256_MASK,
30190  IX86_BUILTIN_PMULDQ256_MASK,
30191  IX86_BUILTIN_PMULDQ128_MASK,
30192  IX86_BUILTIN_PMULUDQ128_MASK,
30193  IX86_BUILTIN_CVTPD2PS256_MASK,
30194  IX86_BUILTIN_CVTPD2PS_MASK,
30195  IX86_BUILTIN_VPERMVARSI256_MASK,
30196  IX86_BUILTIN_VPERMVARDI256_MASK,
30197  IX86_BUILTIN_VPERMDI256_MASK,
30198  IX86_BUILTIN_CMPQ256,
30199  IX86_BUILTIN_CMPD256,
30200  IX86_BUILTIN_UCMPQ256,
30201  IX86_BUILTIN_UCMPD256,
30202  IX86_BUILTIN_CMPB256,
30203  IX86_BUILTIN_CMPW256,
30204  IX86_BUILTIN_UCMPB256,
30205  IX86_BUILTIN_UCMPW256,
30206  IX86_BUILTIN_CMPPD256_MASK,
30207  IX86_BUILTIN_CMPPS256_MASK,
30208  IX86_BUILTIN_CMPQ128,
30209  IX86_BUILTIN_CMPD128,
30210  IX86_BUILTIN_UCMPQ128,
30211  IX86_BUILTIN_UCMPD128,
30212  IX86_BUILTIN_CMPB128,
30213  IX86_BUILTIN_CMPW128,
30214  IX86_BUILTIN_UCMPB128,
30215  IX86_BUILTIN_UCMPW128,
30216  IX86_BUILTIN_CMPPD128_MASK,
30217  IX86_BUILTIN_CMPPS128_MASK,
30218
30219  IX86_BUILTIN_GATHER3SIV8SF,
30220  IX86_BUILTIN_GATHER3SIV4SF,
30221  IX86_BUILTIN_GATHER3SIV4DF,
30222  IX86_BUILTIN_GATHER3SIV2DF,
30223  IX86_BUILTIN_GATHER3DIV8SF,
30224  IX86_BUILTIN_GATHER3DIV4SF,
30225  IX86_BUILTIN_GATHER3DIV4DF,
30226  IX86_BUILTIN_GATHER3DIV2DF,
30227  IX86_BUILTIN_GATHER3SIV8SI,
30228  IX86_BUILTIN_GATHER3SIV4SI,
30229  IX86_BUILTIN_GATHER3SIV4DI,
30230  IX86_BUILTIN_GATHER3SIV2DI,
30231  IX86_BUILTIN_GATHER3DIV8SI,
30232  IX86_BUILTIN_GATHER3DIV4SI,
30233  IX86_BUILTIN_GATHER3DIV4DI,
30234  IX86_BUILTIN_GATHER3DIV2DI,
30235  IX86_BUILTIN_SCATTERSIV8SF,
30236  IX86_BUILTIN_SCATTERSIV4SF,
30237  IX86_BUILTIN_SCATTERSIV4DF,
30238  IX86_BUILTIN_SCATTERSIV2DF,
30239  IX86_BUILTIN_SCATTERDIV8SF,
30240  IX86_BUILTIN_SCATTERDIV4SF,
30241  IX86_BUILTIN_SCATTERDIV4DF,
30242  IX86_BUILTIN_SCATTERDIV2DF,
30243  IX86_BUILTIN_SCATTERSIV8SI,
30244  IX86_BUILTIN_SCATTERSIV4SI,
30245  IX86_BUILTIN_SCATTERSIV4DI,
30246  IX86_BUILTIN_SCATTERSIV2DI,
30247  IX86_BUILTIN_SCATTERDIV8SI,
30248  IX86_BUILTIN_SCATTERDIV4SI,
30249  IX86_BUILTIN_SCATTERDIV4DI,
30250  IX86_BUILTIN_SCATTERDIV2DI,
30251
30252  /* AVX512DQ.  */
30253  IX86_BUILTIN_RANGESD128,
30254  IX86_BUILTIN_RANGESS128,
30255  IX86_BUILTIN_KUNPCKWD,
30256  IX86_BUILTIN_KUNPCKDQ,
30257  IX86_BUILTIN_BROADCASTF32x2_512,
30258  IX86_BUILTIN_BROADCASTI32x2_512,
30259  IX86_BUILTIN_BROADCASTF64X2_512,
30260  IX86_BUILTIN_BROADCASTI64X2_512,
30261  IX86_BUILTIN_BROADCASTF32X8_512,
30262  IX86_BUILTIN_BROADCASTI32X8_512,
30263  IX86_BUILTIN_EXTRACTF64X2_512,
30264  IX86_BUILTIN_EXTRACTF32X8,
30265  IX86_BUILTIN_EXTRACTI64X2_512,
30266  IX86_BUILTIN_EXTRACTI32X8,
30267  IX86_BUILTIN_REDUCEPD512_MASK,
30268  IX86_BUILTIN_REDUCEPS512_MASK,
30269  IX86_BUILTIN_PMULLQ512,
30270  IX86_BUILTIN_XORPD512,
30271  IX86_BUILTIN_XORPS512,
30272  IX86_BUILTIN_ORPD512,
30273  IX86_BUILTIN_ORPS512,
30274  IX86_BUILTIN_ANDPD512,
30275  IX86_BUILTIN_ANDPS512,
30276  IX86_BUILTIN_ANDNPD512,
30277  IX86_BUILTIN_ANDNPS512,
30278  IX86_BUILTIN_INSERTF32X8,
30279  IX86_BUILTIN_INSERTI32X8,
30280  IX86_BUILTIN_INSERTF64X2_512,
30281  IX86_BUILTIN_INSERTI64X2_512,
30282  IX86_BUILTIN_FPCLASSPD512,
30283  IX86_BUILTIN_FPCLASSPS512,
30284  IX86_BUILTIN_CVTD2MASK512,
30285  IX86_BUILTIN_CVTQ2MASK512,
30286  IX86_BUILTIN_CVTMASK2D512,
30287  IX86_BUILTIN_CVTMASK2Q512,
30288  IX86_BUILTIN_CVTPD2QQ512,
30289  IX86_BUILTIN_CVTPS2QQ512,
30290  IX86_BUILTIN_CVTPD2UQQ512,
30291  IX86_BUILTIN_CVTPS2UQQ512,
30292  IX86_BUILTIN_CVTQQ2PS512,
30293  IX86_BUILTIN_CVTUQQ2PS512,
30294  IX86_BUILTIN_CVTQQ2PD512,
30295  IX86_BUILTIN_CVTUQQ2PD512,
30296  IX86_BUILTIN_CVTTPS2QQ512,
30297  IX86_BUILTIN_CVTTPS2UQQ512,
30298  IX86_BUILTIN_CVTTPD2QQ512,
30299  IX86_BUILTIN_CVTTPD2UQQ512,
30300  IX86_BUILTIN_RANGEPS512,
30301  IX86_BUILTIN_RANGEPD512,
30302
30303  /* AVX512BW.  */
30304  IX86_BUILTIN_PACKUSDW512,
30305  IX86_BUILTIN_PACKSSDW512,
30306  IX86_BUILTIN_LOADDQUHI512_MASK,
30307  IX86_BUILTIN_LOADDQUQI512_MASK,
30308  IX86_BUILTIN_PSLLDQ512,
30309  IX86_BUILTIN_PSRLDQ512,
30310  IX86_BUILTIN_STOREDQUHI512_MASK,
30311  IX86_BUILTIN_STOREDQUQI512_MASK,
30312  IX86_BUILTIN_PALIGNR512,
30313  IX86_BUILTIN_PALIGNR512_MASK,
30314  IX86_BUILTIN_MOVDQUHI512_MASK,
30315  IX86_BUILTIN_MOVDQUQI512_MASK,
30316  IX86_BUILTIN_PSADBW512,
30317  IX86_BUILTIN_DBPSADBW512,
30318  IX86_BUILTIN_PBROADCASTB512,
30319  IX86_BUILTIN_PBROADCASTB512_GPR,
30320  IX86_BUILTIN_PBROADCASTW512,
30321  IX86_BUILTIN_PBROADCASTW512_GPR,
30322  IX86_BUILTIN_PMOVSXBW512_MASK,
30323  IX86_BUILTIN_PMOVZXBW512_MASK,
30324  IX86_BUILTIN_VPERMVARHI512_MASK,
30325  IX86_BUILTIN_VPERMT2VARHI512,
30326  IX86_BUILTIN_VPERMT2VARHI512_MASKZ,
30327  IX86_BUILTIN_VPERMI2VARHI512,
30328  IX86_BUILTIN_PAVGB512,
30329  IX86_BUILTIN_PAVGW512,
30330  IX86_BUILTIN_PADDB512,
30331  IX86_BUILTIN_PSUBB512,
30332  IX86_BUILTIN_PSUBSB512,
30333  IX86_BUILTIN_PADDSB512,
30334  IX86_BUILTIN_PSUBUSB512,
30335  IX86_BUILTIN_PADDUSB512,
30336  IX86_BUILTIN_PSUBW512,
30337  IX86_BUILTIN_PADDW512,
30338  IX86_BUILTIN_PSUBSW512,
30339  IX86_BUILTIN_PADDSW512,
30340  IX86_BUILTIN_PSUBUSW512,
30341  IX86_BUILTIN_PADDUSW512,
30342  IX86_BUILTIN_PMAXUW512,
30343  IX86_BUILTIN_PMAXSW512,
30344  IX86_BUILTIN_PMINUW512,
30345  IX86_BUILTIN_PMINSW512,
30346  IX86_BUILTIN_PMAXUB512,
30347  IX86_BUILTIN_PMAXSB512,
30348  IX86_BUILTIN_PMINUB512,
30349  IX86_BUILTIN_PMINSB512,
30350  IX86_BUILTIN_PMOVWB512,
30351  IX86_BUILTIN_PMOVSWB512,
30352  IX86_BUILTIN_PMOVUSWB512,
30353  IX86_BUILTIN_PMULHRSW512_MASK,
30354  IX86_BUILTIN_PMULHUW512_MASK,
30355  IX86_BUILTIN_PMULHW512_MASK,
30356  IX86_BUILTIN_PMULLW512_MASK,
30357  IX86_BUILTIN_PSLLWI512_MASK,
30358  IX86_BUILTIN_PSLLW512_MASK,
30359  IX86_BUILTIN_PACKSSWB512,
30360  IX86_BUILTIN_PACKUSWB512,
30361  IX86_BUILTIN_PSRAVV32HI,
30362  IX86_BUILTIN_PMADDUBSW512_MASK,
30363  IX86_BUILTIN_PMADDWD512_MASK,
30364  IX86_BUILTIN_PSRLVV32HI,
30365  IX86_BUILTIN_PUNPCKHBW512,
30366  IX86_BUILTIN_PUNPCKHWD512,
30367  IX86_BUILTIN_PUNPCKLBW512,
30368  IX86_BUILTIN_PUNPCKLWD512,
30369  IX86_BUILTIN_PSHUFB512,
30370  IX86_BUILTIN_PSHUFHW512,
30371  IX86_BUILTIN_PSHUFLW512,
30372  IX86_BUILTIN_PSRAWI512,
30373  IX86_BUILTIN_PSRAW512,
30374  IX86_BUILTIN_PSRLWI512,
30375  IX86_BUILTIN_PSRLW512,
30376  IX86_BUILTIN_CVTB2MASK512,
30377  IX86_BUILTIN_CVTW2MASK512,
30378  IX86_BUILTIN_CVTMASK2B512,
30379  IX86_BUILTIN_CVTMASK2W512,
30380  IX86_BUILTIN_PCMPEQB512_MASK,
30381  IX86_BUILTIN_PCMPEQW512_MASK,
30382  IX86_BUILTIN_PCMPGTB512_MASK,
30383  IX86_BUILTIN_PCMPGTW512_MASK,
30384  IX86_BUILTIN_PTESTMB512,
30385  IX86_BUILTIN_PTESTMW512,
30386  IX86_BUILTIN_PTESTNMB512,
30387  IX86_BUILTIN_PTESTNMW512,
30388  IX86_BUILTIN_PSLLVV32HI,
30389  IX86_BUILTIN_PABSB512,
30390  IX86_BUILTIN_PABSW512,
30391  IX86_BUILTIN_BLENDMW512,
30392  IX86_BUILTIN_BLENDMB512,
30393  IX86_BUILTIN_CMPB512,
30394  IX86_BUILTIN_CMPW512,
30395  IX86_BUILTIN_UCMPB512,
30396  IX86_BUILTIN_UCMPW512,
30397
30398  /* Alternate 4 and 8 element gather/scatter for the vectorizer
30399     where all operands are 32-byte or 64-byte wide respectively.  */
30400  IX86_BUILTIN_GATHERALTSIV4DF,
30401  IX86_BUILTIN_GATHERALTDIV8SF,
30402  IX86_BUILTIN_GATHERALTSIV4DI,
30403  IX86_BUILTIN_GATHERALTDIV8SI,
30404  IX86_BUILTIN_GATHER3ALTDIV16SF,
30405  IX86_BUILTIN_GATHER3ALTDIV16SI,
30406  IX86_BUILTIN_GATHER3ALTSIV4DF,
30407  IX86_BUILTIN_GATHER3ALTDIV8SF,
30408  IX86_BUILTIN_GATHER3ALTSIV4DI,
30409  IX86_BUILTIN_GATHER3ALTDIV8SI,
30410  IX86_BUILTIN_GATHER3ALTSIV8DF,
30411  IX86_BUILTIN_GATHER3ALTSIV8DI,
30412  IX86_BUILTIN_GATHER3DIV16SF,
30413  IX86_BUILTIN_GATHER3DIV16SI,
30414  IX86_BUILTIN_GATHER3DIV8DF,
30415  IX86_BUILTIN_GATHER3DIV8DI,
30416  IX86_BUILTIN_GATHER3SIV16SF,
30417  IX86_BUILTIN_GATHER3SIV16SI,
30418  IX86_BUILTIN_GATHER3SIV8DF,
30419  IX86_BUILTIN_GATHER3SIV8DI,
30420  IX86_BUILTIN_SCATTERDIV16SF,
30421  IX86_BUILTIN_SCATTERDIV16SI,
30422  IX86_BUILTIN_SCATTERDIV8DF,
30423  IX86_BUILTIN_SCATTERDIV8DI,
30424  IX86_BUILTIN_SCATTERSIV16SF,
30425  IX86_BUILTIN_SCATTERSIV16SI,
30426  IX86_BUILTIN_SCATTERSIV8DF,
30427  IX86_BUILTIN_SCATTERSIV8DI,
30428
30429  /* AVX512PF */
30430  IX86_BUILTIN_GATHERPFQPD,
30431  IX86_BUILTIN_GATHERPFDPS,
30432  IX86_BUILTIN_GATHERPFDPD,
30433  IX86_BUILTIN_GATHERPFQPS,
30434  IX86_BUILTIN_SCATTERPFDPD,
30435  IX86_BUILTIN_SCATTERPFDPS,
30436  IX86_BUILTIN_SCATTERPFQPD,
30437  IX86_BUILTIN_SCATTERPFQPS,
30438
30439  /* AVX-512ER */
30440  IX86_BUILTIN_EXP2PD_MASK,
30441  IX86_BUILTIN_EXP2PS_MASK,
30442  IX86_BUILTIN_EXP2PS,
30443  IX86_BUILTIN_RCP28PD,
30444  IX86_BUILTIN_RCP28PS,
30445  IX86_BUILTIN_RCP28SD,
30446  IX86_BUILTIN_RCP28SS,
30447  IX86_BUILTIN_RSQRT28PD,
30448  IX86_BUILTIN_RSQRT28PS,
30449  IX86_BUILTIN_RSQRT28SD,
30450  IX86_BUILTIN_RSQRT28SS,
30451
30452  /* AVX-512IFMA */
30453  IX86_BUILTIN_VPMADD52LUQ512,
30454  IX86_BUILTIN_VPMADD52HUQ512,
30455  IX86_BUILTIN_VPMADD52LUQ256,
30456  IX86_BUILTIN_VPMADD52HUQ256,
30457  IX86_BUILTIN_VPMADD52LUQ128,
30458  IX86_BUILTIN_VPMADD52HUQ128,
30459  IX86_BUILTIN_VPMADD52LUQ512_MASKZ,
30460  IX86_BUILTIN_VPMADD52HUQ512_MASKZ,
30461  IX86_BUILTIN_VPMADD52LUQ256_MASKZ,
30462  IX86_BUILTIN_VPMADD52HUQ256_MASKZ,
30463  IX86_BUILTIN_VPMADD52LUQ128_MASKZ,
30464  IX86_BUILTIN_VPMADD52HUQ128_MASKZ,
30465
30466  /* AVX-512VBMI */
30467  IX86_BUILTIN_VPMULTISHIFTQB512,
30468  IX86_BUILTIN_VPMULTISHIFTQB256,
30469  IX86_BUILTIN_VPMULTISHIFTQB128,
30470  IX86_BUILTIN_VPERMVARQI512_MASK,
30471  IX86_BUILTIN_VPERMT2VARQI512,
30472  IX86_BUILTIN_VPERMT2VARQI512_MASKZ,
30473  IX86_BUILTIN_VPERMI2VARQI512,
30474  IX86_BUILTIN_VPERMVARQI256_MASK,
30475  IX86_BUILTIN_VPERMVARQI128_MASK,
30476  IX86_BUILTIN_VPERMT2VARQI256,
30477  IX86_BUILTIN_VPERMT2VARQI256_MASKZ,
30478  IX86_BUILTIN_VPERMT2VARQI128,
30479  IX86_BUILTIN_VPERMT2VARQI128_MASKZ,
30480  IX86_BUILTIN_VPERMI2VARQI256,
30481  IX86_BUILTIN_VPERMI2VARQI128,
30482
30483  /* SHA builtins.  */
30484  IX86_BUILTIN_SHA1MSG1,
30485  IX86_BUILTIN_SHA1MSG2,
30486  IX86_BUILTIN_SHA1NEXTE,
30487  IX86_BUILTIN_SHA1RNDS4,
30488  IX86_BUILTIN_SHA256MSG1,
30489  IX86_BUILTIN_SHA256MSG2,
30490  IX86_BUILTIN_SHA256RNDS2,
30491
30492  /* CLWB instructions.  */
30493  IX86_BUILTIN_CLWB,
30494
30495  /* PCOMMIT instructions.  */
30496  IX86_BUILTIN_PCOMMIT,
30497
30498  /* CLFLUSHOPT instructions.  */
30499  IX86_BUILTIN_CLFLUSHOPT,
30500
30501  /* TFmode support builtins.  */
30502  IX86_BUILTIN_INFQ,
30503  IX86_BUILTIN_HUGE_VALQ,
30504  IX86_BUILTIN_FABSQ,
30505  IX86_BUILTIN_COPYSIGNQ,
30506
30507  /* Vectorizer support builtins.  */
30508  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
30509  IX86_BUILTIN_CPYSGNPS,
30510  IX86_BUILTIN_CPYSGNPD,
30511  IX86_BUILTIN_CPYSGNPS256,
30512  IX86_BUILTIN_CPYSGNPS512,
30513  IX86_BUILTIN_CPYSGNPD256,
30514  IX86_BUILTIN_CPYSGNPD512,
30515  IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
30516  IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
30517
30518
30519  /* FMA4 instructions.  */
30520  IX86_BUILTIN_VFMADDSS,
30521  IX86_BUILTIN_VFMADDSD,
30522  IX86_BUILTIN_VFMADDPS,
30523  IX86_BUILTIN_VFMADDPD,
30524  IX86_BUILTIN_VFMADDPS256,
30525  IX86_BUILTIN_VFMADDPD256,
30526  IX86_BUILTIN_VFMADDSUBPS,
30527  IX86_BUILTIN_VFMADDSUBPD,
30528  IX86_BUILTIN_VFMADDSUBPS256,
30529  IX86_BUILTIN_VFMADDSUBPD256,
30530
30531  /* FMA3 instructions.  */
30532  IX86_BUILTIN_VFMADDSS3,
30533  IX86_BUILTIN_VFMADDSD3,
30534
30535  /* XOP instructions.  */
30536  IX86_BUILTIN_VPCMOV,
30537  IX86_BUILTIN_VPCMOV_V2DI,
30538  IX86_BUILTIN_VPCMOV_V4SI,
30539  IX86_BUILTIN_VPCMOV_V8HI,
30540  IX86_BUILTIN_VPCMOV_V16QI,
30541  IX86_BUILTIN_VPCMOV_V4SF,
30542  IX86_BUILTIN_VPCMOV_V2DF,
30543  IX86_BUILTIN_VPCMOV256,
30544  IX86_BUILTIN_VPCMOV_V4DI256,
30545  IX86_BUILTIN_VPCMOV_V8SI256,
30546  IX86_BUILTIN_VPCMOV_V16HI256,
30547  IX86_BUILTIN_VPCMOV_V32QI256,
30548  IX86_BUILTIN_VPCMOV_V8SF256,
30549  IX86_BUILTIN_VPCMOV_V4DF256,
30550
30551  IX86_BUILTIN_VPPERM,
30552
30553  IX86_BUILTIN_VPMACSSWW,
30554  IX86_BUILTIN_VPMACSWW,
30555  IX86_BUILTIN_VPMACSSWD,
30556  IX86_BUILTIN_VPMACSWD,
30557  IX86_BUILTIN_VPMACSSDD,
30558  IX86_BUILTIN_VPMACSDD,
30559  IX86_BUILTIN_VPMACSSDQL,
30560  IX86_BUILTIN_VPMACSSDQH,
30561  IX86_BUILTIN_VPMACSDQL,
30562  IX86_BUILTIN_VPMACSDQH,
30563  IX86_BUILTIN_VPMADCSSWD,
30564  IX86_BUILTIN_VPMADCSWD,
30565
30566  IX86_BUILTIN_VPHADDBW,
30567  IX86_BUILTIN_VPHADDBD,
30568  IX86_BUILTIN_VPHADDBQ,
30569  IX86_BUILTIN_VPHADDWD,
30570  IX86_BUILTIN_VPHADDWQ,
30571  IX86_BUILTIN_VPHADDDQ,
30572  IX86_BUILTIN_VPHADDUBW,
30573  IX86_BUILTIN_VPHADDUBD,
30574  IX86_BUILTIN_VPHADDUBQ,
30575  IX86_BUILTIN_VPHADDUWD,
30576  IX86_BUILTIN_VPHADDUWQ,
30577  IX86_BUILTIN_VPHADDUDQ,
30578  IX86_BUILTIN_VPHSUBBW,
30579  IX86_BUILTIN_VPHSUBWD,
30580  IX86_BUILTIN_VPHSUBDQ,
30581
30582  IX86_BUILTIN_VPROTB,
30583  IX86_BUILTIN_VPROTW,
30584  IX86_BUILTIN_VPROTD,
30585  IX86_BUILTIN_VPROTQ,
30586  IX86_BUILTIN_VPROTB_IMM,
30587  IX86_BUILTIN_VPROTW_IMM,
30588  IX86_BUILTIN_VPROTD_IMM,
30589  IX86_BUILTIN_VPROTQ_IMM,
30590
30591  IX86_BUILTIN_VPSHLB,
30592  IX86_BUILTIN_VPSHLW,
30593  IX86_BUILTIN_VPSHLD,
30594  IX86_BUILTIN_VPSHLQ,
30595  IX86_BUILTIN_VPSHAB,
30596  IX86_BUILTIN_VPSHAW,
30597  IX86_BUILTIN_VPSHAD,
30598  IX86_BUILTIN_VPSHAQ,
30599
30600  IX86_BUILTIN_VFRCZSS,
30601  IX86_BUILTIN_VFRCZSD,
30602  IX86_BUILTIN_VFRCZPS,
30603  IX86_BUILTIN_VFRCZPD,
30604  IX86_BUILTIN_VFRCZPS256,
30605  IX86_BUILTIN_VFRCZPD256,
30606
30607  IX86_BUILTIN_VPCOMEQUB,
30608  IX86_BUILTIN_VPCOMNEUB,
30609  IX86_BUILTIN_VPCOMLTUB,
30610  IX86_BUILTIN_VPCOMLEUB,
30611  IX86_BUILTIN_VPCOMGTUB,
30612  IX86_BUILTIN_VPCOMGEUB,
30613  IX86_BUILTIN_VPCOMFALSEUB,
30614  IX86_BUILTIN_VPCOMTRUEUB,
30615
30616  IX86_BUILTIN_VPCOMEQUW,
30617  IX86_BUILTIN_VPCOMNEUW,
30618  IX86_BUILTIN_VPCOMLTUW,
30619  IX86_BUILTIN_VPCOMLEUW,
30620  IX86_BUILTIN_VPCOMGTUW,
30621  IX86_BUILTIN_VPCOMGEUW,
30622  IX86_BUILTIN_VPCOMFALSEUW,
30623  IX86_BUILTIN_VPCOMTRUEUW,
30624
30625  IX86_BUILTIN_VPCOMEQUD,
30626  IX86_BUILTIN_VPCOMNEUD,
30627  IX86_BUILTIN_VPCOMLTUD,
30628  IX86_BUILTIN_VPCOMLEUD,
30629  IX86_BUILTIN_VPCOMGTUD,
30630  IX86_BUILTIN_VPCOMGEUD,
30631  IX86_BUILTIN_VPCOMFALSEUD,
30632  IX86_BUILTIN_VPCOMTRUEUD,
30633
30634  IX86_BUILTIN_VPCOMEQUQ,
30635  IX86_BUILTIN_VPCOMNEUQ,
30636  IX86_BUILTIN_VPCOMLTUQ,
30637  IX86_BUILTIN_VPCOMLEUQ,
30638  IX86_BUILTIN_VPCOMGTUQ,
30639  IX86_BUILTIN_VPCOMGEUQ,
30640  IX86_BUILTIN_VPCOMFALSEUQ,
30641  IX86_BUILTIN_VPCOMTRUEUQ,
30642
30643  IX86_BUILTIN_VPCOMEQB,
30644  IX86_BUILTIN_VPCOMNEB,
30645  IX86_BUILTIN_VPCOMLTB,
30646  IX86_BUILTIN_VPCOMLEB,
30647  IX86_BUILTIN_VPCOMGTB,
30648  IX86_BUILTIN_VPCOMGEB,
30649  IX86_BUILTIN_VPCOMFALSEB,
30650  IX86_BUILTIN_VPCOMTRUEB,
30651
30652  IX86_BUILTIN_VPCOMEQW,
30653  IX86_BUILTIN_VPCOMNEW,
30654  IX86_BUILTIN_VPCOMLTW,
30655  IX86_BUILTIN_VPCOMLEW,
30656  IX86_BUILTIN_VPCOMGTW,
30657  IX86_BUILTIN_VPCOMGEW,
30658  IX86_BUILTIN_VPCOMFALSEW,
30659  IX86_BUILTIN_VPCOMTRUEW,
30660
30661  IX86_BUILTIN_VPCOMEQD,
30662  IX86_BUILTIN_VPCOMNED,
30663  IX86_BUILTIN_VPCOMLTD,
30664  IX86_BUILTIN_VPCOMLED,
30665  IX86_BUILTIN_VPCOMGTD,
30666  IX86_BUILTIN_VPCOMGED,
30667  IX86_BUILTIN_VPCOMFALSED,
30668  IX86_BUILTIN_VPCOMTRUED,
30669
30670  IX86_BUILTIN_VPCOMEQQ,
30671  IX86_BUILTIN_VPCOMNEQ,
30672  IX86_BUILTIN_VPCOMLTQ,
30673  IX86_BUILTIN_VPCOMLEQ,
30674  IX86_BUILTIN_VPCOMGTQ,
30675  IX86_BUILTIN_VPCOMGEQ,
30676  IX86_BUILTIN_VPCOMFALSEQ,
30677  IX86_BUILTIN_VPCOMTRUEQ,
30678
30679  /* LWP instructions.  */
30680  IX86_BUILTIN_LLWPCB,
30681  IX86_BUILTIN_SLWPCB,
30682  IX86_BUILTIN_LWPVAL32,
30683  IX86_BUILTIN_LWPVAL64,
30684  IX86_BUILTIN_LWPINS32,
30685  IX86_BUILTIN_LWPINS64,
30686
30687  IX86_BUILTIN_CLZS,
30688
30689  /* RTM */
30690  IX86_BUILTIN_XBEGIN,
30691  IX86_BUILTIN_XEND,
30692  IX86_BUILTIN_XABORT,
30693  IX86_BUILTIN_XTEST,
30694
30695  /* MPX */
30696  IX86_BUILTIN_BNDMK,
30697  IX86_BUILTIN_BNDSTX,
30698  IX86_BUILTIN_BNDLDX,
30699  IX86_BUILTIN_BNDCL,
30700  IX86_BUILTIN_BNDCU,
30701  IX86_BUILTIN_BNDRET,
30702  IX86_BUILTIN_BNDNARROW,
30703  IX86_BUILTIN_BNDINT,
30704  IX86_BUILTIN_SIZEOF,
30705  IX86_BUILTIN_BNDLOWER,
30706  IX86_BUILTIN_BNDUPPER,
30707
30708  /* BMI instructions.  */
30709  IX86_BUILTIN_BEXTR32,
30710  IX86_BUILTIN_BEXTR64,
30711  IX86_BUILTIN_CTZS,
30712
30713  /* TBM instructions.  */
30714  IX86_BUILTIN_BEXTRI32,
30715  IX86_BUILTIN_BEXTRI64,
30716
30717  /* BMI2 instructions. */
30718  IX86_BUILTIN_BZHI32,
30719  IX86_BUILTIN_BZHI64,
30720  IX86_BUILTIN_PDEP32,
30721  IX86_BUILTIN_PDEP64,
30722  IX86_BUILTIN_PEXT32,
30723  IX86_BUILTIN_PEXT64,
30724
30725  /* ADX instructions.  */
30726  IX86_BUILTIN_ADDCARRYX32,
30727  IX86_BUILTIN_ADDCARRYX64,
30728
30729  /* SBB instructions.  */
30730  IX86_BUILTIN_SBB32,
30731  IX86_BUILTIN_SBB64,
30732
30733  /* FSGSBASE instructions.  */
30734  IX86_BUILTIN_RDFSBASE32,
30735  IX86_BUILTIN_RDFSBASE64,
30736  IX86_BUILTIN_RDGSBASE32,
30737  IX86_BUILTIN_RDGSBASE64,
30738  IX86_BUILTIN_WRFSBASE32,
30739  IX86_BUILTIN_WRFSBASE64,
30740  IX86_BUILTIN_WRGSBASE32,
30741  IX86_BUILTIN_WRGSBASE64,
30742
30743  /* RDRND instructions.  */
30744  IX86_BUILTIN_RDRAND16_STEP,
30745  IX86_BUILTIN_RDRAND32_STEP,
30746  IX86_BUILTIN_RDRAND64_STEP,
30747
30748  /* RDSEED instructions.  */
30749  IX86_BUILTIN_RDSEED16_STEP,
30750  IX86_BUILTIN_RDSEED32_STEP,
30751  IX86_BUILTIN_RDSEED64_STEP,
30752
30753  /* F16C instructions.  */
30754  IX86_BUILTIN_CVTPH2PS,
30755  IX86_BUILTIN_CVTPH2PS256,
30756  IX86_BUILTIN_CVTPS2PH,
30757  IX86_BUILTIN_CVTPS2PH256,
30758
30759  /* MONITORX and MWAITX instrucions.   */
30760  IX86_BUILTIN_MONITORX,
30761  IX86_BUILTIN_MWAITX,
30762
30763  /* CFString built-in for darwin */
30764  IX86_BUILTIN_CFSTRING,
30765
30766  /* Builtins to get CPU type and supported features. */
30767  IX86_BUILTIN_CPU_INIT,
30768  IX86_BUILTIN_CPU_IS,
30769  IX86_BUILTIN_CPU_SUPPORTS,
30770
30771  /* Read/write FLAGS register built-ins.  */
30772  IX86_BUILTIN_READ_FLAGS,
30773  IX86_BUILTIN_WRITE_FLAGS,
30774
30775  IX86_BUILTIN_MAX
30776};
30777
30778/* Table for the ix86 builtin decls.  */
30779static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
30780
30781/* Table of all of the builtin functions that are possible with different ISA's
30782   but are waiting to be built until a function is declared to use that
30783   ISA.  */
30784struct builtin_isa {
30785  const char *name;		/* function name */
30786  enum ix86_builtin_func_type tcode; /* type to use in the declaration */
30787  HOST_WIDE_INT isa;		/* isa_flags this builtin is defined for */
30788  bool const_p;			/* true if the declaration is constant */
30789  bool leaf_p;			/* true if the declaration has leaf attribute */
30790  bool nothrow_p;		/* true if the declaration has nothrow attribute */
30791  bool set_and_not_built_p;
30792};
30793
30794static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
30795
30796/* Bits that can still enable any inclusion of a builtin.  */
30797static HOST_WIDE_INT deferred_isa_values = 0;
30798
30799/* Add an ix86 target builtin function with CODE, NAME and TYPE.  Save the MASK
30800   of which isa_flags to use in the ix86_builtins_isa array.  Stores the
30801   function decl in the ix86_builtins array.  Returns the function decl or
30802   NULL_TREE, if the builtin was not added.
30803
30804   If the front end has a special hook for builtin functions, delay adding
30805   builtin functions that aren't in the current ISA until the ISA is changed
30806   with function specific optimization.  Doing so, can save about 300K for the
30807   default compiler.  When the builtin is expanded, check at that time whether
30808   it is valid.
30809
30810   If the front end doesn't have a special hook, record all builtins, even if
30811   it isn't an instruction set in the current ISA in case the user uses
30812   function specific options for a different ISA, so that we don't get scope
30813   errors if a builtin is added in the middle of a function scope.  */
30814
30815static inline tree
30816def_builtin (HOST_WIDE_INT mask, const char *name,
30817	     enum ix86_builtin_func_type tcode,
30818	     enum ix86_builtins code)
30819{
30820  tree decl = NULL_TREE;
30821
30822  if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
30823    {
30824      ix86_builtins_isa[(int) code].isa = mask;
30825
30826      /* OPTION_MASK_ISA_AVX512VL has special meaning. Despite of generic case,
30827	 where any bit set means that built-in is enable, this bit must be *and-ed*
30828	 with another one. E.g.: OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL
30829	 means that *both* cpuid bits must be set for the built-in to be available.
30830	 Handle this here.  */
30831      if (mask & ix86_isa_flags & OPTION_MASK_ISA_AVX512VL)
30832	  mask &= ~OPTION_MASK_ISA_AVX512VL;
30833
30834      mask &= ~OPTION_MASK_ISA_64BIT;
30835      if (mask == 0
30836	  || (mask & ix86_isa_flags) != 0
30837	  || (lang_hooks.builtin_function
30838	      == lang_hooks.builtin_function_ext_scope))
30839
30840	{
30841	  tree type = ix86_get_builtin_func_type (tcode);
30842	  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
30843				       NULL, NULL_TREE);
30844	  ix86_builtins[(int) code] = decl;
30845	  ix86_builtins_isa[(int) code].set_and_not_built_p = false;
30846	}
30847      else
30848	{
30849	  /* Just a MASK where set_and_not_built_p == true can potentially
30850	     include a builtin.  */
30851	  deferred_isa_values |= mask;
30852	  ix86_builtins[(int) code] = NULL_TREE;
30853	  ix86_builtins_isa[(int) code].tcode = tcode;
30854	  ix86_builtins_isa[(int) code].name = name;
30855	  ix86_builtins_isa[(int) code].leaf_p = false;
30856	  ix86_builtins_isa[(int) code].nothrow_p = false;
30857	  ix86_builtins_isa[(int) code].const_p = false;
30858	  ix86_builtins_isa[(int) code].set_and_not_built_p = true;
30859	}
30860    }
30861
30862  return decl;
30863}
30864
30865/* Like def_builtin, but also marks the function decl "const".  */
30866
30867static inline tree
30868def_builtin_const (HOST_WIDE_INT mask, const char *name,
30869		   enum ix86_builtin_func_type tcode, enum ix86_builtins code)
30870{
30871  tree decl = def_builtin (mask, name, tcode, code);
30872  if (decl)
30873    TREE_READONLY (decl) = 1;
30874  else
30875    ix86_builtins_isa[(int) code].const_p = true;
30876
30877  return decl;
30878}
30879
30880/* Add any new builtin functions for a given ISA that may not have been
30881   declared.  This saves a bit of space compared to adding all of the
30882   declarations to the tree, even if we didn't use them.  */
30883
30884static void
30885ix86_add_new_builtins (HOST_WIDE_INT isa)
30886{
30887  if ((isa & deferred_isa_values) == 0)
30888    return;
30889
30890  /* Bits in ISA value can be removed from potential isa values.  */
30891  deferred_isa_values &= ~isa;
30892
30893  int i;
30894  tree saved_current_target_pragma = current_target_pragma;
30895  current_target_pragma = NULL_TREE;
30896
30897  for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
30898    {
30899      if ((ix86_builtins_isa[i].isa & isa) != 0
30900	  && ix86_builtins_isa[i].set_and_not_built_p)
30901	{
30902	  tree decl, type;
30903
30904	  /* Don't define the builtin again.  */
30905	  ix86_builtins_isa[i].set_and_not_built_p = false;
30906
30907	  type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
30908	  decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
30909						 type, i, BUILT_IN_MD, NULL,
30910						 NULL_TREE);
30911
30912	  ix86_builtins[i] = decl;
30913	  if (ix86_builtins_isa[i].const_p)
30914	    TREE_READONLY (decl) = 1;
30915	  if (ix86_builtins_isa[i].leaf_p)
30916	    DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
30917						      NULL_TREE);
30918	  if (ix86_builtins_isa[i].nothrow_p)
30919	    TREE_NOTHROW (decl) = 1;
30920	}
30921    }
30922
30923  current_target_pragma = saved_current_target_pragma;
30924}
30925
30926/* Bits for builtin_description.flag.  */
30927
30928/* Set when we don't support the comparison natively, and should
30929   swap_comparison in order to support it.  */
30930#define BUILTIN_DESC_SWAP_OPERANDS	1
30931
30932struct builtin_description
30933{
30934  const HOST_WIDE_INT mask;
30935  const enum insn_code icode;
30936  const char *const name;
30937  const enum ix86_builtins code;
30938  const enum rtx_code comparison;
30939  const int flag;
30940};
30941
30942static const struct builtin_description bdesc_comi[] =
30943{
30944  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
30945  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
30946  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
30947  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
30948  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
30949  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
30950  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
30951  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
30952  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
30953  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
30954  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
30955  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
30956  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
30957  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
30958  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
30959  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
30960  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
30961  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
30962  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
30963  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
30964  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
30965  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
30966  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
30967  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
30968};
30969
30970static const struct builtin_description bdesc_pcmpestr[] =
30971{
30972  /* SSE4.2 */
30973  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
30974  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
30975  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
30976  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
30977  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
30978  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
30979  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
30980};
30981
30982static const struct builtin_description bdesc_pcmpistr[] =
30983{
30984  /* SSE4.2 */
30985  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
30986  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
30987  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
30988  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
30989  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
30990  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
30991  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
30992};
30993
30994/* Special builtins with variable number of arguments.  */
30995static const struct builtin_description bdesc_special_args[] =
30996{
30997  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
30998  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
30999  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID },
31000
31001  /* 80387 (for use internally for atomic compound assignment).  */
31002  { 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID },
31003  { 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID },
31004  { 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID },
31005  { 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID },
31006
31007  /* MMX */
31008  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
31009
31010  /* 3DNow! */
31011  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
31012
31013  /* FXSR, XSAVE, XSAVEOPT, XSAVEC and XSAVES.  */
31014  { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxsave", IX86_BUILTIN_FXSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID },
31015  { OPTION_MASK_ISA_FXSR, CODE_FOR_nothing, "__builtin_ia32_fxrstor", IX86_BUILTIN_FXRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID },
31016  { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xsave", IX86_BUILTIN_XSAVE, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31017  { OPTION_MASK_ISA_XSAVE, CODE_FOR_nothing, "__builtin_ia32_xrstor", IX86_BUILTIN_XRSTOR, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31018  { OPTION_MASK_ISA_XSAVEOPT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt", IX86_BUILTIN_XSAVEOPT, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31019  { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xsaves", IX86_BUILTIN_XSAVES, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31020  { OPTION_MASK_ISA_XSAVES, CODE_FOR_nothing, "__builtin_ia32_xrstors", IX86_BUILTIN_XRSTORS, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31021  { OPTION_MASK_ISA_XSAVEC, CODE_FOR_nothing, "__builtin_ia32_xsavec", IX86_BUILTIN_XSAVEC, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31022
31023  { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxsave64", IX86_BUILTIN_FXSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31024  { OPTION_MASK_ISA_FXSR | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_fxrstor64", IX86_BUILTIN_FXRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID },
31025  { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsave64", IX86_BUILTIN_XSAVE64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31026  { OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstor64", IX86_BUILTIN_XRSTOR64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31027  { OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaveopt64", IX86_BUILTIN_XSAVEOPT64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31028  { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsaves64", IX86_BUILTIN_XSAVES64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31029  { OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31030  { OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64 },
31031
31032  /* SSE */
31033  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storeups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31034  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31035  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31036
31037  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31038  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
31039  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31040  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
31041
31042  /* SSE or 3DNow!A  */
31043  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31044  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
31045
31046  /* SSE2 */
31047  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31048  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
31049  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storeupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31050  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_storedquv16qi, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
31051  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31052  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
31053  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntisi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
31054  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_movntidi, "__builtin_ia32_movnti64", IX86_BUILTIN_MOVNTI64, UNKNOWN, (int) VOID_FTYPE_PLONGLONG_LONGLONG },
31055  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
31056  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loaddquv16qi, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31057
31058  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31059  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
31060
31061  /* SSE3 */
31062  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
31063
31064  /* SSE4.1 */
31065  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
31066
31067  /* SSE4A */
31068  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
31069  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
31070
31071  /* AVX */
31072  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
31073  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
31074
31075  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
31076  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31077  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31078  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
31079  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
31080
31081  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
31082  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loadups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
31083  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31084  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storeups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31085  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_loaddquv32qi, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31086  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_storedquv32qi, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
31087  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
31088
31089  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
31090  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
31091  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
31092
31093  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
31094  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
31095  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
31096  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
31097  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
31098  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
31099  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
31100  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
31101
31102  /* AVX2 */
31103  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_movntdqa, "__builtin_ia32_movntdqa256", IX86_BUILTIN_MOVNTDQA256, UNKNOWN, (int) V4DI_FTYPE_PV4DI },
31104  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd, "__builtin_ia32_maskloadd", IX86_BUILTIN_MASKLOADD, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI },
31105  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq, "__builtin_ia32_maskloadq", IX86_BUILTIN_MASKLOADQ, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI },
31106  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadd256, "__builtin_ia32_maskloadd256", IX86_BUILTIN_MASKLOADD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI },
31107  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskloadq256, "__builtin_ia32_maskloadq256", IX86_BUILTIN_MASKLOADQ256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI },
31108  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored, "__builtin_ia32_maskstored", IX86_BUILTIN_MASKSTORED, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_V4SI },
31109  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq, "__builtin_ia32_maskstoreq", IX86_BUILTIN_MASKSTOREQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_V2DI },
31110  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstored256, "__builtin_ia32_maskstored256", IX86_BUILTIN_MASKSTORED256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_V8SI },
31111  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI },
31112
31113  /* AVX512F */
31114  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31115  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31116  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31117  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31118  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31119  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31120  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31121  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31122  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31123  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31124  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31125  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31126  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31127  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loaddquv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31128  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadupd512_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31129  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadups512_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31130  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_HI },
31131  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_HI },
31132  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_QI },
31133  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_QI },
31134  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF },
31135  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF },
31136  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI },
31137  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI },
31138  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31139  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storedquv8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31140  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeupd512_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31141  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31142  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31143  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_QI },
31144  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31145  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31146  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_QI },
31147  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31148  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31149  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_HI },
31150  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31151  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31152  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8DI_QI },
31153  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31154  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31155  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_HI },
31156  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storeups512_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31157  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_HI },
31158  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_HI },
31159  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_QI },
31160  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_QI },
31161
31162  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
31163  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
31164  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
31165  { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
31166  { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
31167  { OPTION_MASK_ISA_LWP | OPTION_MASK_ISA_64BIT, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
31168
31169  /* FSGSBASE */
31170  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31171  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31172  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31173  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
31174  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31175  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31176  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
31177  { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
31178
31179  /* RTM */
31180  { OPTION_MASK_ISA_RTM, CODE_FOR_xbegin, "__builtin_ia32_xbegin", IX86_BUILTIN_XBEGIN, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
31181  { OPTION_MASK_ISA_RTM, CODE_FOR_xend, "__builtin_ia32_xend", IX86_BUILTIN_XEND, UNKNOWN, (int) VOID_FTYPE_VOID },
31182  { OPTION_MASK_ISA_RTM, CODE_FOR_xtest, "__builtin_ia32_xtest", IX86_BUILTIN_XTEST, UNKNOWN, (int) INT_FTYPE_VOID },
31183
31184  /* AVX512BW */
31185  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_SI },
31186  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_DI },
31187  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_SI },
31188  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_storedquv64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_DI },
31189
31190  /* AVX512VL */
31191  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_loaddquhi256_mask", IX86_BUILTIN_LOADDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_HI },
31192  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_loaddquhi128_mask", IX86_BUILTIN_LOADDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_QI },
31193  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_loaddquqi256_mask", IX86_BUILTIN_LOADDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_SI },
31194  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_loaddquqi128_mask", IX86_BUILTIN_LOADDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_HI },
31195  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64load256_mask", IX86_BUILTIN_MOVDQA64LOAD256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31196  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64load128_mask", IX86_BUILTIN_MOVDQA64LOAD128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31197  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32load256_mask", IX86_BUILTIN_MOVDQA32LOAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31198  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32load128_mask", IX86_BUILTIN_MOVDQA32LOAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31199  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4di_mask, "__builtin_ia32_movdqa64store256_mask", IX86_BUILTIN_MOVDQA64STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31200  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2di_mask, "__builtin_ia32_movdqa64store128_mask", IX86_BUILTIN_MOVDQA64STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31201  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8si_mask, "__builtin_ia32_movdqa32store256_mask", IX86_BUILTIN_MOVDQA32STORE256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31202  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4si_mask, "__builtin_ia32_movdqa32store128_mask", IX86_BUILTIN_MOVDQA32STORE128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31203  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_loadapd256_mask", IX86_BUILTIN_LOADAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31204  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_loadapd128_mask", IX86_BUILTIN_LOADAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31205  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_loadaps256_mask", IX86_BUILTIN_LOADAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31206  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_loadaps128_mask", IX86_BUILTIN_LOADAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31207  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4df_mask, "__builtin_ia32_storeapd256_mask", IX86_BUILTIN_STOREAPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31208  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev2df_mask, "__builtin_ia32_storeapd128_mask", IX86_BUILTIN_STOREAPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31209  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev8sf_mask, "__builtin_ia32_storeaps256_mask", IX86_BUILTIN_STOREAPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31210  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storev4sf_mask, "__builtin_ia32_storeaps128_mask", IX86_BUILTIN_STOREAPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31211  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadupd256_mask, "__builtin_ia32_loadupd256_mask", IX86_BUILTIN_LOADUPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31212  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loadupd_mask, "__builtin_ia32_loadupd128_mask", IX86_BUILTIN_LOADUPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31213  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loadups256_mask, "__builtin_ia32_loadups256_mask", IX86_BUILTIN_LOADUPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31214  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_loadups_mask, "__builtin_ia32_loadups128_mask", IX86_BUILTIN_LOADUPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31215  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd256_mask, "__builtin_ia32_storeupd256_mask", IX86_BUILTIN_STOREUPD256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31216  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeupd_mask, "__builtin_ia32_storeupd128_mask", IX86_BUILTIN_STOREUPD128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31217  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups256_mask, "__builtin_ia32_storeups256_mask", IX86_BUILTIN_STOREUPS256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31218  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storeups_mask, "__builtin_ia32_storeups128_mask", IX86_BUILTIN_STOREUPS128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31219  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv4di_mask, "__builtin_ia32_loaddqudi256_mask", IX86_BUILTIN_LOADDQUDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31220  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv2di_mask, "__builtin_ia32_loaddqudi128_mask", IX86_BUILTIN_LOADDQUDI128_MASK, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31221  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv8si_mask, "__builtin_ia32_loaddqusi256_mask", IX86_BUILTIN_LOADDQUSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31222  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv4si_mask, "__builtin_ia32_loaddqusi128_mask", IX86_BUILTIN_LOADDQUSI128_MASK, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31223  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4di_mask, "__builtin_ia32_storedqudi256_mask", IX86_BUILTIN_STOREDQUDI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31224  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv2di_mask, "__builtin_ia32_storedqudi128_mask", IX86_BUILTIN_STOREDQUDI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31225  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8si_mask, "__builtin_ia32_storedqusi256_mask", IX86_BUILTIN_STOREDQUSI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31226  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv4si_mask, "__builtin_ia32_storedqusi128_mask", IX86_BUILTIN_STOREDQUSI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31227  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16hi_mask, "__builtin_ia32_storedquhi256_mask", IX86_BUILTIN_STOREDQUHI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_HI },
31228  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv8hi_mask, "__builtin_ia32_storedquhi128_mask", IX86_BUILTIN_STOREDQUHI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_QI },
31229  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv32qi_mask, "__builtin_ia32_storedquqi256_mask", IX86_BUILTIN_STOREDQUQI256_MASK, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_SI },
31230  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_storedquv16qi_mask, "__builtin_ia32_storedquqi128_mask", IX86_BUILTIN_STOREDQUQI128_MASK, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_HI },
31231  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4df_mask, "__builtin_ia32_compressstoredf256_mask", IX86_BUILTIN_COMPRESSPDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_QI },
31232  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2df_mask, "__builtin_ia32_compressstoredf128_mask", IX86_BUILTIN_COMPRESSPDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_QI },
31233  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8sf_mask, "__builtin_ia32_compressstoresf256_mask", IX86_BUILTIN_COMPRESSPSSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_QI },
31234  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4sf_mask, "__builtin_ia32_compressstoresf128_mask", IX86_BUILTIN_COMPRESSPSSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_QI },
31235  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4di_mask, "__builtin_ia32_compressstoredi256_mask", IX86_BUILTIN_PCOMPRESSQSTORE256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_QI },
31236  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_QI },
31237  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_QI },
31238  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_QI },
31239  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31240  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31241  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31242  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31243  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31244  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31245  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31246  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31247  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_QI },
31248  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_QI },
31249  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_QI },
31250  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandloadsf128_maskz", IX86_BUILTIN_EXPANDPSLOAD128Z, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_QI },
31251  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expandloaddi256_maskz", IX86_BUILTIN_PEXPANDQLOAD256Z, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_QI },
31252  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expandloaddi128_maskz", IX86_BUILTIN_PEXPANDQLOAD128Z, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_QI },
31253  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandloadsi256_maskz", IX86_BUILTIN_PEXPANDDLOAD256Z, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_QI },
31254  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandloadsi128_maskz", IX86_BUILTIN_PEXPANDDLOAD128Z, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_QI },
31255  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask_store, "__builtin_ia32_pmovqd256mem_mask", IX86_BUILTIN_PMOVQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31256  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask_store, "__builtin_ia32_pmovqd128mem_mask", IX86_BUILTIN_PMOVQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31257  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask_store, "__builtin_ia32_pmovsqd256mem_mask", IX86_BUILTIN_PMOVSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31258  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask_store, "__builtin_ia32_pmovsqd128mem_mask", IX86_BUILTIN_PMOVSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31259  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask_store, "__builtin_ia32_pmovusqd256mem_mask", IX86_BUILTIN_PMOVUSQD256_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4DI_QI },
31260  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask_store, "__builtin_ia32_pmovusqd128mem_mask", IX86_BUILTIN_PMOVUSQD128_MEM, UNKNOWN, (int) VOID_FTYPE_PV4SI_V2DI_QI },
31261  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovqw256mem_mask", IX86_BUILTIN_PMOVQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31262  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovqw128mem_mask", IX86_BUILTIN_PMOVQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31263  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovsqw256mem_mask", IX86_BUILTIN_PMOVSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31264  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovsqw128mem_mask", IX86_BUILTIN_PMOVSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31265  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask_store, "__builtin_ia32_pmovusqw256mem_mask", IX86_BUILTIN_PMOVUSQW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4DI_QI },
31266  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask_store, "__builtin_ia32_pmovusqw128mem_mask", IX86_BUILTIN_PMOVUSQW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V2DI_QI },
31267  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovqb256mem_mask", IX86_BUILTIN_PMOVQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31268  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovqb128mem_mask", IX86_BUILTIN_PMOVQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31269  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovsqb256mem_mask", IX86_BUILTIN_PMOVSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31270  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovsqb128mem_mask", IX86_BUILTIN_PMOVSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31271  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask_store, "__builtin_ia32_pmovusqb256mem_mask", IX86_BUILTIN_PMOVUSQB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4DI_QI },
31272  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask_store, "__builtin_ia32_pmovusqb128mem_mask", IX86_BUILTIN_PMOVUSQB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V2DI_QI },
31273  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovdb256mem_mask", IX86_BUILTIN_PMOVDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31274  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovdb128mem_mask", IX86_BUILTIN_PMOVDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31275  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovsdb256mem_mask", IX86_BUILTIN_PMOVSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31276  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovsdb128mem_mask", IX86_BUILTIN_PMOVSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31277  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask_store, "__builtin_ia32_pmovusdb256mem_mask", IX86_BUILTIN_PMOVUSDB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V8SI_QI },
31278  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask_store, "__builtin_ia32_pmovusdb128mem_mask", IX86_BUILTIN_PMOVUSDB128_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V4SI_QI },
31279  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovdw256mem_mask", IX86_BUILTIN_PMOVDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31280  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovdw128mem_mask", IX86_BUILTIN_PMOVDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31281  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovsdw256mem_mask", IX86_BUILTIN_PMOVSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31282  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovsdw128mem_mask", IX86_BUILTIN_PMOVSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31283  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask_store, "__builtin_ia32_pmovusdw256mem_mask", IX86_BUILTIN_PMOVUSDW256_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8SI_QI },
31284  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask_store, "__builtin_ia32_pmovusdw128mem_mask", IX86_BUILTIN_PMOVUSDW128_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V4SI_QI },
31285
31286  /* PCOMMIT.  */
31287  { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
31288};
31289
31290/* Builtins with variable number of arguments.  */
31291static const struct builtin_description bdesc_args[] =
31292{
31293  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
31294  { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
31295  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
31296  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31297  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31298  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
31299  { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
31300
31301  /* MMX */
31302  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31303  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31304  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31305  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31306  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31307  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31308
31309  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31310  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31311  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31312  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31313  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31314  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31315  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31316  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31317
31318  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31319  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31320
31321  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31322  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31323  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31324  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31325
31326  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31327  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31328  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31329  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31330  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31331  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31332
31333  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31334  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31335  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31336  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31337  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
31338  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
31339
31340  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31341  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
31342  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
31343
31344  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
31345
31346  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31347  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31348  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31349  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31350  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31351  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31352
31353  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31354  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31355  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
31356  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31357  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31358  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
31359
31360  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
31361  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
31362  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
31363  { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
31364
31365  /* 3DNow! */
31366  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31367  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31368  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31369  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31370
31371  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31372  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31373  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31374  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31375  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31376  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
31377  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31378  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31379  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31380  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31381  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31382  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31383  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31384  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31385  { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31386
31387  /* 3DNow!A */
31388  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
31389  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
31390  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31391  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
31392  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31393  { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
31394
31395  /* SSE */
31396  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
31397  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31398  { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31399  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31400  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31401  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31402  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31403  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31404  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31405  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
31406  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
31407  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
31408
31409  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31410
31411  { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31412  { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31413  { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31414  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31415  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31416  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31417  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31418  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31419
31420  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31421  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31422  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31423  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31424  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31425  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31426  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31427  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31428  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31429  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
31430  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
31431  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31432  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
31433  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
31434  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
31435  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31436  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
31437  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
31438  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
31439  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
31440
31441  { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31442  { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31443  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31444  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31445
31446  { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31447  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31448  { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31449  { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31450
31451  { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3,  "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31452
31453  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31454  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31455  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31456  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31457  { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31458
31459  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
31460  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
31461  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
31462
31463  { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
31464
31465  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31466  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31467  { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
31468
31469  { OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
31470  { OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
31471
31472  /* SSE MMX or 3Dnow!A */
31473  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31474  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31475  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31476
31477  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31478  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31479  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31480  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31481
31482  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
31483  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
31484
31485  { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
31486
31487  /* SSE2 */
31488  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31489
31490  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF  },
31491  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
31492  { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31493  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
31494  { OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
31495
31496  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31497  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31498  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
31499  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
31500  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
31501
31502  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
31503
31504  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31505  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
31506  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31507  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
31508
31509  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_fix_notruncv4sfv4si, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31510  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
31511  { OPTION_MASK_ISA_SSE2, CODE_FOR_fix_truncv4sfv4si2, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31512
31513  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31514  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31515  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31516  { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31517  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31518  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31519  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31520  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31521
31522  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31523  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31524  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31525  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31526  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
31527  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31528  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31529  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31530  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31531  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31532  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
31533  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31534  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
31535  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
31536  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
31537  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31538  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
31539  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
31540  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
31541  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
31542
31543  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31544  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31545  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31546  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31547
31548  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31549  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31550  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31551  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31552
31553  { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3,  "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31554
31555  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31556  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31557  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31558
31559  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31560
31561  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31562  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31563  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31564  { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31565  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31566  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31567  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31568  { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31569
31570  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31571  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31572  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31573  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31574  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31575  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31576  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31577  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31578
31579  { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31580  { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
31581
31582  { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31583  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31584  { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31585  { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31586
31587  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31588  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31589
31590  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31591  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31592  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
31593  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31594  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31595  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI  },
31596
31597  { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31598  { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31599  { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31600  { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31601
31602  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31603  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI  },
31604  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN,  (int) V4SI_FTYPE_V4SI_V4SI },
31605  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31606  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31607  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31608  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31609  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31610
31611  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31612  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31613  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
31614
31615  { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31616  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
31617
31618  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
31619  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31620
31621  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
31622
31623  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
31624  { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
31625  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
31626  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
31627
31628  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31629  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31630  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31631  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31632  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31633  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31634  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31635
31636  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
31637  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31638  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31639  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
31640  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31641  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31642  { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
31643
31644  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
31645  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
31646  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
31647  { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
31648
31649  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
31650  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31651  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
31652
31653  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
31654
31655  { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31656
31657  /* SSE2 MMX */
31658  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31659  { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
31660
31661  /* SSE3 */
31662  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
31663  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31664
31665  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31666  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31667  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31668  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31669  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
31670  { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
31671
31672  /* SSSE3 */
31673  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
31674  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
31675  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31676  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
31677  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
31678  { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
31679
31680  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31681  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31682  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31683  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31684  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31685  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31686  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31687  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31688  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31689  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31690  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31691  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31692  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
31693  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
31694  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31695  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31696  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31697  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31698  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31699  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
31700  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31701  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
31702  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31703  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
31704
31705  /* SSSE3.  */
31706  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
31707  { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
31708
31709  /* SSE4.1 */
31710  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31711  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31712  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
31713  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
31714  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31715  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31716  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31717  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
31718  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
31719  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
31720
31721  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31722  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31723  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31724  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31725  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31726  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31727  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
31728  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
31729  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
31730  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
31731  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
31732  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
31733  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
31734
31735  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
31736  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31737  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31738  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31739  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31740  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31741  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
31742  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31743  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31744  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
31745  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
31746  { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
31747
31748  /* SSE4.1 */
31749  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31750  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31751  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31752  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31753
31754  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_floorpd", IX86_BUILTIN_FLOORPD, (enum rtx_code) ROUND_FLOOR, (int) V2DF_FTYPE_V2DF_ROUND },
31755  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_ceilpd", IX86_BUILTIN_CEILPD, (enum rtx_code) ROUND_CEIL, (int) V2DF_FTYPE_V2DF_ROUND },
31756  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_truncpd", IX86_BUILTIN_TRUNCPD, (enum rtx_code) ROUND_TRUNC, (int) V2DF_FTYPE_V2DF_ROUND },
31757  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_rintpd", IX86_BUILTIN_RINTPD, (enum rtx_code) ROUND_MXCSR, (int) V2DF_FTYPE_V2DF_ROUND },
31758
31759  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_floorpd_vec_pack_sfix", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31760  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd_vec_pack_sfix, "__builtin_ia32_ceilpd_vec_pack_sfix", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V2DF_V2DF_ROUND },
31761
31762  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2, "__builtin_ia32_roundpd_az", IX86_BUILTIN_ROUNDPD_AZ, UNKNOWN, (int) V2DF_FTYPE_V2DF },
31763  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv2df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
31764
31765  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_floorps", IX86_BUILTIN_FLOORPS, (enum rtx_code) ROUND_FLOOR, (int) V4SF_FTYPE_V4SF_ROUND },
31766  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_ceilps", IX86_BUILTIN_CEILPS, (enum rtx_code) ROUND_CEIL, (int) V4SF_FTYPE_V4SF_ROUND },
31767  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_truncps", IX86_BUILTIN_TRUNCPS, (enum rtx_code) ROUND_TRUNC, (int) V4SF_FTYPE_V4SF_ROUND },
31768  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_rintps", IX86_BUILTIN_RINTPS, (enum rtx_code) ROUND_MXCSR, (int) V4SF_FTYPE_V4SF_ROUND },
31769
31770  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_floorps_sfix", IX86_BUILTIN_FLOORPS_SFIX, (enum rtx_code) ROUND_FLOOR, (int) V4SI_FTYPE_V4SF_ROUND },
31771  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps_sfix, "__builtin_ia32_ceilps_sfix", IX86_BUILTIN_CEILPS_SFIX, (enum rtx_code) ROUND_CEIL, (int) V4SI_FTYPE_V4SF_ROUND },
31772
31773  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2, "__builtin_ia32_roundps_az", IX86_BUILTIN_ROUNDPS_AZ, UNKNOWN, (int) V4SF_FTYPE_V4SF },
31774  { OPTION_MASK_ISA_ROUND, CODE_FOR_roundv4sf2_sfix, "__builtin_ia32_roundps_az_sfix", IX86_BUILTIN_ROUNDPS_AZ_SFIX, UNKNOWN, (int) V4SI_FTYPE_V4SF },
31775
31776  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31777  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31778  { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
31779
31780  /* SSE4.2 */
31781  { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31782  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
31783  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
31784  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
31785  { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
31786
31787  /* SSE4A */
31788  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
31789  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
31790  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
31791  { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31792
31793  /* AES */
31794  { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
31795  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
31796
31797  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31798  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31799  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31800  { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
31801
31802  /* PCLMUL */
31803  { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
31804
31805  /* AVX */
31806  { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31807  { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31808  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31809  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31810  { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31811  { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31812  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31813  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31814  { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31815  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31816  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31817  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31818  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31819  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31820  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31821  { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31822  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31823  { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31824  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31825  { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31826  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31827  { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31828  { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31829  { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31830  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31831  { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31832
31833  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
31834  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
31835  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
31836  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
31837
31838  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31839  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31840  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
31841  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
31842  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31843  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31844  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31845  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31846  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vmcmpv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31847  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
31848  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
31849  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31850  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31851  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
31852  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
31853  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
31854  { OPTION_MASK_ISA_AVX, CODE_FOR_floatv4siv4df2, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
31855  { OPTION_MASK_ISA_AVX, CODE_FOR_floatv8siv8sf2, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
31856  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
31857  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_fix_notruncv8sfv8si, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31858  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
31859  { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv4dfv4si2, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31860  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
31861  { OPTION_MASK_ISA_AVX, CODE_FOR_fix_truncv8sfv8si2, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31862  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
31863  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
31864  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
31865  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
31866  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
31867  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31868  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31869  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
31870  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
31871  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
31872
31873  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31874  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31875  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31876
31877  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31878  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31879  { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31880  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31881  { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31882
31883  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31884
31885  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
31886  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
31887
31888  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_floorpd256", IX86_BUILTIN_FLOORPD256, (enum rtx_code) ROUND_FLOOR, (int) V4DF_FTYPE_V4DF_ROUND },
31889  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_ceilpd256", IX86_BUILTIN_CEILPD256, (enum rtx_code) ROUND_CEIL, (int) V4DF_FTYPE_V4DF_ROUND },
31890  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_truncpd256", IX86_BUILTIN_TRUNCPD256, (enum rtx_code) ROUND_TRUNC, (int) V4DF_FTYPE_V4DF_ROUND },
31891  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_rintpd256", IX86_BUILTIN_RINTPD256, (enum rtx_code) ROUND_MXCSR, (int) V4DF_FTYPE_V4DF_ROUND },
31892
31893  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2, "__builtin_ia32_roundpd_az256", IX86_BUILTIN_ROUNDPD_AZ256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
31894  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv4df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix256", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31895
31896  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_floorpd_vec_pack_sfix256", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31897  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd_vec_pack_sfix256, "__builtin_ia32_ceilpd_vec_pack_sfix256", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V4DF_V4DF_ROUND },
31898
31899  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_floorps256", IX86_BUILTIN_FLOORPS256, (enum rtx_code) ROUND_FLOOR, (int) V8SF_FTYPE_V8SF_ROUND },
31900  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_ceilps256", IX86_BUILTIN_CEILPS256, (enum rtx_code) ROUND_CEIL, (int) V8SF_FTYPE_V8SF_ROUND },
31901  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_truncps256", IX86_BUILTIN_TRUNCPS256, (enum rtx_code) ROUND_TRUNC, (int) V8SF_FTYPE_V8SF_ROUND },
31902  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_rintps256", IX86_BUILTIN_RINTPS256, (enum rtx_code) ROUND_MXCSR, (int) V8SF_FTYPE_V8SF_ROUND },
31903
31904  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_floorps_sfix256", IX86_BUILTIN_FLOORPS_SFIX256, (enum rtx_code) ROUND_FLOOR, (int) V8SI_FTYPE_V8SF_ROUND },
31905  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
31906
31907  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
31908  { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
31909
31910  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31911  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31912  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31913  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31914
31915  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
31916  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
31917  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
31918  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
31919  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
31920  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
31921
31922  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31923  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31924  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
31925  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31926  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31927  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
31928  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31929  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31930  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
31931  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31932  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31933  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
31934  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31935  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31936  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
31937
31938  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF  },
31939  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
31940
31941  { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3,  "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
31942  { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3,  "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
31943
31944  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
31945
31946  /* AVX2 */
31947  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
31948  { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
31949  { OPTION_MASK_ISA_AVX2, CODE_FOR_absv16hi2, "__builtin_ia32_pabsw256", IX86_BUILTIN_PABSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI },
31950  { OPTION_MASK_ISA_AVX2, CODE_FOR_absv8si2, "__builtin_ia32_pabsd256", IX86_BUILTIN_PABSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI },
31951  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packssdw, "__builtin_ia32_packssdw256",  IX86_BUILTIN_PACKSSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31952  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packsswb, "__builtin_ia32_packsswb256",  IX86_BUILTIN_PACKSSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31953  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packusdw, "__builtin_ia32_packusdw256",  IX86_BUILTIN_PACKUSDW256, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI },
31954  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_packuswb, "__builtin_ia32_packuswb256",  IX86_BUILTIN_PACKUSWB256, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI },
31955  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv32qi3, "__builtin_ia32_paddb256", IX86_BUILTIN_PADDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31956  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv16hi3, "__builtin_ia32_paddw256", IX86_BUILTIN_PADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31957  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv8si3, "__builtin_ia32_paddd256", IX86_BUILTIN_PADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31958  { OPTION_MASK_ISA_AVX2, CODE_FOR_addv4di3, "__builtin_ia32_paddq256", IX86_BUILTIN_PADDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31959  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv32qi3, "__builtin_ia32_paddsb256", IX86_BUILTIN_PADDSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31960  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ssaddv16hi3, "__builtin_ia32_paddsw256", IX86_BUILTIN_PADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31961  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv32qi3, "__builtin_ia32_paddusb256", IX86_BUILTIN_PADDUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31962  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_usaddv16hi3, "__builtin_ia32_paddusw256", IX86_BUILTIN_PADDUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31963  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_palignrv2ti, "__builtin_ia32_palignr256", IX86_BUILTIN_PALIGNR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_CONVERT },
31964  { OPTION_MASK_ISA_AVX2, CODE_FOR_andv4di3, "__builtin_ia32_andsi256", IX86_BUILTIN_AND256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31965  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_andnotv4di3, "__builtin_ia32_andnotsi256", IX86_BUILTIN_ANDNOT256I, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
31966  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv32qi3, "__builtin_ia32_pavgb256",  IX86_BUILTIN_PAVGB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31967  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_uavgv16hi3, "__builtin_ia32_pavgw256",  IX86_BUILTIN_PAVGW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31968  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendvb, "__builtin_ia32_pblendvb256", IX86_BUILTIN_PBLENDVB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI },
31969  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblendw, "__builtin_ia32_pblendw256", IX86_BUILTIN_PBLENDVW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT },
31970  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv32qi3, "__builtin_ia32_pcmpeqb256", IX86_BUILTIN_PCMPEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31971  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv16hi3, "__builtin_ia32_pcmpeqw256", IX86_BUILTIN_PCMPEQW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31972  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv8si3, "__builtin_ia32_pcmpeqd256", IX86_BUILTIN_PCMPEQD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI  },
31973  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_eqv4di3, "__builtin_ia32_pcmpeqq256", IX86_BUILTIN_PCMPEQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI  },
31974  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv32qi3, "__builtin_ia32_pcmpgtb256", IX86_BUILTIN_PCMPGTB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31975  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv16hi3, "__builtin_ia32_pcmpgtw256", IX86_BUILTIN_PCMPGTW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31976  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv8si3, "__builtin_ia32_pcmpgtd256", IX86_BUILTIN_PCMPGTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI  },
31977  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_gtv4di3, "__builtin_ia32_pcmpgtq256", IX86_BUILTIN_PCMPGTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI  },
31978  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddwv16hi3, "__builtin_ia32_phaddw256", IX86_BUILTIN_PHADDW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31979  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phadddv8si3, "__builtin_ia32_phaddd256", IX86_BUILTIN_PHADDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31980  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phaddswv16hi3, "__builtin_ia32_phaddsw256", IX86_BUILTIN_PHADDSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31981  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubwv16hi3, "__builtin_ia32_phsubw256", IX86_BUILTIN_PHSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31982  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubdv8si3, "__builtin_ia32_phsubd256", IX86_BUILTIN_PHSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31983  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_phsubswv16hi3, "__builtin_ia32_phsubsw256", IX86_BUILTIN_PHSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31984  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddubsw256, "__builtin_ia32_pmaddubsw256", IX86_BUILTIN_PMADDUBSW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
31985  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmaddwd, "__builtin_ia32_pmaddwd256", IX86_BUILTIN_PMADDWD256, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI },
31986  { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv32qi3, "__builtin_ia32_pmaxsb256", IX86_BUILTIN_PMAXSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31987  { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv16hi3, "__builtin_ia32_pmaxsw256", IX86_BUILTIN_PMAXSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31988  { OPTION_MASK_ISA_AVX2, CODE_FOR_smaxv8si3 , "__builtin_ia32_pmaxsd256", IX86_BUILTIN_PMAXSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31989  { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv32qi3, "__builtin_ia32_pmaxub256", IX86_BUILTIN_PMAXUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31990  { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv16hi3, "__builtin_ia32_pmaxuw256", IX86_BUILTIN_PMAXUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31991  { OPTION_MASK_ISA_AVX2, CODE_FOR_umaxv8si3 , "__builtin_ia32_pmaxud256", IX86_BUILTIN_PMAXUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31992  { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv32qi3, "__builtin_ia32_pminsb256", IX86_BUILTIN_PMINSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31993  { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv16hi3, "__builtin_ia32_pminsw256", IX86_BUILTIN_PMINSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31994  { OPTION_MASK_ISA_AVX2, CODE_FOR_sminv8si3 , "__builtin_ia32_pminsd256", IX86_BUILTIN_PMINSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31995  { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv32qi3, "__builtin_ia32_pminub256", IX86_BUILTIN_PMINUB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
31996  { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv16hi3, "__builtin_ia32_pminuw256", IX86_BUILTIN_PMINUW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
31997  { OPTION_MASK_ISA_AVX2, CODE_FOR_uminv8si3 , "__builtin_ia32_pminud256", IX86_BUILTIN_PMINUD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
31998  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmovmskb, "__builtin_ia32_pmovmskb256", IX86_BUILTIN_PMOVMSKB256, UNKNOWN, (int) INT_FTYPE_V32QI },
31999  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv16qiv16hi2, "__builtin_ia32_pmovsxbw256", IX86_BUILTIN_PMOVSXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
32000  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8qiv8si2  , "__builtin_ia32_pmovsxbd256", IX86_BUILTIN_PMOVSXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
32001  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4qiv4di2  , "__builtin_ia32_pmovsxbq256", IX86_BUILTIN_PMOVSXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
32002  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv8hiv8si2  , "__builtin_ia32_pmovsxwd256", IX86_BUILTIN_PMOVSXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
32003  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4hiv4di2  , "__builtin_ia32_pmovsxwq256", IX86_BUILTIN_PMOVSXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
32004  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sign_extendv4siv4di2  , "__builtin_ia32_pmovsxdq256", IX86_BUILTIN_PMOVSXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
32005  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv16qiv16hi2, "__builtin_ia32_pmovzxbw256", IX86_BUILTIN_PMOVZXBW256, UNKNOWN, (int) V16HI_FTYPE_V16QI },
32006  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8qiv8si2  , "__builtin_ia32_pmovzxbd256", IX86_BUILTIN_PMOVZXBD256, UNKNOWN, (int) V8SI_FTYPE_V16QI },
32007  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4qiv4di2  , "__builtin_ia32_pmovzxbq256", IX86_BUILTIN_PMOVZXBQ256, UNKNOWN, (int) V4DI_FTYPE_V16QI },
32008  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2  , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
32009  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2  , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
32010  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2  , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
32011  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
32012  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pmulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32013  { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32014  { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256"  , IX86_BUILTIN_PMULHW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32015  { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256"  , IX86_BUILTIN_PMULLW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32016  { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256"  , IX86_BUILTIN_PMULLD256  , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32017  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
32018  { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32019  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
32020  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32021  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufdv3, "__builtin_ia32_pshufd256", IX86_BUILTIN_PSHUFD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT },
32022  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufhwv3, "__builtin_ia32_pshufhw256", IX86_BUILTIN_PSHUFHW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32023  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshuflwv3, "__builtin_ia32_pshuflw256", IX86_BUILTIN_PSHUFLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT },
32024  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv32qi3, "__builtin_ia32_psignb256", IX86_BUILTIN_PSIGNB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32025  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv16hi3, "__builtin_ia32_psignw256", IX86_BUILTIN_PSIGNW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32026  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psignv8si3 , "__builtin_ia32_psignd256", IX86_BUILTIN_PSIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32027  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlv2ti3, "__builtin_ia32_pslldqi256", IX86_BUILTIN_PSLLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32028  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllwi256", IX86_BUILTIN_PSLLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32029  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv16hi3, "__builtin_ia32_psllw256", IX86_BUILTIN_PSLLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32030  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslldi256", IX86_BUILTIN_PSLLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32031  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv8si3, "__builtin_ia32_pslld256", IX86_BUILTIN_PSLLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32032  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllqi256", IX86_BUILTIN_PSLLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32033  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashlv4di3, "__builtin_ia32_psllq256", IX86_BUILTIN_PSLLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32034  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psrawi256", IX86_BUILTIN_PSRAWI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32035  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv16hi3, "__builtin_ia32_psraw256", IX86_BUILTIN_PSRAW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32036  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psradi256", IX86_BUILTIN_PSRADI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32037  { OPTION_MASK_ISA_AVX2, CODE_FOR_ashrv8si3, "__builtin_ia32_psrad256", IX86_BUILTIN_PSRAD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32038  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrv2ti3, "__builtin_ia32_psrldqi256", IX86_BUILTIN_PSRLDQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_CONVERT },
32039  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlwi256", IX86_BUILTIN_PSRLWI256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_SI_COUNT },
32040  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv16hi3, "__builtin_ia32_psrlw256", IX86_BUILTIN_PSRLW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_COUNT },
32041  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrldi256", IX86_BUILTIN_PSRLDI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_SI_COUNT },
32042  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv8si3, "__builtin_ia32_psrld256", IX86_BUILTIN_PSRLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_COUNT },
32043  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlqi256", IX86_BUILTIN_PSRLQI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_COUNT },
32044  { OPTION_MASK_ISA_AVX2, CODE_FOR_lshrv4di3, "__builtin_ia32_psrlq256", IX86_BUILTIN_PSRLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_COUNT },
32045  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv32qi3, "__builtin_ia32_psubb256", IX86_BUILTIN_PSUBB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32046  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv16hi3, "__builtin_ia32_psubw256", IX86_BUILTIN_PSUBW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32047  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv8si3, "__builtin_ia32_psubd256", IX86_BUILTIN_PSUBD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32048  { OPTION_MASK_ISA_AVX2, CODE_FOR_subv4di3, "__builtin_ia32_psubq256", IX86_BUILTIN_PSUBQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32049  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv32qi3, "__builtin_ia32_psubsb256", IX86_BUILTIN_PSUBSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32050  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_sssubv16hi3, "__builtin_ia32_psubsw256", IX86_BUILTIN_PSUBSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32051  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv32qi3, "__builtin_ia32_psubusb256", IX86_BUILTIN_PSUBUSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32052  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ussubv16hi3, "__builtin_ia32_psubusw256", IX86_BUILTIN_PSUBUSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32053  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv32qi, "__builtin_ia32_punpckhbw256", IX86_BUILTIN_PUNPCKHBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32054  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv16hi, "__builtin_ia32_punpckhwd256", IX86_BUILTIN_PUNPCKHWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI  },
32055  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv8si, "__builtin_ia32_punpckhdq256", IX86_BUILTIN_PUNPCKHDQ256, UNKNOWN,  (int) V8SI_FTYPE_V8SI_V8SI },
32056  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_highv4di, "__builtin_ia32_punpckhqdq256", IX86_BUILTIN_PUNPCKHQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32057  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv32qi, "__builtin_ia32_punpcklbw256", IX86_BUILTIN_PUNPCKLBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
32058  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv16hi, "__builtin_ia32_punpcklwd256", IX86_BUILTIN_PUNPCKLWD256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
32059  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv8si, "__builtin_ia32_punpckldq256", IX86_BUILTIN_PUNPCKLDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32060  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_interleave_lowv4di, "__builtin_ia32_punpcklqdq256", IX86_BUILTIN_PUNPCKLQDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32061  { OPTION_MASK_ISA_AVX2, CODE_FOR_xorv4di3, "__builtin_ia32_pxor256", IX86_BUILTIN_PXOR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32062  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4sf, "__builtin_ia32_vbroadcastss_ps", IX86_BUILTIN_VBROADCASTSS_PS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
32063  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv8sf, "__builtin_ia32_vbroadcastss_ps256", IX86_BUILTIN_VBROADCASTSS_PS256, UNKNOWN, (int) V8SF_FTYPE_V4SF },
32064  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vec_dupv4df, "__builtin_ia32_vbroadcastsd_pd256", IX86_BUILTIN_VBROADCASTSD_PD256, UNKNOWN, (int) V4DF_FTYPE_V2DF },
32065  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_vbroadcasti128_v4di, "__builtin_ia32_vbroadcastsi256", IX86_BUILTIN_VBROADCASTSI256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32066  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv4si, "__builtin_ia32_pblendd128", IX86_BUILTIN_PBLENDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32067  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pblenddv8si, "__builtin_ia32_pblendd256", IX86_BUILTIN_PBLENDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
32068  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv32qi, "__builtin_ia32_pbroadcastb256", IX86_BUILTIN_PBROADCASTB256, UNKNOWN, (int) V32QI_FTYPE_V16QI },
32069  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16hi, "__builtin_ia32_pbroadcastw256", IX86_BUILTIN_PBROADCASTW256, UNKNOWN, (int) V16HI_FTYPE_V8HI },
32070  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8si, "__builtin_ia32_pbroadcastd256", IX86_BUILTIN_PBROADCASTD256, UNKNOWN, (int) V8SI_FTYPE_V4SI },
32071  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4di, "__builtin_ia32_pbroadcastq256", IX86_BUILTIN_PBROADCASTQ256, UNKNOWN, (int) V4DI_FTYPE_V2DI },
32072  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv16qi, "__builtin_ia32_pbroadcastb128", IX86_BUILTIN_PBROADCASTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
32073  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv8hi, "__builtin_ia32_pbroadcastw128", IX86_BUILTIN_PBROADCASTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
32074  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
32075  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
32076  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32077  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
32078  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
32079  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
32080  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
32081  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vextractf128v4di, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
32082  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx_vinsertf128v4di, "__builtin_ia32_insert128i256", IX86_BUILTIN_VINSERT128I256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT },
32083  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4di, "__builtin_ia32_psllv4di", IX86_BUILTIN_PSLLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32084  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv2di, "__builtin_ia32_psllv2di", IX86_BUILTIN_PSLLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32085  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv8si, "__builtin_ia32_psllv8si", IX86_BUILTIN_PSLLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32086  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashlvv4si, "__builtin_ia32_psllv4si", IX86_BUILTIN_PSLLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32087  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv8si, "__builtin_ia32_psrav8si", IX86_BUILTIN_PSRAVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32088  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_ashrvv4si, "__builtin_ia32_psrav4si", IX86_BUILTIN_PSRAVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32089  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4di, "__builtin_ia32_psrlv4di", IX86_BUILTIN_PSRLVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
32090  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv2di, "__builtin_ia32_psrlv2di", IX86_BUILTIN_PSRLVV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
32091  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv8si, "__builtin_ia32_psrlv8si", IX86_BUILTIN_PSRLVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
32092  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_lshrvv4si, "__builtin_ia32_psrlv4si", IX86_BUILTIN_PSRLVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32093
32094  { OPTION_MASK_ISA_LZCNT, CODE_FOR_clzhi2_lzcnt,   "__builtin_clzs",   IX86_BUILTIN_CLZS,    UNKNOWN,     (int) UINT16_FTYPE_UINT16 },
32095
32096  /* BMI */
32097  { OPTION_MASK_ISA_BMI, CODE_FOR_bmi_bextr_si, "__builtin_ia32_bextr_u32", IX86_BUILTIN_BEXTR32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32098  { OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi_bextr_di, "__builtin_ia32_bextr_u64", IX86_BUILTIN_BEXTR64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32099  { OPTION_MASK_ISA_BMI, CODE_FOR_ctzhi2,       "__builtin_ctzs",           IX86_BUILTIN_CTZS,    UNKNOWN, (int) UINT16_FTYPE_UINT16 },
32100
32101  /* TBM */
32102  { OPTION_MASK_ISA_TBM, CODE_FOR_tbm_bextri_si, "__builtin_ia32_bextri_u32", IX86_BUILTIN_BEXTRI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32103  { OPTION_MASK_ISA_TBM | OPTION_MASK_ISA_64BIT, CODE_FOR_tbm_bextri_di, "__builtin_ia32_bextri_u64", IX86_BUILTIN_BEXTRI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32104
32105  /* F16C */
32106  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
32107  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
32108  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
32109  { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
32110
32111  /* BMI2 */
32112  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32113  { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32114  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32115  { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32116  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
32117  { OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
32118
32119  /* AVX512F */
32120  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI },
32121  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF },
32122  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF },
32123  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI },
32124  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF },
32125  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF },
32126  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32127  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32128  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32129  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32130  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32131  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32132  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32133  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_QI },
32134  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32135  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_QI },
32136  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
32137  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
32138  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32139  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32140  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32141  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32142  { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32143  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtps2ph512_mask,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_HI },
32144  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_QI },
32145  { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT },
32146  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32147  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32148  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32149  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32150  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_QI },
32151  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_QI },
32152  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_QI },
32153  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_QI },
32154  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI },
32155  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI },
32156  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI },
32157  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI },
32158  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32159  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32160  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32161  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32162  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32163  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32164  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32165  { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32166  { OPTION_MASK_ISA_AVX512F, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32167  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32168  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32169  { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32170  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32171  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32172  { OPTION_MASK_ISA_AVX512F, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32173  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
32174  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_HI },
32175  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_QI },
32176  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_HI },
32177  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
32178  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_QI },
32179  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32180  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32181  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32182  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32183  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32184  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32185  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32186  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32187  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32188  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32189  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32190  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32191  { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32192  { OPTION_MASK_ISA_AVX512F, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32193  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32194  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32195  { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32196  { OPTION_MASK_ISA_AVX512F, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32197  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32198  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32199  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32200  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32201  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32202  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32203  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32204  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32205  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32206  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32207  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32208  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32209  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32210  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32211  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32212  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_HI },
32213  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_HI },
32214  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_QI },
32215  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_QI },
32216  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_QI },
32217  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_HI },
32218  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_QI },
32219  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_QI },
32220  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_HI },
32221  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_QI },
32222  { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32223  { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask"  , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32224  { OPTION_MASK_ISA_AVX512F, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_QI },
32225  { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32226  { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32227  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32228  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32229  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32230  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32231  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32232  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32233  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32234  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32235  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32236  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32237  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32238  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32239  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32240  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32241  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32242  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32243  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32244  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32245  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32246  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32247  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32248  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_HI },
32249  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_HI },
32250  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_QI },
32251  { OPTION_MASK_ISA_AVX512F, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32252  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32253  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32254  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32255  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32256  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32257  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32258  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_HI },
32259  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_QI },
32260  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32261  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32262  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32263  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32264  { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32265  { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32266  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32267  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32268  { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32269  { OPTION_MASK_ISA_AVX512F, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32270  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI },
32271  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI },
32272  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
32273  { OPTION_MASK_ISA_AVX512F, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
32274  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32275  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32276  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI },
32277  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI },
32278  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI },
32279  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI },
32280  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) HI_FTYPE_V16SI_V16SI_INT_HI },
32281  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) QI_FTYPE_V8DI_V8DI_INT_QI },
32282  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32283  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32284  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
32285  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_unpcklps512_mask,  "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
32286  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32287  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32288  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_HI },
32289  { OPTION_MASK_ISA_AVX512CD, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_QI },
32290  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32291  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_QI },
32292  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32293  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32294  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32295  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32296  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
32297  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
32298  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32299  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32300  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32301  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32302  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32303  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_QI },
32304  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32305  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_HI },
32306  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32307  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32308  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_QI },
32309  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
32310  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_HI },
32311  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_HI },
32312  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32313  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI },
32314  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32315  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI },
32316
32317  { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF },
32318  { OPTION_MASK_ISA_AVX512F, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF },
32319  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
32320  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32321  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
32322  { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
32323  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32324  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
32325
32326  /* Mask arithmetic operations */
32327  { OPTION_MASK_ISA_AVX512F, CODE_FOR_andhi3, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32328  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32329  { OPTION_MASK_ISA_AVX512F, CODE_FOR_one_cmplhi2, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) HI_FTYPE_HI },
32330  { OPTION_MASK_ISA_AVX512F, CODE_FOR_iorhi3, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32331  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32332  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32333  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) HI_FTYPE_HI_HI },
32334  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32335  { OPTION_MASK_ISA_AVX512F, CODE_FOR_xorhi3, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) HI_FTYPE_HI_HI },
32336  { OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) HI_FTYPE_HI },
32337
32338  /* SHA */
32339  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg1, 0, IX86_BUILTIN_SHA1MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32340  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1msg2, 0, IX86_BUILTIN_SHA1MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32341  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1nexte, 0, IX86_BUILTIN_SHA1NEXTE, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32342  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha1rnds4, 0, IX86_BUILTIN_SHA1RNDS4, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT },
32343  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg1, 0, IX86_BUILTIN_SHA256MSG1, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32344  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256msg2, 0, IX86_BUILTIN_SHA256MSG2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
32345  { OPTION_MASK_ISA_SSE2, CODE_FOR_sha256rnds2, 0, IX86_BUILTIN_SHA256RNDS2, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
32346
32347  /* AVX512VL.  */
32348  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_palignrv32qi_mask, "__builtin_ia32_palignr256_mask", IX86_BUILTIN_PALIGNR256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT },
32349  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_palignrv16qi_mask, "__builtin_ia32_palignr128_mask", IX86_BUILTIN_PALIGNR128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT },
32350  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4di_mask, "__builtin_ia32_movdqa64_256_mask", IX86_BUILTIN_MOVDQA64_256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32351  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2di_mask, "__builtin_ia32_movdqa64_128_mask", IX86_BUILTIN_MOVDQA64_128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32352  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8si_mask, "__builtin_ia32_movdqa32_256_mask", IX86_BUILTIN_MOVDQA32_256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32353  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4si_mask, "__builtin_ia32_movdqa32_128_mask", IX86_BUILTIN_MOVDQA32_128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32354  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4df_mask, "__builtin_ia32_movapd256_mask", IX86_BUILTIN_MOVAPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32355  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv2df_mask, "__builtin_ia32_movapd128_mask", IX86_BUILTIN_MOVAPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32356  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv8sf_mask, "__builtin_ia32_movaps256_mask", IX86_BUILTIN_MOVAPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32357  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loadv4sf_mask, "__builtin_ia32_movaps128_mask", IX86_BUILTIN_MOVAPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32358  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv16hi_mask, "__builtin_ia32_movdquhi256_mask", IX86_BUILTIN_MOVDQUHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
32359  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_loaddquv8hi_mask, "__builtin_ia32_movdquhi128_mask", IX86_BUILTIN_MOVDQUHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32360  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_loaddquv32qi_mask, "__builtin_ia32_movdquqi256_mask", IX86_BUILTIN_MOVDQUQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
32361  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_loaddquv16qi_mask, "__builtin_ia32_movdquqi128_mask", IX86_BUILTIN_MOVDQUQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32362  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4sf3_mask, "__builtin_ia32_minps_mask", IX86_BUILTIN_MINPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32363  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4sf3_mask, "__builtin_ia32_maxps_mask", IX86_BUILTIN_MAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32364  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2df3_mask, "__builtin_ia32_minpd_mask", IX86_BUILTIN_MINPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32365  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2df3_mask, "__builtin_ia32_maxpd_mask", IX86_BUILTIN_MAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32366  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4df3_mask, "__builtin_ia32_maxpd256_mask", IX86_BUILTIN_MAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32367  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8sf3_mask, "__builtin_ia32_maxps256_mask", IX86_BUILTIN_MAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32368  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4df3_mask, "__builtin_ia32_minpd256_mask", IX86_BUILTIN_MINPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32369  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8sf3_mask, "__builtin_ia32_minps256_mask", IX86_BUILTIN_MINPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32370  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4sf3_mask, "__builtin_ia32_mulps_mask", IX86_BUILTIN_MULPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32371  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_divv4sf3_mask, "__builtin_ia32_divps_mask", IX86_BUILTIN_DIVPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32372  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv2df3_mask, "__builtin_ia32_mulpd_mask", IX86_BUILTIN_MULPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32373  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_divv2df3_mask, "__builtin_ia32_divpd_mask", IX86_BUILTIN_DIVPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32374  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv4df3_mask, "__builtin_ia32_divpd256_mask", IX86_BUILTIN_DIVPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32375  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_divv8sf3_mask, "__builtin_ia32_divps256_mask", IX86_BUILTIN_DIVPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32376  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4df3_mask, "__builtin_ia32_mulpd256_mask", IX86_BUILTIN_MULPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32377  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8sf3_mask, "__builtin_ia32_mulps256_mask", IX86_BUILTIN_MULPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32378  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2df3_mask, "__builtin_ia32_addpd128_mask", IX86_BUILTIN_ADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32379  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4df3_mask, "__builtin_ia32_addpd256_mask", IX86_BUILTIN_ADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32380  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4sf3_mask, "__builtin_ia32_addps128_mask", IX86_BUILTIN_ADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32381  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8sf3_mask, "__builtin_ia32_addps256_mask", IX86_BUILTIN_ADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32382  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2df3_mask, "__builtin_ia32_subpd128_mask", IX86_BUILTIN_SUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32383  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4df3_mask, "__builtin_ia32_subpd256_mask", IX86_BUILTIN_SUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32384  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4sf3_mask, "__builtin_ia32_subps128_mask", IX86_BUILTIN_SUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32385  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8sf3_mask, "__builtin_ia32_subps256_mask", IX86_BUILTIN_SUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32386  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4df3_mask, "__builtin_ia32_xorpd256_mask", IX86_BUILTIN_XORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32387  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2df3_mask, "__builtin_ia32_xorpd128_mask", IX86_BUILTIN_XORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32388  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8sf3_mask, "__builtin_ia32_xorps256_mask", IX86_BUILTIN_XORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32389  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4sf3_mask, "__builtin_ia32_xorps128_mask", IX86_BUILTIN_XORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32390  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4df3_mask, "__builtin_ia32_orpd256_mask", IX86_BUILTIN_ORPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32391  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2df3_mask, "__builtin_ia32_orpd128_mask", IX86_BUILTIN_ORPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32392  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8sf3_mask, "__builtin_ia32_orps256_mask", IX86_BUILTIN_ORPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32393  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4sf3_mask, "__builtin_ia32_orps128_mask", IX86_BUILTIN_ORPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32394  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8sf_mask, "__builtin_ia32_broadcastf32x2_256_mask", IX86_BUILTIN_BROADCASTF32x2_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32395  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv8si_mask, "__builtin_ia32_broadcasti32x2_256_mask", IX86_BUILTIN_BROADCASTI32x2_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32396  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4si_mask, "__builtin_ia32_broadcasti32x2_128_mask", IX86_BUILTIN_BROADCASTI32x2_128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32397  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4df_mask_1, "__builtin_ia32_broadcastf64x2_256_mask", IX86_BUILTIN_BROADCASTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32398  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_broadcastv4di_mask_1, "__builtin_ia32_broadcasti64x2_256_mask", IX86_BUILTIN_BROADCASTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32399  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8sf_mask_1, "__builtin_ia32_broadcastf32x4_256_mask", IX86_BUILTIN_BROADCASTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32400  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_broadcastv8si_mask_1, "__builtin_ia32_broadcasti32x4_256_mask", IX86_BUILTIN_BROADCASTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32401  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8sf, "__builtin_ia32_extractf32x4_256_mask", IX86_BUILTIN_EXTRACTF32X4_256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT_V4SF_QI },
32402  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v8si, "__builtin_ia32_extracti32x4_256_mask", IX86_BUILTIN_EXTRACTI32X4_256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT_V4SI_QI },
32403  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv16hi_mask, "__builtin_ia32_dbpsadbw256_mask", IX86_BUILTIN_DBPSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI },
32404  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_dbpsadbwv8hi_mask, "__builtin_ia32_dbpsadbw128_mask", IX86_BUILTIN_DBPSADBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI },
32405  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2qq256_mask", IX86_BUILTIN_CVTTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32406  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2qq128_mask", IX86_BUILTIN_CVTTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32407  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4di2_mask, "__builtin_ia32_cvttpd2uqq256_mask", IX86_BUILTIN_CVTTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32408  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2di2_mask, "__builtin_ia32_cvttpd2uqq128_mask", IX86_BUILTIN_CVTTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32409  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2qq256_mask", IX86_BUILTIN_CVTPD2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32410  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2qq128_mask", IX86_BUILTIN_CVTPD2QQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32411  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4di2_mask, "__builtin_ia32_cvtpd2uqq256_mask", IX86_BUILTIN_CVTPD2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_QI },
32412  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2di2_mask, "__builtin_ia32_cvtpd2uqq128_mask", IX86_BUILTIN_CVTPD2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_QI },
32413  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv4dfv4si2_mask, "__builtin_ia32_cvtpd2udq256_mask", IX86_BUILTIN_CVTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32414  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_notruncv2dfv2si2_mask, "__builtin_ia32_cvtpd2udq128_mask", IX86_BUILTIN_CVTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32415  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2qq256_mask", IX86_BUILTIN_CVTTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32416  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2qq128_mask", IX86_BUILTIN_CVTTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32417  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4di2_mask, "__builtin_ia32_cvttps2uqq256_mask", IX86_BUILTIN_CVTTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32418  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2sfv2di2_mask, "__builtin_ia32_cvttps2uqq128_mask", IX86_BUILTIN_CVTTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32419  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2dq256_mask", IX86_BUILTIN_CVTTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32420  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2dq128_mask", IX86_BUILTIN_CVTTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32421  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv8sfv8si2_mask, "__builtin_ia32_cvttps2udq256_mask", IX86_BUILTIN_CVTTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32422  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4sfv4si2_mask, "__builtin_ia32_cvttps2udq128_mask", IX86_BUILTIN_CVTTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32423  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_fix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2dq256_mask", IX86_BUILTIN_CVTTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32424  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvttpd2dq_mask, "__builtin_ia32_cvttpd2dq128_mask", IX86_BUILTIN_CVTTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32425  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv4dfv4si2_mask, "__builtin_ia32_cvttpd2udq256_mask", IX86_BUILTIN_CVTTPD2UDQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32426  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufix_truncv2dfv2si2_mask, "__builtin_ia32_cvttpd2udq128_mask", IX86_BUILTIN_CVTTPD2UDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32427  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2dq256_mask, "__builtin_ia32_cvtpd2dq256_mask", IX86_BUILTIN_CVTPD2DQ256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_QI },
32428  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2dq_mask, "__builtin_ia32_cvtpd2dq128_mask", IX86_BUILTIN_CVTPD2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_QI },
32429  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4df2_mask, "__builtin_ia32_cvtdq2pd256_mask", IX86_BUILTIN_CVTDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32430  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtdq2pd_mask, "__builtin_ia32_cvtdq2pd128_mask", IX86_BUILTIN_CVTDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32431  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4df2_mask, "__builtin_ia32_cvtudq2pd256_mask", IX86_BUILTIN_CVTUDQ2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SI_V4DF_QI },
32432  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2siv2df2_mask, "__builtin_ia32_cvtudq2pd128_mask", IX86_BUILTIN_CVTUDQ2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SI_V2DF_QI },
32433  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv8siv8sf2_mask, "__builtin_ia32_cvtdq2ps256_mask", IX86_BUILTIN_CVTDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32434  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4siv4sf2_mask, "__builtin_ia32_cvtdq2ps128_mask", IX86_BUILTIN_CVTDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32435  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv8siv8sf2_mask, "__builtin_ia32_cvtudq2ps256_mask", IX86_BUILTIN_CVTUDQ2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_QI },
32436  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4siv4sf2_mask, "__builtin_ia32_cvtudq2ps128_mask", IX86_BUILTIN_CVTUDQ2PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_QI },
32437  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtps2pd256_mask, "__builtin_ia32_cvtps2pd256_mask", IX86_BUILTIN_CVTPS2PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_QI },
32438  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtps2pd_mask, "__builtin_ia32_cvtps2pd128_mask", IX86_BUILTIN_CVTPS2PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V4SF_V2DF_QI },
32439  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv32qi_mask, "__builtin_ia32_pbroadcastb256_mask", IX86_BUILTIN_PBROADCASTB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16QI_V32QI_SI },
32440  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv32qi_mask, "__builtin_ia32_pbroadcastb256_gpr_mask", IX86_BUILTIN_PBROADCASTB256_GPR_MASK, UNKNOWN, (int) V32QI_FTYPE_QI_V32QI_SI },
32441  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16qi_mask, "__builtin_ia32_pbroadcastb128_mask", IX86_BUILTIN_PBROADCASTB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
32442  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16qi_mask, "__builtin_ia32_pbroadcastb128_gpr_mask", IX86_BUILTIN_PBROADCASTB128_GPR_MASK, UNKNOWN, (int) V16QI_FTYPE_QI_V16QI_HI },
32443  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv16hi_mask, "__builtin_ia32_pbroadcastw256_mask", IX86_BUILTIN_PBROADCASTW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8HI_V16HI_HI },
32444  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv16hi_mask, "__builtin_ia32_pbroadcastw256_gpr_mask", IX86_BUILTIN_PBROADCASTW256_GPR_MASK, UNKNOWN, (int) V16HI_FTYPE_HI_V16HI_HI },
32445  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8hi_mask, "__builtin_ia32_pbroadcastw128_mask", IX86_BUILTIN_PBROADCASTW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
32446  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8hi_mask, "__builtin_ia32_pbroadcastw128_gpr_mask", IX86_BUILTIN_PBROADCASTW128_GPR_MASK, UNKNOWN, (int) V8HI_FTYPE_HI_V8HI_QI },
32447  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8si_mask, "__builtin_ia32_pbroadcastd256_mask", IX86_BUILTIN_PBROADCASTD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V4SI_V8SI_QI },
32448  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv8si_mask, "__builtin_ia32_pbroadcastd256_gpr_mask", IX86_BUILTIN_PBROADCASTD256_GPR_MASK, UNKNOWN, (int) V8SI_FTYPE_SI_V8SI_QI },
32449  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4si_mask, "__builtin_ia32_pbroadcastd128_mask", IX86_BUILTIN_PBROADCASTD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32450  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4si_mask, "__builtin_ia32_pbroadcastd128_gpr_mask", IX86_BUILTIN_PBROADCASTD128_GPR_MASK, UNKNOWN, (int) V4SI_FTYPE_SI_V4SI_QI },
32451  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4di_mask, "__builtin_ia32_pbroadcastq256_mask", IX86_BUILTIN_PBROADCASTQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V2DI_V4DI_QI },
32452  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv4di_mask, "__builtin_ia32_pbroadcastq256_gpr_mask", IX86_BUILTIN_PBROADCASTQ256_GPR_MASK, UNKNOWN, (int) V4DI_FTYPE_DI_V4DI_QI },
32453  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv2di_mask, "__builtin_ia32_pbroadcastq128_mask", IX86_BUILTIN_PBROADCASTQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32454  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dup_gprv2di_mask, "__builtin_ia32_pbroadcastq128_gpr_mask", IX86_BUILTIN_PBROADCASTQ128_GPR_MASK, UNKNOWN, (int) V2DI_FTYPE_DI_V2DI_QI },
32455  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv8sf_mask, "__builtin_ia32_broadcastss256_mask", IX86_BUILTIN_BROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_V4SF_V8SF_QI },
32456  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4sf_mask, "__builtin_ia32_broadcastss128_mask", IX86_BUILTIN_BROADCASTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32457  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vec_dupv4df_mask, "__builtin_ia32_broadcastsd256_mask", IX86_BUILTIN_BROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_V2DF_V4DF_QI },
32458  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4df, "__builtin_ia32_extractf64x2_256_mask", IX86_BUILTIN_EXTRACTF64X2_256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT_V2DF_QI },
32459  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vextractf128v4di, "__builtin_ia32_extracti64x2_256_mask", IX86_BUILTIN_EXTRACTI64X2_256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT_V2DI_QI },
32460  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8sf, "__builtin_ia32_insertf32x4_256_mask", IX86_BUILTIN_INSERTF32X4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI },
32461  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv8si, "__builtin_ia32_inserti32x4_256_mask", IX86_BUILTIN_INSERTI32X4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI },
32462  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv16qiv16hi2_mask, "__builtin_ia32_pmovsxbw256_mask", IX86_BUILTIN_PMOVSXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32463  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv8qiv8hi2_mask, "__builtin_ia32_pmovsxbw128_mask", IX86_BUILTIN_PMOVSXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32464  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8qiv8si2_mask, "__builtin_ia32_pmovsxbd256_mask", IX86_BUILTIN_PMOVSXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32465  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4qiv4si2_mask, "__builtin_ia32_pmovsxbd128_mask", IX86_BUILTIN_PMOVSXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32466  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4qiv4di2_mask, "__builtin_ia32_pmovsxbq256_mask", IX86_BUILTIN_PMOVSXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32467  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2qiv2di2_mask, "__builtin_ia32_pmovsxbq128_mask", IX86_BUILTIN_PMOVSXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32468  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv8hiv8si2_mask, "__builtin_ia32_pmovsxwd256_mask", IX86_BUILTIN_PMOVSXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32469  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv4hiv4si2_mask, "__builtin_ia32_pmovsxwd128_mask", IX86_BUILTIN_PMOVSXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32470  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4hiv4di2_mask, "__builtin_ia32_pmovsxwq256_mask", IX86_BUILTIN_PMOVSXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32471  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2hiv2di2_mask, "__builtin_ia32_pmovsxwq128_mask", IX86_BUILTIN_PMOVSXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32472  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sign_extendv4siv4di2_mask, "__builtin_ia32_pmovsxdq256_mask", IX86_BUILTIN_PMOVSXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32473  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_sign_extendv2siv2di2_mask, "__builtin_ia32_pmovsxdq128_mask", IX86_BUILTIN_PMOVSXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32474  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv16qiv16hi2_mask, "__builtin_ia32_pmovzxbw256_mask", IX86_BUILTIN_PMOVZXBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16QI_V16HI_HI },
32475  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv8qiv8hi2_mask, "__builtin_ia32_pmovzxbw128_mask", IX86_BUILTIN_PMOVZXBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V8HI_QI },
32476  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8qiv8si2_mask, "__builtin_ia32_pmovzxbd256_mask", IX86_BUILTIN_PMOVZXBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16QI_V8SI_QI },
32477  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4qiv4si2_mask, "__builtin_ia32_pmovzxbd128_mask", IX86_BUILTIN_PMOVZXBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V16QI_V4SI_QI },
32478  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4qiv4di2_mask, "__builtin_ia32_pmovzxbq256_mask", IX86_BUILTIN_PMOVZXBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V16QI_V4DI_QI },
32479  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2qiv2di2_mask, "__builtin_ia32_pmovzxbq128_mask", IX86_BUILTIN_PMOVZXBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V16QI_V2DI_QI },
32480  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv8hiv8si2_mask, "__builtin_ia32_pmovzxwd256_mask", IX86_BUILTIN_PMOVZXWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8HI_V8SI_QI },
32481  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv4hiv4si2_mask, "__builtin_ia32_pmovzxwd128_mask", IX86_BUILTIN_PMOVZXWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V4SI_QI },
32482  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4hiv4di2_mask, "__builtin_ia32_pmovzxwq256_mask", IX86_BUILTIN_PMOVZXWQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8HI_V4DI_QI },
32483  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2hiv2di2_mask, "__builtin_ia32_pmovzxwq128_mask", IX86_BUILTIN_PMOVZXWQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V8HI_V2DI_QI },
32484  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_zero_extendv4siv4di2_mask, "__builtin_ia32_pmovzxdq256_mask", IX86_BUILTIN_PMOVZXDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SI_V4DI_QI },
32485  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_zero_extendv2siv2di2_mask, "__builtin_ia32_pmovzxdq128_mask", IX86_BUILTIN_PMOVZXDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V2DI_QI },
32486  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4df_mask, "__builtin_ia32_reducepd256_mask", IX86_BUILTIN_REDUCEPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32487  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv2df_mask, "__builtin_ia32_reducepd128_mask", IX86_BUILTIN_REDUCEPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32488  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv8sf_mask, "__builtin_ia32_reduceps256_mask", IX86_BUILTIN_REDUCEPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32489  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_reducepv4sf_mask, "__builtin_ia32_reduceps128_mask", IX86_BUILTIN_REDUCEPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32490  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv2df, "__builtin_ia32_reducesd", IX86_BUILTIN_REDUCESD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
32491  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducesv4sf, "__builtin_ia32_reducess", IX86_BUILTIN_REDUCESS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
32492  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16hi_mask, "__builtin_ia32_permvarhi256_mask", IX86_BUILTIN_VPERMVARHI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32493  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv8hi_mask, "__builtin_ia32_permvarhi128_mask", IX86_BUILTIN_VPERMVARHI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32494  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_mask, "__builtin_ia32_vpermt2varhi256_mask", IX86_BUILTIN_VPERMT2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32495  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16hi3_maskz, "__builtin_ia32_vpermt2varhi256_maskz", IX86_BUILTIN_VPERMT2VARHI256_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32496  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_mask, "__builtin_ia32_vpermt2varhi128_mask", IX86_BUILTIN_VPERMT2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32497  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8hi3_maskz, "__builtin_ia32_vpermt2varhi128_maskz", IX86_BUILTIN_VPERMT2VARHI128_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32498  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16hi3_mask, "__builtin_ia32_vpermi2varhi256_mask", IX86_BUILTIN_VPERMI2VARHI256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32499  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8hi3_mask, "__builtin_ia32_vpermi2varhi128_mask", IX86_BUILTIN_VPERMI2VARHI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32500  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4df_mask, "__builtin_ia32_rcp14pd256_mask", IX86_BUILTIN_RCP14PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32501  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v2df_mask, "__builtin_ia32_rcp14pd128_mask", IX86_BUILTIN_RCP14PD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32502  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v8sf_mask, "__builtin_ia32_rcp14ps256_mask", IX86_BUILTIN_RCP14PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32503  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rcp14v4sf_mask, "__builtin_ia32_rcp14ps128_mask", IX86_BUILTIN_RCP14PS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32504  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4df_mask, "__builtin_ia32_rsqrt14pd256_mask", IX86_BUILTIN_RSQRT14PD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32505  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v2df_mask, "__builtin_ia32_rsqrt14pd128_mask", IX86_BUILTIN_RSQRT14PD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32506  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v8sf_mask, "__builtin_ia32_rsqrt14ps256_mask", IX86_BUILTIN_RSQRT14PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32507  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_rsqrt14v4sf_mask, "__builtin_ia32_rsqrt14ps128_mask", IX86_BUILTIN_RSQRT14PS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32508  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv4df2_mask, "__builtin_ia32_sqrtpd256_mask", IX86_BUILTIN_SQRTPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32509  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sqrtv2df2_mask, "__builtin_ia32_sqrtpd128_mask", IX86_BUILTIN_SQRTPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32510  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_sqrtv8sf2_mask, "__builtin_ia32_sqrtps256_mask", IX86_BUILTIN_SQRTPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32511  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_sqrtv4sf2_mask, "__builtin_ia32_sqrtps128_mask", IX86_BUILTIN_SQRTPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32512  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16qi3_mask, "__builtin_ia32_paddb128_mask", IX86_BUILTIN_PADDB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32513  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8hi3_mask, "__builtin_ia32_paddw128_mask", IX86_BUILTIN_PADDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32514  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4si3_mask, "__builtin_ia32_paddd128_mask", IX86_BUILTIN_PADDD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32515  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv2di3_mask, "__builtin_ia32_paddq128_mask", IX86_BUILTIN_PADDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32516  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16qi3_mask, "__builtin_ia32_psubb128_mask", IX86_BUILTIN_PSUBB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32517  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8hi3_mask, "__builtin_ia32_psubw128_mask", IX86_BUILTIN_PSUBW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32518  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4si3_mask, "__builtin_ia32_psubd128_mask", IX86_BUILTIN_PSUBD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32519  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv2di3_mask, "__builtin_ia32_psubq128_mask", IX86_BUILTIN_PSUBQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32520  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv16qi3_mask, "__builtin_ia32_paddsb128_mask", IX86_BUILTIN_PADDSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32521  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ssaddv8hi3_mask, "__builtin_ia32_paddsw128_mask", IX86_BUILTIN_PADDSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32522  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv16qi3_mask, "__builtin_ia32_psubsb128_mask", IX86_BUILTIN_PSUBSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32523  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_sssubv8hi3_mask, "__builtin_ia32_psubsw128_mask", IX86_BUILTIN_PSUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32524  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv16qi3_mask, "__builtin_ia32_paddusb128_mask", IX86_BUILTIN_PADDUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32525  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_usaddv8hi3_mask, "__builtin_ia32_paddusw128_mask", IX86_BUILTIN_PADDUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32526  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv16qi3_mask, "__builtin_ia32_psubusb128_mask", IX86_BUILTIN_PSUBUSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32527  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_ussubv8hi3_mask, "__builtin_ia32_psubusw128_mask", IX86_BUILTIN_PSUBUSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32528  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv32qi3_mask, "__builtin_ia32_paddb256_mask", IX86_BUILTIN_PADDB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32529  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv16hi3_mask, "__builtin_ia32_paddw256_mask", IX86_BUILTIN_PADDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32530  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv8si3_mask, "__builtin_ia32_paddd256_mask", IX86_BUILTIN_PADDD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32531  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_addv4di3_mask, "__builtin_ia32_paddq256_mask", IX86_BUILTIN_PADDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32532  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv32qi3_mask, "__builtin_ia32_paddsb256_mask", IX86_BUILTIN_PADDSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32533  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ssaddv16hi3_mask, "__builtin_ia32_paddsw256_mask", IX86_BUILTIN_PADDSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32534  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv32qi3_mask, "__builtin_ia32_paddusb256_mask", IX86_BUILTIN_PADDUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32535  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_usaddv16hi3_mask, "__builtin_ia32_paddusw256_mask", IX86_BUILTIN_PADDUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32536  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv32qi3_mask, "__builtin_ia32_psubb256_mask", IX86_BUILTIN_PSUBB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32537  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv16hi3_mask, "__builtin_ia32_psubw256_mask", IX86_BUILTIN_PSUBW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32538  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv8si3_mask, "__builtin_ia32_psubd256_mask", IX86_BUILTIN_PSUBD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32539  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_subv4di3_mask, "__builtin_ia32_psubq256_mask", IX86_BUILTIN_PSUBQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32540  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv32qi3_mask, "__builtin_ia32_psubsb256_mask", IX86_BUILTIN_PSUBSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32541  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_sssubv16hi3_mask, "__builtin_ia32_psubsw256_mask", IX86_BUILTIN_PSUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32542  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv32qi3_mask, "__builtin_ia32_psubusb256_mask", IX86_BUILTIN_PSUBUSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32543  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ussubv16hi3_mask, "__builtin_ia32_psubusw256_mask", IX86_BUILTIN_PSUBUSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32544  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_256_mask", IX86_BUILTIN_SHUF_F64x2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32545  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_256_mask", IX86_BUILTIN_SHUF_I64x2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32546  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_256_mask", IX86_BUILTIN_SHUF_I32x4_256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32547  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_256_mask", IX86_BUILTIN_SHUF_F32x4_256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32548  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovwb128_mask", IX86_BUILTIN_PMOVWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32549  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovwb256_mask", IX86_BUILTIN_PMOVWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32550  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovswb128_mask", IX86_BUILTIN_PMOVSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32551  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovswb256_mask", IX86_BUILTIN_PMOVSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32552  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask, "__builtin_ia32_pmovuswb128_mask", IX86_BUILTIN_PMOVUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V16QI_QI },
32553  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask, "__builtin_ia32_pmovuswb256_mask", IX86_BUILTIN_PMOVUSWB256, UNKNOWN, (int) V16QI_FTYPE_V16HI_V16QI_HI },
32554  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4qi2_mask, "__builtin_ia32_pmovdb128_mask", IX86_BUILTIN_PMOVDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32555  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8qi2_mask, "__builtin_ia32_pmovdb256_mask", IX86_BUILTIN_PMOVDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32556  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4qi2_mask, "__builtin_ia32_pmovsdb128_mask", IX86_BUILTIN_PMOVSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32557  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8qi2_mask, "__builtin_ia32_pmovsdb256_mask", IX86_BUILTIN_PMOVSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32558  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4qi2_mask, "__builtin_ia32_pmovusdb128_mask", IX86_BUILTIN_PMOVUSDB128, UNKNOWN, (int) V16QI_FTYPE_V4SI_V16QI_QI },
32559  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8qi2_mask, "__builtin_ia32_pmovusdb256_mask", IX86_BUILTIN_PMOVUSDB256, UNKNOWN, (int) V16QI_FTYPE_V8SI_V16QI_QI },
32560  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4siv4hi2_mask, "__builtin_ia32_pmovdw128_mask", IX86_BUILTIN_PMOVDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32561  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev8siv8hi2_mask, "__builtin_ia32_pmovdw256_mask", IX86_BUILTIN_PMOVDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32562  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4siv4hi2_mask, "__builtin_ia32_pmovsdw128_mask", IX86_BUILTIN_PMOVSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32563  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev8siv8hi2_mask, "__builtin_ia32_pmovsdw256_mask", IX86_BUILTIN_PMOVSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32564  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4siv4hi2_mask, "__builtin_ia32_pmovusdw128_mask", IX86_BUILTIN_PMOVUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V8HI_QI },
32565  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev8siv8hi2_mask, "__builtin_ia32_pmovusdw256_mask", IX86_BUILTIN_PMOVUSDW256, UNKNOWN, (int) V8HI_FTYPE_V8SI_V8HI_QI },
32566  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2qi2_mask, "__builtin_ia32_pmovqb128_mask", IX86_BUILTIN_PMOVQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32567  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4qi2_mask, "__builtin_ia32_pmovqb256_mask", IX86_BUILTIN_PMOVQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32568  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2qi2_mask, "__builtin_ia32_pmovsqb128_mask", IX86_BUILTIN_PMOVSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32569  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4qi2_mask, "__builtin_ia32_pmovsqb256_mask", IX86_BUILTIN_PMOVSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32570  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2qi2_mask, "__builtin_ia32_pmovusqb128_mask", IX86_BUILTIN_PMOVUSQB128, UNKNOWN, (int) V16QI_FTYPE_V2DI_V16QI_QI },
32571  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4qi2_mask, "__builtin_ia32_pmovusqb256_mask", IX86_BUILTIN_PMOVUSQB256, UNKNOWN, (int) V16QI_FTYPE_V4DI_V16QI_QI },
32572  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2hi2_mask, "__builtin_ia32_pmovqw128_mask", IX86_BUILTIN_PMOVQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32573  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4hi2_mask, "__builtin_ia32_pmovqw256_mask", IX86_BUILTIN_PMOVQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32574  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2hi2_mask, "__builtin_ia32_pmovsqw128_mask", IX86_BUILTIN_PMOVSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32575  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4hi2_mask, "__builtin_ia32_pmovsqw256_mask", IX86_BUILTIN_PMOVSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32576  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2hi2_mask, "__builtin_ia32_pmovusqw128_mask", IX86_BUILTIN_PMOVUSQW128, UNKNOWN, (int) V8HI_FTYPE_V2DI_V8HI_QI },
32577  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4hi2_mask, "__builtin_ia32_pmovusqw256_mask", IX86_BUILTIN_PMOVUSQW256, UNKNOWN, (int) V8HI_FTYPE_V4DI_V8HI_QI },
32578  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev2div2si2_mask, "__builtin_ia32_pmovqd128_mask", IX86_BUILTIN_PMOVQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32579  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_truncatev4div4si2_mask, "__builtin_ia32_pmovqd256_mask", IX86_BUILTIN_PMOVQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32580  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev2div2si2_mask, "__builtin_ia32_pmovsqd128_mask", IX86_BUILTIN_PMOVSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32581  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ss_truncatev4div4si2_mask, "__builtin_ia32_pmovsqd256_mask", IX86_BUILTIN_PMOVSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32582  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev2div2si2_mask, "__builtin_ia32_pmovusqd128_mask", IX86_BUILTIN_PMOVUSQD128, UNKNOWN, (int) V4SI_FTYPE_V2DI_V4SI_QI },
32583  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_us_truncatev4div4si2_mask, "__builtin_ia32_pmovusqd256_mask", IX86_BUILTIN_PMOVUSQD256, UNKNOWN, (int) V4SI_FTYPE_V4DI_V4SI_QI },
32584  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4df_mask, "__builtin_ia32_rangepd256_mask", IX86_BUILTIN_RANGEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32585  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv2df_mask, "__builtin_ia32_rangepd128_mask", IX86_BUILTIN_RANGEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32586  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv8sf_mask, "__builtin_ia32_rangeps256_mask", IX86_BUILTIN_RANGEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32587  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_rangepv4sf_mask, "__builtin_ia32_rangeps128_mask", IX86_BUILTIN_RANGEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32588  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv8sf_mask, "__builtin_ia32_getexpps256_mask", IX86_BUILTIN_GETEXPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32589  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4df_mask, "__builtin_ia32_getexppd256_mask", IX86_BUILTIN_GETEXPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32590  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv4sf_mask, "__builtin_ia32_getexpps128_mask", IX86_BUILTIN_GETEXPPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32591  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getexpv2df_mask, "__builtin_ia32_getexppd128_mask", IX86_BUILTIN_GETEXPPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32592  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_mask, "__builtin_ia32_fixupimmpd256_mask", IX86_BUILTIN_FIXUPIMMPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32593  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4df_maskz, "__builtin_ia32_fixupimmpd256_maskz", IX86_BUILTIN_FIXUPIMMPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI },
32594  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_mask, "__builtin_ia32_fixupimmps256_mask", IX86_BUILTIN_FIXUPIMMPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32595  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv8sf_maskz, "__builtin_ia32_fixupimmps256_maskz", IX86_BUILTIN_FIXUPIMMPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI },
32596  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_mask, "__builtin_ia32_fixupimmpd128_mask", IX86_BUILTIN_FIXUPIMMPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32597  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv2df_maskz, "__builtin_ia32_fixupimmpd128_maskz", IX86_BUILTIN_FIXUPIMMPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI },
32598  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_mask, "__builtin_ia32_fixupimmps128_mask", IX86_BUILTIN_FIXUPIMMPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32599  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fixupimmv4sf_maskz, "__builtin_ia32_fixupimmps128_maskz", IX86_BUILTIN_FIXUPIMMPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI },
32600  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4di2_mask, "__builtin_ia32_pabsq256_mask", IX86_BUILTIN_PABSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32601  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv2di2_mask, "__builtin_ia32_pabsq128_mask", IX86_BUILTIN_PABSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32602  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8si2_mask, "__builtin_ia32_pabsd256_mask", IX86_BUILTIN_PABSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32603  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv4si2_mask, "__builtin_ia32_pabsd128_mask", IX86_BUILTIN_PABSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32604  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pmulhrswv16hi3_mask , "__builtin_ia32_pmulhrsw256_mask", IX86_BUILTIN_PMULHRSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32605  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pmulhrswv8hi3_mask, "__builtin_ia32_pmulhrsw128_mask", IX86_BUILTIN_PMULHRSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32606  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv8hi3_highpart_mask, "__builtin_ia32_pmulhuw128_mask", IX86_BUILTIN_PMULHUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32607  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umulv16hi3_highpart_mask, "__builtin_ia32_pmulhuw256_mask" , IX86_BUILTIN_PMULHUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32608  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv16hi3_highpart_mask, "__builtin_ia32_pmulhw256_mask"  , IX86_BUILTIN_PMULHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32609  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smulv8hi3_highpart_mask, "__builtin_ia32_pmulhw128_mask", IX86_BUILTIN_PMULHW128_MASK, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32610  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv16hi3_mask, "__builtin_ia32_pmullw256_mask"  , IX86_BUILTIN_PMULLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32611  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8hi3_mask, "__builtin_ia32_pmullw128_mask", IX86_BUILTIN_PMULLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32612  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv4di3_mask, "__builtin_ia32_pmullq256_mask", IX86_BUILTIN_PMULLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32613  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_mulv2di3_mask, "__builtin_ia32_pmullq128_mask", IX86_BUILTIN_PMULLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32614  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4df3_mask, "__builtin_ia32_andpd256_mask", IX86_BUILTIN_ANDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32615  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2df3_mask, "__builtin_ia32_andpd128_mask", IX86_BUILTIN_ANDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32616  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8sf3_mask, "__builtin_ia32_andps256_mask", IX86_BUILTIN_ANDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32617  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4sf3_mask, "__builtin_ia32_andps128_mask", IX86_BUILTIN_ANDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32618  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv4df3_mask, "__builtin_ia32_andnpd256_mask", IX86_BUILTIN_ANDNPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32619  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2df3_mask, "__builtin_ia32_andnpd128_mask", IX86_BUILTIN_ANDNPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32620  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_andnotv8sf3_mask, "__builtin_ia32_andnps256_mask", IX86_BUILTIN_ANDNPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32621  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_andnotv4sf3_mask, "__builtin_ia32_andnps128_mask", IX86_BUILTIN_ANDNPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32622  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllwi128_mask", IX86_BUILTIN_PSLLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32623  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslldi128_mask", IX86_BUILTIN_PSLLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32624  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllqi128_mask", IX86_BUILTIN_PSLLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32625  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8hi3_mask, "__builtin_ia32_psllw128_mask", IX86_BUILTIN_PSLLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32626  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4si3_mask, "__builtin_ia32_pslld128_mask", IX86_BUILTIN_PSLLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32627  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv2di3_mask, "__builtin_ia32_psllq128_mask", IX86_BUILTIN_PSLLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32628  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllwi256_mask", IX86_BUILTIN_PSLLWI256_MASK , UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32629  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv16hi3_mask, "__builtin_ia32_psllw256_mask", IX86_BUILTIN_PSLLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32630  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslldi256_mask", IX86_BUILTIN_PSLLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32631  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv8si3_mask, "__builtin_ia32_pslld256_mask", IX86_BUILTIN_PSLLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32632  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllqi256_mask", IX86_BUILTIN_PSLLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32633  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashlv4di3_mask, "__builtin_ia32_psllq256_mask", IX86_BUILTIN_PSLLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32634  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psradi128_mask", IX86_BUILTIN_PSRADI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32635  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4si3_mask, "__builtin_ia32_psrad128_mask", IX86_BUILTIN_PSRAD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32636  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psradi256_mask", IX86_BUILTIN_PSRADI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32637  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8si3_mask, "__builtin_ia32_psrad256_mask", IX86_BUILTIN_PSRAD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32638  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraqi128_mask", IX86_BUILTIN_PSRAQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32639  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv2di3_mask, "__builtin_ia32_psraq128_mask", IX86_BUILTIN_PSRAQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32640  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraqi256_mask", IX86_BUILTIN_PSRAQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32641  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv4di3_mask, "__builtin_ia32_psraq256_mask", IX86_BUILTIN_PSRAQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32642  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv8si3_mask, "__builtin_ia32_pandd256_mask", IX86_BUILTIN_PANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32643  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4si3_mask, "__builtin_ia32_pandd128_mask", IX86_BUILTIN_PANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32644  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrldi128_mask", IX86_BUILTIN_PSRLDI128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32645  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4si3_mask, "__builtin_ia32_psrld128_mask", IX86_BUILTIN_PSRLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32646  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrldi256_mask", IX86_BUILTIN_PSRLDI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32647  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8si3_mask, "__builtin_ia32_psrld256_mask", IX86_BUILTIN_PSRLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_V8SI_QI },
32648  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlqi128_mask", IX86_BUILTIN_PSRLQI128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32649  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv2di3_mask, "__builtin_ia32_psrlq128_mask", IX86_BUILTIN_PSRLQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32650  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlqi256_mask", IX86_BUILTIN_PSRLQI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32651  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv4di3_mask, "__builtin_ia32_psrlq256_mask", IX86_BUILTIN_PSRLQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_V4DI_QI },
32652  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv4di3_mask, "__builtin_ia32_pandq256_mask", IX86_BUILTIN_PANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32653  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_andv2di3_mask, "__builtin_ia32_pandq128_mask", IX86_BUILTIN_PANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32654  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv8si3_mask, "__builtin_ia32_pandnd256_mask", IX86_BUILTIN_PANDND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32655  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv4si3_mask, "__builtin_ia32_pandnd128_mask", IX86_BUILTIN_PANDND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32656  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_andnotv4di3_mask, "__builtin_ia32_pandnq256_mask", IX86_BUILTIN_PANDNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32657  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_andnotv2di3_mask, "__builtin_ia32_pandnq128_mask", IX86_BUILTIN_PANDNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32658  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv8si3_mask, "__builtin_ia32_pord256_mask", IX86_BUILTIN_PORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32659  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4si3_mask, "__builtin_ia32_pord128_mask", IX86_BUILTIN_PORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32660  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv4di3_mask, "__builtin_ia32_porq256_mask", IX86_BUILTIN_PORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32661  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_iorv2di3_mask, "__builtin_ia32_porq128_mask", IX86_BUILTIN_PORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32662  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv8si3_mask, "__builtin_ia32_pxord256_mask", IX86_BUILTIN_PXORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32663  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4si3_mask, "__builtin_ia32_pxord128_mask", IX86_BUILTIN_PXORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32664  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv4di3_mask, "__builtin_ia32_pxorq256_mask", IX86_BUILTIN_PXORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32665  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_xorv2di3_mask, "__builtin_ia32_pxorq128_mask", IX86_BUILTIN_PXORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32666  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packsswb_mask, "__builtin_ia32_packsswb256_mask",  IX86_BUILTIN_PACKSSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32667  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packsswb_mask, "__builtin_ia32_packsswb128_mask",  IX86_BUILTIN_PACKSSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32668  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packuswb_mask, "__builtin_ia32_packuswb256_mask",  IX86_BUILTIN_PACKUSWB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HI_V16HI_V32QI_SI },
32669  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packuswb_mask, "__builtin_ia32_packuswb128_mask",  IX86_BUILTIN_PACKUSWB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI_V16QI_HI },
32670  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev8sf_mask, "__builtin_ia32_rndscaleps_256_mask", IX86_BUILTIN_RNDSCALEPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32671  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4df_mask, "__builtin_ia32_rndscalepd_256_mask", IX86_BUILTIN_RNDSCALEPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32672  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev4sf_mask, "__builtin_ia32_rndscaleps_128_mask", IX86_BUILTIN_RNDSCALEPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32673  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rndscalev2df_mask, "__builtin_ia32_rndscalepd_128_mask", IX86_BUILTIN_RNDSCALEPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32674  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_mask, "__builtin_ia32_pternlogq256_mask", IX86_BUILTIN_VTERNLOGQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32675  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4di_maskz, "__builtin_ia32_pternlogq256_maskz", IX86_BUILTIN_VTERNLOGQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI },
32676  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_mask, "__builtin_ia32_pternlogd256_mask", IX86_BUILTIN_VTERNLOGD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32677  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv8si_maskz, "__builtin_ia32_pternlogd256_maskz", IX86_BUILTIN_VTERNLOGD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI },
32678  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_mask, "__builtin_ia32_pternlogq128_mask", IX86_BUILTIN_VTERNLOGQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32679  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv2di_maskz, "__builtin_ia32_pternlogq128_maskz", IX86_BUILTIN_VTERNLOGQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI },
32680  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_mask, "__builtin_ia32_pternlogd128_mask", IX86_BUILTIN_VTERNLOGD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32681  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vternlogv4si_maskz, "__builtin_ia32_pternlogd128_maskz", IX86_BUILTIN_VTERNLOGD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI },
32682  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4df_mask, "__builtin_ia32_scalefpd256_mask", IX86_BUILTIN_SCALEFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32683  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv8sf_mask, "__builtin_ia32_scalefps256_mask", IX86_BUILTIN_SCALEFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32684  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv2df_mask, "__builtin_ia32_scalefpd128_mask", IX86_BUILTIN_SCALEFPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32685  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_scalefv4sf_mask, "__builtin_ia32_scalefps128_mask", IX86_BUILTIN_SCALEFPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32686  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask, "__builtin_ia32_vfmaddpd256_mask", IX86_BUILTIN_VFMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32687  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_mask3, "__builtin_ia32_vfmaddpd256_mask3", IX86_BUILTIN_VFMADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32688  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4df_maskz, "__builtin_ia32_vfmaddpd256_maskz", IX86_BUILTIN_VFMADDPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32689  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask, "__builtin_ia32_vfmaddpd128_mask", IX86_BUILTIN_VFMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32690  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_mask3, "__builtin_ia32_vfmaddpd128_mask3", IX86_BUILTIN_VFMADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32691  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v2df_maskz, "__builtin_ia32_vfmaddpd128_maskz", IX86_BUILTIN_VFMADDPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32692  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask, "__builtin_ia32_vfmaddps256_mask", IX86_BUILTIN_VFMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32693  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_mask3, "__builtin_ia32_vfmaddps256_mask3", IX86_BUILTIN_VFMADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32694  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v8sf_maskz, "__builtin_ia32_vfmaddps256_maskz", IX86_BUILTIN_VFMADDPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32695  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask, "__builtin_ia32_vfmaddps128_mask", IX86_BUILTIN_VFMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32696  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_mask3, "__builtin_ia32_vfmaddps128_mask3", IX86_BUILTIN_VFMADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32697  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmadd_v4sf_maskz, "__builtin_ia32_vfmaddps128_maskz", IX86_BUILTIN_VFMADDPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32698  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4df_mask3, "__builtin_ia32_vfmsubpd256_mask3", IX86_BUILTIN_VFMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32699  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v2df_mask3, "__builtin_ia32_vfmsubpd128_mask3", IX86_BUILTIN_VFMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32700  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v8sf_mask3, "__builtin_ia32_vfmsubps256_mask3", IX86_BUILTIN_VFMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32701  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsub_v4sf_mask3, "__builtin_ia32_vfmsubps128_mask3", IX86_BUILTIN_VFMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32702  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4df_mask, "__builtin_ia32_vfnmaddpd256_mask", IX86_BUILTIN_VFNMADDPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32703  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v2df_mask, "__builtin_ia32_vfnmaddpd128_mask", IX86_BUILTIN_VFNMADDPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32704  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v8sf_mask, "__builtin_ia32_vfnmaddps256_mask", IX86_BUILTIN_VFNMADDPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32705  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmadd_v4sf_mask, "__builtin_ia32_vfnmaddps128_mask", IX86_BUILTIN_VFNMADDPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32706  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask, "__builtin_ia32_vfnmsubpd256_mask", IX86_BUILTIN_VFNMSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32707  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4df_mask3, "__builtin_ia32_vfnmsubpd256_mask3", IX86_BUILTIN_VFNMSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32708  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask, "__builtin_ia32_vfnmsubpd128_mask", IX86_BUILTIN_VFNMSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32709  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v2df_mask3, "__builtin_ia32_vfnmsubpd128_mask3", IX86_BUILTIN_VFNMSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32710  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask, "__builtin_ia32_vfnmsubps256_mask", IX86_BUILTIN_VFNMSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32711  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v8sf_mask3, "__builtin_ia32_vfnmsubps256_mask3", IX86_BUILTIN_VFNMSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32712  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask, "__builtin_ia32_vfnmsubps128_mask", IX86_BUILTIN_VFNMSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32713  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fnmsub_v4sf_mask3, "__builtin_ia32_vfnmsubps128_mask3", IX86_BUILTIN_VFNMSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32714  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask, "__builtin_ia32_vfmaddsubpd256_mask", IX86_BUILTIN_VFMADDSUBPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32715  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_mask3, "__builtin_ia32_vfmaddsubpd256_mask3", IX86_BUILTIN_VFMADDSUBPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32716  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4df_maskz, "__builtin_ia32_vfmaddsubpd256_maskz", IX86_BUILTIN_VFMADDSUBPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32717  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask, "__builtin_ia32_vfmaddsubpd128_mask", IX86_BUILTIN_VFMADDSUBPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32718  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_mask3, "__builtin_ia32_vfmaddsubpd128_mask3", IX86_BUILTIN_VFMADDSUBPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32719  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v2df_maskz, "__builtin_ia32_vfmaddsubpd128_maskz", IX86_BUILTIN_VFMADDSUBPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32720  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask, "__builtin_ia32_vfmaddsubps256_mask", IX86_BUILTIN_VFMADDSUBPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32721  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3, "__builtin_ia32_vfmaddsubps256_mask3", IX86_BUILTIN_VFMADDSUBPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32722  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz, "__builtin_ia32_vfmaddsubps256_maskz", IX86_BUILTIN_VFMADDSUBPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32723  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask, "__builtin_ia32_vfmaddsubps128_mask", IX86_BUILTIN_VFMADDSUBPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32724  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_mask3, "__builtin_ia32_vfmaddsubps128_mask3", IX86_BUILTIN_VFMADDSUBPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32725  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmaddsub_v4sf_maskz, "__builtin_ia32_vfmaddsubps128_maskz", IX86_BUILTIN_VFMADDSUBPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32726  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4df_mask3, "__builtin_ia32_vfmsubaddpd256_mask3", IX86_BUILTIN_VFMSUBADDPD256_MASK3, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32727  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v2df_mask3, "__builtin_ia32_vfmsubaddpd128_mask3", IX86_BUILTIN_VFMSUBADDPD128_MASK3, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32728  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3, "__builtin_ia32_vfmsubaddps256_mask3", IX86_BUILTIN_VFMSUBADDPS256_MASK3, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32729  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_fmsubadd_v4sf_mask3, "__builtin_ia32_vfmsubaddps128_mask3", IX86_BUILTIN_VFMSUBADDPS128_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32730  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4df, "__builtin_ia32_insertf64x2_256_mask", IX86_BUILTIN_INSERTF64X2_256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI },
32731  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vinsertv4di, "__builtin_ia32_inserti64x2_256_mask", IX86_BUILTIN_INSERTI64X2_256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI },
32732  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv16hi_mask, "__builtin_ia32_psrav16hi_mask", IX86_BUILTIN_PSRAVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32733  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashrvv8hi_mask, "__builtin_ia32_psrav8hi_mask", IX86_BUILTIN_PSRAVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32734  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v16hi_mask, "__builtin_ia32_pmaddubsw256_mask", IX86_BUILTIN_PMADDUBSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_V16HI_HI },
32735  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddubsw512v8hi_mask, "__builtin_ia32_pmaddubsw128_mask", IX86_BUILTIN_PMADDUBSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_V8HI_QI },
32736  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v16hi_mask, "__builtin_ia32_pmaddwd256_mask", IX86_BUILTIN_PMADDWD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V16HI_V16HI_V8SI_QI },
32737  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512bw_pmaddwd512v8hi_mask, "__builtin_ia32_pmaddwd128_mask", IX86_BUILTIN_PMADDWD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI_V4SI_QI },
32738  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv16hi_mask, "__builtin_ia32_psrlv16hi_mask", IX86_BUILTIN_PSRLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32739  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_lshrvv8hi_mask, "__builtin_ia32_psrlv8hi_mask", IX86_BUILTIN_PSRLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32740  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_fix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2dq256_mask", IX86_BUILTIN_CVTPS2DQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32741  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_fix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2dq128_mask", IX86_BUILTIN_CVTPS2DQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32742  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv8sfv8si_mask, "__builtin_ia32_cvtps2udq256_mask", IX86_BUILTIN_CVTPS2UDQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_QI },
32743  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ufix_notruncv4sfv4si_mask, "__builtin_ia32_cvtps2udq128_mask", IX86_BUILTIN_CVTPS2UDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_QI },
32744  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv4di_mask, "__builtin_ia32_cvtps2qq256_mask", IX86_BUILTIN_CVTPS2QQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32745  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2qqv2di_mask, "__builtin_ia32_cvtps2qq128_mask", IX86_BUILTIN_CVTPS2QQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32746  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv4di_mask, "__builtin_ia32_cvtps2uqq256_mask", IX86_BUILTIN_CVTPS2UQQ256, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_QI },
32747  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_cvtps2uqqv2di_mask, "__builtin_ia32_cvtps2uqq128_mask", IX86_BUILTIN_CVTPS2UQQ128, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_QI },
32748  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv8sf_mask, "__builtin_ia32_getmantps256_mask", IX86_BUILTIN_GETMANTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
32749  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4sf_mask, "__builtin_ia32_getmantps128_mask", IX86_BUILTIN_GETMANTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
32750  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv4df_mask, "__builtin_ia32_getmantpd256_mask", IX86_BUILTIN_GETMANTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
32751  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_getmantv2df_mask, "__builtin_ia32_getmantpd128_mask", IX86_BUILTIN_GETMANTPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
32752  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movddup256_mask, "__builtin_ia32_movddup256_mask", IX86_BUILTIN_MOVDDUP256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32753  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_dupv2df_mask, "__builtin_ia32_movddup128_mask", IX86_BUILTIN_MOVDDUP128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32754  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movshdup256_mask, "__builtin_ia32_movshdup256_mask", IX86_BUILTIN_MOVSHDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32755  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movshdup_mask, "__builtin_ia32_movshdup128_mask", IX86_BUILTIN_MOVSHDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32756  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_movsldup256_mask, "__builtin_ia32_movsldup256_mask", IX86_BUILTIN_MOVSLDUP256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32757  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse3_movsldup_mask, "__builtin_ia32_movsldup128_mask", IX86_BUILTIN_MOVSLDUP128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32758  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4sf2_mask, "__builtin_ia32_cvtqq2ps256_mask", IX86_BUILTIN_CVTQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32759  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2sf2_mask, "__builtin_ia32_cvtqq2ps128_mask", IX86_BUILTIN_CVTQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32760  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4sf2_mask, "__builtin_ia32_cvtuqq2ps256_mask", IX86_BUILTIN_CVTUQQ2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_QI },
32761  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2sf2_mask, "__builtin_ia32_cvtuqq2ps128_mask", IX86_BUILTIN_CVTUQQ2PS128, UNKNOWN, (int) V4SF_FTYPE_V2DI_V4SF_QI },
32762  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv4div4df2_mask, "__builtin_ia32_cvtqq2pd256_mask", IX86_BUILTIN_CVTQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32763  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_floatv2div2df2_mask, "__builtin_ia32_cvtqq2pd128_mask", IX86_BUILTIN_CVTQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32764  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv4div4df2_mask, "__builtin_ia32_cvtuqq2pd256_mask", IX86_BUILTIN_CVTUQQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_QI },
32765  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ufloatv2div2df2_mask, "__builtin_ia32_cvtuqq2pd128_mask", IX86_BUILTIN_CVTUQQ2PD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_QI },
32766  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_mask, "__builtin_ia32_vpermt2varq256_mask", IX86_BUILTIN_VPERMT2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32767  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4di3_maskz, "__builtin_ia32_vpermt2varq256_maskz", IX86_BUILTIN_VPERMT2VARQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32768  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_mask, "__builtin_ia32_vpermt2vard256_mask", IX86_BUILTIN_VPERMT2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32769  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8si3_maskz, "__builtin_ia32_vpermt2vard256_maskz", IX86_BUILTIN_VPERMT2VARD256_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32770  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4di3_mask, "__builtin_ia32_vpermi2varq256_mask", IX86_BUILTIN_VPERMI2VARQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32771  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8si3_mask, "__builtin_ia32_vpermi2vard256_mask", IX86_BUILTIN_VPERMI2VARD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32772  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_mask, "__builtin_ia32_vpermt2varpd256_mask", IX86_BUILTIN_VPERMT2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32773  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4df3_maskz, "__builtin_ia32_vpermt2varpd256_maskz", IX86_BUILTIN_VPERMT2VARPD256_MASKZ, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_V4DF_QI },
32774  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_mask, "__builtin_ia32_vpermt2varps256_mask", IX86_BUILTIN_VPERMT2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32775  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv8sf3_maskz, "__builtin_ia32_vpermt2varps256_maskz", IX86_BUILTIN_VPERMT2VARPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_V8SF_QI },
32776  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4df3_mask, "__builtin_ia32_vpermi2varpd256_mask", IX86_BUILTIN_VPERMI2VARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
32777  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv8sf3_mask, "__builtin_ia32_vpermi2varps256_mask", IX86_BUILTIN_VPERMI2VARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
32778  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_mask, "__builtin_ia32_vpermt2varq128_mask", IX86_BUILTIN_VPERMT2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32779  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2di3_maskz, "__builtin_ia32_vpermt2varq128_maskz", IX86_BUILTIN_VPERMT2VARQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32780  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_mask, "__builtin_ia32_vpermt2vard128_mask", IX86_BUILTIN_VPERMT2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32781  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4si3_maskz, "__builtin_ia32_vpermt2vard128_maskz", IX86_BUILTIN_VPERMT2VARD128_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32782  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2di3_mask, "__builtin_ia32_vpermi2varq128_mask", IX86_BUILTIN_VPERMI2VARQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32783  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4si3_mask, "__builtin_ia32_vpermi2vard128_mask", IX86_BUILTIN_VPERMI2VARD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32784  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_mask, "__builtin_ia32_vpermt2varpd128_mask", IX86_BUILTIN_VPERMT2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32785  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv2df3_maskz, "__builtin_ia32_vpermt2varpd128_maskz", IX86_BUILTIN_VPERMT2VARPD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DI_V2DF_V2DF_QI },
32786  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_mask, "__builtin_ia32_vpermt2varps128_mask", IX86_BUILTIN_VPERMT2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32787  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv4sf3_maskz, "__builtin_ia32_vpermt2varps128_maskz", IX86_BUILTIN_VPERMT2VARPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SI_V4SF_V4SF_QI },
32788  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv2df3_mask, "__builtin_ia32_vpermi2varpd128_mask", IX86_BUILTIN_VPERMI2VARPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
32789  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv4sf3_mask, "__builtin_ia32_vpermi2varps128_mask", IX86_BUILTIN_VPERMI2VARPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
32790  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_pshufbv32qi3_mask, "__builtin_ia32_pshufb256_mask", IX86_BUILTIN_PSHUFB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32791  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ssse3_pshufbv16qi3_mask, "__builtin_ia32_pshufb128_mask", IX86_BUILTIN_PSHUFB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32792  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhwv3_mask, "__builtin_ia32_pshufhw256_mask", IX86_BUILTIN_PSHUFHW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32793  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufhw_mask, "__builtin_ia32_pshufhw128_mask", IX86_BUILTIN_PSHUFHW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32794  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflwv3_mask, "__builtin_ia32_pshuflw256_mask", IX86_BUILTIN_PSHUFLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32795  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshuflw_mask, "__builtin_ia32_pshuflw128_mask", IX86_BUILTIN_PSHUFLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32796  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufdv3_mask, "__builtin_ia32_pshufd256_mask", IX86_BUILTIN_PSHUFD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32797  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_pshufd_mask, "__builtin_ia32_pshufd128_mask", IX86_BUILTIN_PSHUFD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32798  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufpd256_mask, "__builtin_ia32_shufpd256_mask", IX86_BUILTIN_SHUFPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI },
32799  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_shufpd_mask, "__builtin_ia32_shufpd128_mask", IX86_BUILTIN_SHUFPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI },
32800  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_shufps256_mask, "__builtin_ia32_shufps256_mask", IX86_BUILTIN_SHUFPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI },
32801  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse_shufps_mask, "__builtin_ia32_shufps128_mask", IX86_BUILTIN_SHUFPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI },
32802  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4di_mask, "__builtin_ia32_prolvq256_mask", IX86_BUILTIN_PROLVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32803  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv2di_mask, "__builtin_ia32_prolvq128_mask", IX86_BUILTIN_PROLVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32804  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4di_mask, "__builtin_ia32_prolq256_mask", IX86_BUILTIN_PROLQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32805  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv2di_mask, "__builtin_ia32_prolq128_mask", IX86_BUILTIN_PROLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32806  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4di_mask, "__builtin_ia32_prorvq256_mask", IX86_BUILTIN_PRORVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32807  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv2di_mask, "__builtin_ia32_prorvq128_mask", IX86_BUILTIN_PRORVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32808  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4di_mask, "__builtin_ia32_prorq256_mask", IX86_BUILTIN_PRORQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
32809  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv2di_mask, "__builtin_ia32_prorq128_mask", IX86_BUILTIN_PRORQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_V2DI_QI },
32810  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv2di_mask, "__builtin_ia32_psravq128_mask", IX86_BUILTIN_PSRAVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32811  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4di_mask, "__builtin_ia32_psravq256_mask", IX86_BUILTIN_PSRAVQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32812  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4di_mask, "__builtin_ia32_psllv4di_mask", IX86_BUILTIN_PSLLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32813  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv2di_mask, "__builtin_ia32_psllv2di_mask", IX86_BUILTIN_PSLLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32814  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv8si_mask, "__builtin_ia32_psllv8si_mask", IX86_BUILTIN_PSLLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32815  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashlvv4si_mask, "__builtin_ia32_psllv4si_mask", IX86_BUILTIN_PSLLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32816  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv8si_mask, "__builtin_ia32_psrav8si_mask", IX86_BUILTIN_PSRAVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32817  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_ashrvv4si_mask, "__builtin_ia32_psrav4si_mask", IX86_BUILTIN_PSRAVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32818  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4di_mask, "__builtin_ia32_psrlv4di_mask", IX86_BUILTIN_PSRLVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32819  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv2di_mask, "__builtin_ia32_psrlv2di_mask", IX86_BUILTIN_PSRLVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32820  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv8si_mask, "__builtin_ia32_psrlv8si_mask", IX86_BUILTIN_PSRLVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32821  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_lshrvv4si_mask, "__builtin_ia32_psrlv4si_mask", IX86_BUILTIN_PSRLVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32822  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psrawi256_mask", IX86_BUILTIN_PSRAWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32823  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv16hi3_mask, "__builtin_ia32_psraw256_mask", IX86_BUILTIN_PSRAW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32824  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psrawi128_mask", IX86_BUILTIN_PSRAWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32825  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_ashrv8hi3_mask, "__builtin_ia32_psraw128_mask", IX86_BUILTIN_PSRAW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32826  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlwi256_mask", IX86_BUILTIN_PSRLWI256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_INT_V16HI_HI },
32827  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv16hi3_mask, "__builtin_ia32_psrlw256_mask", IX86_BUILTIN_PSRLW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V8HI_V16HI_HI },
32828  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlwi128_mask", IX86_BUILTIN_PSRLWI128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT_V8HI_QI },
32829  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_lshrv8hi3_mask, "__builtin_ia32_psrlw128_mask", IX86_BUILTIN_PSRLW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32830  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv8si_mask, "__builtin_ia32_prorvd256_mask", IX86_BUILTIN_PRORVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32831  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv8si_mask, "__builtin_ia32_prolvd256_mask", IX86_BUILTIN_PROLVD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32832  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv8si_mask, "__builtin_ia32_prord256_mask", IX86_BUILTIN_PRORD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32833  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv8si_mask, "__builtin_ia32_prold256_mask", IX86_BUILTIN_PROLD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_INT_V8SI_QI },
32834  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorvv4si_mask, "__builtin_ia32_prorvd128_mask", IX86_BUILTIN_PRORVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32835  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolvv4si_mask, "__builtin_ia32_prolvd128_mask", IX86_BUILTIN_PROLVD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32836  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rorv4si_mask, "__builtin_ia32_prord128_mask", IX86_BUILTIN_PRORD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32837  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_rolv4si_mask, "__builtin_ia32_prold128_mask", IX86_BUILTIN_PROLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT_V4SI_QI },
32838  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4df_mask, "__builtin_ia32_fpclasspd256_mask", IX86_BUILTIN_FPCLASSPD256, UNKNOWN, (int) QI_FTYPE_V4DF_INT_QI },
32839  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv2df_mask, "__builtin_ia32_fpclasspd128_mask", IX86_BUILTIN_FPCLASSPD128, UNKNOWN, (int) QI_FTYPE_V2DF_INT_QI },
32840  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv2df, "__builtin_ia32_fpclasssd", IX86_BUILTIN_FPCLASSSD, UNKNOWN, (int) QI_FTYPE_V2DF_INT },
32841  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv8sf_mask, "__builtin_ia32_fpclassps256_mask", IX86_BUILTIN_FPCLASSPS256, UNKNOWN, (int) QI_FTYPE_V8SF_INT_QI },
32842  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512dq_fpclassv4sf_mask, "__builtin_ia32_fpclassps128_mask", IX86_BUILTIN_FPCLASSPS128, UNKNOWN, (int) QI_FTYPE_V4SF_INT_QI },
32843  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vmfpclassv4sf, "__builtin_ia32_fpclassss", IX86_BUILTIN_FPCLASSSS, UNKNOWN, (int) QI_FTYPE_V4SF_INT },
32844  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv16qi, "__builtin_ia32_cvtb2mask128", IX86_BUILTIN_CVTB2MASK128, UNKNOWN, (int) HI_FTYPE_V16QI },
32845  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtb2maskv32qi, "__builtin_ia32_cvtb2mask256", IX86_BUILTIN_CVTB2MASK256, UNKNOWN, (int) SI_FTYPE_V32QI },
32846  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv8hi, "__builtin_ia32_cvtw2mask128", IX86_BUILTIN_CVTW2MASK128, UNKNOWN, (int) QI_FTYPE_V8HI },
32847  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtw2maskv16hi, "__builtin_ia32_cvtw2mask256", IX86_BUILTIN_CVTW2MASK256, UNKNOWN, (int) HI_FTYPE_V16HI },
32848  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv4si, "__builtin_ia32_cvtd2mask128", IX86_BUILTIN_CVTD2MASK128, UNKNOWN, (int) QI_FTYPE_V4SI },
32849  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtd2maskv8si, "__builtin_ia32_cvtd2mask256", IX86_BUILTIN_CVTD2MASK256, UNKNOWN, (int) QI_FTYPE_V8SI },
32850  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv2di, "__builtin_ia32_cvtq2mask128", IX86_BUILTIN_CVTQ2MASK128, UNKNOWN, (int) QI_FTYPE_V2DI },
32851  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtq2maskv4di, "__builtin_ia32_cvtq2mask256", IX86_BUILTIN_CVTQ2MASK256, UNKNOWN, (int) QI_FTYPE_V4DI },
32852  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv16qi, "__builtin_ia32_cvtmask2b128", IX86_BUILTIN_CVTMASK2B128, UNKNOWN, (int) V16QI_FTYPE_HI },
32853  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2bv32qi, "__builtin_ia32_cvtmask2b256", IX86_BUILTIN_CVTMASK2B256, UNKNOWN, (int) V32QI_FTYPE_SI },
32854  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv8hi, "__builtin_ia32_cvtmask2w128", IX86_BUILTIN_CVTMASK2W128, UNKNOWN, (int) V8HI_FTYPE_QI },
32855  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2wv16hi, "__builtin_ia32_cvtmask2w256", IX86_BUILTIN_CVTMASK2W256, UNKNOWN, (int) V16HI_FTYPE_HI },
32856  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv4si, "__builtin_ia32_cvtmask2d128", IX86_BUILTIN_CVTMASK2D128, UNKNOWN, (int) V4SI_FTYPE_QI },
32857  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2dv8si, "__builtin_ia32_cvtmask2d256", IX86_BUILTIN_CVTMASK2D256, UNKNOWN, (int) V8SI_FTYPE_QI },
32858  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv2di, "__builtin_ia32_cvtmask2q128", IX86_BUILTIN_CVTMASK2Q128, UNKNOWN, (int) V2DI_FTYPE_QI },
32859  { OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cvtmask2qv4di, "__builtin_ia32_cvtmask2q256", IX86_BUILTIN_CVTMASK2Q256, UNKNOWN, (int) V4DI_FTYPE_QI },
32860  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16qi3_mask, "__builtin_ia32_pcmpeqb128_mask", IX86_BUILTIN_PCMPEQB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32861  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv32qi3_mask, "__builtin_ia32_pcmpeqb256_mask", IX86_BUILTIN_PCMPEQB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32862  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8hi3_mask, "__builtin_ia32_pcmpeqw128_mask", IX86_BUILTIN_PCMPEQW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32863  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv16hi3_mask, "__builtin_ia32_pcmpeqw256_mask", IX86_BUILTIN_PCMPEQW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32864  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4si3_mask, "__builtin_ia32_pcmpeqd128_mask", IX86_BUILTIN_PCMPEQD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32865  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv8si3_mask, "__builtin_ia32_pcmpeqd256_mask", IX86_BUILTIN_PCMPEQD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32866  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv2di3_mask, "__builtin_ia32_pcmpeqq128_mask", IX86_BUILTIN_PCMPEQQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32867  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_eqv4di3_mask, "__builtin_ia32_pcmpeqq256_mask", IX86_BUILTIN_PCMPEQQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32868  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16qi3_mask, "__builtin_ia32_pcmpgtb128_mask", IX86_BUILTIN_PCMPGTB128_MASK, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32869  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv32qi3_mask, "__builtin_ia32_pcmpgtb256_mask", IX86_BUILTIN_PCMPGTB256_MASK, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32870  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8hi3_mask, "__builtin_ia32_pcmpgtw128_mask", IX86_BUILTIN_PCMPGTW128_MASK, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32871  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv16hi3_mask, "__builtin_ia32_pcmpgtw256_mask", IX86_BUILTIN_PCMPGTW256_MASK, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32872  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4si3_mask, "__builtin_ia32_pcmpgtd128_mask", IX86_BUILTIN_PCMPGTD128_MASK, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32873  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv8si3_mask, "__builtin_ia32_pcmpgtd256_mask", IX86_BUILTIN_PCMPGTD256_MASK, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32874  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv2di3_mask, "__builtin_ia32_pcmpgtq128_mask", IX86_BUILTIN_PCMPGTQ128_MASK, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32875  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_gtv4di3_mask, "__builtin_ia32_pcmpgtq256_mask", IX86_BUILTIN_PCMPGTQ256_MASK, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32876  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16qi3_mask, "__builtin_ia32_ptestmb128", IX86_BUILTIN_PTESTMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32877  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv32qi3_mask, "__builtin_ia32_ptestmb256", IX86_BUILTIN_PTESTMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32878  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8hi3_mask, "__builtin_ia32_ptestmw128", IX86_BUILTIN_PTESTMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32879  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv16hi3_mask, "__builtin_ia32_ptestmw256", IX86_BUILTIN_PTESTMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32880  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4si3_mask, "__builtin_ia32_ptestmd128", IX86_BUILTIN_PTESTMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32881  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv8si3_mask, "__builtin_ia32_ptestmd256", IX86_BUILTIN_PTESTMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32882  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv2di3_mask, "__builtin_ia32_ptestmq128", IX86_BUILTIN_PTESTMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32883  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testmv4di3_mask, "__builtin_ia32_ptestmq256", IX86_BUILTIN_PTESTMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32884  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16qi3_mask, "__builtin_ia32_ptestnmb128", IX86_BUILTIN_PTESTNMB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_HI },
32885  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv32qi3_mask, "__builtin_ia32_ptestnmb256", IX86_BUILTIN_PTESTNMB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_SI },
32886  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8hi3_mask, "__builtin_ia32_ptestnmw128", IX86_BUILTIN_PTESTNMW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_QI },
32887  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv16hi3_mask, "__builtin_ia32_ptestnmw256", IX86_BUILTIN_PTESTNMW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_HI },
32888  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4si3_mask, "__builtin_ia32_ptestnmd128", IX86_BUILTIN_PTESTNMD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_QI },
32889  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv8si3_mask, "__builtin_ia32_ptestnmd256", IX86_BUILTIN_PTESTNMD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_QI },
32890  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv2di3_mask, "__builtin_ia32_ptestnmq128", IX86_BUILTIN_PTESTNMQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_QI },
32891  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_testnmv4di3_mask, "__builtin_ia32_ptestnmq256", IX86_BUILTIN_PTESTNMQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_QI },
32892  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv2di, "__builtin_ia32_broadcastmb128", IX86_BUILTIN_PBROADCASTMB128, UNKNOWN, (int) V2DI_FTYPE_QI },
32893  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskb_vec_dupv4di, "__builtin_ia32_broadcastmb256", IX86_BUILTIN_PBROADCASTMB256, UNKNOWN, (int) V4DI_FTYPE_QI },
32894  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv4si, "__builtin_ia32_broadcastmw128", IX86_BUILTIN_PBROADCASTMW128, UNKNOWN, (int) V4SI_FTYPE_HI },
32895  { OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512CD, CODE_FOR_avx512cd_maskw_vec_dupv8si, "__builtin_ia32_broadcastmw256", IX86_BUILTIN_PBROADCASTMW256, UNKNOWN, (int) V8SI_FTYPE_HI },
32896  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4df_mask, "__builtin_ia32_compressdf256_mask", IX86_BUILTIN_COMPRESSPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32897  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2df_mask, "__builtin_ia32_compressdf128_mask", IX86_BUILTIN_COMPRESSPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32898  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8sf_mask, "__builtin_ia32_compresssf256_mask", IX86_BUILTIN_COMPRESSPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32899  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4sf_mask, "__builtin_ia32_compresssf128_mask", IX86_BUILTIN_COMPRESSPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32900  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4di_mask, "__builtin_ia32_compressdi256_mask", IX86_BUILTIN_PCOMPRESSQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32901  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32902  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32903  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32904  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32905  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32906  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32907  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32908  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32909  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32910  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32911  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32912  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
32913  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
32914  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
32915  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4sf_maskz, "__builtin_ia32_expandsf128_maskz", IX86_BUILTIN_EXPANDPS128Z, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
32916  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4di_maskz, "__builtin_ia32_expanddi256_maskz", IX86_BUILTIN_PEXPANDQ256Z, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32917  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv2di_maskz, "__builtin_ia32_expanddi128_maskz", IX86_BUILTIN_PEXPANDQ128Z, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32918  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv8si_maskz, "__builtin_ia32_expandsi256_maskz", IX86_BUILTIN_PEXPANDD256Z, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32919  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_expandv4si_maskz, "__builtin_ia32_expandsi128_maskz", IX86_BUILTIN_PEXPANDD128Z, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32920  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8si3_mask, "__builtin_ia32_pmaxsd256_mask", IX86_BUILTIN_PMAXSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32921  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8si3_mask, "__builtin_ia32_pminsd256_mask", IX86_BUILTIN_PMINSD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32922  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8si3_mask, "__builtin_ia32_pmaxud256_mask", IX86_BUILTIN_PMAXUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32923  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8si3_mask, "__builtin_ia32_pminud256_mask", IX86_BUILTIN_PMINUD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32924  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4si3_mask, "__builtin_ia32_pmaxsd128_mask", IX86_BUILTIN_PMAXSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32925  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4si3_mask, "__builtin_ia32_pminsd128_mask", IX86_BUILTIN_PMINSD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32926  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4si3_mask, "__builtin_ia32_pmaxud128_mask", IX86_BUILTIN_PMAXUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32927  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4si3_mask, "__builtin_ia32_pminud128_mask", IX86_BUILTIN_PMINUD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32928  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv4di3_mask, "__builtin_ia32_pmaxsq256_mask", IX86_BUILTIN_PMAXSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32929  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv4di3_mask, "__builtin_ia32_pminsq256_mask", IX86_BUILTIN_PMINSQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32930  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv4di3_mask, "__builtin_ia32_pmaxuq256_mask", IX86_BUILTIN_PMAXUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32931  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv4di3_mask, "__builtin_ia32_pminuq256_mask", IX86_BUILTIN_PMINUQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32932  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv2di3_mask, "__builtin_ia32_pmaxsq128_mask", IX86_BUILTIN_PMAXSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32933  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv2di3_mask, "__builtin_ia32_pminsq128_mask", IX86_BUILTIN_PMINSQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32934  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv2di3_mask, "__builtin_ia32_pmaxuq128_mask", IX86_BUILTIN_PMAXUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32935  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv2di3_mask, "__builtin_ia32_pminuq128_mask", IX86_BUILTIN_PMINUQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32936  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv32qi3_mask, "__builtin_ia32_pminsb256_mask", IX86_BUILTIN_PMINSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32937  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv32qi3_mask, "__builtin_ia32_pminub256_mask", IX86_BUILTIN_PMINUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32938  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv32qi3_mask, "__builtin_ia32_pmaxsb256_mask", IX86_BUILTIN_PMAXSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32939  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv32qi3_mask, "__builtin_ia32_pmaxub256_mask", IX86_BUILTIN_PMAXUB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32940  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16qi3_mask, "__builtin_ia32_pminsb128_mask", IX86_BUILTIN_PMINSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32941  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16qi3_mask, "__builtin_ia32_pminub128_mask", IX86_BUILTIN_PMINUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32942  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16qi3_mask, "__builtin_ia32_pmaxsb128_mask", IX86_BUILTIN_PMAXSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32943  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16qi3_mask, "__builtin_ia32_pmaxub128_mask", IX86_BUILTIN_PMAXUB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32944  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv16hi3_mask, "__builtin_ia32_pminsw256_mask", IX86_BUILTIN_PMINSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32945  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv16hi3_mask, "__builtin_ia32_pminuw256_mask", IX86_BUILTIN_PMINUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32946  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv16hi3_mask, "__builtin_ia32_pmaxsw256_mask", IX86_BUILTIN_PMAXSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32947  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv16hi3_mask, "__builtin_ia32_pmaxuw256_mask", IX86_BUILTIN_PMAXUW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32948  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sminv8hi3_mask, "__builtin_ia32_pminsw128_mask", IX86_BUILTIN_PMINSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32949  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_uminv8hi3_mask, "__builtin_ia32_pminuw128_mask", IX86_BUILTIN_PMINUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32950  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_smaxv8hi3_mask, "__builtin_ia32_pmaxsw128_mask", IX86_BUILTIN_PMAXSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32951  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_umaxv8hi3_mask, "__builtin_ia32_pmaxuw128_mask", IX86_BUILTIN_PMAXUW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32952  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4di_mask, "__builtin_ia32_vpconflictdi_256_mask", IX86_BUILTIN_VPCONFLICTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32953  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv8si_mask, "__builtin_ia32_vpconflictsi_256_mask", IX86_BUILTIN_VPCONFLICTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32954  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4di2_mask, "__builtin_ia32_vplzcntq_256_mask", IX86_BUILTIN_VPCLZCNTQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
32955  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv8si2_mask, "__builtin_ia32_vplzcntd_256_mask", IX86_BUILTIN_VPCLZCNTD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
32956  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhpd256_mask, "__builtin_ia32_unpckhpd256_mask", IX86_BUILTIN_UNPCKHPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32957  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpckhpd128_mask, "__builtin_ia32_unpckhpd128_mask", IX86_BUILTIN_UNPCKHPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32958  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpckhps256_mask, "__builtin_ia32_unpckhps256_mask", IX86_BUILTIN_UNPCKHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32959  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4sf_mask, "__builtin_ia32_unpckhps128_mask", IX86_BUILTIN_UNPCKHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32960  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklpd256_mask, "__builtin_ia32_unpcklpd256_mask", IX86_BUILTIN_UNPCKLPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_QI },
32961  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_unpcklpd128_mask, "__builtin_ia32_unpcklpd128_mask", IX86_BUILTIN_UNPCKLPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_QI },
32962  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_unpcklps256_mask,  "__builtin_ia32_unpcklps256_mask", IX86_BUILTIN_UNPCKLPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_QI },
32963  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv2di_mask, "__builtin_ia32_vpconflictdi_128_mask", IX86_BUILTIN_VPCONFLICTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32964  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_conflictv4si_mask, "__builtin_ia32_vpconflictsi_128_mask", IX86_BUILTIN_VPCONFLICTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32965  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv2di2_mask, "__builtin_ia32_vplzcntq_128_mask", IX86_BUILTIN_VPCLZCNTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
32966  { OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512VL, CODE_FOR_clzv4si2_mask, "__builtin_ia32_vplzcntd_128_mask", IX86_BUILTIN_VPCLZCNTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
32967  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_unpcklps128_mask,  "__builtin_ia32_unpcklps128_mask", IX86_BUILTIN_UNPCKLPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_QI },
32968  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv8si_mask, "__builtin_ia32_alignd256_mask", IX86_BUILTIN_ALIGND256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI },
32969  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4di_mask, "__builtin_ia32_alignq256_mask", IX86_BUILTIN_ALIGNQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI },
32970  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv4si_mask, "__builtin_ia32_alignd128_mask", IX86_BUILTIN_ALIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI },
32971  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_alignv2di_mask, "__builtin_ia32_alignq128_mask", IX86_BUILTIN_ALIGNQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI },
32972  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph256_mask,  "__builtin_ia32_vcvtps2ph256_mask", IX86_BUILTIN_CVTPS2PH256_MASK, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT_V8HI_QI },
32973  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtps2ph_mask,  "__builtin_ia32_vcvtps2ph_mask", IX86_BUILTIN_CVTPS2PH_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT_V8HI_QI },
32974  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps_mask, "__builtin_ia32_vcvtph2ps_mask", IX86_BUILTIN_CVTPH2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V8HI_V4SF_QI },
32975  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vcvtph2ps256_mask, "__builtin_ia32_vcvtph2ps256_mask", IX86_BUILTIN_CVTPH2PS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8HI_V8SF_QI },
32976  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv4si_mask, "__builtin_ia32_punpckhdq128_mask", IX86_BUILTIN_PUNPCKHDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32977  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv8si_mask, "__builtin_ia32_punpckhdq256_mask", IX86_BUILTIN_PUNPCKHDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32978  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv2di_mask, "__builtin_ia32_punpckhqdq128_mask", IX86_BUILTIN_PUNPCKHQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32979  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv4di_mask, "__builtin_ia32_punpckhqdq256_mask", IX86_BUILTIN_PUNPCKHQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32980  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv4si_mask, "__builtin_ia32_punpckldq128_mask", IX86_BUILTIN_PUNPCKLDQ128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
32981  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv8si_mask, "__builtin_ia32_punpckldq256_mask", IX86_BUILTIN_PUNPCKLDQ256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
32982  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv2di_mask, "__builtin_ia32_punpcklqdq128_mask", IX86_BUILTIN_PUNPCKLQDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
32983  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv4di_mask, "__builtin_ia32_punpcklqdq256_mask", IX86_BUILTIN_PUNPCKLQDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
32984  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv16qi_mask, "__builtin_ia32_punpckhbw128_mask", IX86_BUILTIN_PUNPCKHBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32985  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv32qi_mask, "__builtin_ia32_punpckhbw256_mask", IX86_BUILTIN_PUNPCKHBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32986  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_highv8hi_mask, "__builtin_ia32_punpckhwd128_mask", IX86_BUILTIN_PUNPCKHWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32987  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_highv16hi_mask, "__builtin_ia32_punpckhwd256_mask", IX86_BUILTIN_PUNPCKHWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32988  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv16qi_mask, "__builtin_ia32_punpcklbw128_mask", IX86_BUILTIN_PUNPCKLBW128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
32989  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv32qi_mask, "__builtin_ia32_punpcklbw256_mask", IX86_BUILTIN_PUNPCKLBW256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32990  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_interleave_lowv8hi_mask, "__builtin_ia32_punpcklwd128_mask", IX86_BUILTIN_PUNPCKLWD128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32991  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_interleave_lowv16hi_mask, "__builtin_ia32_punpcklwd256_mask", IX86_BUILTIN_PUNPCKLWD256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32992  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv16hi_mask, "__builtin_ia32_psllv16hi_mask", IX86_BUILTIN_PSLLVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
32993  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ashlvv8hi_mask, "__builtin_ia32_psllv8hi_mask", IX86_BUILTIN_PSLLVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
32994  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packssdw_mask, "__builtin_ia32_packssdw256_mask",  IX86_BUILTIN_PACKSSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32995  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_packssdw_mask, "__builtin_ia32_packssdw128_mask",  IX86_BUILTIN_PACKSSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32996  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_packusdw_mask, "__builtin_ia32_packusdw256_mask",  IX86_BUILTIN_PACKUSDW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V8SI_V8SI_V16HI_HI },
32997  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_packusdw_mask, "__builtin_ia32_packusdw128_mask",  IX86_BUILTIN_PACKUSDW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI_V8HI_QI },
32998  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv32qi3_mask, "__builtin_ia32_pavgb256_mask", IX86_BUILTIN_PAVGB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
32999  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_uavgv16hi3_mask, "__builtin_ia32_pavgw256_mask", IX86_BUILTIN_PAVGW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_HI },
33000  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv16qi3_mask, "__builtin_ia32_pavgb128_mask", IX86_BUILTIN_PAVGB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33001  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_uavgv8hi3_mask, "__builtin_ia32_pavgw128_mask", IX86_BUILTIN_PAVGW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_QI },
33002  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8sf_mask, "__builtin_ia32_permvarsf256_mask", IX86_BUILTIN_VPERMVARSF256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
33003  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4df_mask, "__builtin_ia32_permvardf256_mask", IX86_BUILTIN_VPERMVARDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
33004  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4df_mask, "__builtin_ia32_permdf256_mask", IX86_BUILTIN_VPERMDF256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
33005  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv32qi2_mask, "__builtin_ia32_pabsb256_mask", IX86_BUILTIN_PABSB256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
33006  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16qi2_mask, "__builtin_ia32_pabsb128_mask", IX86_BUILTIN_PABSB128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
33007  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv16hi2_mask, "__builtin_ia32_pabsw256_mask", IX86_BUILTIN_PABSW256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
33008  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_absv8hi2_mask, "__builtin_ia32_pabsw128_mask", IX86_BUILTIN_PABSW128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
33009  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv2df3_mask, "__builtin_ia32_vpermilvarpd_mask", IX86_BUILTIN_VPERMILVARPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI_V2DF_QI },
33010  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4sf3_mask, "__builtin_ia32_vpermilvarps_mask", IX86_BUILTIN_VPERMILVARPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI_V4SF_QI },
33011  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv4df3_mask, "__builtin_ia32_vpermilvarpd256_mask", IX86_BUILTIN_VPERMILVARPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI_V4DF_QI },
33012  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilvarv8sf3_mask, "__builtin_ia32_vpermilvarps256_mask", IX86_BUILTIN_VPERMILVARPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI_V8SF_QI },
33013  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv2df_mask, "__builtin_ia32_vpermilpd_mask", IX86_BUILTIN_VPERMILPD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT_V2DF_QI },
33014  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4sf_mask, "__builtin_ia32_vpermilps_mask", IX86_BUILTIN_VPERMILPS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_V4SF_QI },
33015  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv4df_mask, "__builtin_ia32_vpermilpd256_mask", IX86_BUILTIN_VPERMILPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_QI },
33016  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_vpermilv8sf_mask, "__builtin_ia32_vpermilps256_mask", IX86_BUILTIN_VPERMILPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_QI },
33017  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4di, "__builtin_ia32_blendmq_256_mask", IX86_BUILTIN_BLENDMQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_QI },
33018  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8si, "__builtin_ia32_blendmd_256_mask", IX86_BUILTIN_BLENDMD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_QI },
33019  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4df, "__builtin_ia32_blendmpd_256_mask", IX86_BUILTIN_BLENDMPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_QI },
33020  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8sf, "__builtin_ia32_blendmps_256_mask", IX86_BUILTIN_BLENDMPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_QI },
33021  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2di, "__builtin_ia32_blendmq_128_mask", IX86_BUILTIN_BLENDMQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_QI },
33022  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4si, "__builtin_ia32_blendmd_128_mask", IX86_BUILTIN_BLENDMD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_QI },
33023  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv2df, "__builtin_ia32_blendmpd_128_mask", IX86_BUILTIN_BLENDMPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_QI },
33024  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv4sf, "__builtin_ia32_blendmps_128_mask", IX86_BUILTIN_BLENDMPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_QI },
33025  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16hi, "__builtin_ia32_blendmw_256_mask", IX86_BUILTIN_BLENDMW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_HI },
33026  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv32qi, "__builtin_ia32_blendmb_256_mask", IX86_BUILTIN_BLENDMB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_SI },
33027  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv8hi, "__builtin_ia32_blendmw_128_mask", IX86_BUILTIN_BLENDMW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_QI },
33028  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_blendmv16qi, "__builtin_ia32_blendmb_128_mask", IX86_BUILTIN_BLENDMB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_HI },
33029  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv8si3_mask, "__builtin_ia32_pmulld256_mask", IX86_BUILTIN_PMULLD256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33030  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_mulv4si3_mask, "__builtin_ia32_pmulld128_mask", IX86_BUILTIN_PMULLD128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_QI },
33031  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v8si_mask, "__builtin_ia32_pmuludq256_mask", IX86_BUILTIN_PMULUDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33032  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_smult_even_v8si_mask, "__builtin_ia32_pmuldq256_mask", IX86_BUILTIN_PMULDQ256_MASK, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI_V4DI_QI },
33033  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse4_1_mulv2siv2di3_mask, "__builtin_ia32_pmuldq128_mask", IX86_BUILTIN_PMULDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33034  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_vec_widen_umult_even_v4si_mask, "__builtin_ia32_pmuludq128_mask", IX86_BUILTIN_PMULUDQ128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI_V2DI_QI },
33035  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx_cvtpd2ps256_mask, "__builtin_ia32_cvtpd2ps256_mask", IX86_BUILTIN_CVTPD2PS256_MASK, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_QI },
33036  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_sse2_cvtpd2ps_mask, "__builtin_ia32_cvtpd2ps_mask", IX86_BUILTIN_CVTPD2PS_MASK, UNKNOWN, (int) V4SF_FTYPE_V2DF_V4SF_QI },
33037  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv8si_mask, "__builtin_ia32_permvarsi256_mask", IX86_BUILTIN_VPERMVARSI256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_QI },
33038  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx2_permvarv4di_mask, "__builtin_ia32_permvardi256_mask", IX86_BUILTIN_VPERMVARDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33039  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permv4di_mask, "__builtin_ia32_permdi256_mask", IX86_BUILTIN_VPERMDI256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT_V4DI_QI },
33040  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4di3_mask, "__builtin_ia32_cmpq256_mask", IX86_BUILTIN_CMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33041  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8si3_mask, "__builtin_ia32_cmpd256_mask", IX86_BUILTIN_CMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33042  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4di3_mask, "__builtin_ia32_ucmpq256_mask", IX86_BUILTIN_UCMPQ256, UNKNOWN, (int) QI_FTYPE_V4DI_V4DI_INT_QI },
33043  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8si3_mask, "__builtin_ia32_ucmpd256_mask", IX86_BUILTIN_UCMPD256, UNKNOWN, (int) QI_FTYPE_V8SI_V8SI_INT_QI },
33044  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv32qi3_mask, "__builtin_ia32_cmpb256_mask", IX86_BUILTIN_CMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33045  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16hi3_mask, "__builtin_ia32_cmpw256_mask", IX86_BUILTIN_CMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33046  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv32qi3_mask, "__builtin_ia32_ucmpb256_mask", IX86_BUILTIN_UCMPB256, UNKNOWN, (int) SI_FTYPE_V32QI_V32QI_INT_SI },
33047  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16hi3_mask, "__builtin_ia32_ucmpw256_mask", IX86_BUILTIN_UCMPW256, UNKNOWN, (int) HI_FTYPE_V16HI_V16HI_INT_HI },
33048  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4df3_mask, "__builtin_ia32_cmppd256_mask", IX86_BUILTIN_CMPPD256_MASK, UNKNOWN, (int) QI_FTYPE_V4DF_V4DF_INT_QI },
33049  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8sf3_mask, "__builtin_ia32_cmpps256_mask", IX86_BUILTIN_CMPPS256_MASK, UNKNOWN, (int) QI_FTYPE_V8SF_V8SF_INT_QI },
33050  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2di3_mask, "__builtin_ia32_cmpq128_mask", IX86_BUILTIN_CMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33051  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4si3_mask, "__builtin_ia32_cmpd128_mask", IX86_BUILTIN_CMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33052  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv2di3_mask, "__builtin_ia32_ucmpq128_mask", IX86_BUILTIN_UCMPQ128, UNKNOWN, (int) QI_FTYPE_V2DI_V2DI_INT_QI },
33053  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv4si3_mask, "__builtin_ia32_ucmpd128_mask", IX86_BUILTIN_UCMPD128, UNKNOWN, (int) QI_FTYPE_V4SI_V4SI_INT_QI },
33054  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv16qi3_mask, "__builtin_ia32_cmpb128_mask", IX86_BUILTIN_CMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33055  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv8hi3_mask, "__builtin_ia32_cmpw128_mask", IX86_BUILTIN_CMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33056  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv16qi3_mask, "__builtin_ia32_ucmpb128_mask", IX86_BUILTIN_UCMPB128, UNKNOWN, (int) HI_FTYPE_V16QI_V16QI_INT_HI },
33057  { OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_ucmpv8hi3_mask, "__builtin_ia32_ucmpw128_mask", IX86_BUILTIN_UCMPW128, UNKNOWN, (int) QI_FTYPE_V8HI_V8HI_INT_QI },
33058  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_ia32_cmppd128_mask", IX86_BUILTIN_CMPPD128_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI },
33059  { OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI },
33060
33061  /* AVX512DQ.  */
33062  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_HI },
33063  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_HI },
33064  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_QI },
33065  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_QI },
33066  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_HI },
33067  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_HI },
33068  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_QI },
33069  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_QI },
33070  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_QI },
33071  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_QI },
33072  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI },
33073  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI },
33074  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33075  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33076  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33077  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33078  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33079  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI },
33080  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33081  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI},
33082  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI },
33083  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI },
33084  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI },
33085  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI },
33086  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI },
33087  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_QI },
33088  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_HI },
33089  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) HI_FTYPE_V16SI },
33090  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) QI_FTYPE_V8DI },
33091  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_HI },
33092  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_QI },
33093
33094  /* AVX512BW.  */
33095  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) SI_FTYPE_SI_SI },
33096  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) DI_FTYPE_DI_DI },
33097  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask",  IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33098  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33099  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT },
33100  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask",  IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_SI },
33101  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT },
33102  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT },
33103  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_loaddquv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33104  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_loaddquv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33105  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI },
33106  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI },
33107  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_DI },
33108  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_DI },
33109  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_SI },
33110  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_SI },
33111  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33112  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_SI },
33113  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33114  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33115  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33116  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33117  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33118  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33119  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33120  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33121  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33122  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33123  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33124  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33125  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33126  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33127  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33128  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33129  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33130  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33131  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33132  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33133  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33134  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33135  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33136  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33137  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33138  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33139  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33140  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33141  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_SI },
33142  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33143  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33144  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask"  , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33145  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33146  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33147  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33148  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask",  IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33149  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask",  IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_DI },
33150  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33151  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_SI },
33152  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_HI },
33153  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33154  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33155  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33156  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33157  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33158  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33159  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33160  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33161  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33162  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33163  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_SI },
33164  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_SI },
33165  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) DI_FTYPE_V64QI },
33166  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) SI_FTYPE_V32HI },
33167  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_DI },
33168  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_SI },
33169  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33170  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33171  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33172  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33173  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33174  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33175  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_DI },
33176  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_SI },
33177  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_SI },
33178  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33179  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33180  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_SI },
33181  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_DI },
33182  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33183  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33184  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) DI_FTYPE_V64QI_V64QI_INT_DI },
33185  { OPTION_MASK_ISA_AVX512BW, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) SI_FTYPE_V32HI_V32HI_INT_SI },
33186
33187  /* AVX512IFMA */
33188  { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33189  { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33190  { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33191  { OPTION_MASK_ISA_AVX512IFMA, CODE_FOR_vpamdd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_QI },
33192  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33193  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33194  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33195  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv4di_maskz, "__builtin_ia32_vpmadd52huq256_maskz", IX86_BUILTIN_VPMADD52HUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_QI },
33196  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_mask, "__builtin_ia32_vpmadd52luq128_mask", IX86_BUILTIN_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33197  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52luqv2di_maskz, "__builtin_ia32_vpmadd52luq128_maskz", IX86_BUILTIN_VPMADD52LUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33198  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33199  { OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_QI },
33200
33201  /* AVX512VBMI */
33202  { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33203  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33204  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33205  { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33206  { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33207  { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33208  { OPTION_MASK_ISA_AVX512VBMI, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_DI },
33209  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33210  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33211  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33212  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv32qi3_maskz, "__builtin_ia32_vpermt2varqi256_maskz", IX86_BUILTIN_VPERMT2VARQI256_MASKZ, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33213  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_mask, "__builtin_ia32_vpermt2varqi128_mask", IX86_BUILTIN_VPERMT2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33214  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermt2varv16qi3_maskz, "__builtin_ia32_vpermt2varqi128_maskz", IX86_BUILTIN_VPERMT2VARQI128_MASKZ, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33215  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv32qi3_mask, "__builtin_ia32_vpermi2varqi256_mask", IX86_BUILTIN_VPERMI2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_SI },
33216  { OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_HI },
33217};
33218
33219/* Builtins with rounding support.  */
33220static const struct builtin_description bdesc_round_args[] =
33221{
33222  /* AVX512F */
33223  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33224  { OPTION_MASK_ISA_AVX512F, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33225  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33226  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33227  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) QI_FTYPE_V8DF_V8DF_INT_QI_INT },
33228  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) HI_FTYPE_V16SF_V16SF_INT_HI_INT },
33229  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) QI_FTYPE_V2DF_V2DF_INT_QI_INT },
33230  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) QI_FTYPE_V4SF_V4SF_INT_QI_INT },
33231  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT },
33232  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT },
33233  { OPTION_MASK_ISA_AVX512F, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33234  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33235  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
33236  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33237  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
33238  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33239  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
33240  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33241  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
33242  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT },
33243  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_cvtsi2ss32", IX86_BUILTIN_CVTSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT_INT },
33244  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT },
33245  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT },
33246  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33247  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
33248  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33249  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
33250  { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufloatv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT },
33251  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT },
33252  { OPTION_MASK_ISA_AVX512F, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT },
33253  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT },
33254  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33255  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33256  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33257  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33258  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33259  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT },
33260  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33261  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT },
33262  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33263  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT },
33264  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33265  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT },
33266  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33267  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33268  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33269  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33270  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33271  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33272  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33273  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33274  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33275  { OPTION_MASK_ISA_AVX512F, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33276  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33277  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33278  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33279  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33280  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33281  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33282  { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33283  { OPTION_MASK_ISA_AVX512F, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33284  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33285  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33286  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
33287  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
33288  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev2df_round, "__builtin_ia32_rndscalesd_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33289  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_rndscalev4sf_round, "__builtin_ia32_rndscaless_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33290  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33291  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33292  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv2df_round, "__builtin_ia32_scalefsd_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33293  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vmscalefv4sf_round, "__builtin_ia32_scalefss_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33294  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33295  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33296  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsqrtv2df2_round, "__builtin_ia32_sqrtsd_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33297  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsqrtv4sf2_round, "__builtin_ia32_sqrtss_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33298  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33299  { OPTION_MASK_ISA_AVX512F, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33300  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33301  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33302  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2si_round, "__builtin_ia32_vcvtsd2si32", IX86_BUILTIN_VCVTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33303  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq_round, "__builtin_ia32_vcvtsd2si64", IX86_BUILTIN_VCVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33304  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtsd2usi_round, "__builtin_ia32_vcvtsd2usi32", IX86_BUILTIN_VCVTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33305  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtsd2usiq_round, "__builtin_ia32_vcvtsd2usi64", IX86_BUILTIN_VCVTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33306  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvtss2si_round, "__builtin_ia32_vcvtss2si32", IX86_BUILTIN_VCVTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33307  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq_round, "__builtin_ia32_vcvtss2si64", IX86_BUILTIN_VCVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33308  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtss2usi_round, "__builtin_ia32_vcvtss2usi32", IX86_BUILTIN_VCVTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33309  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvtss2usiq_round, "__builtin_ia32_vcvtss2usi64", IX86_BUILTIN_VCVTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33310  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvttsd2si_round, "__builtin_ia32_vcvttsd2si32", IX86_BUILTIN_VCVTTSD2SI32, UNKNOWN, (int) INT_FTYPE_V2DF_INT },
33311  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq_round, "__builtin_ia32_vcvttsd2si64", IX86_BUILTIN_VCVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF_INT },
33312  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttsd2usi_round, "__builtin_ia32_vcvttsd2usi32", IX86_BUILTIN_VCVTTSD2USI32, UNKNOWN, (int) UINT_FTYPE_V2DF_INT },
33313  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttsd2usiq_round, "__builtin_ia32_vcvttsd2usi64", IX86_BUILTIN_VCVTTSD2USI64, UNKNOWN, (int) UINT64_FTYPE_V2DF_INT },
33314  { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse_cvttss2si_round, "__builtin_ia32_vcvttss2si32", IX86_BUILTIN_VCVTTSS2SI32, UNKNOWN, (int) INT_FTYPE_V4SF_INT },
33315  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT },
33316  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT },
33317  { OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, CODE_FOR_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT },
33318  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33319  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33320  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33321  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33322  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33323  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33324  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT },
33325  { OPTION_MASK_ISA_AVX512F, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT },
33326  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33327  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33328  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33329  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33330  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33331  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33332  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33333  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33334  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33335  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33336  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33337  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33338  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33339  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT },
33340  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33341  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT },
33342
33343  /* AVX512ER */
33344  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33345  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33346  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33347  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33348  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33349  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33350  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
33351  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
33352  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
33353  { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
33354
33355  /* AVX512DQ.  */
33356  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv2df_round, "__builtin_ia32_rangesd128_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT },
33357  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangesv4sf_round, "__builtin_ia32_rangess128_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT },
33358  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33359  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33360  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33361  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33362  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33363  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT },
33364  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33365  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufloatv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT },
33366  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33367  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT },
33368  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33369  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_ufix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT },
33370  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT },
33371  { OPTION_MASK_ISA_AVX512DQ, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT },
33372};
33373
33374/* Bultins for MPX.  */
33375static const struct builtin_description bdesc_mpx[] =
33376{
33377  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndstx", IX86_BUILTIN_BNDSTX, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND_PCVOID },
33378  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcl", IX86_BUILTIN_BNDCL, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33379  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndcu", IX86_BUILTIN_BNDCU, UNKNOWN, (int) VOID_FTYPE_PCVOID_BND },
33380};
33381
33382/* Const builtins for MPX.  */
33383static const struct builtin_description bdesc_mpx_const[] =
33384{
33385  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndmk", IX86_BUILTIN_BNDMK, UNKNOWN, (int) BND_FTYPE_PCVOID_ULONG },
33386  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndldx", IX86_BUILTIN_BNDLDX, UNKNOWN, (int) BND_FTYPE_PCVOID_PCVOID },
33387  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_narrow_bounds", IX86_BUILTIN_BNDNARROW, UNKNOWN, (int) PVOID_FTYPE_PCVOID_BND_ULONG },
33388  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndint", IX86_BUILTIN_BNDINT, UNKNOWN, (int) BND_FTYPE_BND_BND },
33389  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_sizeof", IX86_BUILTIN_SIZEOF, UNKNOWN, (int) ULONG_FTYPE_VOID },
33390  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndlower", IX86_BUILTIN_BNDLOWER, UNKNOWN, (int) PVOID_FTYPE_BND },
33391  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndupper", IX86_BUILTIN_BNDUPPER, UNKNOWN, (int) PVOID_FTYPE_BND },
33392  { OPTION_MASK_ISA_MPX, (enum insn_code)0, "__builtin_ia32_bndret", IX86_BUILTIN_BNDRET, UNKNOWN, (int) BND_FTYPE_PCVOID },
33393};
33394
33395/* FMA4 and XOP.  */
33396#define MULTI_ARG_4_DF2_DI_I	V2DF_FTYPE_V2DF_V2DF_V2DI_INT
33397#define MULTI_ARG_4_DF2_DI_I1	V4DF_FTYPE_V4DF_V4DF_V4DI_INT
33398#define MULTI_ARG_4_SF2_SI_I	V4SF_FTYPE_V4SF_V4SF_V4SI_INT
33399#define MULTI_ARG_4_SF2_SI_I1	V8SF_FTYPE_V8SF_V8SF_V8SI_INT
33400#define MULTI_ARG_3_SF		V4SF_FTYPE_V4SF_V4SF_V4SF
33401#define MULTI_ARG_3_DF		V2DF_FTYPE_V2DF_V2DF_V2DF
33402#define MULTI_ARG_3_SF2		V8SF_FTYPE_V8SF_V8SF_V8SF
33403#define MULTI_ARG_3_DF2		V4DF_FTYPE_V4DF_V4DF_V4DF
33404#define MULTI_ARG_3_DI		V2DI_FTYPE_V2DI_V2DI_V2DI
33405#define MULTI_ARG_3_SI		V4SI_FTYPE_V4SI_V4SI_V4SI
33406#define MULTI_ARG_3_SI_DI	V4SI_FTYPE_V4SI_V4SI_V2DI
33407#define MULTI_ARG_3_HI		V8HI_FTYPE_V8HI_V8HI_V8HI
33408#define MULTI_ARG_3_HI_SI	V8HI_FTYPE_V8HI_V8HI_V4SI
33409#define MULTI_ARG_3_QI		V16QI_FTYPE_V16QI_V16QI_V16QI
33410#define MULTI_ARG_3_DI2		V4DI_FTYPE_V4DI_V4DI_V4DI
33411#define MULTI_ARG_3_SI2		V8SI_FTYPE_V8SI_V8SI_V8SI
33412#define MULTI_ARG_3_HI2		V16HI_FTYPE_V16HI_V16HI_V16HI
33413#define MULTI_ARG_3_QI2		V32QI_FTYPE_V32QI_V32QI_V32QI
33414#define MULTI_ARG_2_SF		V4SF_FTYPE_V4SF_V4SF
33415#define MULTI_ARG_2_DF		V2DF_FTYPE_V2DF_V2DF
33416#define MULTI_ARG_2_DI		V2DI_FTYPE_V2DI_V2DI
33417#define MULTI_ARG_2_SI		V4SI_FTYPE_V4SI_V4SI
33418#define MULTI_ARG_2_HI		V8HI_FTYPE_V8HI_V8HI
33419#define MULTI_ARG_2_QI		V16QI_FTYPE_V16QI_V16QI
33420#define MULTI_ARG_2_DI_IMM	V2DI_FTYPE_V2DI_SI
33421#define MULTI_ARG_2_SI_IMM	V4SI_FTYPE_V4SI_SI
33422#define MULTI_ARG_2_HI_IMM	V8HI_FTYPE_V8HI_SI
33423#define MULTI_ARG_2_QI_IMM	V16QI_FTYPE_V16QI_SI
33424#define MULTI_ARG_2_DI_CMP	V2DI_FTYPE_V2DI_V2DI_CMP
33425#define MULTI_ARG_2_SI_CMP	V4SI_FTYPE_V4SI_V4SI_CMP
33426#define MULTI_ARG_2_HI_CMP	V8HI_FTYPE_V8HI_V8HI_CMP
33427#define MULTI_ARG_2_QI_CMP	V16QI_FTYPE_V16QI_V16QI_CMP
33428#define MULTI_ARG_2_SF_TF	V4SF_FTYPE_V4SF_V4SF_TF
33429#define MULTI_ARG_2_DF_TF	V2DF_FTYPE_V2DF_V2DF_TF
33430#define MULTI_ARG_2_DI_TF	V2DI_FTYPE_V2DI_V2DI_TF
33431#define MULTI_ARG_2_SI_TF	V4SI_FTYPE_V4SI_V4SI_TF
33432#define MULTI_ARG_2_HI_TF	V8HI_FTYPE_V8HI_V8HI_TF
33433#define MULTI_ARG_2_QI_TF	V16QI_FTYPE_V16QI_V16QI_TF
33434#define MULTI_ARG_1_SF		V4SF_FTYPE_V4SF
33435#define MULTI_ARG_1_DF		V2DF_FTYPE_V2DF
33436#define MULTI_ARG_1_SF2		V8SF_FTYPE_V8SF
33437#define MULTI_ARG_1_DF2		V4DF_FTYPE_V4DF
33438#define MULTI_ARG_1_DI		V2DI_FTYPE_V2DI
33439#define MULTI_ARG_1_SI		V4SI_FTYPE_V4SI
33440#define MULTI_ARG_1_HI		V8HI_FTYPE_V8HI
33441#define MULTI_ARG_1_QI		V16QI_FTYPE_V16QI
33442#define MULTI_ARG_1_SI_DI	V2DI_FTYPE_V4SI
33443#define MULTI_ARG_1_HI_DI	V2DI_FTYPE_V8HI
33444#define MULTI_ARG_1_HI_SI	V4SI_FTYPE_V8HI
33445#define MULTI_ARG_1_QI_DI	V2DI_FTYPE_V16QI
33446#define MULTI_ARG_1_QI_SI	V4SI_FTYPE_V16QI
33447#define MULTI_ARG_1_QI_HI	V8HI_FTYPE_V16QI
33448
33449static const struct builtin_description bdesc_multi_arg[] =
33450{
33451  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v4sf,
33452    "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS,
33453    UNKNOWN, (int)MULTI_ARG_3_SF },
33454  { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmadd_v2df,
33455    "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD,
33456    UNKNOWN, (int)MULTI_ARG_3_DF },
33457
33458  { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v4sf,
33459    "__builtin_ia32_vfmaddss3", IX86_BUILTIN_VFMADDSS3,
33460    UNKNOWN, (int)MULTI_ARG_3_SF },
33461  { OPTION_MASK_ISA_FMA, CODE_FOR_fmai_vmfmadd_v2df,
33462    "__builtin_ia32_vfmaddsd3", IX86_BUILTIN_VFMADDSD3,
33463    UNKNOWN, (int)MULTI_ARG_3_DF },
33464
33465  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4sf,
33466    "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS,
33467    UNKNOWN, (int)MULTI_ARG_3_SF },
33468  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v2df,
33469    "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD,
33470    UNKNOWN, (int)MULTI_ARG_3_DF },
33471  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v8sf,
33472    "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256,
33473    UNKNOWN, (int)MULTI_ARG_3_SF2 },
33474  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmadd_v4df,
33475    "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256,
33476    UNKNOWN, (int)MULTI_ARG_3_DF2 },
33477
33478  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4sf,
33479    "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS,
33480    UNKNOWN, (int)MULTI_ARG_3_SF },
33481  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v2df,
33482    "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD,
33483    UNKNOWN, (int)MULTI_ARG_3_DF },
33484  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v8sf,
33485    "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256,
33486    UNKNOWN, (int)MULTI_ARG_3_SF2 },
33487  { OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_FMA4, CODE_FOR_fmaddsub_v4df,
33488    "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256,
33489    UNKNOWN, (int)MULTI_ARG_3_DF2 },
33490
33491  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov",      IX86_BUILTIN_VPCMOV,	 UNKNOWN,      (int)MULTI_ARG_3_DI },
33492  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di,        "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN,      (int)MULTI_ARG_3_DI },
33493  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si,        "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN,      (int)MULTI_ARG_3_SI },
33494  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi,        "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN,      (int)MULTI_ARG_3_HI },
33495  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi,       "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN,      (int)MULTI_ARG_3_QI },
33496  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df,        "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN,      (int)MULTI_ARG_3_DF },
33497  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf,        "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN,      (int)MULTI_ARG_3_SF },
33498
33499  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov256",       IX86_BUILTIN_VPCMOV256,       UNKNOWN,      (int)MULTI_ARG_3_DI2 },
33500  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256,        "__builtin_ia32_vpcmov_v4di256",  IX86_BUILTIN_VPCMOV_V4DI256,  UNKNOWN,      (int)MULTI_ARG_3_DI2 },
33501  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256,        "__builtin_ia32_vpcmov_v8si256",  IX86_BUILTIN_VPCMOV_V8SI256,  UNKNOWN,      (int)MULTI_ARG_3_SI2 },
33502  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256,       "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN,      (int)MULTI_ARG_3_HI2 },
33503  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256,       "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN,      (int)MULTI_ARG_3_QI2 },
33504  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256,        "__builtin_ia32_vpcmov_v4df256",  IX86_BUILTIN_VPCMOV_V4DF256,  UNKNOWN,      (int)MULTI_ARG_3_DF2 },
33505  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256,        "__builtin_ia32_vpcmov_v8sf256",  IX86_BUILTIN_VPCMOV_V8SF256,  UNKNOWN,      (int)MULTI_ARG_3_SF2 },
33506
33507  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm,             "__builtin_ia32_vpperm",      IX86_BUILTIN_VPPERM,      UNKNOWN,      (int)MULTI_ARG_3_QI },
33508
33509  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww,          "__builtin_ia32_vpmacssww",   IX86_BUILTIN_VPMACSSWW,   UNKNOWN,      (int)MULTI_ARG_3_HI },
33510  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww,           "__builtin_ia32_vpmacsww",    IX86_BUILTIN_VPMACSWW,    UNKNOWN,      (int)MULTI_ARG_3_HI },
33511  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd,          "__builtin_ia32_vpmacsswd",   IX86_BUILTIN_VPMACSSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33512  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd,           "__builtin_ia32_vpmacswd",    IX86_BUILTIN_VPMACSWD,    UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33513  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd,          "__builtin_ia32_vpmacssdd",   IX86_BUILTIN_VPMACSSDD,   UNKNOWN,      (int)MULTI_ARG_3_SI },
33514  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd,           "__builtin_ia32_vpmacsdd",    IX86_BUILTIN_VPMACSDD,    UNKNOWN,      (int)MULTI_ARG_3_SI },
33515  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql,         "__builtin_ia32_vpmacssdql",  IX86_BUILTIN_VPMACSSDQL,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33516  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh,         "__builtin_ia32_vpmacssdqh",  IX86_BUILTIN_VPMACSSDQH,  UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33517  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql,          "__builtin_ia32_vpmacsdql",   IX86_BUILTIN_VPMACSDQL,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33518  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh,          "__builtin_ia32_vpmacsdqh",   IX86_BUILTIN_VPMACSDQH,   UNKNOWN,      (int)MULTI_ARG_3_SI_DI },
33519  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd,         "__builtin_ia32_vpmadcsswd",  IX86_BUILTIN_VPMADCSSWD,  UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33520  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd,          "__builtin_ia32_vpmadcswd",   IX86_BUILTIN_VPMADCSWD,   UNKNOWN,      (int)MULTI_ARG_3_HI_SI },
33521
33522  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3,        "__builtin_ia32_vprotq",      IX86_BUILTIN_VPROTQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
33523  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3,        "__builtin_ia32_vprotd",      IX86_BUILTIN_VPROTD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
33524  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3,        "__builtin_ia32_vprotw",      IX86_BUILTIN_VPROTW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
33525  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3,       "__builtin_ia32_vprotb",      IX86_BUILTIN_VPROTB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
33526  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3,         "__builtin_ia32_vprotqi",     IX86_BUILTIN_VPROTQ_IMM,  UNKNOWN,      (int)MULTI_ARG_2_DI_IMM },
33527  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3,         "__builtin_ia32_vprotdi",     IX86_BUILTIN_VPROTD_IMM,  UNKNOWN,      (int)MULTI_ARG_2_SI_IMM },
33528  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3,         "__builtin_ia32_vprotwi",     IX86_BUILTIN_VPROTW_IMM,  UNKNOWN,      (int)MULTI_ARG_2_HI_IMM },
33529  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3,        "__builtin_ia32_vprotbi",     IX86_BUILTIN_VPROTB_IMM,  UNKNOWN,      (int)MULTI_ARG_2_QI_IMM },
33530  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav2di3,         "__builtin_ia32_vpshaq",      IX86_BUILTIN_VPSHAQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
33531  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav4si3,         "__builtin_ia32_vpshad",      IX86_BUILTIN_VPSHAD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
33532  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav8hi3,         "__builtin_ia32_vpshaw",      IX86_BUILTIN_VPSHAW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
33533  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shav16qi3,        "__builtin_ia32_vpshab",      IX86_BUILTIN_VPSHAB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
33534  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv2di3,         "__builtin_ia32_vpshlq",      IX86_BUILTIN_VPSHLQ,      UNKNOWN,      (int)MULTI_ARG_2_DI },
33535  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv4si3,         "__builtin_ia32_vpshld",      IX86_BUILTIN_VPSHLD,      UNKNOWN,      (int)MULTI_ARG_2_SI },
33536  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv8hi3,         "__builtin_ia32_vpshlw",      IX86_BUILTIN_VPSHLW,      UNKNOWN,      (int)MULTI_ARG_2_HI },
33537  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_shlv16qi3,        "__builtin_ia32_vpshlb",      IX86_BUILTIN_VPSHLB,      UNKNOWN,      (int)MULTI_ARG_2_QI },
33538
33539  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2,       "__builtin_ia32_vfrczss",     IX86_BUILTIN_VFRCZSS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
33540  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2,       "__builtin_ia32_vfrczsd",     IX86_BUILTIN_VFRCZSD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
33541  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2,         "__builtin_ia32_vfrczps",     IX86_BUILTIN_VFRCZPS,     UNKNOWN,      (int)MULTI_ARG_1_SF },
33542  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2,         "__builtin_ia32_vfrczpd",     IX86_BUILTIN_VFRCZPD,     UNKNOWN,      (int)MULTI_ARG_1_DF },
33543  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2,         "__builtin_ia32_vfrczps256",  IX86_BUILTIN_VFRCZPS256,  UNKNOWN,      (int)MULTI_ARG_1_SF2 },
33544  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2,         "__builtin_ia32_vfrczpd256",  IX86_BUILTIN_VFRCZPD256,  UNKNOWN,      (int)MULTI_ARG_1_DF2 },
33545
33546  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw,           "__builtin_ia32_vphaddbw",    IX86_BUILTIN_VPHADDBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
33547  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd,           "__builtin_ia32_vphaddbd",    IX86_BUILTIN_VPHADDBD,    UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
33548  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq,           "__builtin_ia32_vphaddbq",    IX86_BUILTIN_VPHADDBQ,    UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
33549  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd,           "__builtin_ia32_vphaddwd",    IX86_BUILTIN_VPHADDWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
33550  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq,           "__builtin_ia32_vphaddwq",    IX86_BUILTIN_VPHADDWQ,    UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
33551  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq,           "__builtin_ia32_vphadddq",    IX86_BUILTIN_VPHADDDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
33552  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw,          "__builtin_ia32_vphaddubw",   IX86_BUILTIN_VPHADDUBW,   UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
33553  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd,          "__builtin_ia32_vphaddubd",   IX86_BUILTIN_VPHADDUBD,   UNKNOWN,      (int)MULTI_ARG_1_QI_SI },
33554  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq,          "__builtin_ia32_vphaddubq",   IX86_BUILTIN_VPHADDUBQ,   UNKNOWN,      (int)MULTI_ARG_1_QI_DI },
33555  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd,          "__builtin_ia32_vphadduwd",   IX86_BUILTIN_VPHADDUWD,   UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
33556  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq,          "__builtin_ia32_vphadduwq",   IX86_BUILTIN_VPHADDUWQ,   UNKNOWN,      (int)MULTI_ARG_1_HI_DI },
33557  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq,          "__builtin_ia32_vphaddudq",   IX86_BUILTIN_VPHADDUDQ,   UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
33558  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw,           "__builtin_ia32_vphsubbw",    IX86_BUILTIN_VPHSUBBW,    UNKNOWN,      (int)MULTI_ARG_1_QI_HI },
33559  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd,           "__builtin_ia32_vphsubwd",    IX86_BUILTIN_VPHSUBWD,    UNKNOWN,      (int)MULTI_ARG_1_HI_SI },
33560  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq,           "__builtin_ia32_vphsubdq",    IX86_BUILTIN_VPHSUBDQ,    UNKNOWN,      (int)MULTI_ARG_1_SI_DI },
33561
33562  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomeqb",    IX86_BUILTIN_VPCOMEQB,    EQ,           (int)MULTI_ARG_2_QI_CMP },
33563  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneb",    IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
33564  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomneqb",   IX86_BUILTIN_VPCOMNEB,    NE,           (int)MULTI_ARG_2_QI_CMP },
33565  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomltb",    IX86_BUILTIN_VPCOMLTB,    LT,           (int)MULTI_ARG_2_QI_CMP },
33566  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomleb",    IX86_BUILTIN_VPCOMLEB,    LE,           (int)MULTI_ARG_2_QI_CMP },
33567  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgtb",    IX86_BUILTIN_VPCOMGTB,    GT,           (int)MULTI_ARG_2_QI_CMP },
33568  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3,     "__builtin_ia32_vpcomgeb",    IX86_BUILTIN_VPCOMGEB,    GE,           (int)MULTI_ARG_2_QI_CMP },
33569
33570  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomeqw",    IX86_BUILTIN_VPCOMEQW,    EQ,           (int)MULTI_ARG_2_HI_CMP },
33571  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomnew",    IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
33572  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomneqw",   IX86_BUILTIN_VPCOMNEW,    NE,           (int)MULTI_ARG_2_HI_CMP },
33573  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomltw",    IX86_BUILTIN_VPCOMLTW,    LT,           (int)MULTI_ARG_2_HI_CMP },
33574  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomlew",    IX86_BUILTIN_VPCOMLEW,    LE,           (int)MULTI_ARG_2_HI_CMP },
33575  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgtw",    IX86_BUILTIN_VPCOMGTW,    GT,           (int)MULTI_ARG_2_HI_CMP },
33576  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3,      "__builtin_ia32_vpcomgew",    IX86_BUILTIN_VPCOMGEW,    GE,           (int)MULTI_ARG_2_HI_CMP },
33577
33578  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomeqd",    IX86_BUILTIN_VPCOMEQD,    EQ,           (int)MULTI_ARG_2_SI_CMP },
33579  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomned",    IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
33580  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomneqd",   IX86_BUILTIN_VPCOMNED,    NE,           (int)MULTI_ARG_2_SI_CMP },
33581  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomltd",    IX86_BUILTIN_VPCOMLTD,    LT,           (int)MULTI_ARG_2_SI_CMP },
33582  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomled",    IX86_BUILTIN_VPCOMLED,    LE,           (int)MULTI_ARG_2_SI_CMP },
33583  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomgtd",    IX86_BUILTIN_VPCOMGTD,    GT,           (int)MULTI_ARG_2_SI_CMP },
33584  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3,      "__builtin_ia32_vpcomged",    IX86_BUILTIN_VPCOMGED,    GE,           (int)MULTI_ARG_2_SI_CMP },
33585
33586  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomeqq",    IX86_BUILTIN_VPCOMEQQ,    EQ,           (int)MULTI_ARG_2_DI_CMP },
33587  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneq",    IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
33588  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomneqq",   IX86_BUILTIN_VPCOMNEQ,    NE,           (int)MULTI_ARG_2_DI_CMP },
33589  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomltq",    IX86_BUILTIN_VPCOMLTQ,    LT,           (int)MULTI_ARG_2_DI_CMP },
33590  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomleq",    IX86_BUILTIN_VPCOMLEQ,    LE,           (int)MULTI_ARG_2_DI_CMP },
33591  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgtq",    IX86_BUILTIN_VPCOMGTQ,    GT,           (int)MULTI_ARG_2_DI_CMP },
33592  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3,      "__builtin_ia32_vpcomgeq",    IX86_BUILTIN_VPCOMGEQ,    GE,           (int)MULTI_ARG_2_DI_CMP },
33593
33594  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb",   IX86_BUILTIN_VPCOMEQUB,   EQ,           (int)MULTI_ARG_2_QI_CMP },
33595  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub",   IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
33596  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb",  IX86_BUILTIN_VPCOMNEUB,   NE,           (int)MULTI_ARG_2_QI_CMP },
33597  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub",   IX86_BUILTIN_VPCOMLTUB,   LTU,          (int)MULTI_ARG_2_QI_CMP },
33598  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub",   IX86_BUILTIN_VPCOMLEUB,   LEU,          (int)MULTI_ARG_2_QI_CMP },
33599  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub",   IX86_BUILTIN_VPCOMGTUB,   GTU,          (int)MULTI_ARG_2_QI_CMP },
33600  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub",   IX86_BUILTIN_VPCOMGEUB,   GEU,          (int)MULTI_ARG_2_QI_CMP },
33601
33602  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw",   IX86_BUILTIN_VPCOMEQUW,   EQ,           (int)MULTI_ARG_2_HI_CMP },
33603  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw",   IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
33604  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw",  IX86_BUILTIN_VPCOMNEUW,   NE,           (int)MULTI_ARG_2_HI_CMP },
33605  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomltuw",   IX86_BUILTIN_VPCOMLTUW,   LTU,          (int)MULTI_ARG_2_HI_CMP },
33606  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomleuw",   IX86_BUILTIN_VPCOMLEUW,   LEU,          (int)MULTI_ARG_2_HI_CMP },
33607  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgtuw",   IX86_BUILTIN_VPCOMGTUW,   GTU,          (int)MULTI_ARG_2_HI_CMP },
33608  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3,  "__builtin_ia32_vpcomgeuw",   IX86_BUILTIN_VPCOMGEUW,   GEU,          (int)MULTI_ARG_2_HI_CMP },
33609
33610  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd",   IX86_BUILTIN_VPCOMEQUD,   EQ,           (int)MULTI_ARG_2_SI_CMP },
33611  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud",   IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
33612  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd",  IX86_BUILTIN_VPCOMNEUD,   NE,           (int)MULTI_ARG_2_SI_CMP },
33613  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomltud",   IX86_BUILTIN_VPCOMLTUD,   LTU,          (int)MULTI_ARG_2_SI_CMP },
33614  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomleud",   IX86_BUILTIN_VPCOMLEUD,   LEU,          (int)MULTI_ARG_2_SI_CMP },
33615  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgtud",   IX86_BUILTIN_VPCOMGTUD,   GTU,          (int)MULTI_ARG_2_SI_CMP },
33616  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3,  "__builtin_ia32_vpcomgeud",   IX86_BUILTIN_VPCOMGEUD,   GEU,          (int)MULTI_ARG_2_SI_CMP },
33617
33618  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq",   IX86_BUILTIN_VPCOMEQUQ,   EQ,           (int)MULTI_ARG_2_DI_CMP },
33619  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq",   IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
33620  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq",  IX86_BUILTIN_VPCOMNEUQ,   NE,           (int)MULTI_ARG_2_DI_CMP },
33621  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomltuq",   IX86_BUILTIN_VPCOMLTUQ,   LTU,          (int)MULTI_ARG_2_DI_CMP },
33622  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomleuq",   IX86_BUILTIN_VPCOMLEUQ,   LEU,          (int)MULTI_ARG_2_DI_CMP },
33623  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgtuq",   IX86_BUILTIN_VPCOMGTUQ,   GTU,          (int)MULTI_ARG_2_DI_CMP },
33624  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3,  "__builtin_ia32_vpcomgeuq",   IX86_BUILTIN_VPCOMGEUQ,   GEU,          (int)MULTI_ARG_2_DI_CMP },
33625
33626  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
33627  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
33628  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
33629  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
33630  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_QI_TF },
33631  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_HI_TF },
33632  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_SI_TF },
33633  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE,   (int)MULTI_ARG_2_DI_TF },
33634
33635  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueb",  IX86_BUILTIN_VPCOMTRUEB,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
33636  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtruew",  IX86_BUILTIN_VPCOMTRUEW,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
33637  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrued",  IX86_BUILTIN_VPCOMTRUED,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
33638  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueq",  IX86_BUILTIN_VPCOMTRUEQ,  (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
33639  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3,     "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_QI_TF },
33640  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3,      "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_HI_TF },
33641  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3,      "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_SI_TF },
33642  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3,      "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE,    (int)MULTI_ARG_2_DI_TF },
33643
33644  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3,     "__builtin_ia32_vpermil2pd",  IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
33645  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3,     "__builtin_ia32_vpermil2ps",  IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
33646  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3,     "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
33647  { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3,     "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
33648
33649};
33650
33651/* TM vector builtins.  */
33652
33653/* Reuse the existing x86-specific `struct builtin_description' cause
33654   we're lazy.  Add casts to make them fit.  */
33655static const struct builtin_description bdesc_tm[] =
33656{
33657  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WM64", (enum ix86_builtins) BUILT_IN_TM_STORE_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33658  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaRM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33659  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_WaWM64", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M64, UNKNOWN, VOID_FTYPE_PV2SI_V2SI },
33660  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33661  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaRM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33662  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RaWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33663  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_RfWM64", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M64, UNKNOWN, V2SI_FTYPE_PCV2SI },
33664
33665  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WM128", (enum ix86_builtins) BUILT_IN_TM_STORE_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33666  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaRM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33667  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_WaWM128", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M128, UNKNOWN, VOID_FTYPE_PV4SF_V4SF },
33668  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33669  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaRM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33670  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RaWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33671  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_RfWM128", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M128, UNKNOWN, V4SF_FTYPE_PCV4SF },
33672
33673  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WM256", (enum ix86_builtins) BUILT_IN_TM_STORE_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33674  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaRM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAR_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33675  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_WaWM256", (enum ix86_builtins) BUILT_IN_TM_STORE_WAW_M256, UNKNOWN, VOID_FTYPE_PV8SF_V8SF },
33676  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33677  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaRM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAR_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33678  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RaWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RAW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33679  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_RfWM256", (enum ix86_builtins) BUILT_IN_TM_LOAD_RFW_M256, UNKNOWN, V8SF_FTYPE_PCV8SF },
33680
33681  { OPTION_MASK_ISA_MMX, CODE_FOR_nothing, "__builtin__ITM_LM64", (enum ix86_builtins) BUILT_IN_TM_LOG_M64, UNKNOWN, VOID_FTYPE_PCVOID },
33682  { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin__ITM_LM128", (enum ix86_builtins) BUILT_IN_TM_LOG_M128, UNKNOWN, VOID_FTYPE_PCVOID },
33683  { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin__ITM_LM256", (enum ix86_builtins) BUILT_IN_TM_LOG_M256, UNKNOWN, VOID_FTYPE_PCVOID },
33684};
33685
33686/* TM callbacks.  */
33687
33688/* Return the builtin decl needed to load a vector of TYPE.  */
33689
33690static tree
33691ix86_builtin_tm_load (tree type)
33692{
33693  if (TREE_CODE (type) == VECTOR_TYPE)
33694    {
33695      switch (tree_to_uhwi (TYPE_SIZE (type)))
33696	{
33697	case 64:
33698	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M64);
33699	case 128:
33700	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M128);
33701	case 256:
33702	  return builtin_decl_explicit (BUILT_IN_TM_LOAD_M256);
33703	}
33704    }
33705  return NULL_TREE;
33706}
33707
33708/* Return the builtin decl needed to store a vector of TYPE.  */
33709
33710static tree
33711ix86_builtin_tm_store (tree type)
33712{
33713  if (TREE_CODE (type) == VECTOR_TYPE)
33714    {
33715      switch (tree_to_uhwi (TYPE_SIZE (type)))
33716	{
33717	case 64:
33718	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M64);
33719	case 128:
33720	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M128);
33721	case 256:
33722	  return builtin_decl_explicit (BUILT_IN_TM_STORE_M256);
33723	}
33724    }
33725  return NULL_TREE;
33726}
33727
33728/* Initialize the transactional memory vector load/store builtins.  */
33729
33730static void
33731ix86_init_tm_builtins (void)
33732{
33733  enum ix86_builtin_func_type ftype;
33734  const struct builtin_description *d;
33735  size_t i;
33736  tree decl;
33737  tree attrs_load, attrs_type_load, attrs_store, attrs_type_store;
33738  tree attrs_log, attrs_type_log;
33739
33740  if (!flag_tm)
33741    return;
33742
33743  /* If there are no builtins defined, we must be compiling in a
33744     language without trans-mem support.  */
33745  if (!builtin_decl_explicit_p (BUILT_IN_TM_LOAD_1))
33746    return;
33747
33748  /* Use whatever attributes a normal TM load has.  */
33749  decl = builtin_decl_explicit (BUILT_IN_TM_LOAD_1);
33750  attrs_load = DECL_ATTRIBUTES (decl);
33751  attrs_type_load = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33752  /* Use whatever attributes a normal TM store has.  */
33753  decl = builtin_decl_explicit (BUILT_IN_TM_STORE_1);
33754  attrs_store = DECL_ATTRIBUTES (decl);
33755  attrs_type_store = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33756  /* Use whatever attributes a normal TM log has.  */
33757  decl = builtin_decl_explicit (BUILT_IN_TM_LOG);
33758  attrs_log = DECL_ATTRIBUTES (decl);
33759  attrs_type_log = TYPE_ATTRIBUTES (TREE_TYPE (decl));
33760
33761  for (i = 0, d = bdesc_tm;
33762       i < ARRAY_SIZE (bdesc_tm);
33763       i++, d++)
33764    {
33765      if ((d->mask & ix86_isa_flags) != 0
33766	  || (lang_hooks.builtin_function
33767	      == lang_hooks.builtin_function_ext_scope))
33768	{
33769	  tree type, attrs, attrs_type;
33770	  enum built_in_function code = (enum built_in_function) d->code;
33771
33772	  ftype = (enum ix86_builtin_func_type) d->flag;
33773	  type = ix86_get_builtin_func_type (ftype);
33774
33775	  if (BUILTIN_TM_LOAD_P (code))
33776	    {
33777	      attrs = attrs_load;
33778	      attrs_type = attrs_type_load;
33779	    }
33780	  else if (BUILTIN_TM_STORE_P (code))
33781	    {
33782	      attrs = attrs_store;
33783	      attrs_type = attrs_type_store;
33784	    }
33785	  else
33786	    {
33787	      attrs = attrs_log;
33788	      attrs_type = attrs_type_log;
33789	    }
33790	  decl = add_builtin_function (d->name, type, code, BUILT_IN_NORMAL,
33791				       /* The builtin without the prefix for
33792					  calling it directly.  */
33793				       d->name + strlen ("__builtin_"),
33794				       attrs);
33795	  /* add_builtin_function() will set the DECL_ATTRIBUTES, now
33796	     set the TYPE_ATTRIBUTES.  */
33797	  decl_attributes (&TREE_TYPE (decl), attrs_type, ATTR_FLAG_BUILT_IN);
33798
33799	  set_builtin_decl (code, decl, false);
33800	}
33801    }
33802}
33803
33804/* Set up all the MMX/SSE builtins, even builtins for instructions that are not
33805   in the current target ISA to allow the user to compile particular modules
33806   with different target specific options that differ from the command line
33807   options.  */
33808static void
33809ix86_init_mmx_sse_builtins (void)
33810{
33811  const struct builtin_description * d;
33812  enum ix86_builtin_func_type ftype;
33813  size_t i;
33814
33815  /* Add all special builtins with variable number of operands.  */
33816  for (i = 0, d = bdesc_special_args;
33817       i < ARRAY_SIZE (bdesc_special_args);
33818       i++, d++)
33819    {
33820      if (d->name == 0)
33821	continue;
33822
33823      ftype = (enum ix86_builtin_func_type) d->flag;
33824      def_builtin (d->mask, d->name, ftype, d->code);
33825    }
33826
33827  /* Add all builtins with variable number of operands.  */
33828  for (i = 0, d = bdesc_args;
33829       i < ARRAY_SIZE (bdesc_args);
33830       i++, d++)
33831    {
33832      if (d->name == 0)
33833	continue;
33834
33835      ftype = (enum ix86_builtin_func_type) d->flag;
33836      def_builtin_const (d->mask, d->name, ftype, d->code);
33837    }
33838
33839  /* Add all builtins with rounding.  */
33840  for (i = 0, d = bdesc_round_args;
33841       i < ARRAY_SIZE (bdesc_round_args);
33842       i++, d++)
33843    {
33844      if (d->name == 0)
33845	continue;
33846
33847      ftype = (enum ix86_builtin_func_type) d->flag;
33848      def_builtin_const (d->mask, d->name, ftype, d->code);
33849    }
33850
33851  /* pcmpestr[im] insns.  */
33852  for (i = 0, d = bdesc_pcmpestr;
33853       i < ARRAY_SIZE (bdesc_pcmpestr);
33854       i++, d++)
33855    {
33856      if (d->code == IX86_BUILTIN_PCMPESTRM128)
33857	ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
33858      else
33859	ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
33860      def_builtin_const (d->mask, d->name, ftype, d->code);
33861    }
33862
33863  /* pcmpistr[im] insns.  */
33864  for (i = 0, d = bdesc_pcmpistr;
33865       i < ARRAY_SIZE (bdesc_pcmpistr);
33866       i++, d++)
33867    {
33868      if (d->code == IX86_BUILTIN_PCMPISTRM128)
33869	ftype = V16QI_FTYPE_V16QI_V16QI_INT;
33870      else
33871	ftype = INT_FTYPE_V16QI_V16QI_INT;
33872      def_builtin_const (d->mask, d->name, ftype, d->code);
33873    }
33874
33875  /* comi/ucomi insns.  */
33876  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
33877    {
33878      if (d->mask == OPTION_MASK_ISA_SSE2)
33879	ftype = INT_FTYPE_V2DF_V2DF;
33880      else
33881	ftype = INT_FTYPE_V4SF_V4SF;
33882      def_builtin_const (d->mask, d->name, ftype, d->code);
33883    }
33884
33885  /* SSE */
33886  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
33887	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
33888  def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
33889	       UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
33890
33891  /* SSE or 3DNow!A */
33892  def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
33893	       "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
33894	       IX86_BUILTIN_MASKMOVQ);
33895
33896  /* SSE2 */
33897  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
33898	       VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
33899
33900  def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
33901	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
33902  x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
33903			    VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
33904
33905  /* SSE3.  */
33906  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
33907	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
33908  def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
33909	       VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
33910
33911  /* AES */
33912  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
33913		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
33914  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
33915		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
33916  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
33917		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
33918  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
33919		     V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
33920  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
33921		     V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
33922  def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
33923		     V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
33924
33925  /* PCLMUL */
33926  def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
33927		     V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
33928
33929  /* RDRND */
33930  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand16_step",
33931	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDRAND16_STEP);
33932  def_builtin (OPTION_MASK_ISA_RDRND, "__builtin_ia32_rdrand32_step",
33933	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDRAND32_STEP);
33934  def_builtin (OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT,
33935	       "__builtin_ia32_rdrand64_step", INT_FTYPE_PULONGLONG,
33936	       IX86_BUILTIN_RDRAND64_STEP);
33937
33938  /* AVX2 */
33939  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2df",
33940	       V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_V2DF_INT,
33941	       IX86_BUILTIN_GATHERSIV2DF);
33942
33943  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4df",
33944	       V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_V4DF_INT,
33945	       IX86_BUILTIN_GATHERSIV4DF);
33946
33947  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2df",
33948	       V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_V2DF_INT,
33949	       IX86_BUILTIN_GATHERDIV2DF);
33950
33951  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4df",
33952	       V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_V4DF_INT,
33953	       IX86_BUILTIN_GATHERDIV4DF);
33954
33955  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4sf",
33956	       V4SF_FTYPE_V4SF_PCFLOAT_V4SI_V4SF_INT,
33957	       IX86_BUILTIN_GATHERSIV4SF);
33958
33959  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8sf",
33960	       V8SF_FTYPE_V8SF_PCFLOAT_V8SI_V8SF_INT,
33961	       IX86_BUILTIN_GATHERSIV8SF);
33962
33963  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf",
33964	       V4SF_FTYPE_V4SF_PCFLOAT_V2DI_V4SF_INT,
33965	       IX86_BUILTIN_GATHERDIV4SF);
33966
33967  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4sf256",
33968	       V4SF_FTYPE_V4SF_PCFLOAT_V4DI_V4SF_INT,
33969	       IX86_BUILTIN_GATHERDIV8SF);
33970
33971  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv2di",
33972	       V2DI_FTYPE_V2DI_PCINT64_V4SI_V2DI_INT,
33973	       IX86_BUILTIN_GATHERSIV2DI);
33974
33975  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4di",
33976	       V4DI_FTYPE_V4DI_PCINT64_V4SI_V4DI_INT,
33977	       IX86_BUILTIN_GATHERSIV4DI);
33978
33979  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv2di",
33980	       V2DI_FTYPE_V2DI_PCINT64_V2DI_V2DI_INT,
33981	       IX86_BUILTIN_GATHERDIV2DI);
33982
33983  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4di",
33984	       V4DI_FTYPE_V4DI_PCINT64_V4DI_V4DI_INT,
33985	       IX86_BUILTIN_GATHERDIV4DI);
33986
33987  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv4si",
33988	       V4SI_FTYPE_V4SI_PCINT_V4SI_V4SI_INT,
33989	       IX86_BUILTIN_GATHERSIV4SI);
33990
33991  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gathersiv8si",
33992	       V8SI_FTYPE_V8SI_PCINT_V8SI_V8SI_INT,
33993	       IX86_BUILTIN_GATHERSIV8SI);
33994
33995  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si",
33996	       V4SI_FTYPE_V4SI_PCINT_V2DI_V4SI_INT,
33997	       IX86_BUILTIN_GATHERDIV4SI);
33998
33999  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatherdiv4si256",
34000	       V4SI_FTYPE_V4SI_PCINT_V4DI_V4SI_INT,
34001	       IX86_BUILTIN_GATHERDIV8SI);
34002
34003  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4df ",
34004	       V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_V4DF_INT,
34005	       IX86_BUILTIN_GATHERALTSIV4DF);
34006
34007  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4sf256 ",
34008	       V8SF_FTYPE_V8SF_PCFLOAT_V4DI_V8SF_INT,
34009	       IX86_BUILTIN_GATHERALTDIV8SF);
34010
34011  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltsiv4di ",
34012	       V4DI_FTYPE_V4DI_PCINT64_V8SI_V4DI_INT,
34013	       IX86_BUILTIN_GATHERALTSIV4DI);
34014
34015  def_builtin (OPTION_MASK_ISA_AVX2, "__builtin_ia32_gatheraltdiv4si256 ",
34016	       V8SI_FTYPE_V8SI_PCINT_V4DI_V8SI_INT,
34017	       IX86_BUILTIN_GATHERALTDIV8SI);
34018
34019  /* AVX512F */
34020  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16sf",
34021	       V16SF_FTYPE_V16SF_PCFLOAT_V16SI_HI_INT,
34022	       IX86_BUILTIN_GATHER3SIV16SF);
34023
34024  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8df",
34025	       V8DF_FTYPE_V8DF_PCDOUBLE_V8SI_QI_INT,
34026	       IX86_BUILTIN_GATHER3SIV8DF);
34027
34028  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16sf",
34029	       V8SF_FTYPE_V8SF_PCFLOAT_V8DI_QI_INT,
34030	       IX86_BUILTIN_GATHER3DIV16SF);
34031
34032  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8df",
34033	       V8DF_FTYPE_V8DF_PCDOUBLE_V8DI_QI_INT,
34034	       IX86_BUILTIN_GATHER3DIV8DF);
34035
34036  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv16si",
34037	       V16SI_FTYPE_V16SI_PCINT_V16SI_HI_INT,
34038	       IX86_BUILTIN_GATHER3SIV16SI);
34039
34040  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gathersiv8di",
34041	       V8DI_FTYPE_V8DI_PCINT64_V8SI_QI_INT,
34042	       IX86_BUILTIN_GATHER3SIV8DI);
34043
34044  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv16si",
34045	       V8SI_FTYPE_V8SI_PCINT_V8DI_QI_INT,
34046	       IX86_BUILTIN_GATHER3DIV16SI);
34047
34048  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatherdiv8di",
34049	       V8DI_FTYPE_V8DI_PCINT64_V8DI_QI_INT,
34050	       IX86_BUILTIN_GATHER3DIV8DI);
34051
34052  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8df ",
34053	       V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
34054	       IX86_BUILTIN_GATHER3ALTSIV8DF);
34055
34056  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8sf ",
34057	       V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
34058	       IX86_BUILTIN_GATHER3ALTDIV16SF);
34059
34060  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltsiv8di ",
34061	       V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
34062	       IX86_BUILTIN_GATHER3ALTSIV8DI);
34063
34064  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_gatheraltdiv8si ",
34065	       V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
34066	       IX86_BUILTIN_GATHER3ALTDIV16SI);
34067
34068  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16sf",
34069	       VOID_FTYPE_PFLOAT_HI_V16SI_V16SF_INT,
34070	       IX86_BUILTIN_SCATTERSIV16SF);
34071
34072  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8df",
34073	       VOID_FTYPE_PDOUBLE_QI_V8SI_V8DF_INT,
34074	       IX86_BUILTIN_SCATTERSIV8DF);
34075
34076  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16sf",
34077	       VOID_FTYPE_PFLOAT_QI_V8DI_V8SF_INT,
34078	       IX86_BUILTIN_SCATTERDIV16SF);
34079
34080  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8df",
34081	       VOID_FTYPE_PDOUBLE_QI_V8DI_V8DF_INT,
34082	       IX86_BUILTIN_SCATTERDIV8DF);
34083
34084  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv16si",
34085	       VOID_FTYPE_PINT_HI_V16SI_V16SI_INT,
34086	       IX86_BUILTIN_SCATTERSIV16SI);
34087
34088  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scattersiv8di",
34089	       VOID_FTYPE_PLONGLONG_QI_V8SI_V8DI_INT,
34090	       IX86_BUILTIN_SCATTERSIV8DI);
34091
34092  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv16si",
34093	       VOID_FTYPE_PINT_QI_V8DI_V8SI_INT,
34094	       IX86_BUILTIN_SCATTERDIV16SI);
34095
34096  def_builtin (OPTION_MASK_ISA_AVX512F, "__builtin_ia32_scatterdiv8di",
34097	       VOID_FTYPE_PLONGLONG_QI_V8DI_V8DI_INT,
34098	       IX86_BUILTIN_SCATTERDIV8DI);
34099
34100  /* AVX512VL */
34101  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2df",
34102	       V2DF_FTYPE_V2DF_PCDOUBLE_V4SI_QI_INT,
34103	       IX86_BUILTIN_GATHER3SIV2DF);
34104
34105  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4df",
34106	       V4DF_FTYPE_V4DF_PCDOUBLE_V4SI_QI_INT,
34107	       IX86_BUILTIN_GATHER3SIV4DF);
34108
34109  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2df",
34110	       V2DF_FTYPE_V2DF_PCDOUBLE_V2DI_QI_INT,
34111	       IX86_BUILTIN_GATHER3DIV2DF);
34112
34113  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4df",
34114	       V4DF_FTYPE_V4DF_PCDOUBLE_V4DI_QI_INT,
34115	       IX86_BUILTIN_GATHER3DIV4DF);
34116
34117  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4sf",
34118	       V4SF_FTYPE_V4SF_PCFLOAT_V4SI_QI_INT,
34119	       IX86_BUILTIN_GATHER3SIV4SF);
34120
34121  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8sf",
34122	       V8SF_FTYPE_V8SF_PCFLOAT_V8SI_QI_INT,
34123	       IX86_BUILTIN_GATHER3SIV8SF);
34124
34125  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4sf",
34126	       V4SF_FTYPE_V4SF_PCFLOAT_V2DI_QI_INT,
34127	       IX86_BUILTIN_GATHER3DIV4SF);
34128
34129  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8sf",
34130	       V4SF_FTYPE_V4SF_PCFLOAT_V4DI_QI_INT,
34131	       IX86_BUILTIN_GATHER3DIV8SF);
34132
34133  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv2di",
34134	       V2DI_FTYPE_V2DI_PCINT64_V4SI_QI_INT,
34135	       IX86_BUILTIN_GATHER3SIV2DI);
34136
34137  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4di",
34138	       V4DI_FTYPE_V4DI_PCINT64_V4SI_QI_INT,
34139	       IX86_BUILTIN_GATHER3SIV4DI);
34140
34141  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div2di",
34142	       V2DI_FTYPE_V2DI_PCINT64_V2DI_QI_INT,
34143	       IX86_BUILTIN_GATHER3DIV2DI);
34144
34145  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4di",
34146	       V4DI_FTYPE_V4DI_PCINT64_V4DI_QI_INT,
34147	       IX86_BUILTIN_GATHER3DIV4DI);
34148
34149  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv4si",
34150	       V4SI_FTYPE_V4SI_PCINT_V4SI_QI_INT,
34151	       IX86_BUILTIN_GATHER3SIV4SI);
34152
34153  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3siv8si",
34154	       V8SI_FTYPE_V8SI_PCINT_V8SI_QI_INT,
34155	       IX86_BUILTIN_GATHER3SIV8SI);
34156
34157  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div4si",
34158	       V4SI_FTYPE_V4SI_PCINT_V2DI_QI_INT,
34159	       IX86_BUILTIN_GATHER3DIV4SI);
34160
34161  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3div8si",
34162	       V4SI_FTYPE_V4SI_PCINT_V4DI_QI_INT,
34163	       IX86_BUILTIN_GATHER3DIV8SI);
34164
34165  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4df ",
34166	       V4DF_FTYPE_V4DF_PCDOUBLE_V8SI_QI_INT,
34167	       IX86_BUILTIN_GATHER3ALTSIV4DF);
34168
34169  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8sf ",
34170	       V8SF_FTYPE_V8SF_PCFLOAT_V4DI_QI_INT,
34171	       IX86_BUILTIN_GATHER3ALTDIV8SF);
34172
34173  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altsiv4di ",
34174	       V4DI_FTYPE_V4DI_PCINT64_V8SI_QI_INT,
34175	       IX86_BUILTIN_GATHER3ALTSIV4DI);
34176
34177  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_gather3altdiv8si ",
34178	       V8SI_FTYPE_V8SI_PCINT_V4DI_QI_INT,
34179	       IX86_BUILTIN_GATHER3ALTDIV8SI);
34180
34181  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8sf",
34182	       VOID_FTYPE_PFLOAT_QI_V8SI_V8SF_INT,
34183	       IX86_BUILTIN_SCATTERSIV8SF);
34184
34185  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4sf",
34186	       VOID_FTYPE_PFLOAT_QI_V4SI_V4SF_INT,
34187	       IX86_BUILTIN_SCATTERSIV4SF);
34188
34189  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4df",
34190	       VOID_FTYPE_PDOUBLE_QI_V4SI_V4DF_INT,
34191	       IX86_BUILTIN_SCATTERSIV4DF);
34192
34193  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2df",
34194	       VOID_FTYPE_PDOUBLE_QI_V4SI_V2DF_INT,
34195	       IX86_BUILTIN_SCATTERSIV2DF);
34196
34197  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8sf",
34198	       VOID_FTYPE_PFLOAT_QI_V4DI_V4SF_INT,
34199	       IX86_BUILTIN_SCATTERDIV8SF);
34200
34201  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4sf",
34202	       VOID_FTYPE_PFLOAT_QI_V2DI_V4SF_INT,
34203	       IX86_BUILTIN_SCATTERDIV4SF);
34204
34205  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4df",
34206	       VOID_FTYPE_PDOUBLE_QI_V4DI_V4DF_INT,
34207	       IX86_BUILTIN_SCATTERDIV4DF);
34208
34209  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2df",
34210	       VOID_FTYPE_PDOUBLE_QI_V2DI_V2DF_INT,
34211	       IX86_BUILTIN_SCATTERDIV2DF);
34212
34213  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv8si",
34214	       VOID_FTYPE_PINT_QI_V8SI_V8SI_INT,
34215	       IX86_BUILTIN_SCATTERSIV8SI);
34216
34217  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4si",
34218	       VOID_FTYPE_PINT_QI_V4SI_V4SI_INT,
34219	       IX86_BUILTIN_SCATTERSIV4SI);
34220
34221  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv4di",
34222	       VOID_FTYPE_PLONGLONG_QI_V4SI_V4DI_INT,
34223	       IX86_BUILTIN_SCATTERSIV4DI);
34224
34225  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scattersiv2di",
34226	       VOID_FTYPE_PLONGLONG_QI_V4SI_V2DI_INT,
34227	       IX86_BUILTIN_SCATTERSIV2DI);
34228
34229  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv8si",
34230	       VOID_FTYPE_PINT_QI_V4DI_V4SI_INT,
34231	       IX86_BUILTIN_SCATTERDIV8SI);
34232
34233  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4si",
34234	       VOID_FTYPE_PINT_QI_V2DI_V4SI_INT,
34235	       IX86_BUILTIN_SCATTERDIV4SI);
34236
34237  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv4di",
34238	       VOID_FTYPE_PLONGLONG_QI_V4DI_V4DI_INT,
34239	       IX86_BUILTIN_SCATTERDIV4DI);
34240
34241  def_builtin (OPTION_MASK_ISA_AVX512VL, "__builtin_ia32_scatterdiv2di",
34242	       VOID_FTYPE_PLONGLONG_QI_V2DI_V2DI_INT,
34243	       IX86_BUILTIN_SCATTERDIV2DI);
34244
34245  /* AVX512PF */
34246  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdpd",
34247	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34248	       IX86_BUILTIN_GATHERPFDPD);
34249  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfdps",
34250	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34251	       IX86_BUILTIN_GATHERPFDPS);
34252  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqpd",
34253	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34254	       IX86_BUILTIN_GATHERPFQPD);
34255  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_gatherpfqps",
34256	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34257	       IX86_BUILTIN_GATHERPFQPS);
34258  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdpd",
34259	       VOID_FTYPE_QI_V8SI_PCINT64_INT_INT,
34260	       IX86_BUILTIN_SCATTERPFDPD);
34261  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfdps",
34262	       VOID_FTYPE_HI_V16SI_PCINT_INT_INT,
34263	       IX86_BUILTIN_SCATTERPFDPS);
34264  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqpd",
34265	       VOID_FTYPE_QI_V8DI_PCINT64_INT_INT,
34266	       IX86_BUILTIN_SCATTERPFQPD);
34267  def_builtin (OPTION_MASK_ISA_AVX512PF, "__builtin_ia32_scatterpfqps",
34268	       VOID_FTYPE_QI_V8DI_PCINT_INT_INT,
34269	       IX86_BUILTIN_SCATTERPFQPS);
34270
34271  /* SHA */
34272  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg1",
34273		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG1);
34274  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1msg2",
34275		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1MSG2);
34276  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1nexte",
34277		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA1NEXTE);
34278  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha1rnds4",
34279		     V4SI_FTYPE_V4SI_V4SI_INT, IX86_BUILTIN_SHA1RNDS4);
34280  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg1",
34281		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG1);
34282  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256msg2",
34283		     V4SI_FTYPE_V4SI_V4SI, IX86_BUILTIN_SHA256MSG2);
34284  def_builtin_const (OPTION_MASK_ISA_SHA, "__builtin_ia32_sha256rnds2",
34285		     V4SI_FTYPE_V4SI_V4SI_V4SI, IX86_BUILTIN_SHA256RNDS2);
34286
34287  /* RTM.  */
34288  def_builtin (OPTION_MASK_ISA_RTM, "__builtin_ia32_xabort",
34289	       VOID_FTYPE_UNSIGNED, IX86_BUILTIN_XABORT);
34290
34291  /* MMX access to the vec_init patterns.  */
34292  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
34293		     V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
34294
34295  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
34296		     V4HI_FTYPE_HI_HI_HI_HI,
34297		     IX86_BUILTIN_VEC_INIT_V4HI);
34298
34299  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
34300		     V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
34301		     IX86_BUILTIN_VEC_INIT_V8QI);
34302
34303  /* Access to the vec_extract patterns.  */
34304  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
34305		     DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
34306  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
34307		     DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
34308  def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
34309		     FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
34310  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
34311		     SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
34312  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
34313		     HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
34314
34315  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34316		     "__builtin_ia32_vec_ext_v4hi",
34317		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
34318
34319  def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
34320		     SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
34321
34322  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
34323		     QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
34324
34325  /* Access to the vec_set patterns.  */
34326  def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
34327		     "__builtin_ia32_vec_set_v2di",
34328		     V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
34329
34330  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
34331		     V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
34332
34333  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
34334		     V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
34335
34336  def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
34337		     V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
34338
34339  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
34340		     "__builtin_ia32_vec_set_v4hi",
34341		     V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
34342
34343  def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
34344		     V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
34345
34346  /* RDSEED */
34347  def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_hi_step",
34348	       INT_FTYPE_PUSHORT, IX86_BUILTIN_RDSEED16_STEP);
34349  def_builtin (OPTION_MASK_ISA_RDSEED, "__builtin_ia32_rdseed_si_step",
34350	       INT_FTYPE_PUNSIGNED, IX86_BUILTIN_RDSEED32_STEP);
34351  def_builtin (OPTION_MASK_ISA_RDSEED | OPTION_MASK_ISA_64BIT,
34352	       "__builtin_ia32_rdseed_di_step",
34353	       INT_FTYPE_PULONGLONG, IX86_BUILTIN_RDSEED64_STEP);
34354
34355  /* ADCX */
34356  def_builtin (0, "__builtin_ia32_addcarryx_u32",
34357	       UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_ADDCARRYX32);
34358  def_builtin (OPTION_MASK_ISA_64BIT,
34359	       "__builtin_ia32_addcarryx_u64",
34360	       UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34361	       IX86_BUILTIN_ADDCARRYX64);
34362
34363  /* SBB */
34364  def_builtin (0, "__builtin_ia32_sbb_u32",
34365	       UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED, IX86_BUILTIN_SBB32);
34366  def_builtin (OPTION_MASK_ISA_64BIT,
34367	       "__builtin_ia32_sbb_u64",
34368	       UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG,
34369	       IX86_BUILTIN_SBB64);
34370
34371  /* Read/write FLAGS.  */
34372  def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u32",
34373               UNSIGNED_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34374  def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_readeflags_u64",
34375               UINT64_FTYPE_VOID, IX86_BUILTIN_READ_FLAGS);
34376  def_builtin (~OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u32",
34377               VOID_FTYPE_UNSIGNED, IX86_BUILTIN_WRITE_FLAGS);
34378  def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_ia32_writeeflags_u64",
34379               VOID_FTYPE_UINT64, IX86_BUILTIN_WRITE_FLAGS);
34380
34381  /* CLFLUSHOPT.  */
34382  def_builtin (OPTION_MASK_ISA_CLFLUSHOPT, "__builtin_ia32_clflushopt",
34383	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSHOPT);
34384
34385  /* CLWB.  */
34386  def_builtin (OPTION_MASK_ISA_CLWB, "__builtin_ia32_clwb",
34387	       VOID_FTYPE_PCVOID, IX86_BUILTIN_CLWB);
34388
34389  /* MONITORX and MWAITX.  */
34390  def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_monitorx",
34391	       VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITORX);
34392  def_builtin (OPTION_MASK_ISA_MWAITX, "__builtin_ia32_mwaitx",
34393	       VOID_FTYPE_UNSIGNED_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAITX);
34394
34395  /* Add FMA4 multi-arg argument instructions */
34396  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
34397    {
34398      if (d->name == 0)
34399	continue;
34400
34401      ftype = (enum ix86_builtin_func_type) d->flag;
34402      def_builtin_const (d->mask, d->name, ftype, d->code);
34403    }
34404}
34405
34406static void
34407ix86_init_mpx_builtins ()
34408{
34409  const struct builtin_description * d;
34410  enum ix86_builtin_func_type ftype;
34411  tree decl;
34412  size_t i;
34413
34414  for (i = 0, d = bdesc_mpx;
34415       i < ARRAY_SIZE (bdesc_mpx);
34416       i++, d++)
34417    {
34418      if (d->name == 0)
34419	continue;
34420
34421      ftype = (enum ix86_builtin_func_type) d->flag;
34422      decl = def_builtin (d->mask, d->name, ftype, d->code);
34423
34424      /* With no leaf and nothrow flags for MPX builtins
34425	 abnormal edges may follow its call when setjmp
34426	 presents in the function.  Since we may have a lot
34427	 of MPX builtins calls it causes lots of useless
34428	 edges and enormous PHI nodes.  To avoid this we mark
34429	 MPX builtins as leaf and nothrow.  */
34430      if (decl)
34431	{
34432	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34433						    NULL_TREE);
34434	  TREE_NOTHROW (decl) = 1;
34435	}
34436      else
34437	{
34438	  ix86_builtins_isa[(int)d->code].leaf_p = true;
34439	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
34440	}
34441    }
34442
34443  for (i = 0, d = bdesc_mpx_const;
34444       i < ARRAY_SIZE (bdesc_mpx_const);
34445       i++, d++)
34446    {
34447      if (d->name == 0)
34448	continue;
34449
34450      ftype = (enum ix86_builtin_func_type) d->flag;
34451      decl = def_builtin_const (d->mask, d->name, ftype, d->code);
34452
34453      if (decl)
34454	{
34455	  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
34456						    NULL_TREE);
34457	  TREE_NOTHROW (decl) = 1;
34458	}
34459      else
34460	{
34461	  ix86_builtins_isa[(int)d->code].leaf_p = true;
34462	  ix86_builtins_isa[(int)d->code].nothrow_p = true;
34463	}
34464    }
34465}
34466
34467/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
34468   to return a pointer to VERSION_DECL if the outcome of the expression
34469   formed by PREDICATE_CHAIN is true.  This function will be called during
34470   version dispatch to decide which function version to execute.  It returns
34471   the basic block at the end, to which more conditions can be added.  */
34472
34473static basic_block
34474add_condition_to_bb (tree function_decl, tree version_decl,
34475		     tree predicate_chain, basic_block new_bb)
34476{
34477  gimple return_stmt;
34478  tree convert_expr, result_var;
34479  gimple convert_stmt;
34480  gimple call_cond_stmt;
34481  gimple if_else_stmt;
34482
34483  basic_block bb1, bb2, bb3;
34484  edge e12, e23;
34485
34486  tree cond_var, and_expr_var = NULL_TREE;
34487  gimple_seq gseq;
34488
34489  tree predicate_decl, predicate_arg;
34490
34491  push_cfun (DECL_STRUCT_FUNCTION (function_decl));
34492
34493  gcc_assert (new_bb != NULL);
34494  gseq = bb_seq (new_bb);
34495
34496
34497  convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
34498	     		 build_fold_addr_expr (version_decl));
34499  result_var = create_tmp_var (ptr_type_node);
34500  convert_stmt = gimple_build_assign (result_var, convert_expr);
34501  return_stmt = gimple_build_return (result_var);
34502
34503  if (predicate_chain == NULL_TREE)
34504    {
34505      gimple_seq_add_stmt (&gseq, convert_stmt);
34506      gimple_seq_add_stmt (&gseq, return_stmt);
34507      set_bb_seq (new_bb, gseq);
34508      gimple_set_bb (convert_stmt, new_bb);
34509      gimple_set_bb (return_stmt, new_bb);
34510      pop_cfun ();
34511      return new_bb;
34512    }
34513
34514  while (predicate_chain != NULL)
34515    {
34516      cond_var = create_tmp_var (integer_type_node);
34517      predicate_decl = TREE_PURPOSE (predicate_chain);
34518      predicate_arg = TREE_VALUE (predicate_chain);
34519      call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
34520      gimple_call_set_lhs (call_cond_stmt, cond_var);
34521
34522      gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
34523      gimple_set_bb (call_cond_stmt, new_bb);
34524      gimple_seq_add_stmt (&gseq, call_cond_stmt);
34525
34526      predicate_chain = TREE_CHAIN (predicate_chain);
34527
34528      if (and_expr_var == NULL)
34529        and_expr_var = cond_var;
34530      else
34531	{
34532	  gimple assign_stmt;
34533	  /* Use MIN_EXPR to check if any integer is zero?.
34534	     and_expr_var = min_expr <cond_var, and_expr_var>  */
34535	  assign_stmt = gimple_build_assign (and_expr_var,
34536			  build2 (MIN_EXPR, integer_type_node,
34537				  cond_var, and_expr_var));
34538
34539	  gimple_set_block (assign_stmt, DECL_INITIAL (function_decl));
34540	  gimple_set_bb (assign_stmt, new_bb);
34541	  gimple_seq_add_stmt (&gseq, assign_stmt);
34542	}
34543    }
34544
34545  if_else_stmt = gimple_build_cond (GT_EXPR, and_expr_var,
34546	  		            integer_zero_node,
34547				    NULL_TREE, NULL_TREE);
34548  gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
34549  gimple_set_bb (if_else_stmt, new_bb);
34550  gimple_seq_add_stmt (&gseq, if_else_stmt);
34551
34552  gimple_seq_add_stmt (&gseq, convert_stmt);
34553  gimple_seq_add_stmt (&gseq, return_stmt);
34554  set_bb_seq (new_bb, gseq);
34555
34556  bb1 = new_bb;
34557  e12 = split_block (bb1, if_else_stmt);
34558  bb2 = e12->dest;
34559  e12->flags &= ~EDGE_FALLTHRU;
34560  e12->flags |= EDGE_TRUE_VALUE;
34561
34562  e23 = split_block (bb2, return_stmt);
34563
34564  gimple_set_bb (convert_stmt, bb2);
34565  gimple_set_bb (return_stmt, bb2);
34566
34567  bb3 = e23->dest;
34568  make_edge (bb1, bb3, EDGE_FALSE_VALUE);
34569
34570  remove_edge (e23);
34571  make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
34572
34573  pop_cfun ();
34574
34575  return bb3;
34576}
34577
34578/* This parses the attribute arguments to target in DECL and determines
34579   the right builtin to use to match the platform specification.
34580   It returns the priority value for this version decl.  If PREDICATE_LIST
34581   is not NULL, it stores the list of cpu features that need to be checked
34582   before dispatching this function.  */
34583
34584static unsigned int
34585get_builtin_code_for_version (tree decl, tree *predicate_list)
34586{
34587  tree attrs;
34588  struct cl_target_option cur_target;
34589  tree target_node;
34590  struct cl_target_option *new_target;
34591  const char *arg_str = NULL;
34592  const char *attrs_str = NULL;
34593  char *tok_str = NULL;
34594  char *token;
34595
34596  /* Priority of i386 features, greater value is higher priority.   This is
34597     used to decide the order in which function dispatch must happen.  For
34598     instance, a version specialized for SSE4.2 should be checked for dispatch
34599     before a version for SSE3, as SSE4.2 implies SSE3.  */
34600  enum feature_priority
34601  {
34602    P_ZERO = 0,
34603    P_MMX,
34604    P_SSE,
34605    P_SSE2,
34606    P_SSE3,
34607    P_SSSE3,
34608    P_PROC_SSSE3,
34609    P_SSE4_A,
34610    P_PROC_SSE4_A,
34611    P_SSE4_1,
34612    P_SSE4_2,
34613    P_PROC_SSE4_2,
34614    P_POPCNT,
34615    P_AVX,
34616    P_PROC_AVX,
34617    P_BMI,
34618    P_PROC_BMI,
34619    P_FMA4,
34620    P_XOP,
34621    P_PROC_XOP,
34622    P_FMA,
34623    P_PROC_FMA,
34624    P_BMI2,
34625    P_AVX2,
34626    P_PROC_AVX2,
34627    P_AVX512F,
34628    P_PROC_AVX512F
34629  };
34630
34631 enum feature_priority priority = P_ZERO;
34632
34633  /* These are the target attribute strings for which a dispatcher is
34634     available, from fold_builtin_cpu.  */
34635
34636  static struct _feature_list
34637    {
34638      const char *const name;
34639      const enum feature_priority priority;
34640    }
34641  const feature_list[] =
34642    {
34643      {"mmx", P_MMX},
34644      {"sse", P_SSE},
34645      {"sse2", P_SSE2},
34646      {"sse3", P_SSE3},
34647      {"sse4a", P_SSE4_A},
34648      {"ssse3", P_SSSE3},
34649      {"sse4.1", P_SSE4_1},
34650      {"sse4.2", P_SSE4_2},
34651      {"popcnt", P_POPCNT},
34652      {"avx", P_AVX},
34653      {"bmi", P_BMI},
34654      {"fma4", P_FMA4},
34655      {"xop", P_XOP},
34656      {"fma", P_FMA},
34657      {"bmi2", P_BMI2},
34658      {"avx2", P_AVX2},
34659      {"avx512f", P_AVX512F}
34660    };
34661
34662
34663  static unsigned int NUM_FEATURES
34664    = sizeof (feature_list) / sizeof (struct _feature_list);
34665
34666  unsigned int i;
34667
34668  tree predicate_chain = NULL_TREE;
34669  tree predicate_decl, predicate_arg;
34670
34671  attrs = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
34672  gcc_assert (attrs != NULL);
34673
34674  attrs = TREE_VALUE (TREE_VALUE (attrs));
34675
34676  gcc_assert (TREE_CODE (attrs) == STRING_CST);
34677  attrs_str = TREE_STRING_POINTER (attrs);
34678
34679  /* Return priority zero for default function.  */
34680  if (strcmp (attrs_str, "default") == 0)
34681    return 0;
34682
34683  /* Handle arch= if specified.  For priority, set it to be 1 more than
34684     the best instruction set the processor can handle.  For instance, if
34685     there is a version for atom and a version for ssse3 (the highest ISA
34686     priority for atom), the atom version must be checked for dispatch
34687     before the ssse3 version. */
34688  if (strstr (attrs_str, "arch=") != NULL)
34689    {
34690      cl_target_option_save (&cur_target, &global_options);
34691      target_node = ix86_valid_target_attribute_tree (attrs, &global_options,
34692						      &global_options_set);
34693
34694      gcc_assert (target_node);
34695      new_target = TREE_TARGET_OPTION (target_node);
34696      gcc_assert (new_target);
34697
34698      if (new_target->arch_specified && new_target->arch > 0)
34699	{
34700	  switch (new_target->arch)
34701	    {
34702	    case PROCESSOR_CORE2:
34703	      arg_str = "core2";
34704	      priority = P_PROC_SSSE3;
34705	      break;
34706	    case PROCESSOR_NEHALEM:
34707	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_AES)
34708		arg_str = "westmere";
34709	      else
34710		/* We translate "arch=corei7" and "arch=nehalem" to
34711		   "corei7" so that it will be mapped to M_INTEL_COREI7
34712		   as cpu type to cover all M_INTEL_COREI7_XXXs.  */
34713		arg_str = "corei7";
34714	      priority = P_PROC_SSE4_2;
34715	      break;
34716	    case PROCESSOR_SANDYBRIDGE:
34717	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_F16C)
34718		arg_str = "ivybridge";
34719	      else
34720		arg_str = "sandybridge";
34721	      priority = P_PROC_AVX;
34722	      break;
34723	    case PROCESSOR_HASWELL:
34724	      if (new_target->x_ix86_isa_flags & OPTION_MASK_ISA_ADX)
34725		arg_str = "broadwell";
34726	      else
34727		arg_str = "haswell";
34728	      priority = P_PROC_AVX2;
34729	      break;
34730	    case PROCESSOR_BONNELL:
34731	      arg_str = "bonnell";
34732	      priority = P_PROC_SSSE3;
34733	      break;
34734	    case PROCESSOR_KNL:
34735	      arg_str = "knl";
34736	      priority = P_PROC_AVX512F;
34737	      break;
34738	    case PROCESSOR_SILVERMONT:
34739	      arg_str = "silvermont";
34740	      priority = P_PROC_SSE4_2;
34741	      break;
34742	    case PROCESSOR_AMDFAM10:
34743	      arg_str = "amdfam10h";
34744	      priority = P_PROC_SSE4_A;
34745	      break;
34746	    case PROCESSOR_BTVER1:
34747	      arg_str = "btver1";
34748	      priority = P_PROC_SSE4_A;
34749	      break;
34750	    case PROCESSOR_BTVER2:
34751	      arg_str = "btver2";
34752	      priority = P_PROC_BMI;
34753	      break;
34754	    case PROCESSOR_BDVER1:
34755	      arg_str = "bdver1";
34756	      priority = P_PROC_XOP;
34757	      break;
34758	    case PROCESSOR_BDVER2:
34759	      arg_str = "bdver2";
34760	      priority = P_PROC_FMA;
34761	      break;
34762	    case PROCESSOR_BDVER3:
34763	      arg_str = "bdver3";
34764	      priority = P_PROC_FMA;
34765	      break;
34766	    case PROCESSOR_BDVER4:
34767	      arg_str = "bdver4";
34768	      priority = P_PROC_AVX2;
34769	      break;
34770	    }
34771	}
34772
34773      cl_target_option_restore (&global_options, &cur_target);
34774
34775      if (predicate_list && arg_str == NULL)
34776	{
34777	  error_at (DECL_SOURCE_LOCATION (decl),
34778	    	"No dispatcher found for the versioning attributes");
34779	  return 0;
34780	}
34781
34782      if (predicate_list)
34783	{
34784          predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_IS];
34785          /* For a C string literal the length includes the trailing NULL.  */
34786          predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
34787          predicate_chain = tree_cons (predicate_decl, predicate_arg,
34788				       predicate_chain);
34789	}
34790    }
34791
34792  /* Process feature name.  */
34793  tok_str =  (char *) xmalloc (strlen (attrs_str) + 1);
34794  strcpy (tok_str, attrs_str);
34795  token = strtok (tok_str, ",");
34796  predicate_decl = ix86_builtins [(int) IX86_BUILTIN_CPU_SUPPORTS];
34797
34798  while (token != NULL)
34799    {
34800      /* Do not process "arch="  */
34801      if (strncmp (token, "arch=", 5) == 0)
34802	{
34803	  token = strtok (NULL, ",");
34804	  continue;
34805	}
34806      for (i = 0; i < NUM_FEATURES; ++i)
34807	{
34808	  if (strcmp (token, feature_list[i].name) == 0)
34809	    {
34810	      if (predicate_list)
34811		{
34812		  predicate_arg = build_string_literal (
34813				  strlen (feature_list[i].name) + 1,
34814				  feature_list[i].name);
34815		  predicate_chain = tree_cons (predicate_decl, predicate_arg,
34816					       predicate_chain);
34817		}
34818	      /* Find the maximum priority feature.  */
34819	      if (feature_list[i].priority > priority)
34820		priority = feature_list[i].priority;
34821
34822	      break;
34823	    }
34824	}
34825      if (predicate_list && i == NUM_FEATURES)
34826	{
34827	  error_at (DECL_SOURCE_LOCATION (decl),
34828		    "No dispatcher found for %s", token);
34829	  return 0;
34830	}
34831      token = strtok (NULL, ",");
34832    }
34833  free (tok_str);
34834
34835  if (predicate_list && predicate_chain == NULL_TREE)
34836    {
34837      error_at (DECL_SOURCE_LOCATION (decl),
34838	        "No dispatcher found for the versioning attributes : %s",
34839	        attrs_str);
34840      return 0;
34841    }
34842  else if (predicate_list)
34843    {
34844      predicate_chain = nreverse (predicate_chain);
34845      *predicate_list = predicate_chain;
34846    }
34847
34848  return priority;
34849}
34850
34851/* This compares the priority of target features in function DECL1
34852   and DECL2.  It returns positive value if DECL1 is higher priority,
34853   negative value if DECL2 is higher priority and 0 if they are the
34854   same.  */
34855
34856static int
34857ix86_compare_version_priority (tree decl1, tree decl2)
34858{
34859  unsigned int priority1 = get_builtin_code_for_version (decl1, NULL);
34860  unsigned int priority2 = get_builtin_code_for_version (decl2, NULL);
34861
34862  return (int)priority1 - (int)priority2;
34863}
34864
34865/* V1 and V2 point to function versions with different priorities
34866   based on the target ISA.  This function compares their priorities.  */
34867
34868static int
34869feature_compare (const void *v1, const void *v2)
34870{
34871  typedef struct _function_version_info
34872    {
34873      tree version_decl;
34874      tree predicate_chain;
34875      unsigned int dispatch_priority;
34876    } function_version_info;
34877
34878  const function_version_info c1 = *(const function_version_info *)v1;
34879  const function_version_info c2 = *(const function_version_info *)v2;
34880  return (c2.dispatch_priority - c1.dispatch_priority);
34881}
34882
34883/* This function generates the dispatch function for
34884   multi-versioned functions.  DISPATCH_DECL is the function which will
34885   contain the dispatch logic.  FNDECLS are the function choices for
34886   dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
34887   in DISPATCH_DECL in which the dispatch code is generated.  */
34888
34889static int
34890dispatch_function_versions (tree dispatch_decl,
34891			    void *fndecls_p,
34892			    basic_block *empty_bb)
34893{
34894  tree default_decl;
34895  gimple ifunc_cpu_init_stmt;
34896  gimple_seq gseq;
34897  int ix;
34898  tree ele;
34899  vec<tree> *fndecls;
34900  unsigned int num_versions = 0;
34901  unsigned int actual_versions = 0;
34902  unsigned int i;
34903
34904  struct _function_version_info
34905    {
34906      tree version_decl;
34907      tree predicate_chain;
34908      unsigned int dispatch_priority;
34909    }*function_version_info;
34910
34911  gcc_assert (dispatch_decl != NULL
34912	      && fndecls_p != NULL
34913	      && empty_bb != NULL);
34914
34915  /*fndecls_p is actually a vector.  */
34916  fndecls = static_cast<vec<tree> *> (fndecls_p);
34917
34918  /* At least one more version other than the default.  */
34919  num_versions = fndecls->length ();
34920  gcc_assert (num_versions >= 2);
34921
34922  function_version_info = (struct _function_version_info *)
34923    XNEWVEC (struct _function_version_info, (num_versions - 1));
34924
34925  /* The first version in the vector is the default decl.  */
34926  default_decl = (*fndecls)[0];
34927
34928  push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
34929
34930  gseq = bb_seq (*empty_bb);
34931  /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
34932     constructors, so explicity call __builtin_cpu_init here.  */
34933  ifunc_cpu_init_stmt = gimple_build_call_vec (
34934                     ix86_builtins [(int) IX86_BUILTIN_CPU_INIT], vNULL);
34935  gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
34936  gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
34937  set_bb_seq (*empty_bb, gseq);
34938
34939  pop_cfun ();
34940
34941
34942  for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
34943    {
34944      tree version_decl = ele;
34945      tree predicate_chain = NULL_TREE;
34946      unsigned int priority;
34947      /* Get attribute string, parse it and find the right predicate decl.
34948         The predicate function could be a lengthy combination of many
34949	 features, like arch-type and various isa-variants.  */
34950      priority = get_builtin_code_for_version (version_decl,
34951	 			               &predicate_chain);
34952
34953      if (predicate_chain == NULL_TREE)
34954	continue;
34955
34956      function_version_info [actual_versions].version_decl = version_decl;
34957      function_version_info [actual_versions].predicate_chain
34958	 = predicate_chain;
34959      function_version_info [actual_versions].dispatch_priority = priority;
34960      actual_versions++;
34961    }
34962
34963  /* Sort the versions according to descending order of dispatch priority.  The
34964     priority is based on the ISA.  This is not a perfect solution.  There
34965     could still be ambiguity.  If more than one function version is suitable
34966     to execute,  which one should be dispatched?  In future, allow the user
34967     to specify a dispatch  priority next to the version.  */
34968  qsort (function_version_info, actual_versions,
34969         sizeof (struct _function_version_info), feature_compare);
34970
34971  for  (i = 0; i < actual_versions; ++i)
34972    *empty_bb = add_condition_to_bb (dispatch_decl,
34973				     function_version_info[i].version_decl,
34974				     function_version_info[i].predicate_chain,
34975				     *empty_bb);
34976
34977  /* dispatch default version at the end.  */
34978  *empty_bb = add_condition_to_bb (dispatch_decl, default_decl,
34979				   NULL, *empty_bb);
34980
34981  free (function_version_info);
34982  return 0;
34983}
34984
34985/* Comparator function to be used in qsort routine to sort attribute
34986   specification strings to "target".  */
34987
34988static int
34989attr_strcmp (const void *v1, const void *v2)
34990{
34991  const char *c1 = *(char *const*)v1;
34992  const char *c2 = *(char *const*)v2;
34993  return strcmp (c1, c2);
34994}
34995
34996/* ARGLIST is the argument to target attribute.  This function tokenizes
34997   the comma separated arguments, sorts them and returns a string which
34998   is a unique identifier for the comma separated arguments.   It also
34999   replaces non-identifier characters "=,-" with "_".  */
35000
35001static char *
35002sorted_attr_string (tree arglist)
35003{
35004  tree arg;
35005  size_t str_len_sum = 0;
35006  char **args = NULL;
35007  char *attr_str, *ret_str;
35008  char *attr = NULL;
35009  unsigned int argnum = 1;
35010  unsigned int i;
35011
35012  for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35013    {
35014      const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35015      size_t len = strlen (str);
35016      str_len_sum += len + 1;
35017      if (arg != arglist)
35018	argnum++;
35019      for (i = 0; i < strlen (str); i++)
35020	if (str[i] == ',')
35021	  argnum++;
35022    }
35023
35024  attr_str = XNEWVEC (char, str_len_sum);
35025  str_len_sum = 0;
35026  for (arg = arglist; arg; arg = TREE_CHAIN (arg))
35027    {
35028      const char *str = TREE_STRING_POINTER (TREE_VALUE (arg));
35029      size_t len = strlen (str);
35030      memcpy (attr_str + str_len_sum, str, len);
35031      attr_str[str_len_sum + len] = TREE_CHAIN (arg) ? ',' : '\0';
35032      str_len_sum += len + 1;
35033    }
35034
35035  /* Replace "=,-" with "_".  */
35036  for (i = 0; i < strlen (attr_str); i++)
35037    if (attr_str[i] == '=' || attr_str[i]== '-')
35038      attr_str[i] = '_';
35039
35040  if (argnum == 1)
35041    return attr_str;
35042
35043  args = XNEWVEC (char *, argnum);
35044
35045  i = 0;
35046  attr = strtok (attr_str, ",");
35047  while (attr != NULL)
35048    {
35049      args[i] = attr;
35050      i++;
35051      attr = strtok (NULL, ",");
35052    }
35053
35054  qsort (args, argnum, sizeof (char *), attr_strcmp);
35055
35056  ret_str = XNEWVEC (char, str_len_sum);
35057  str_len_sum = 0;
35058  for (i = 0; i < argnum; i++)
35059    {
35060      size_t len = strlen (args[i]);
35061      memcpy (ret_str + str_len_sum, args[i], len);
35062      ret_str[str_len_sum + len] = i < argnum - 1 ? '_' : '\0';
35063      str_len_sum += len + 1;
35064    }
35065
35066  XDELETEVEC (args);
35067  XDELETEVEC (attr_str);
35068  return ret_str;
35069}
35070
35071/* This function changes the assembler name for functions that are
35072   versions.  If DECL is a function version and has a "target"
35073   attribute, it appends the attribute string to its assembler name.  */
35074
35075static tree
35076ix86_mangle_function_version_assembler_name (tree decl, tree id)
35077{
35078  tree version_attr;
35079  const char *orig_name, *version_string;
35080  char *attr_str, *assembler_name;
35081
35082  if (DECL_DECLARED_INLINE_P (decl)
35083      && lookup_attribute ("gnu_inline",
35084			   DECL_ATTRIBUTES (decl)))
35085    error_at (DECL_SOURCE_LOCATION (decl),
35086	      "Function versions cannot be marked as gnu_inline,"
35087	      " bodies have to be generated");
35088
35089  if (DECL_VIRTUAL_P (decl)
35090      || DECL_VINDEX (decl))
35091    sorry ("Virtual function multiversioning not supported");
35092
35093  version_attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35094
35095  /* target attribute string cannot be NULL.  */
35096  gcc_assert (version_attr != NULL_TREE);
35097
35098  orig_name = IDENTIFIER_POINTER (id);
35099  version_string
35100    = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (version_attr)));
35101
35102  if (strcmp (version_string, "default") == 0)
35103    return id;
35104
35105  attr_str = sorted_attr_string (TREE_VALUE (version_attr));
35106  assembler_name = XNEWVEC (char, strlen (orig_name) + strlen (attr_str) + 2);
35107
35108  sprintf (assembler_name, "%s.%s", orig_name, attr_str);
35109
35110  /* Allow assembler name to be modified if already set.  */
35111  if (DECL_ASSEMBLER_NAME_SET_P (decl))
35112    SET_DECL_RTL (decl, NULL);
35113
35114  tree ret = get_identifier (assembler_name);
35115  XDELETEVEC (attr_str);
35116  XDELETEVEC (assembler_name);
35117  return ret;
35118}
35119
35120/* This function returns true if FN1 and FN2 are versions of the same function,
35121   that is, the target strings of the function decls are different.  This assumes
35122   that FN1 and FN2 have the same signature.  */
35123
35124static bool
35125ix86_function_versions (tree fn1, tree fn2)
35126{
35127  tree attr1, attr2;
35128  char *target1, *target2;
35129  bool result;
35130
35131  if (TREE_CODE (fn1) != FUNCTION_DECL
35132      || TREE_CODE (fn2) != FUNCTION_DECL)
35133    return false;
35134
35135  attr1 = lookup_attribute ("target", DECL_ATTRIBUTES (fn1));
35136  attr2 = lookup_attribute ("target", DECL_ATTRIBUTES (fn2));
35137
35138  /* At least one function decl should have the target attribute specified.  */
35139  if (attr1 == NULL_TREE && attr2 == NULL_TREE)
35140    return false;
35141
35142  /* Diagnose missing target attribute if one of the decls is already
35143     multi-versioned.  */
35144  if (attr1 == NULL_TREE || attr2 == NULL_TREE)
35145    {
35146      if (DECL_FUNCTION_VERSIONED (fn1) || DECL_FUNCTION_VERSIONED (fn2))
35147	{
35148	  if (attr2 != NULL_TREE)
35149	    {
35150	      tree tem = fn1;
35151	      fn1 = fn2;
35152	      fn2 = tem;
35153	      attr1 = attr2;
35154	    }
35155	  error_at (DECL_SOURCE_LOCATION (fn2),
35156		    "missing %<target%> attribute for multi-versioned %D",
35157		    fn2);
35158	  inform (DECL_SOURCE_LOCATION (fn1),
35159		  "previous declaration of %D", fn1);
35160	  /* Prevent diagnosing of the same error multiple times.  */
35161	  DECL_ATTRIBUTES (fn2)
35162	    = tree_cons (get_identifier ("target"),
35163			 copy_node (TREE_VALUE (attr1)),
35164			 DECL_ATTRIBUTES (fn2));
35165	}
35166      return false;
35167    }
35168
35169  target1 = sorted_attr_string (TREE_VALUE (attr1));
35170  target2 = sorted_attr_string (TREE_VALUE (attr2));
35171
35172  /* The sorted target strings must be different for fn1 and fn2
35173     to be versions.  */
35174  if (strcmp (target1, target2) == 0)
35175    result = false;
35176  else
35177    result = true;
35178
35179  XDELETEVEC (target1);
35180  XDELETEVEC (target2);
35181
35182  return result;
35183}
35184
35185static tree
35186ix86_mangle_decl_assembler_name (tree decl, tree id)
35187{
35188  /* For function version, add the target suffix to the assembler name.  */
35189  if (TREE_CODE (decl) == FUNCTION_DECL
35190      && DECL_FUNCTION_VERSIONED (decl))
35191    id = ix86_mangle_function_version_assembler_name (decl, id);
35192#ifdef SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME
35193  id = SUBTARGET_MANGLE_DECL_ASSEMBLER_NAME (decl, id);
35194#endif
35195
35196  return id;
35197}
35198
35199/* Return a new name by appending SUFFIX to the DECL name.  If make_unique
35200   is true, append the full path name of the source file.  */
35201
35202static char *
35203make_name (tree decl, const char *suffix, bool make_unique)
35204{
35205  char *global_var_name;
35206  int name_len;
35207  const char *name;
35208  const char *unique_name = NULL;
35209
35210  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
35211
35212  /* Get a unique name that can be used globally without any chances
35213     of collision at link time.  */
35214  if (make_unique)
35215    unique_name = IDENTIFIER_POINTER (get_file_function_name ("\0"));
35216
35217  name_len = strlen (name) + strlen (suffix) + 2;
35218
35219  if (make_unique)
35220    name_len += strlen (unique_name) + 1;
35221  global_var_name = XNEWVEC (char, name_len);
35222
35223  /* Use '.' to concatenate names as it is demangler friendly.  */
35224  if (make_unique)
35225    snprintf (global_var_name, name_len, "%s.%s.%s", name, unique_name,
35226	      suffix);
35227  else
35228    snprintf (global_var_name, name_len, "%s.%s", name, suffix);
35229
35230  return global_var_name;
35231}
35232
35233#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35234
35235/* Make a dispatcher declaration for the multi-versioned function DECL.
35236   Calls to DECL function will be replaced with calls to the dispatcher
35237   by the front-end.  Return the decl created.  */
35238
35239static tree
35240make_dispatcher_decl (const tree decl)
35241{
35242  tree func_decl;
35243  char *func_name;
35244  tree fn_type, func_type;
35245  bool is_uniq = false;
35246
35247  if (TREE_PUBLIC (decl) == 0)
35248    is_uniq = true;
35249
35250  func_name = make_name (decl, "ifunc", is_uniq);
35251
35252  fn_type = TREE_TYPE (decl);
35253  func_type = build_function_type (TREE_TYPE (fn_type),
35254				   TYPE_ARG_TYPES (fn_type));
35255
35256  func_decl = build_fn_decl (func_name, func_type);
35257  XDELETEVEC (func_name);
35258  TREE_USED (func_decl) = 1;
35259  DECL_CONTEXT (func_decl) = NULL_TREE;
35260  DECL_INITIAL (func_decl) = error_mark_node;
35261  DECL_ARTIFICIAL (func_decl) = 1;
35262  /* Mark this func as external, the resolver will flip it again if
35263     it gets generated.  */
35264  DECL_EXTERNAL (func_decl) = 1;
35265  /* This will be of type IFUNCs have to be externally visible.  */
35266  TREE_PUBLIC (func_decl) = 1;
35267
35268  return func_decl;
35269}
35270
35271#endif
35272
35273/* Returns true if decl is multi-versioned and DECL is the default function,
35274   that is it is not tagged with target specific optimization.  */
35275
35276static bool
35277is_function_default_version (const tree decl)
35278{
35279  if (TREE_CODE (decl) != FUNCTION_DECL
35280      || !DECL_FUNCTION_VERSIONED (decl))
35281    return false;
35282  tree attr = lookup_attribute ("target", DECL_ATTRIBUTES (decl));
35283  gcc_assert (attr);
35284  attr = TREE_VALUE (TREE_VALUE (attr));
35285  return (TREE_CODE (attr) == STRING_CST
35286	  && strcmp (TREE_STRING_POINTER (attr), "default") == 0);
35287}
35288
35289/* Make a dispatcher declaration for the multi-versioned function DECL.
35290   Calls to DECL function will be replaced with calls to the dispatcher
35291   by the front-end.  Returns the decl of the dispatcher function.  */
35292
35293static tree
35294ix86_get_function_versions_dispatcher (void *decl)
35295{
35296  tree fn = (tree) decl;
35297  struct cgraph_node *node = NULL;
35298  struct cgraph_node *default_node = NULL;
35299  struct cgraph_function_version_info *node_v = NULL;
35300  struct cgraph_function_version_info *first_v = NULL;
35301
35302  tree dispatch_decl = NULL;
35303
35304  struct cgraph_function_version_info *default_version_info = NULL;
35305
35306  gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
35307
35308  node = cgraph_node::get (fn);
35309  gcc_assert (node != NULL);
35310
35311  node_v = node->function_version ();
35312  gcc_assert (node_v != NULL);
35313
35314  if (node_v->dispatcher_resolver != NULL)
35315    return node_v->dispatcher_resolver;
35316
35317  /* Find the default version and make it the first node.  */
35318  first_v = node_v;
35319  /* Go to the beginning of the chain.  */
35320  while (first_v->prev != NULL)
35321    first_v = first_v->prev;
35322  default_version_info = first_v;
35323  while (default_version_info != NULL)
35324    {
35325      if (is_function_default_version
35326	    (default_version_info->this_node->decl))
35327        break;
35328      default_version_info = default_version_info->next;
35329    }
35330
35331  /* If there is no default node, just return NULL.  */
35332  if (default_version_info == NULL)
35333    return NULL;
35334
35335  /* Make default info the first node.  */
35336  if (first_v != default_version_info)
35337    {
35338      default_version_info->prev->next = default_version_info->next;
35339      if (default_version_info->next)
35340        default_version_info->next->prev = default_version_info->prev;
35341      first_v->prev = default_version_info;
35342      default_version_info->next = first_v;
35343      default_version_info->prev = NULL;
35344    }
35345
35346  default_node = default_version_info->this_node;
35347
35348#if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
35349  if (targetm.has_ifunc_p ())
35350    {
35351      struct cgraph_function_version_info *it_v = NULL;
35352      struct cgraph_node *dispatcher_node = NULL;
35353      struct cgraph_function_version_info *dispatcher_version_info = NULL;
35354
35355      /* Right now, the dispatching is done via ifunc.  */
35356      dispatch_decl = make_dispatcher_decl (default_node->decl);
35357
35358      dispatcher_node = cgraph_node::get_create (dispatch_decl);
35359      gcc_assert (dispatcher_node != NULL);
35360      dispatcher_node->dispatcher_function = 1;
35361      dispatcher_version_info
35362	= dispatcher_node->insert_new_function_version ();
35363      dispatcher_version_info->next = default_version_info;
35364      dispatcher_node->definition = 1;
35365
35366      /* Set the dispatcher for all the versions.  */
35367      it_v = default_version_info;
35368      while (it_v != NULL)
35369	{
35370	  it_v->dispatcher_resolver = dispatch_decl;
35371	  it_v = it_v->next;
35372	}
35373    }
35374  else
35375#endif
35376    {
35377      error_at (DECL_SOURCE_LOCATION (default_node->decl),
35378		"multiversioning needs ifunc which is not supported "
35379		"on this target");
35380    }
35381
35382  return dispatch_decl;
35383}
35384
35385/* Makes a function attribute of the form NAME(ARG_NAME) and chains
35386   it to CHAIN.  */
35387
35388static tree
35389make_attribute (const char *name, const char *arg_name, tree chain)
35390{
35391  tree attr_name;
35392  tree attr_arg_name;
35393  tree attr_args;
35394  tree attr;
35395
35396  attr_name = get_identifier (name);
35397  attr_arg_name = build_string (strlen (arg_name), arg_name);
35398  attr_args = tree_cons (NULL_TREE, attr_arg_name, NULL_TREE);
35399  attr = tree_cons (attr_name, attr_args, chain);
35400  return attr;
35401}
35402
35403/* Make the resolver function decl to dispatch the versions of
35404   a multi-versioned function,  DEFAULT_DECL.  Create an
35405   empty basic block in the resolver and store the pointer in
35406   EMPTY_BB.  Return the decl of the resolver function.  */
35407
35408static tree
35409make_resolver_func (const tree default_decl,
35410		    const tree dispatch_decl,
35411		    basic_block *empty_bb)
35412{
35413  char *resolver_name;
35414  tree decl, type, decl_name, t;
35415  bool is_uniq = false;
35416
35417  /* IFUNC's have to be globally visible.  So, if the default_decl is
35418     not, then the name of the IFUNC should be made unique.  */
35419  if (TREE_PUBLIC (default_decl) == 0)
35420    is_uniq = true;
35421
35422  /* Append the filename to the resolver function if the versions are
35423     not externally visible.  This is because the resolver function has
35424     to be externally visible for the loader to find it.  So, appending
35425     the filename will prevent conflicts with a resolver function from
35426     another module which is based on the same version name.  */
35427  resolver_name = make_name (default_decl, "resolver", is_uniq);
35428
35429  /* The resolver function should return a (void *). */
35430  type = build_function_type_list (ptr_type_node, NULL_TREE);
35431
35432  decl = build_fn_decl (resolver_name, type);
35433  decl_name = get_identifier (resolver_name);
35434  SET_DECL_ASSEMBLER_NAME (decl, decl_name);
35435
35436  DECL_NAME (decl) = decl_name;
35437  TREE_USED (decl) = 1;
35438  DECL_ARTIFICIAL (decl) = 1;
35439  DECL_IGNORED_P (decl) = 0;
35440  /* IFUNC resolvers have to be externally visible.  */
35441  TREE_PUBLIC (decl) = 1;
35442  DECL_UNINLINABLE (decl) = 1;
35443
35444  /* Resolver is not external, body is generated.  */
35445  DECL_EXTERNAL (decl) = 0;
35446  DECL_EXTERNAL (dispatch_decl) = 0;
35447
35448  DECL_CONTEXT (decl) = NULL_TREE;
35449  DECL_INITIAL (decl) = make_node (BLOCK);
35450  DECL_STATIC_CONSTRUCTOR (decl) = 0;
35451
35452  if (DECL_COMDAT_GROUP (default_decl)
35453      || TREE_PUBLIC (default_decl))
35454    {
35455      /* In this case, each translation unit with a call to this
35456	 versioned function will put out a resolver.  Ensure it
35457	 is comdat to keep just one copy.  */
35458      DECL_COMDAT (decl) = 1;
35459      make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
35460    }
35461  /* Build result decl and add to function_decl. */
35462  t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
35463  DECL_ARTIFICIAL (t) = 1;
35464  DECL_IGNORED_P (t) = 1;
35465  DECL_RESULT (decl) = t;
35466
35467  gimplify_function_tree (decl);
35468  push_cfun (DECL_STRUCT_FUNCTION (decl));
35469  *empty_bb = init_lowered_empty_function (decl, false, 0);
35470
35471  cgraph_node::add_new_function (decl, true);
35472  symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
35473
35474  pop_cfun ();
35475
35476  gcc_assert (dispatch_decl != NULL);
35477  /* Mark dispatch_decl as "ifunc" with resolver as resolver_name.  */
35478  DECL_ATTRIBUTES (dispatch_decl)
35479    = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
35480
35481  /* Create the alias for dispatch to resolver here.  */
35482  /*cgraph_create_function_alias (dispatch_decl, decl);*/
35483  cgraph_node::create_same_body_alias (dispatch_decl, decl);
35484  XDELETEVEC (resolver_name);
35485  return decl;
35486}
35487
35488/* Generate the dispatching code body to dispatch multi-versioned function
35489   DECL.  The target hook is called to process the "target" attributes and
35490   provide the code to dispatch the right function at run-time.  NODE points
35491   to the dispatcher decl whose body will be created.  */
35492
35493static tree
35494ix86_generate_version_dispatcher_body (void *node_p)
35495{
35496  tree resolver_decl;
35497  basic_block empty_bb;
35498  tree default_ver_decl;
35499  struct cgraph_node *versn;
35500  struct cgraph_node *node;
35501
35502  struct cgraph_function_version_info *node_version_info = NULL;
35503  struct cgraph_function_version_info *versn_info = NULL;
35504
35505  node = (cgraph_node *)node_p;
35506
35507  node_version_info = node->function_version ();
35508  gcc_assert (node->dispatcher_function
35509	      && node_version_info != NULL);
35510
35511  if (node_version_info->dispatcher_resolver)
35512    return node_version_info->dispatcher_resolver;
35513
35514  /* The first version in the chain corresponds to the default version.  */
35515  default_ver_decl = node_version_info->next->this_node->decl;
35516
35517  /* node is going to be an alias, so remove the finalized bit.  */
35518  node->definition = false;
35519
35520  resolver_decl = make_resolver_func (default_ver_decl,
35521				      node->decl, &empty_bb);
35522
35523  node_version_info->dispatcher_resolver = resolver_decl;
35524
35525  push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
35526
35527  auto_vec<tree, 2> fn_ver_vec;
35528
35529  for (versn_info = node_version_info->next; versn_info;
35530       versn_info = versn_info->next)
35531    {
35532      versn = versn_info->this_node;
35533      /* Check for virtual functions here again, as by this time it should
35534	 have been determined if this function needs a vtable index or
35535	 not.  This happens for methods in derived classes that override
35536	 virtual methods in base classes but are not explicitly marked as
35537	 virtual.  */
35538      if (DECL_VINDEX (versn->decl))
35539	sorry ("Virtual function multiversioning not supported");
35540
35541      fn_ver_vec.safe_push (versn->decl);
35542    }
35543
35544  dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
35545  cgraph_edge::rebuild_edges ();
35546  pop_cfun ();
35547  return resolver_decl;
35548}
35549/* This builds the processor_model struct type defined in
35550   libgcc/config/i386/cpuinfo.c  */
35551
35552static tree
35553build_processor_model_struct (void)
35554{
35555  const char *field_name[] = {"__cpu_vendor", "__cpu_type", "__cpu_subtype",
35556			      "__cpu_features"};
35557  tree field = NULL_TREE, field_chain = NULL_TREE;
35558  int i;
35559  tree type = make_node (RECORD_TYPE);
35560
35561  /* The first 3 fields are unsigned int.  */
35562  for (i = 0; i < 3; ++i)
35563    {
35564      field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35565			  get_identifier (field_name[i]), unsigned_type_node);
35566      if (field_chain != NULL_TREE)
35567	DECL_CHAIN (field) = field_chain;
35568      field_chain = field;
35569    }
35570
35571  /* The last field is an array of unsigned integers of size one.  */
35572  field = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
35573		      get_identifier (field_name[3]),
35574		      build_array_type (unsigned_type_node,
35575					build_index_type (size_one_node)));
35576  if (field_chain != NULL_TREE)
35577    DECL_CHAIN (field) = field_chain;
35578  field_chain = field;
35579
35580  finish_builtin_struct (type, "__processor_model", field_chain, NULL_TREE);
35581  return type;
35582}
35583
35584/* Returns a extern, comdat VAR_DECL of type TYPE and name NAME. */
35585
35586static tree
35587make_var_decl (tree type, const char *name)
35588{
35589  tree new_decl;
35590
35591  new_decl = build_decl (UNKNOWN_LOCATION,
35592	                 VAR_DECL,
35593	  	         get_identifier(name),
35594		         type);
35595
35596  DECL_EXTERNAL (new_decl) = 1;
35597  TREE_STATIC (new_decl) = 1;
35598  TREE_PUBLIC (new_decl) = 1;
35599  DECL_INITIAL (new_decl) = 0;
35600  DECL_ARTIFICIAL (new_decl) = 0;
35601  DECL_PRESERVE_P (new_decl) = 1;
35602
35603  make_decl_one_only (new_decl, DECL_ASSEMBLER_NAME (new_decl));
35604  assemble_variable (new_decl, 0, 0, 0);
35605
35606  return new_decl;
35607}
35608
35609/* FNDECL is a __builtin_cpu_is or a __builtin_cpu_supports call that is folded
35610   into an integer defined in libgcc/config/i386/cpuinfo.c */
35611
35612static tree
35613fold_builtin_cpu (tree fndecl, tree *args)
35614{
35615  unsigned int i;
35616  enum ix86_builtins fn_code = (enum ix86_builtins)
35617				DECL_FUNCTION_CODE (fndecl);
35618  tree param_string_cst = NULL;
35619
35620  /* This is the order of bit-fields in __processor_features in cpuinfo.c */
35621  enum processor_features
35622  {
35623    F_CMOV = 0,
35624    F_MMX,
35625    F_POPCNT,
35626    F_SSE,
35627    F_SSE2,
35628    F_SSE3,
35629    F_SSSE3,
35630    F_SSE4_1,
35631    F_SSE4_2,
35632    F_AVX,
35633    F_AVX2,
35634    F_SSE4_A,
35635    F_FMA4,
35636    F_XOP,
35637    F_FMA,
35638    F_AVX512F,
35639    F_BMI,
35640    F_BMI2,
35641    F_MAX
35642  };
35643
35644  /* These are the values for vendor types and cpu types  and subtypes
35645     in cpuinfo.c.  Cpu types and subtypes should be subtracted by
35646     the corresponding start value.  */
35647  enum processor_model
35648  {
35649    M_INTEL = 1,
35650    M_AMD,
35651    M_CPU_TYPE_START,
35652    M_INTEL_BONNELL,
35653    M_INTEL_CORE2,
35654    M_INTEL_COREI7,
35655    M_AMDFAM10H,
35656    M_AMDFAM15H,
35657    M_INTEL_SILVERMONT,
35658    M_INTEL_KNL,
35659    M_AMD_BTVER1,
35660    M_AMD_BTVER2,
35661    M_CPU_SUBTYPE_START,
35662    M_INTEL_COREI7_NEHALEM,
35663    M_INTEL_COREI7_WESTMERE,
35664    M_INTEL_COREI7_SANDYBRIDGE,
35665    M_AMDFAM10H_BARCELONA,
35666    M_AMDFAM10H_SHANGHAI,
35667    M_AMDFAM10H_ISTANBUL,
35668    M_AMDFAM15H_BDVER1,
35669    M_AMDFAM15H_BDVER2,
35670    M_AMDFAM15H_BDVER3,
35671    M_AMDFAM15H_BDVER4,
35672    M_INTEL_COREI7_IVYBRIDGE,
35673    M_INTEL_COREI7_HASWELL,
35674    M_INTEL_COREI7_BROADWELL
35675  };
35676
35677  static struct _arch_names_table
35678    {
35679      const char *const name;
35680      const enum processor_model model;
35681    }
35682  const arch_names_table[] =
35683    {
35684      {"amd", M_AMD},
35685      {"intel", M_INTEL},
35686      {"atom", M_INTEL_BONNELL},
35687      {"slm", M_INTEL_SILVERMONT},
35688      {"core2", M_INTEL_CORE2},
35689      {"corei7", M_INTEL_COREI7},
35690      {"nehalem", M_INTEL_COREI7_NEHALEM},
35691      {"westmere", M_INTEL_COREI7_WESTMERE},
35692      {"sandybridge", M_INTEL_COREI7_SANDYBRIDGE},
35693      {"ivybridge", M_INTEL_COREI7_IVYBRIDGE},
35694      {"haswell", M_INTEL_COREI7_HASWELL},
35695      {"broadwell", M_INTEL_COREI7_BROADWELL},
35696      {"bonnell", M_INTEL_BONNELL},
35697      {"silvermont", M_INTEL_SILVERMONT},
35698      {"knl", M_INTEL_KNL},
35699      {"amdfam10h", M_AMDFAM10H},
35700      {"barcelona", M_AMDFAM10H_BARCELONA},
35701      {"shanghai", M_AMDFAM10H_SHANGHAI},
35702      {"istanbul", M_AMDFAM10H_ISTANBUL},
35703      {"btver1", M_AMD_BTVER1},
35704      {"amdfam15h", M_AMDFAM15H},
35705      {"bdver1", M_AMDFAM15H_BDVER1},
35706      {"bdver2", M_AMDFAM15H_BDVER2},
35707      {"bdver3", M_AMDFAM15H_BDVER3},
35708      {"bdver4", M_AMDFAM15H_BDVER4},
35709      {"btver2", M_AMD_BTVER2},
35710    };
35711
35712  static struct _isa_names_table
35713    {
35714      const char *const name;
35715      const enum processor_features feature;
35716    }
35717  const isa_names_table[] =
35718    {
35719      {"cmov",   F_CMOV},
35720      {"mmx",    F_MMX},
35721      {"popcnt", F_POPCNT},
35722      {"sse",    F_SSE},
35723      {"sse2",   F_SSE2},
35724      {"sse3",   F_SSE3},
35725      {"ssse3",  F_SSSE3},
35726      {"sse4a",  F_SSE4_A},
35727      {"sse4.1", F_SSE4_1},
35728      {"sse4.2", F_SSE4_2},
35729      {"avx",    F_AVX},
35730      {"fma4",   F_FMA4},
35731      {"xop",    F_XOP},
35732      {"fma",    F_FMA},
35733      {"avx2",   F_AVX2},
35734      {"avx512f",F_AVX512F},
35735      {"bmi",    F_BMI},
35736      {"bmi2",   F_BMI2}
35737    };
35738
35739  tree __processor_model_type = build_processor_model_struct ();
35740  tree __cpu_model_var = make_var_decl (__processor_model_type,
35741					"__cpu_model");
35742
35743
35744  varpool_node::add (__cpu_model_var);
35745
35746  gcc_assert ((args != NULL) && (*args != NULL));
35747
35748  param_string_cst = *args;
35749  while (param_string_cst
35750	 && TREE_CODE (param_string_cst) !=  STRING_CST)
35751    {
35752      /* *args must be a expr that can contain other EXPRS leading to a
35753	 STRING_CST.   */
35754      if (!EXPR_P (param_string_cst))
35755 	{
35756	  error ("Parameter to builtin must be a string constant or literal");
35757	  return integer_zero_node;
35758	}
35759      param_string_cst = TREE_OPERAND (EXPR_CHECK (param_string_cst), 0);
35760    }
35761
35762  gcc_assert (param_string_cst);
35763
35764  if (fn_code == IX86_BUILTIN_CPU_IS)
35765    {
35766      tree ref;
35767      tree field;
35768      tree final;
35769
35770      unsigned int field_val = 0;
35771      unsigned int NUM_ARCH_NAMES
35772	= sizeof (arch_names_table) / sizeof (struct _arch_names_table);
35773
35774      for (i = 0; i < NUM_ARCH_NAMES; i++)
35775	if (strcmp (arch_names_table[i].name,
35776	    TREE_STRING_POINTER (param_string_cst)) == 0)
35777	  break;
35778
35779      if (i == NUM_ARCH_NAMES)
35780	{
35781	  error ("Parameter to builtin not valid: %s",
35782	         TREE_STRING_POINTER (param_string_cst));
35783	  return integer_zero_node;
35784	}
35785
35786      field = TYPE_FIELDS (__processor_model_type);
35787      field_val = arch_names_table[i].model;
35788
35789      /* CPU types are stored in the next field.  */
35790      if (field_val > M_CPU_TYPE_START
35791	  && field_val < M_CPU_SUBTYPE_START)
35792	{
35793	  field = DECL_CHAIN (field);
35794	  field_val -= M_CPU_TYPE_START;
35795	}
35796
35797      /* CPU subtypes are stored in the next field.  */
35798      if (field_val > M_CPU_SUBTYPE_START)
35799	{
35800	  field = DECL_CHAIN ( DECL_CHAIN (field));
35801	  field_val -= M_CPU_SUBTYPE_START;
35802	}
35803
35804      /* Get the appropriate field in __cpu_model.  */
35805      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35806		     field, NULL_TREE);
35807
35808      /* Check the value.  */
35809      final = build2 (EQ_EXPR, unsigned_type_node, ref,
35810		      build_int_cstu (unsigned_type_node, field_val));
35811      return build1 (CONVERT_EXPR, integer_type_node, final);
35812    }
35813  else if (fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35814    {
35815      tree ref;
35816      tree array_elt;
35817      tree field;
35818      tree final;
35819
35820      unsigned int field_val = 0;
35821      unsigned int NUM_ISA_NAMES
35822	= sizeof (isa_names_table) / sizeof (struct _isa_names_table);
35823
35824      for (i = 0; i < NUM_ISA_NAMES; i++)
35825	if (strcmp (isa_names_table[i].name,
35826	    TREE_STRING_POINTER (param_string_cst)) == 0)
35827	  break;
35828
35829      if (i == NUM_ISA_NAMES)
35830	{
35831	  error ("Parameter to builtin not valid: %s",
35832	       	 TREE_STRING_POINTER (param_string_cst));
35833	  return integer_zero_node;
35834	}
35835
35836      field = TYPE_FIELDS (__processor_model_type);
35837      /* Get the last field, which is __cpu_features.  */
35838      while (DECL_CHAIN (field))
35839        field = DECL_CHAIN (field);
35840
35841      /* Get the appropriate field: __cpu_model.__cpu_features  */
35842      ref =  build3 (COMPONENT_REF, TREE_TYPE (field), __cpu_model_var,
35843		     field, NULL_TREE);
35844
35845      /* Access the 0th element of __cpu_features array.  */
35846      array_elt = build4 (ARRAY_REF, unsigned_type_node, ref,
35847			  integer_zero_node, NULL_TREE, NULL_TREE);
35848
35849      field_val = (1 << isa_names_table[i].feature);
35850      /* Return __cpu_model.__cpu_features[0] & field_val  */
35851      final = build2 (BIT_AND_EXPR, unsigned_type_node, array_elt,
35852		      build_int_cstu (unsigned_type_node, field_val));
35853      return build1 (CONVERT_EXPR, integer_type_node, final);
35854    }
35855  gcc_unreachable ();
35856}
35857
35858static tree
35859ix86_fold_builtin (tree fndecl, int n_args,
35860		   tree *args, bool ignore ATTRIBUTE_UNUSED)
35861{
35862  if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
35863    {
35864      enum ix86_builtins fn_code = (enum ix86_builtins)
35865				   DECL_FUNCTION_CODE (fndecl);
35866      if (fn_code ==  IX86_BUILTIN_CPU_IS
35867	  || fn_code == IX86_BUILTIN_CPU_SUPPORTS)
35868	{
35869	  gcc_assert (n_args == 1);
35870          return fold_builtin_cpu (fndecl, args);
35871	}
35872    }
35873
35874#ifdef SUBTARGET_FOLD_BUILTIN
35875  return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore);
35876#endif
35877
35878  return NULL_TREE;
35879}
35880
35881/* Make builtins to detect cpu type and features supported.  NAME is
35882   the builtin name, CODE is the builtin code, and FTYPE is the function
35883   type of the builtin.  */
35884
35885static void
35886make_cpu_type_builtin (const char* name, int code,
35887		       enum ix86_builtin_func_type ftype, bool is_const)
35888{
35889  tree decl;
35890  tree type;
35891
35892  type = ix86_get_builtin_func_type (ftype);
35893  decl = add_builtin_function (name, type, code, BUILT_IN_MD,
35894			       NULL, NULL_TREE);
35895  gcc_assert (decl != NULL_TREE);
35896  ix86_builtins[(int) code] = decl;
35897  TREE_READONLY (decl) = is_const;
35898}
35899
35900/* Make builtins to get CPU type and features supported.  The created
35901   builtins are :
35902
35903   __builtin_cpu_init (), to detect cpu type and features,
35904   __builtin_cpu_is ("<CPUNAME>"), to check if cpu is of type <CPUNAME>,
35905   __builtin_cpu_supports ("<FEATURE>"), to check if cpu supports <FEATURE>
35906   */
35907
35908static void
35909ix86_init_platform_type_builtins (void)
35910{
35911  make_cpu_type_builtin ("__builtin_cpu_init", IX86_BUILTIN_CPU_INIT,
35912			 INT_FTYPE_VOID, false);
35913  make_cpu_type_builtin ("__builtin_cpu_is", IX86_BUILTIN_CPU_IS,
35914			 INT_FTYPE_PCCHAR, true);
35915  make_cpu_type_builtin ("__builtin_cpu_supports", IX86_BUILTIN_CPU_SUPPORTS,
35916			 INT_FTYPE_PCCHAR, true);
35917}
35918
35919/* Internal method for ix86_init_builtins.  */
35920
35921static void
35922ix86_init_builtins_va_builtins_abi (void)
35923{
35924  tree ms_va_ref, sysv_va_ref;
35925  tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
35926  tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
35927  tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
35928  tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
35929
35930  if (!TARGET_64BIT)
35931    return;
35932  fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
35933  fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
35934  ms_va_ref = build_reference_type (ms_va_list_type_node);
35935  sysv_va_ref =
35936    build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
35937
35938  fnvoid_va_end_ms =
35939    build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35940  fnvoid_va_start_ms =
35941    build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
35942  fnvoid_va_end_sysv =
35943    build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
35944  fnvoid_va_start_sysv =
35945    build_varargs_function_type_list (void_type_node, sysv_va_ref,
35946    				       NULL_TREE);
35947  fnvoid_va_copy_ms =
35948    build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
35949    			      NULL_TREE);
35950  fnvoid_va_copy_sysv =
35951    build_function_type_list (void_type_node, sysv_va_ref,
35952    			      sysv_va_ref, NULL_TREE);
35953
35954  add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
35955  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
35956  add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
35957  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
35958  add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
35959			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
35960  add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
35961  			BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35962  add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
35963  			BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35964  add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
35965			BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
35966}
35967
35968static void
35969ix86_init_builtin_types (void)
35970{
35971  tree float128_type_node, float80_type_node;
35972
35973  /* The __float80 type.  */
35974  float80_type_node = long_double_type_node;
35975  if (TYPE_MODE (float80_type_node) != XFmode)
35976    {
35977      /* The __float80 type.  */
35978      float80_type_node = make_node (REAL_TYPE);
35979
35980      TYPE_PRECISION (float80_type_node) = 80;
35981      layout_type (float80_type_node);
35982    }
35983  lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
35984
35985  /* The __float128 type.  */
35986  float128_type_node = make_node (REAL_TYPE);
35987  TYPE_PRECISION (float128_type_node) = 128;
35988  layout_type (float128_type_node);
35989  lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
35990
35991  /* This macro is built by i386-builtin-types.awk.  */
35992  DEFINE_BUILTIN_PRIMITIVE_TYPES;
35993}
35994
35995static void
35996ix86_init_builtins (void)
35997{
35998  tree t;
35999
36000  ix86_init_builtin_types ();
36001
36002  /* Builtins to get CPU type and features. */
36003  ix86_init_platform_type_builtins ();
36004
36005  /* TFmode support builtins.  */
36006  def_builtin_const (0, "__builtin_infq",
36007		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
36008  def_builtin_const (0, "__builtin_huge_valq",
36009		     FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
36010
36011  /* We will expand them to normal call if SSE isn't available since
36012     they are used by libgcc. */
36013  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
36014  t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
36015			    BUILT_IN_MD, "__fabstf2", NULL_TREE);
36016  TREE_READONLY (t) = 1;
36017  ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
36018
36019  t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
36020  t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
36021			    BUILT_IN_MD, "__copysigntf3", NULL_TREE);
36022  TREE_READONLY (t) = 1;
36023  ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
36024
36025  ix86_init_tm_builtins ();
36026  ix86_init_mmx_sse_builtins ();
36027  ix86_init_mpx_builtins ();
36028
36029  if (TARGET_LP64)
36030    ix86_init_builtins_va_builtins_abi ();
36031
36032#ifdef SUBTARGET_INIT_BUILTINS
36033  SUBTARGET_INIT_BUILTINS;
36034#endif
36035}
36036
36037/* Return the ix86 builtin for CODE.  */
36038
36039static tree
36040ix86_builtin_decl (unsigned code, bool)
36041{
36042  if (code >= IX86_BUILTIN_MAX)
36043    return error_mark_node;
36044
36045  return ix86_builtins[code];
36046}
36047
36048/* Errors in the source file can cause expand_expr to return const0_rtx
36049   where we expect a vector.  To avoid crashing, use one of the vector
36050   clear instructions.  */
36051static rtx
36052safe_vector_operand (rtx x, machine_mode mode)
36053{
36054  if (x == const0_rtx)
36055    x = CONST0_RTX (mode);
36056  return x;
36057}
36058
36059/* Fixup modeless constants to fit required mode.  */
36060static rtx
36061fixup_modeless_constant (rtx x, machine_mode mode)
36062{
36063  if (GET_MODE (x) == VOIDmode)
36064    x = convert_to_mode (mode, x, 1);
36065  return x;
36066}
36067
36068/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
36069
36070static rtx
36071ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
36072{
36073  rtx pat;
36074  tree arg0 = CALL_EXPR_ARG (exp, 0);
36075  tree arg1 = CALL_EXPR_ARG (exp, 1);
36076  rtx op0 = expand_normal (arg0);
36077  rtx op1 = expand_normal (arg1);
36078  machine_mode tmode = insn_data[icode].operand[0].mode;
36079  machine_mode mode0 = insn_data[icode].operand[1].mode;
36080  machine_mode mode1 = insn_data[icode].operand[2].mode;
36081
36082  if (VECTOR_MODE_P (mode0))
36083    op0 = safe_vector_operand (op0, mode0);
36084  if (VECTOR_MODE_P (mode1))
36085    op1 = safe_vector_operand (op1, mode1);
36086
36087  if (optimize || !target
36088      || GET_MODE (target) != tmode
36089      || !insn_data[icode].operand[0].predicate (target, tmode))
36090    target = gen_reg_rtx (tmode);
36091
36092  if (GET_MODE (op1) == SImode && mode1 == TImode)
36093    {
36094      rtx x = gen_reg_rtx (V4SImode);
36095      emit_insn (gen_sse2_loadd (x, op1));
36096      op1 = gen_lowpart (TImode, x);
36097    }
36098
36099  if (!insn_data[icode].operand[1].predicate (op0, mode0))
36100    op0 = copy_to_mode_reg (mode0, op0);
36101  if (!insn_data[icode].operand[2].predicate (op1, mode1))
36102    op1 = copy_to_mode_reg (mode1, op1);
36103
36104  pat = GEN_FCN (icode) (target, op0, op1);
36105  if (! pat)
36106    return 0;
36107
36108  emit_insn (pat);
36109
36110  return target;
36111}
36112
36113/* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns.  */
36114
36115static rtx
36116ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
36117			       enum ix86_builtin_func_type m_type,
36118			       enum rtx_code sub_code)
36119{
36120  rtx pat;
36121  int i;
36122  int nargs;
36123  bool comparison_p = false;
36124  bool tf_p = false;
36125  bool last_arg_constant = false;
36126  int num_memory = 0;
36127  struct {
36128    rtx op;
36129    machine_mode mode;
36130  } args[4];
36131
36132  machine_mode tmode = insn_data[icode].operand[0].mode;
36133
36134  switch (m_type)
36135    {
36136    case MULTI_ARG_4_DF2_DI_I:
36137    case MULTI_ARG_4_DF2_DI_I1:
36138    case MULTI_ARG_4_SF2_SI_I:
36139    case MULTI_ARG_4_SF2_SI_I1:
36140      nargs = 4;
36141      last_arg_constant = true;
36142      break;
36143
36144    case MULTI_ARG_3_SF:
36145    case MULTI_ARG_3_DF:
36146    case MULTI_ARG_3_SF2:
36147    case MULTI_ARG_3_DF2:
36148    case MULTI_ARG_3_DI:
36149    case MULTI_ARG_3_SI:
36150    case MULTI_ARG_3_SI_DI:
36151    case MULTI_ARG_3_HI:
36152    case MULTI_ARG_3_HI_SI:
36153    case MULTI_ARG_3_QI:
36154    case MULTI_ARG_3_DI2:
36155    case MULTI_ARG_3_SI2:
36156    case MULTI_ARG_3_HI2:
36157    case MULTI_ARG_3_QI2:
36158      nargs = 3;
36159      break;
36160
36161    case MULTI_ARG_2_SF:
36162    case MULTI_ARG_2_DF:
36163    case MULTI_ARG_2_DI:
36164    case MULTI_ARG_2_SI:
36165    case MULTI_ARG_2_HI:
36166    case MULTI_ARG_2_QI:
36167      nargs = 2;
36168      break;
36169
36170    case MULTI_ARG_2_DI_IMM:
36171    case MULTI_ARG_2_SI_IMM:
36172    case MULTI_ARG_2_HI_IMM:
36173    case MULTI_ARG_2_QI_IMM:
36174      nargs = 2;
36175      last_arg_constant = true;
36176      break;
36177
36178    case MULTI_ARG_1_SF:
36179    case MULTI_ARG_1_DF:
36180    case MULTI_ARG_1_SF2:
36181    case MULTI_ARG_1_DF2:
36182    case MULTI_ARG_1_DI:
36183    case MULTI_ARG_1_SI:
36184    case MULTI_ARG_1_HI:
36185    case MULTI_ARG_1_QI:
36186    case MULTI_ARG_1_SI_DI:
36187    case MULTI_ARG_1_HI_DI:
36188    case MULTI_ARG_1_HI_SI:
36189    case MULTI_ARG_1_QI_DI:
36190    case MULTI_ARG_1_QI_SI:
36191    case MULTI_ARG_1_QI_HI:
36192      nargs = 1;
36193      break;
36194
36195    case MULTI_ARG_2_DI_CMP:
36196    case MULTI_ARG_2_SI_CMP:
36197    case MULTI_ARG_2_HI_CMP:
36198    case MULTI_ARG_2_QI_CMP:
36199      nargs = 2;
36200      comparison_p = true;
36201      break;
36202
36203    case MULTI_ARG_2_SF_TF:
36204    case MULTI_ARG_2_DF_TF:
36205    case MULTI_ARG_2_DI_TF:
36206    case MULTI_ARG_2_SI_TF:
36207    case MULTI_ARG_2_HI_TF:
36208    case MULTI_ARG_2_QI_TF:
36209      nargs = 2;
36210      tf_p = true;
36211      break;
36212
36213    default:
36214      gcc_unreachable ();
36215    }
36216
36217  if (optimize || !target
36218      || GET_MODE (target) != tmode
36219      || !insn_data[icode].operand[0].predicate (target, tmode))
36220    target = gen_reg_rtx (tmode);
36221
36222  gcc_assert (nargs <= 4);
36223
36224  for (i = 0; i < nargs; i++)
36225    {
36226      tree arg = CALL_EXPR_ARG (exp, i);
36227      rtx op = expand_normal (arg);
36228      int adjust = (comparison_p) ? 1 : 0;
36229      machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
36230
36231      if (last_arg_constant && i == nargs - 1)
36232	{
36233	  if (!insn_data[icode].operand[i + 1].predicate (op, mode))
36234	    {
36235	      enum insn_code new_icode = icode;
36236	      switch (icode)
36237		{
36238		case CODE_FOR_xop_vpermil2v2df3:
36239		case CODE_FOR_xop_vpermil2v4sf3:
36240		case CODE_FOR_xop_vpermil2v4df3:
36241		case CODE_FOR_xop_vpermil2v8sf3:
36242		  error ("the last argument must be a 2-bit immediate");
36243		  return gen_reg_rtx (tmode);
36244		case CODE_FOR_xop_rotlv2di3:
36245		  new_icode = CODE_FOR_rotlv2di3;
36246		  goto xop_rotl;
36247		case CODE_FOR_xop_rotlv4si3:
36248		  new_icode = CODE_FOR_rotlv4si3;
36249		  goto xop_rotl;
36250		case CODE_FOR_xop_rotlv8hi3:
36251		  new_icode = CODE_FOR_rotlv8hi3;
36252		  goto xop_rotl;
36253		case CODE_FOR_xop_rotlv16qi3:
36254		  new_icode = CODE_FOR_rotlv16qi3;
36255		xop_rotl:
36256		  if (CONST_INT_P (op))
36257		    {
36258		      int mask = GET_MODE_BITSIZE (GET_MODE_INNER (tmode)) - 1;
36259		      op = GEN_INT (INTVAL (op) & mask);
36260		      gcc_checking_assert
36261			(insn_data[icode].operand[i + 1].predicate (op, mode));
36262		    }
36263		  else
36264		    {
36265		      gcc_checking_assert
36266			(nargs == 2
36267			 && insn_data[new_icode].operand[0].mode == tmode
36268			 && insn_data[new_icode].operand[1].mode == tmode
36269			 && insn_data[new_icode].operand[2].mode == mode
36270			 && insn_data[new_icode].operand[0].predicate
36271			    == insn_data[icode].operand[0].predicate
36272			 && insn_data[new_icode].operand[1].predicate
36273			    == insn_data[icode].operand[1].predicate);
36274		      icode = new_icode;
36275		      goto non_constant;
36276		    }
36277		  break;
36278		default:
36279		  gcc_unreachable ();
36280		}
36281	    }
36282	}
36283      else
36284	{
36285	non_constant:
36286	  if (VECTOR_MODE_P (mode))
36287	    op = safe_vector_operand (op, mode);
36288
36289	  /* If we aren't optimizing, only allow one memory operand to be
36290	     generated.  */
36291	  if (memory_operand (op, mode))
36292	    num_memory++;
36293
36294	  gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
36295
36296	  if (optimize
36297	      || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
36298	      || num_memory > 1)
36299	    op = force_reg (mode, op);
36300	}
36301
36302      args[i].op = op;
36303      args[i].mode = mode;
36304    }
36305
36306  switch (nargs)
36307    {
36308    case 1:
36309      pat = GEN_FCN (icode) (target, args[0].op);
36310      break;
36311
36312    case 2:
36313      if (tf_p)
36314	pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
36315			       GEN_INT ((int)sub_code));
36316      else if (! comparison_p)
36317	pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
36318      else
36319	{
36320	  rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
36321				       args[0].op,
36322				       args[1].op);
36323
36324	  pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
36325	}
36326      break;
36327
36328    case 3:
36329      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
36330      break;
36331
36332    case 4:
36333      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
36334      break;
36335
36336    default:
36337      gcc_unreachable ();
36338    }
36339
36340  if (! pat)
36341    return 0;
36342
36343  emit_insn (pat);
36344  return target;
36345}
36346
36347/* Subroutine of ix86_expand_args_builtin to take care of scalar unop
36348   insns with vec_merge.  */
36349
36350static rtx
36351ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
36352				    rtx target)
36353{
36354  rtx pat;
36355  tree arg0 = CALL_EXPR_ARG (exp, 0);
36356  rtx op1, op0 = expand_normal (arg0);
36357  machine_mode tmode = insn_data[icode].operand[0].mode;
36358  machine_mode mode0 = insn_data[icode].operand[1].mode;
36359
36360  if (optimize || !target
36361      || GET_MODE (target) != tmode
36362      || !insn_data[icode].operand[0].predicate (target, tmode))
36363    target = gen_reg_rtx (tmode);
36364
36365  if (VECTOR_MODE_P (mode0))
36366    op0 = safe_vector_operand (op0, mode0);
36367
36368  if ((optimize && !register_operand (op0, mode0))
36369      || !insn_data[icode].operand[1].predicate (op0, mode0))
36370    op0 = copy_to_mode_reg (mode0, op0);
36371
36372  op1 = op0;
36373  if (!insn_data[icode].operand[2].predicate (op1, mode0))
36374    op1 = copy_to_mode_reg (mode0, op1);
36375
36376  pat = GEN_FCN (icode) (target, op0, op1);
36377  if (! pat)
36378    return 0;
36379  emit_insn (pat);
36380  return target;
36381}
36382
36383/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
36384
36385static rtx
36386ix86_expand_sse_compare (const struct builtin_description *d,
36387			 tree exp, rtx target, bool swap)
36388{
36389  rtx pat;
36390  tree arg0 = CALL_EXPR_ARG (exp, 0);
36391  tree arg1 = CALL_EXPR_ARG (exp, 1);
36392  rtx op0 = expand_normal (arg0);
36393  rtx op1 = expand_normal (arg1);
36394  rtx op2;
36395  machine_mode tmode = insn_data[d->icode].operand[0].mode;
36396  machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36397  machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36398  enum rtx_code comparison = d->comparison;
36399
36400  if (VECTOR_MODE_P (mode0))
36401    op0 = safe_vector_operand (op0, mode0);
36402  if (VECTOR_MODE_P (mode1))
36403    op1 = safe_vector_operand (op1, mode1);
36404
36405  /* Swap operands if we have a comparison that isn't available in
36406     hardware.  */
36407  if (swap)
36408    std::swap (op0, op1);
36409
36410  if (optimize || !target
36411      || GET_MODE (target) != tmode
36412      || !insn_data[d->icode].operand[0].predicate (target, tmode))
36413    target = gen_reg_rtx (tmode);
36414
36415  if ((optimize && !register_operand (op0, mode0))
36416      || !insn_data[d->icode].operand[1].predicate (op0, mode0))
36417    op0 = copy_to_mode_reg (mode0, op0);
36418  if ((optimize && !register_operand (op1, mode1))
36419      || !insn_data[d->icode].operand[2].predicate (op1, mode1))
36420    op1 = copy_to_mode_reg (mode1, op1);
36421
36422  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
36423  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36424  if (! pat)
36425    return 0;
36426  emit_insn (pat);
36427  return target;
36428}
36429
36430/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
36431
36432static rtx
36433ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
36434		      rtx target)
36435{
36436  rtx pat;
36437  tree arg0 = CALL_EXPR_ARG (exp, 0);
36438  tree arg1 = CALL_EXPR_ARG (exp, 1);
36439  rtx op0 = expand_normal (arg0);
36440  rtx op1 = expand_normal (arg1);
36441  machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36442  machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36443  enum rtx_code comparison = d->comparison;
36444
36445  if (VECTOR_MODE_P (mode0))
36446    op0 = safe_vector_operand (op0, mode0);
36447  if (VECTOR_MODE_P (mode1))
36448    op1 = safe_vector_operand (op1, mode1);
36449
36450  /* Swap operands if we have a comparison that isn't available in
36451     hardware.  */
36452  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
36453    std::swap (op0, op1);
36454
36455  target = gen_reg_rtx (SImode);
36456  emit_move_insn (target, const0_rtx);
36457  target = gen_rtx_SUBREG (QImode, target, 0);
36458
36459  if ((optimize && !register_operand (op0, mode0))
36460      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36461    op0 = copy_to_mode_reg (mode0, op0);
36462  if ((optimize && !register_operand (op1, mode1))
36463      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36464    op1 = copy_to_mode_reg (mode1, op1);
36465
36466  pat = GEN_FCN (d->icode) (op0, op1);
36467  if (! pat)
36468    return 0;
36469  emit_insn (pat);
36470  emit_insn (gen_rtx_SET (VOIDmode,
36471			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36472			  gen_rtx_fmt_ee (comparison, QImode,
36473					  SET_DEST (pat),
36474					  const0_rtx)));
36475
36476  return SUBREG_REG (target);
36477}
36478
36479/* Subroutines of ix86_expand_args_builtin to take care of round insns.  */
36480
36481static rtx
36482ix86_expand_sse_round (const struct builtin_description *d, tree exp,
36483		       rtx target)
36484{
36485  rtx pat;
36486  tree arg0 = CALL_EXPR_ARG (exp, 0);
36487  rtx op1, op0 = expand_normal (arg0);
36488  machine_mode tmode = insn_data[d->icode].operand[0].mode;
36489  machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36490
36491  if (optimize || target == 0
36492      || GET_MODE (target) != tmode
36493      || !insn_data[d->icode].operand[0].predicate (target, tmode))
36494    target = gen_reg_rtx (tmode);
36495
36496  if (VECTOR_MODE_P (mode0))
36497    op0 = safe_vector_operand (op0, mode0);
36498
36499  if ((optimize && !register_operand (op0, mode0))
36500      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36501    op0 = copy_to_mode_reg (mode0, op0);
36502
36503  op1 = GEN_INT (d->comparison);
36504
36505  pat = GEN_FCN (d->icode) (target, op0, op1);
36506  if (! pat)
36507    return 0;
36508  emit_insn (pat);
36509  return target;
36510}
36511
36512static rtx
36513ix86_expand_sse_round_vec_pack_sfix (const struct builtin_description *d,
36514				     tree exp, rtx target)
36515{
36516  rtx pat;
36517  tree arg0 = CALL_EXPR_ARG (exp, 0);
36518  tree arg1 = CALL_EXPR_ARG (exp, 1);
36519  rtx op0 = expand_normal (arg0);
36520  rtx op1 = expand_normal (arg1);
36521  rtx op2;
36522  machine_mode tmode = insn_data[d->icode].operand[0].mode;
36523  machine_mode mode0 = insn_data[d->icode].operand[1].mode;
36524  machine_mode mode1 = insn_data[d->icode].operand[2].mode;
36525
36526  if (optimize || target == 0
36527      || GET_MODE (target) != tmode
36528      || !insn_data[d->icode].operand[0].predicate (target, tmode))
36529    target = gen_reg_rtx (tmode);
36530
36531  op0 = safe_vector_operand (op0, mode0);
36532  op1 = safe_vector_operand (op1, mode1);
36533
36534  if ((optimize && !register_operand (op0, mode0))
36535      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36536    op0 = copy_to_mode_reg (mode0, op0);
36537  if ((optimize && !register_operand (op1, mode1))
36538      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36539    op1 = copy_to_mode_reg (mode1, op1);
36540
36541  op2 = GEN_INT (d->comparison);
36542
36543  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
36544  if (! pat)
36545    return 0;
36546  emit_insn (pat);
36547  return target;
36548}
36549
36550/* Subroutine of ix86_expand_builtin to take care of ptest insns.  */
36551
36552static rtx
36553ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
36554		       rtx target)
36555{
36556  rtx pat;
36557  tree arg0 = CALL_EXPR_ARG (exp, 0);
36558  tree arg1 = CALL_EXPR_ARG (exp, 1);
36559  rtx op0 = expand_normal (arg0);
36560  rtx op1 = expand_normal (arg1);
36561  machine_mode mode0 = insn_data[d->icode].operand[0].mode;
36562  machine_mode mode1 = insn_data[d->icode].operand[1].mode;
36563  enum rtx_code comparison = d->comparison;
36564
36565  if (VECTOR_MODE_P (mode0))
36566    op0 = safe_vector_operand (op0, mode0);
36567  if (VECTOR_MODE_P (mode1))
36568    op1 = safe_vector_operand (op1, mode1);
36569
36570  target = gen_reg_rtx (SImode);
36571  emit_move_insn (target, const0_rtx);
36572  target = gen_rtx_SUBREG (QImode, target, 0);
36573
36574  if ((optimize && !register_operand (op0, mode0))
36575      || !insn_data[d->icode].operand[0].predicate (op0, mode0))
36576    op0 = copy_to_mode_reg (mode0, op0);
36577  if ((optimize && !register_operand (op1, mode1))
36578      || !insn_data[d->icode].operand[1].predicate (op1, mode1))
36579    op1 = copy_to_mode_reg (mode1, op1);
36580
36581  pat = GEN_FCN (d->icode) (op0, op1);
36582  if (! pat)
36583    return 0;
36584  emit_insn (pat);
36585  emit_insn (gen_rtx_SET (VOIDmode,
36586			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36587			  gen_rtx_fmt_ee (comparison, QImode,
36588					  SET_DEST (pat),
36589					  const0_rtx)));
36590
36591  return SUBREG_REG (target);
36592}
36593
36594/* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns.  */
36595
36596static rtx
36597ix86_expand_sse_pcmpestr (const struct builtin_description *d,
36598			  tree exp, rtx target)
36599{
36600  rtx pat;
36601  tree arg0 = CALL_EXPR_ARG (exp, 0);
36602  tree arg1 = CALL_EXPR_ARG (exp, 1);
36603  tree arg2 = CALL_EXPR_ARG (exp, 2);
36604  tree arg3 = CALL_EXPR_ARG (exp, 3);
36605  tree arg4 = CALL_EXPR_ARG (exp, 4);
36606  rtx scratch0, scratch1;
36607  rtx op0 = expand_normal (arg0);
36608  rtx op1 = expand_normal (arg1);
36609  rtx op2 = expand_normal (arg2);
36610  rtx op3 = expand_normal (arg3);
36611  rtx op4 = expand_normal (arg4);
36612  machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
36613
36614  tmode0 = insn_data[d->icode].operand[0].mode;
36615  tmode1 = insn_data[d->icode].operand[1].mode;
36616  modev2 = insn_data[d->icode].operand[2].mode;
36617  modei3 = insn_data[d->icode].operand[3].mode;
36618  modev4 = insn_data[d->icode].operand[4].mode;
36619  modei5 = insn_data[d->icode].operand[5].mode;
36620  modeimm = insn_data[d->icode].operand[6].mode;
36621
36622  if (VECTOR_MODE_P (modev2))
36623    op0 = safe_vector_operand (op0, modev2);
36624  if (VECTOR_MODE_P (modev4))
36625    op2 = safe_vector_operand (op2, modev4);
36626
36627  if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36628    op0 = copy_to_mode_reg (modev2, op0);
36629  if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
36630    op1 = copy_to_mode_reg (modei3, op1);
36631  if ((optimize && !register_operand (op2, modev4))
36632      || !insn_data[d->icode].operand[4].predicate (op2, modev4))
36633    op2 = copy_to_mode_reg (modev4, op2);
36634  if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
36635    op3 = copy_to_mode_reg (modei5, op3);
36636
36637  if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
36638    {
36639      error ("the fifth argument must be an 8-bit immediate");
36640      return const0_rtx;
36641    }
36642
36643  if (d->code == IX86_BUILTIN_PCMPESTRI128)
36644    {
36645      if (optimize || !target
36646	  || GET_MODE (target) != tmode0
36647	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36648	target = gen_reg_rtx (tmode0);
36649
36650      scratch1 = gen_reg_rtx (tmode1);
36651
36652      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
36653    }
36654  else if (d->code == IX86_BUILTIN_PCMPESTRM128)
36655    {
36656      if (optimize || !target
36657	  || GET_MODE (target) != tmode1
36658	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36659	target = gen_reg_rtx (tmode1);
36660
36661      scratch0 = gen_reg_rtx (tmode0);
36662
36663      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
36664    }
36665  else
36666    {
36667      gcc_assert (d->flag);
36668
36669      scratch0 = gen_reg_rtx (tmode0);
36670      scratch1 = gen_reg_rtx (tmode1);
36671
36672      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
36673    }
36674
36675  if (! pat)
36676    return 0;
36677
36678  emit_insn (pat);
36679
36680  if (d->flag)
36681    {
36682      target = gen_reg_rtx (SImode);
36683      emit_move_insn (target, const0_rtx);
36684      target = gen_rtx_SUBREG (QImode, target, 0);
36685
36686      emit_insn
36687	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36688		      gen_rtx_fmt_ee (EQ, QImode,
36689				      gen_rtx_REG ((machine_mode) d->flag,
36690						   FLAGS_REG),
36691				      const0_rtx)));
36692      return SUBREG_REG (target);
36693    }
36694  else
36695    return target;
36696}
36697
36698
36699/* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns.  */
36700
36701static rtx
36702ix86_expand_sse_pcmpistr (const struct builtin_description *d,
36703			  tree exp, rtx target)
36704{
36705  rtx pat;
36706  tree arg0 = CALL_EXPR_ARG (exp, 0);
36707  tree arg1 = CALL_EXPR_ARG (exp, 1);
36708  tree arg2 = CALL_EXPR_ARG (exp, 2);
36709  rtx scratch0, scratch1;
36710  rtx op0 = expand_normal (arg0);
36711  rtx op1 = expand_normal (arg1);
36712  rtx op2 = expand_normal (arg2);
36713  machine_mode tmode0, tmode1, modev2, modev3, modeimm;
36714
36715  tmode0 = insn_data[d->icode].operand[0].mode;
36716  tmode1 = insn_data[d->icode].operand[1].mode;
36717  modev2 = insn_data[d->icode].operand[2].mode;
36718  modev3 = insn_data[d->icode].operand[3].mode;
36719  modeimm = insn_data[d->icode].operand[4].mode;
36720
36721  if (VECTOR_MODE_P (modev2))
36722    op0 = safe_vector_operand (op0, modev2);
36723  if (VECTOR_MODE_P (modev3))
36724    op1 = safe_vector_operand (op1, modev3);
36725
36726  if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
36727    op0 = copy_to_mode_reg (modev2, op0);
36728  if ((optimize && !register_operand (op1, modev3))
36729      || !insn_data[d->icode].operand[3].predicate (op1, modev3))
36730    op1 = copy_to_mode_reg (modev3, op1);
36731
36732  if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
36733    {
36734      error ("the third argument must be an 8-bit immediate");
36735      return const0_rtx;
36736    }
36737
36738  if (d->code == IX86_BUILTIN_PCMPISTRI128)
36739    {
36740      if (optimize || !target
36741	  || GET_MODE (target) != tmode0
36742	  || !insn_data[d->icode].operand[0].predicate (target, tmode0))
36743	target = gen_reg_rtx (tmode0);
36744
36745      scratch1 = gen_reg_rtx (tmode1);
36746
36747      pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
36748    }
36749  else if (d->code == IX86_BUILTIN_PCMPISTRM128)
36750    {
36751      if (optimize || !target
36752	  || GET_MODE (target) != tmode1
36753	  || !insn_data[d->icode].operand[1].predicate (target, tmode1))
36754	target = gen_reg_rtx (tmode1);
36755
36756      scratch0 = gen_reg_rtx (tmode0);
36757
36758      pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
36759    }
36760  else
36761    {
36762      gcc_assert (d->flag);
36763
36764      scratch0 = gen_reg_rtx (tmode0);
36765      scratch1 = gen_reg_rtx (tmode1);
36766
36767      pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
36768    }
36769
36770  if (! pat)
36771    return 0;
36772
36773  emit_insn (pat);
36774
36775  if (d->flag)
36776    {
36777      target = gen_reg_rtx (SImode);
36778      emit_move_insn (target, const0_rtx);
36779      target = gen_rtx_SUBREG (QImode, target, 0);
36780
36781      emit_insn
36782	(gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
36783		      gen_rtx_fmt_ee (EQ, QImode,
36784				      gen_rtx_REG ((machine_mode) d->flag,
36785						   FLAGS_REG),
36786				      const0_rtx)));
36787      return SUBREG_REG (target);
36788    }
36789  else
36790    return target;
36791}
36792
36793/* Subroutine of ix86_expand_builtin to take care of insns with
36794   variable number of operands.  */
36795
36796static rtx
36797ix86_expand_args_builtin (const struct builtin_description *d,
36798			  tree exp, rtx target)
36799{
36800  rtx pat, real_target;
36801  unsigned int i, nargs;
36802  unsigned int nargs_constant = 0;
36803  unsigned int mask_pos = 0;
36804  int num_memory = 0;
36805  struct
36806    {
36807      rtx op;
36808      machine_mode mode;
36809    } args[6];
36810  bool last_arg_count = false;
36811  enum insn_code icode = d->icode;
36812  const struct insn_data_d *insn_p = &insn_data[icode];
36813  machine_mode tmode = insn_p->operand[0].mode;
36814  machine_mode rmode = VOIDmode;
36815  bool swap = false;
36816  enum rtx_code comparison = d->comparison;
36817
36818  switch ((enum ix86_builtin_func_type) d->flag)
36819    {
36820    case V2DF_FTYPE_V2DF_ROUND:
36821    case V4DF_FTYPE_V4DF_ROUND:
36822    case V4SF_FTYPE_V4SF_ROUND:
36823    case V8SF_FTYPE_V8SF_ROUND:
36824    case V4SI_FTYPE_V4SF_ROUND:
36825    case V8SI_FTYPE_V8SF_ROUND:
36826      return ix86_expand_sse_round (d, exp, target);
36827    case V4SI_FTYPE_V2DF_V2DF_ROUND:
36828    case V8SI_FTYPE_V4DF_V4DF_ROUND:
36829    case V16SI_FTYPE_V8DF_V8DF_ROUND:
36830      return ix86_expand_sse_round_vec_pack_sfix (d, exp, target);
36831    case INT_FTYPE_V8SF_V8SF_PTEST:
36832    case INT_FTYPE_V4DI_V4DI_PTEST:
36833    case INT_FTYPE_V4DF_V4DF_PTEST:
36834    case INT_FTYPE_V4SF_V4SF_PTEST:
36835    case INT_FTYPE_V2DI_V2DI_PTEST:
36836    case INT_FTYPE_V2DF_V2DF_PTEST:
36837      return ix86_expand_sse_ptest (d, exp, target);
36838    case FLOAT128_FTYPE_FLOAT128:
36839    case FLOAT_FTYPE_FLOAT:
36840    case INT_FTYPE_INT:
36841    case UINT64_FTYPE_INT:
36842    case UINT16_FTYPE_UINT16:
36843    case INT64_FTYPE_INT64:
36844    case INT64_FTYPE_V4SF:
36845    case INT64_FTYPE_V2DF:
36846    case INT_FTYPE_V16QI:
36847    case INT_FTYPE_V8QI:
36848    case INT_FTYPE_V8SF:
36849    case INT_FTYPE_V4DF:
36850    case INT_FTYPE_V4SF:
36851    case INT_FTYPE_V2DF:
36852    case INT_FTYPE_V32QI:
36853    case V16QI_FTYPE_V16QI:
36854    case V8SI_FTYPE_V8SF:
36855    case V8SI_FTYPE_V4SI:
36856    case V8HI_FTYPE_V8HI:
36857    case V8HI_FTYPE_V16QI:
36858    case V8QI_FTYPE_V8QI:
36859    case V8SF_FTYPE_V8SF:
36860    case V8SF_FTYPE_V8SI:
36861    case V8SF_FTYPE_V4SF:
36862    case V8SF_FTYPE_V8HI:
36863    case V4SI_FTYPE_V4SI:
36864    case V4SI_FTYPE_V16QI:
36865    case V4SI_FTYPE_V4SF:
36866    case V4SI_FTYPE_V8SI:
36867    case V4SI_FTYPE_V8HI:
36868    case V4SI_FTYPE_V4DF:
36869    case V4SI_FTYPE_V2DF:
36870    case V4HI_FTYPE_V4HI:
36871    case V4DF_FTYPE_V4DF:
36872    case V4DF_FTYPE_V4SI:
36873    case V4DF_FTYPE_V4SF:
36874    case V4DF_FTYPE_V2DF:
36875    case V4SF_FTYPE_V4SF:
36876    case V4SF_FTYPE_V4SI:
36877    case V4SF_FTYPE_V8SF:
36878    case V4SF_FTYPE_V4DF:
36879    case V4SF_FTYPE_V8HI:
36880    case V4SF_FTYPE_V2DF:
36881    case V2DI_FTYPE_V2DI:
36882    case V2DI_FTYPE_V16QI:
36883    case V2DI_FTYPE_V8HI:
36884    case V2DI_FTYPE_V4SI:
36885    case V2DF_FTYPE_V2DF:
36886    case V2DF_FTYPE_V4SI:
36887    case V2DF_FTYPE_V4DF:
36888    case V2DF_FTYPE_V4SF:
36889    case V2DF_FTYPE_V2SI:
36890    case V2SI_FTYPE_V2SI:
36891    case V2SI_FTYPE_V4SF:
36892    case V2SI_FTYPE_V2SF:
36893    case V2SI_FTYPE_V2DF:
36894    case V2SF_FTYPE_V2SF:
36895    case V2SF_FTYPE_V2SI:
36896    case V32QI_FTYPE_V32QI:
36897    case V32QI_FTYPE_V16QI:
36898    case V16HI_FTYPE_V16HI:
36899    case V16HI_FTYPE_V8HI:
36900    case V8SI_FTYPE_V8SI:
36901    case V16HI_FTYPE_V16QI:
36902    case V8SI_FTYPE_V16QI:
36903    case V4DI_FTYPE_V16QI:
36904    case V8SI_FTYPE_V8HI:
36905    case V4DI_FTYPE_V8HI:
36906    case V4DI_FTYPE_V4SI:
36907    case V4DI_FTYPE_V2DI:
36908    case HI_FTYPE_HI:
36909    case HI_FTYPE_V16QI:
36910    case SI_FTYPE_V32QI:
36911    case DI_FTYPE_V64QI:
36912    case V16QI_FTYPE_HI:
36913    case V32QI_FTYPE_SI:
36914    case V64QI_FTYPE_DI:
36915    case V8HI_FTYPE_QI:
36916    case V16HI_FTYPE_HI:
36917    case V32HI_FTYPE_SI:
36918    case V4SI_FTYPE_QI:
36919    case V8SI_FTYPE_QI:
36920    case V4SI_FTYPE_HI:
36921    case V8SI_FTYPE_HI:
36922    case QI_FTYPE_V8HI:
36923    case HI_FTYPE_V16HI:
36924    case SI_FTYPE_V32HI:
36925    case QI_FTYPE_V4SI:
36926    case QI_FTYPE_V8SI:
36927    case HI_FTYPE_V16SI:
36928    case QI_FTYPE_V2DI:
36929    case QI_FTYPE_V4DI:
36930    case QI_FTYPE_V8DI:
36931    case UINT_FTYPE_V2DF:
36932    case UINT_FTYPE_V4SF:
36933    case UINT64_FTYPE_V2DF:
36934    case UINT64_FTYPE_V4SF:
36935    case V16QI_FTYPE_V8DI:
36936    case V16HI_FTYPE_V16SI:
36937    case V16SI_FTYPE_HI:
36938    case V2DI_FTYPE_QI:
36939    case V4DI_FTYPE_QI:
36940    case V16SI_FTYPE_V16SI:
36941    case V16SI_FTYPE_INT:
36942    case V16SF_FTYPE_FLOAT:
36943    case V16SF_FTYPE_V8SF:
36944    case V16SI_FTYPE_V8SI:
36945    case V16SF_FTYPE_V4SF:
36946    case V16SI_FTYPE_V4SI:
36947    case V16SF_FTYPE_V16SF:
36948    case V8HI_FTYPE_V8DI:
36949    case V8UHI_FTYPE_V8UHI:
36950    case V8SI_FTYPE_V8DI:
36951    case V8SF_FTYPE_V8DF:
36952    case V8DI_FTYPE_QI:
36953    case V8DI_FTYPE_INT64:
36954    case V8DI_FTYPE_V4DI:
36955    case V8DI_FTYPE_V8DI:
36956    case V8DF_FTYPE_DOUBLE:
36957    case V8DF_FTYPE_V4DF:
36958    case V8DF_FTYPE_V2DF:
36959    case V8DF_FTYPE_V8DF:
36960    case V8DF_FTYPE_V8SI:
36961      nargs = 1;
36962      break;
36963    case V4SF_FTYPE_V4SF_VEC_MERGE:
36964    case V2DF_FTYPE_V2DF_VEC_MERGE:
36965      return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
36966    case FLOAT128_FTYPE_FLOAT128_FLOAT128:
36967    case V16QI_FTYPE_V16QI_V16QI:
36968    case V16QI_FTYPE_V8HI_V8HI:
36969    case V16SI_FTYPE_V16SI_V16SI:
36970    case V16SF_FTYPE_V16SF_V16SF:
36971    case V16SF_FTYPE_V16SF_V16SI:
36972    case V8QI_FTYPE_V8QI_V8QI:
36973    case V8QI_FTYPE_V4HI_V4HI:
36974    case V8HI_FTYPE_V8HI_V8HI:
36975    case V8HI_FTYPE_V16QI_V16QI:
36976    case V8HI_FTYPE_V4SI_V4SI:
36977    case V8SF_FTYPE_V8SF_V8SF:
36978    case V8SF_FTYPE_V8SF_V8SI:
36979    case V8DI_FTYPE_V8DI_V8DI:
36980    case V8DF_FTYPE_V8DF_V8DF:
36981    case V8DF_FTYPE_V8DF_V8DI:
36982    case V4SI_FTYPE_V4SI_V4SI:
36983    case V4SI_FTYPE_V8HI_V8HI:
36984    case V4SI_FTYPE_V4SF_V4SF:
36985    case V4SI_FTYPE_V2DF_V2DF:
36986    case V4HI_FTYPE_V4HI_V4HI:
36987    case V4HI_FTYPE_V8QI_V8QI:
36988    case V4HI_FTYPE_V2SI_V2SI:
36989    case V4DF_FTYPE_V4DF_V4DF:
36990    case V4DF_FTYPE_V4DF_V4DI:
36991    case V4SF_FTYPE_V4SF_V4SF:
36992    case V4SF_FTYPE_V4SF_V4SI:
36993    case V4SF_FTYPE_V4SF_V2SI:
36994    case V4SF_FTYPE_V4SF_V2DF:
36995    case V4SF_FTYPE_V4SF_UINT:
36996    case V4SF_FTYPE_V4SF_UINT64:
36997    case V4SF_FTYPE_V4SF_DI:
36998    case V4SF_FTYPE_V4SF_SI:
36999    case V2DI_FTYPE_V2DI_V2DI:
37000    case V2DI_FTYPE_V16QI_V16QI:
37001    case V2DI_FTYPE_V4SI_V4SI:
37002    case V2UDI_FTYPE_V4USI_V4USI:
37003    case V2DI_FTYPE_V2DI_V16QI:
37004    case V2DI_FTYPE_V2DF_V2DF:
37005    case V2SI_FTYPE_V2SI_V2SI:
37006    case V2SI_FTYPE_V4HI_V4HI:
37007    case V2SI_FTYPE_V2SF_V2SF:
37008    case V2DF_FTYPE_V2DF_V2DF:
37009    case V2DF_FTYPE_V2DF_V4SF:
37010    case V2DF_FTYPE_V2DF_V2DI:
37011    case V2DF_FTYPE_V2DF_DI:
37012    case V2DF_FTYPE_V2DF_SI:
37013    case V2DF_FTYPE_V2DF_UINT:
37014    case V2DF_FTYPE_V2DF_UINT64:
37015    case V2SF_FTYPE_V2SF_V2SF:
37016    case V1DI_FTYPE_V1DI_V1DI:
37017    case V1DI_FTYPE_V8QI_V8QI:
37018    case V1DI_FTYPE_V2SI_V2SI:
37019    case V32QI_FTYPE_V16HI_V16HI:
37020    case V16HI_FTYPE_V8SI_V8SI:
37021    case V32QI_FTYPE_V32QI_V32QI:
37022    case V16HI_FTYPE_V32QI_V32QI:
37023    case V16HI_FTYPE_V16HI_V16HI:
37024    case V8SI_FTYPE_V4DF_V4DF:
37025    case V8SI_FTYPE_V8SI_V8SI:
37026    case V8SI_FTYPE_V16HI_V16HI:
37027    case V4DI_FTYPE_V4DI_V4DI:
37028    case V4DI_FTYPE_V8SI_V8SI:
37029    case V4UDI_FTYPE_V8USI_V8USI:
37030    case QI_FTYPE_V8DI_V8DI:
37031    case V8DI_FTYPE_V64QI_V64QI:
37032    case HI_FTYPE_V16SI_V16SI:
37033      if (comparison == UNKNOWN)
37034	return ix86_expand_binop_builtin (icode, exp, target);
37035      nargs = 2;
37036      break;
37037    case V4SF_FTYPE_V4SF_V4SF_SWAP:
37038    case V2DF_FTYPE_V2DF_V2DF_SWAP:
37039      gcc_assert (comparison != UNKNOWN);
37040      nargs = 2;
37041      swap = true;
37042      break;
37043    case V16HI_FTYPE_V16HI_V8HI_COUNT:
37044    case V16HI_FTYPE_V16HI_SI_COUNT:
37045    case V8SI_FTYPE_V8SI_V4SI_COUNT:
37046    case V8SI_FTYPE_V8SI_SI_COUNT:
37047    case V4DI_FTYPE_V4DI_V2DI_COUNT:
37048    case V4DI_FTYPE_V4DI_INT_COUNT:
37049    case V8HI_FTYPE_V8HI_V8HI_COUNT:
37050    case V8HI_FTYPE_V8HI_SI_COUNT:
37051    case V4SI_FTYPE_V4SI_V4SI_COUNT:
37052    case V4SI_FTYPE_V4SI_SI_COUNT:
37053    case V4HI_FTYPE_V4HI_V4HI_COUNT:
37054    case V4HI_FTYPE_V4HI_SI_COUNT:
37055    case V2DI_FTYPE_V2DI_V2DI_COUNT:
37056    case V2DI_FTYPE_V2DI_SI_COUNT:
37057    case V2SI_FTYPE_V2SI_V2SI_COUNT:
37058    case V2SI_FTYPE_V2SI_SI_COUNT:
37059    case V1DI_FTYPE_V1DI_V1DI_COUNT:
37060    case V1DI_FTYPE_V1DI_SI_COUNT:
37061      nargs = 2;
37062      last_arg_count = true;
37063      break;
37064    case UINT64_FTYPE_UINT64_UINT64:
37065    case UINT_FTYPE_UINT_UINT:
37066    case UINT_FTYPE_UINT_USHORT:
37067    case UINT_FTYPE_UINT_UCHAR:
37068    case UINT16_FTYPE_UINT16_INT:
37069    case UINT8_FTYPE_UINT8_INT:
37070    case HI_FTYPE_HI_HI:
37071    case SI_FTYPE_SI_SI:
37072    case DI_FTYPE_DI_DI:
37073    case V16SI_FTYPE_V8DF_V8DF:
37074      nargs = 2;
37075      break;
37076    case V2DI_FTYPE_V2DI_INT_CONVERT:
37077      nargs = 2;
37078      rmode = V1TImode;
37079      nargs_constant = 1;
37080      break;
37081    case V4DI_FTYPE_V4DI_INT_CONVERT:
37082      nargs = 2;
37083      rmode = V2TImode;
37084      nargs_constant = 1;
37085      break;
37086    case V8DI_FTYPE_V8DI_INT_CONVERT:
37087      nargs = 2;
37088      rmode = V4TImode;
37089      nargs_constant = 1;
37090      break;
37091    case V8HI_FTYPE_V8HI_INT:
37092    case V8HI_FTYPE_V8SF_INT:
37093    case V16HI_FTYPE_V16SF_INT:
37094    case V8HI_FTYPE_V4SF_INT:
37095    case V8SF_FTYPE_V8SF_INT:
37096    case V4SF_FTYPE_V16SF_INT:
37097    case V16SF_FTYPE_V16SF_INT:
37098    case V4SI_FTYPE_V4SI_INT:
37099    case V4SI_FTYPE_V8SI_INT:
37100    case V4HI_FTYPE_V4HI_INT:
37101    case V4DF_FTYPE_V4DF_INT:
37102    case V4DF_FTYPE_V8DF_INT:
37103    case V4SF_FTYPE_V4SF_INT:
37104    case V4SF_FTYPE_V8SF_INT:
37105    case V2DI_FTYPE_V2DI_INT:
37106    case V2DF_FTYPE_V2DF_INT:
37107    case V2DF_FTYPE_V4DF_INT:
37108    case V16HI_FTYPE_V16HI_INT:
37109    case V8SI_FTYPE_V8SI_INT:
37110    case V16SI_FTYPE_V16SI_INT:
37111    case V4SI_FTYPE_V16SI_INT:
37112    case V4DI_FTYPE_V4DI_INT:
37113    case V2DI_FTYPE_V4DI_INT:
37114    case V4DI_FTYPE_V8DI_INT:
37115    case HI_FTYPE_HI_INT:
37116    case QI_FTYPE_V4SF_INT:
37117    case QI_FTYPE_V2DF_INT:
37118      nargs = 2;
37119      nargs_constant = 1;
37120      break;
37121    case V16QI_FTYPE_V16QI_V16QI_V16QI:
37122    case V8SF_FTYPE_V8SF_V8SF_V8SF:
37123    case V4DF_FTYPE_V4DF_V4DF_V4DF:
37124    case V4SF_FTYPE_V4SF_V4SF_V4SF:
37125    case V2DF_FTYPE_V2DF_V2DF_V2DF:
37126    case V32QI_FTYPE_V32QI_V32QI_V32QI:
37127    case HI_FTYPE_V16SI_V16SI_HI:
37128    case QI_FTYPE_V8DI_V8DI_QI:
37129    case V16HI_FTYPE_V16SI_V16HI_HI:
37130    case V16QI_FTYPE_V16SI_V16QI_HI:
37131    case V16QI_FTYPE_V8DI_V16QI_QI:
37132    case V16SF_FTYPE_V16SF_V16SF_HI:
37133    case V16SF_FTYPE_V16SF_V16SF_V16SF:
37134    case V16SF_FTYPE_V16SF_V16SI_V16SF:
37135    case V16SF_FTYPE_V16SI_V16SF_HI:
37136    case V16SF_FTYPE_V16SI_V16SF_V16SF:
37137    case V16SF_FTYPE_V4SF_V16SF_HI:
37138    case V16SI_FTYPE_SI_V16SI_HI:
37139    case V16SI_FTYPE_V16HI_V16SI_HI:
37140    case V16SI_FTYPE_V16QI_V16SI_HI:
37141    case V16SI_FTYPE_V16SF_V16SI_HI:
37142    case V8SF_FTYPE_V4SF_V8SF_QI:
37143    case V4DF_FTYPE_V2DF_V4DF_QI:
37144    case V8SI_FTYPE_V4SI_V8SI_QI:
37145    case V8SI_FTYPE_SI_V8SI_QI:
37146    case V4SI_FTYPE_V4SI_V4SI_QI:
37147    case V4SI_FTYPE_SI_V4SI_QI:
37148    case V4DI_FTYPE_V2DI_V4DI_QI:
37149    case V4DI_FTYPE_DI_V4DI_QI:
37150    case V2DI_FTYPE_V2DI_V2DI_QI:
37151    case V2DI_FTYPE_DI_V2DI_QI:
37152    case V64QI_FTYPE_V64QI_V64QI_DI:
37153    case V64QI_FTYPE_V16QI_V64QI_DI:
37154    case V64QI_FTYPE_QI_V64QI_DI:
37155    case V32QI_FTYPE_V32QI_V32QI_SI:
37156    case V32QI_FTYPE_V16QI_V32QI_SI:
37157    case V32QI_FTYPE_QI_V32QI_SI:
37158    case V16QI_FTYPE_V16QI_V16QI_HI:
37159    case V16QI_FTYPE_QI_V16QI_HI:
37160    case V32HI_FTYPE_V8HI_V32HI_SI:
37161    case V32HI_FTYPE_HI_V32HI_SI:
37162    case V16HI_FTYPE_V8HI_V16HI_HI:
37163    case V16HI_FTYPE_HI_V16HI_HI:
37164    case V8HI_FTYPE_V8HI_V8HI_QI:
37165    case V8HI_FTYPE_HI_V8HI_QI:
37166    case V8SF_FTYPE_V8HI_V8SF_QI:
37167    case V4SF_FTYPE_V8HI_V4SF_QI:
37168    case V8SI_FTYPE_V8SF_V8SI_QI:
37169    case V4SI_FTYPE_V4SF_V4SI_QI:
37170    case V8DI_FTYPE_V8SF_V8DI_QI:
37171    case V4DI_FTYPE_V4SF_V4DI_QI:
37172    case V2DI_FTYPE_V4SF_V2DI_QI:
37173    case V8SF_FTYPE_V8DI_V8SF_QI:
37174    case V4SF_FTYPE_V4DI_V4SF_QI:
37175    case V4SF_FTYPE_V2DI_V4SF_QI:
37176    case V8DF_FTYPE_V8DI_V8DF_QI:
37177    case V4DF_FTYPE_V4DI_V4DF_QI:
37178    case V2DF_FTYPE_V2DI_V2DF_QI:
37179    case V16QI_FTYPE_V8HI_V16QI_QI:
37180    case V16QI_FTYPE_V16HI_V16QI_HI:
37181    case V16QI_FTYPE_V4SI_V16QI_QI:
37182    case V16QI_FTYPE_V8SI_V16QI_QI:
37183    case V8HI_FTYPE_V4SI_V8HI_QI:
37184    case V8HI_FTYPE_V8SI_V8HI_QI:
37185    case V16QI_FTYPE_V2DI_V16QI_QI:
37186    case V16QI_FTYPE_V4DI_V16QI_QI:
37187    case V8HI_FTYPE_V2DI_V8HI_QI:
37188    case V8HI_FTYPE_V4DI_V8HI_QI:
37189    case V4SI_FTYPE_V2DI_V4SI_QI:
37190    case V4SI_FTYPE_V4DI_V4SI_QI:
37191    case V32QI_FTYPE_V32HI_V32QI_SI:
37192    case HI_FTYPE_V16QI_V16QI_HI:
37193    case SI_FTYPE_V32QI_V32QI_SI:
37194    case DI_FTYPE_V64QI_V64QI_DI:
37195    case QI_FTYPE_V8HI_V8HI_QI:
37196    case HI_FTYPE_V16HI_V16HI_HI:
37197    case SI_FTYPE_V32HI_V32HI_SI:
37198    case QI_FTYPE_V4SI_V4SI_QI:
37199    case QI_FTYPE_V8SI_V8SI_QI:
37200    case QI_FTYPE_V2DI_V2DI_QI:
37201    case QI_FTYPE_V4DI_V4DI_QI:
37202    case V4SF_FTYPE_V2DF_V4SF_QI:
37203    case V4SF_FTYPE_V4DF_V4SF_QI:
37204    case V16SI_FTYPE_V16SI_V16SI_HI:
37205    case V16SI_FTYPE_V16SI_V16SI_V16SI:
37206    case V16SI_FTYPE_V4SI_V16SI_HI:
37207    case V2DI_FTYPE_V2DI_V2DI_V2DI:
37208    case V2DI_FTYPE_V4SI_V2DI_QI:
37209    case V2DI_FTYPE_V8HI_V2DI_QI:
37210    case V2DI_FTYPE_V16QI_V2DI_QI:
37211    case V4DI_FTYPE_V4DI_V4DI_QI:
37212    case V4DI_FTYPE_V4SI_V4DI_QI:
37213    case V4DI_FTYPE_V8HI_V4DI_QI:
37214    case V4DI_FTYPE_V16QI_V4DI_QI:
37215    case V8DI_FTYPE_V8DF_V8DI_QI:
37216    case V4DI_FTYPE_V4DF_V4DI_QI:
37217    case V2DI_FTYPE_V2DF_V2DI_QI:
37218    case V4SI_FTYPE_V4DF_V4SI_QI:
37219    case V4SI_FTYPE_V2DF_V4SI_QI:
37220    case V4SI_FTYPE_V8HI_V4SI_QI:
37221    case V4SI_FTYPE_V16QI_V4SI_QI:
37222    case V8SI_FTYPE_V8SI_V8SI_V8SI:
37223    case V4DI_FTYPE_V4DI_V4DI_V4DI:
37224    case V8DF_FTYPE_V2DF_V8DF_QI:
37225    case V8DF_FTYPE_V4DF_V8DF_QI:
37226    case V8DF_FTYPE_V8DF_V8DF_QI:
37227    case V8DF_FTYPE_V8DF_V8DF_V8DF:
37228    case V8SF_FTYPE_V8SF_V8SF_QI:
37229    case V8SF_FTYPE_V8SI_V8SF_QI:
37230    case V4DF_FTYPE_V4DF_V4DF_QI:
37231    case V4SF_FTYPE_V4SF_V4SF_QI:
37232    case V2DF_FTYPE_V2DF_V2DF_QI:
37233    case V2DF_FTYPE_V4SF_V2DF_QI:
37234    case V2DF_FTYPE_V4SI_V2DF_QI:
37235    case V4SF_FTYPE_V4SI_V4SF_QI:
37236    case V4DF_FTYPE_V4SF_V4DF_QI:
37237    case V4DF_FTYPE_V4SI_V4DF_QI:
37238    case V8SI_FTYPE_V8SI_V8SI_QI:
37239    case V8SI_FTYPE_V8HI_V8SI_QI:
37240    case V8SI_FTYPE_V16QI_V8SI_QI:
37241    case V8DF_FTYPE_V8DF_V8DI_V8DF:
37242    case V8DF_FTYPE_V8DI_V8DF_V8DF:
37243    case V8DF_FTYPE_V8SF_V8DF_QI:
37244    case V8DF_FTYPE_V8SI_V8DF_QI:
37245    case V8DI_FTYPE_DI_V8DI_QI:
37246    case V16SF_FTYPE_V8SF_V16SF_HI:
37247    case V16SI_FTYPE_V8SI_V16SI_HI:
37248    case V16HI_FTYPE_V16HI_V16HI_HI:
37249    case V8HI_FTYPE_V16QI_V8HI_QI:
37250    case V16HI_FTYPE_V16QI_V16HI_HI:
37251    case V32HI_FTYPE_V32HI_V32HI_SI:
37252    case V32HI_FTYPE_V32QI_V32HI_SI:
37253    case V8DI_FTYPE_V16QI_V8DI_QI:
37254    case V8DI_FTYPE_V2DI_V8DI_QI:
37255    case V8DI_FTYPE_V4DI_V8DI_QI:
37256    case V8DI_FTYPE_V8DI_V8DI_QI:
37257    case V8DI_FTYPE_V8DI_V8DI_V8DI:
37258    case V8DI_FTYPE_V8HI_V8DI_QI:
37259    case V8DI_FTYPE_V8SI_V8DI_QI:
37260    case V8HI_FTYPE_V8DI_V8HI_QI:
37261    case V8SF_FTYPE_V8DF_V8SF_QI:
37262    case V8SI_FTYPE_V8DF_V8SI_QI:
37263    case V8SI_FTYPE_V8DI_V8SI_QI:
37264    case V4SI_FTYPE_V4SI_V4SI_V4SI:
37265      nargs = 3;
37266      break;
37267    case V32QI_FTYPE_V32QI_V32QI_INT:
37268    case V16HI_FTYPE_V16HI_V16HI_INT:
37269    case V16QI_FTYPE_V16QI_V16QI_INT:
37270    case V4DI_FTYPE_V4DI_V4DI_INT:
37271    case V8HI_FTYPE_V8HI_V8HI_INT:
37272    case V8SI_FTYPE_V8SI_V8SI_INT:
37273    case V8SI_FTYPE_V8SI_V4SI_INT:
37274    case V8SF_FTYPE_V8SF_V8SF_INT:
37275    case V8SF_FTYPE_V8SF_V4SF_INT:
37276    case V4SI_FTYPE_V4SI_V4SI_INT:
37277    case V4DF_FTYPE_V4DF_V4DF_INT:
37278    case V16SF_FTYPE_V16SF_V16SF_INT:
37279    case V16SF_FTYPE_V16SF_V4SF_INT:
37280    case V16SI_FTYPE_V16SI_V4SI_INT:
37281    case V4DF_FTYPE_V4DF_V2DF_INT:
37282    case V4SF_FTYPE_V4SF_V4SF_INT:
37283    case V2DI_FTYPE_V2DI_V2DI_INT:
37284    case V4DI_FTYPE_V4DI_V2DI_INT:
37285    case V2DF_FTYPE_V2DF_V2DF_INT:
37286    case QI_FTYPE_V8DI_V8DI_INT:
37287    case QI_FTYPE_V8DF_V8DF_INT:
37288    case QI_FTYPE_V2DF_V2DF_INT:
37289    case QI_FTYPE_V4SF_V4SF_INT:
37290    case HI_FTYPE_V16SI_V16SI_INT:
37291    case HI_FTYPE_V16SF_V16SF_INT:
37292      nargs = 3;
37293      nargs_constant = 1;
37294      break;
37295    case V4DI_FTYPE_V4DI_V4DI_INT_CONVERT:
37296      nargs = 3;
37297      rmode = V4DImode;
37298      nargs_constant = 1;
37299      break;
37300    case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
37301      nargs = 3;
37302      rmode = V2DImode;
37303      nargs_constant = 1;
37304      break;
37305    case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
37306      nargs = 3;
37307      rmode = DImode;
37308      nargs_constant = 1;
37309      break;
37310    case V2DI_FTYPE_V2DI_UINT_UINT:
37311      nargs = 3;
37312      nargs_constant = 2;
37313      break;
37314    case V8DI_FTYPE_V8DI_V8DI_INT_CONVERT:
37315      nargs = 3;
37316      rmode = V8DImode;
37317      nargs_constant = 1;
37318      break;
37319    case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_DI_CONVERT:
37320      nargs = 5;
37321      rmode = V8DImode;
37322      mask_pos = 2;
37323      nargs_constant = 1;
37324      break;
37325    case QI_FTYPE_V8DF_INT_QI:
37326    case QI_FTYPE_V4DF_INT_QI:
37327    case QI_FTYPE_V2DF_INT_QI:
37328    case HI_FTYPE_V16SF_INT_HI:
37329    case QI_FTYPE_V8SF_INT_QI:
37330    case QI_FTYPE_V4SF_INT_QI:
37331      nargs = 3;
37332      mask_pos = 1;
37333      nargs_constant = 1;
37334      break;
37335    case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_SI_CONVERT:
37336      nargs = 5;
37337      rmode = V4DImode;
37338      mask_pos = 2;
37339      nargs_constant = 1;
37340      break;
37341    case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_HI_CONVERT:
37342      nargs = 5;
37343      rmode = V2DImode;
37344      mask_pos = 2;
37345      nargs_constant = 1;
37346      break;
37347    case V32QI_FTYPE_V32QI_V32QI_V32QI_SI:
37348    case V32HI_FTYPE_V32HI_V32HI_V32HI_SI:
37349    case V32HI_FTYPE_V64QI_V64QI_V32HI_SI:
37350    case V16SI_FTYPE_V32HI_V32HI_V16SI_HI:
37351    case V64QI_FTYPE_V64QI_V64QI_V64QI_DI:
37352    case V32HI_FTYPE_V32HI_V8HI_V32HI_SI:
37353    case V16HI_FTYPE_V16HI_V8HI_V16HI_HI:
37354    case V8SI_FTYPE_V8SI_V4SI_V8SI_QI:
37355    case V4DI_FTYPE_V4DI_V2DI_V4DI_QI:
37356    case V64QI_FTYPE_V32HI_V32HI_V64QI_DI:
37357    case V32QI_FTYPE_V16HI_V16HI_V32QI_SI:
37358    case V16QI_FTYPE_V8HI_V8HI_V16QI_HI:
37359    case V32HI_FTYPE_V16SI_V16SI_V32HI_SI:
37360    case V16HI_FTYPE_V8SI_V8SI_V16HI_HI:
37361    case V8HI_FTYPE_V4SI_V4SI_V8HI_QI:
37362    case V4DF_FTYPE_V4DF_V4DI_V4DF_QI:
37363    case V8SF_FTYPE_V8SF_V8SI_V8SF_QI:
37364    case V4SF_FTYPE_V4SF_V4SI_V4SF_QI:
37365    case V2DF_FTYPE_V2DF_V2DI_V2DF_QI:
37366    case V2DI_FTYPE_V4SI_V4SI_V2DI_QI:
37367    case V4DI_FTYPE_V8SI_V8SI_V4DI_QI:
37368    case V4DF_FTYPE_V4DI_V4DF_V4DF_QI:
37369    case V8SF_FTYPE_V8SI_V8SF_V8SF_QI:
37370    case V2DF_FTYPE_V2DI_V2DF_V2DF_QI:
37371    case V4SF_FTYPE_V4SI_V4SF_V4SF_QI:
37372    case V16SF_FTYPE_V16SF_V16SF_V16SF_HI:
37373    case V16SF_FTYPE_V16SF_V16SI_V16SF_HI:
37374    case V16SF_FTYPE_V16SI_V16SF_V16SF_HI:
37375    case V16SI_FTYPE_V16SI_V16SI_V16SI_HI:
37376    case V16SI_FTYPE_V16SI_V4SI_V16SI_HI:
37377    case V8HI_FTYPE_V8HI_V8HI_V8HI_QI:
37378    case V8SI_FTYPE_V8SI_V8SI_V8SI_QI:
37379    case V4SI_FTYPE_V4SI_V4SI_V4SI_QI:
37380    case V8SF_FTYPE_V8SF_V8SF_V8SF_QI:
37381    case V16QI_FTYPE_V16QI_V16QI_V16QI_HI:
37382    case V16HI_FTYPE_V16HI_V16HI_V16HI_HI:
37383    case V2DI_FTYPE_V2DI_V2DI_V2DI_QI:
37384    case V2DF_FTYPE_V2DF_V2DF_V2DF_QI:
37385    case V2DF_FTYPE_V2DF_V4SF_V2DF_QI:
37386    case V4DI_FTYPE_V4DI_V4DI_V4DI_QI:
37387    case V4DF_FTYPE_V4DF_V4DF_V4DF_QI:
37388    case V4SF_FTYPE_V4SF_V2DF_V4SF_QI:
37389    case V4SF_FTYPE_V4SF_V4SF_V4SF_QI:
37390    case V8DF_FTYPE_V8DF_V8DF_V8DF_QI:
37391    case V8DF_FTYPE_V8DF_V8DI_V8DF_QI:
37392    case V8DF_FTYPE_V8DI_V8DF_V8DF_QI:
37393    case V8DI_FTYPE_V16SI_V16SI_V8DI_QI:
37394    case V8DI_FTYPE_V8DI_SI_V8DI_V8DI:
37395    case V8DI_FTYPE_V8DI_V2DI_V8DI_QI:
37396    case V8DI_FTYPE_V8DI_V8DI_V8DI_QI:
37397    case V8HI_FTYPE_V16QI_V16QI_V8HI_QI:
37398    case V16HI_FTYPE_V32QI_V32QI_V16HI_HI:
37399    case V8SI_FTYPE_V16HI_V16HI_V8SI_QI:
37400    case V4SI_FTYPE_V8HI_V8HI_V4SI_QI:
37401      nargs = 4;
37402      break;
37403    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
37404    case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
37405    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
37406    case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
37407    case V16SF_FTYPE_V16SF_V16SF_V16SI_INT:
37408      nargs = 4;
37409      nargs_constant = 1;
37410      break;
37411    case QI_FTYPE_V4DI_V4DI_INT_QI:
37412    case QI_FTYPE_V8SI_V8SI_INT_QI:
37413    case QI_FTYPE_V4DF_V4DF_INT_QI:
37414    case QI_FTYPE_V8SF_V8SF_INT_QI:
37415    case QI_FTYPE_V2DI_V2DI_INT_QI:
37416    case QI_FTYPE_V4SI_V4SI_INT_QI:
37417    case QI_FTYPE_V2DF_V2DF_INT_QI:
37418    case QI_FTYPE_V4SF_V4SF_INT_QI:
37419    case DI_FTYPE_V64QI_V64QI_INT_DI:
37420    case SI_FTYPE_V32QI_V32QI_INT_SI:
37421    case HI_FTYPE_V16QI_V16QI_INT_HI:
37422    case SI_FTYPE_V32HI_V32HI_INT_SI:
37423    case HI_FTYPE_V16HI_V16HI_INT_HI:
37424    case QI_FTYPE_V8HI_V8HI_INT_QI:
37425      nargs = 4;
37426      mask_pos = 1;
37427      nargs_constant = 1;
37428      break;
37429    case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
37430      nargs = 4;
37431      nargs_constant = 2;
37432      break;
37433    case UCHAR_FTYPE_UCHAR_UINT_UINT_PUNSIGNED:
37434    case UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PULONGLONG:
37435      nargs = 4;
37436      break;
37437    case QI_FTYPE_V8DI_V8DI_INT_QI:
37438    case HI_FTYPE_V16SI_V16SI_INT_HI:
37439    case QI_FTYPE_V8DF_V8DF_INT_QI:
37440    case HI_FTYPE_V16SF_V16SF_INT_HI:
37441      mask_pos = 1;
37442      nargs = 4;
37443      nargs_constant = 1;
37444      break;
37445    case V8SF_FTYPE_V8SF_INT_V8SF_QI:
37446    case V4SF_FTYPE_V4SF_INT_V4SF_QI:
37447    case V2DF_FTYPE_V4DF_INT_V2DF_QI:
37448    case V2DI_FTYPE_V4DI_INT_V2DI_QI:
37449    case V8SF_FTYPE_V16SF_INT_V8SF_QI:
37450    case V8SI_FTYPE_V16SI_INT_V8SI_QI:
37451    case V2DF_FTYPE_V8DF_INT_V2DF_QI:
37452    case V2DI_FTYPE_V8DI_INT_V2DI_QI:
37453    case V4SF_FTYPE_V8SF_INT_V4SF_QI:
37454    case V4SI_FTYPE_V8SI_INT_V4SI_QI:
37455    case V8HI_FTYPE_V8SF_INT_V8HI_QI:
37456    case V8HI_FTYPE_V4SF_INT_V8HI_QI:
37457    case V32HI_FTYPE_V32HI_INT_V32HI_SI:
37458    case V16HI_FTYPE_V16HI_INT_V16HI_HI:
37459    case V8HI_FTYPE_V8HI_INT_V8HI_QI:
37460    case V4DI_FTYPE_V4DI_INT_V4DI_QI:
37461    case V2DI_FTYPE_V2DI_INT_V2DI_QI:
37462    case V8SI_FTYPE_V8SI_INT_V8SI_QI:
37463    case V4SI_FTYPE_V4SI_INT_V4SI_QI:
37464    case V4DF_FTYPE_V4DF_INT_V4DF_QI:
37465    case V2DF_FTYPE_V2DF_INT_V2DF_QI:
37466    case V8DF_FTYPE_V8DF_INT_V8DF_QI:
37467    case V16SF_FTYPE_V16SF_INT_V16SF_HI:
37468    case V16HI_FTYPE_V16SF_INT_V16HI_HI:
37469    case V16SI_FTYPE_V16SI_INT_V16SI_HI:
37470    case V4SI_FTYPE_V16SI_INT_V4SI_QI:
37471    case V4DI_FTYPE_V8DI_INT_V4DI_QI:
37472    case V4DF_FTYPE_V8DF_INT_V4DF_QI:
37473    case V4SF_FTYPE_V16SF_INT_V4SF_QI:
37474    case V8DI_FTYPE_V8DI_INT_V8DI_QI:
37475      nargs = 4;
37476      mask_pos = 2;
37477      nargs_constant = 1;
37478      break;
37479    case V16SF_FTYPE_V16SF_V4SF_INT_V16SF_HI:
37480    case V16SI_FTYPE_V16SI_V4SI_INT_V16SI_HI:
37481    case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI:
37482    case V8DI_FTYPE_V8DI_V8DI_INT_V8DI_QI:
37483    case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI:
37484    case V16SI_FTYPE_V16SI_V16SI_INT_V16SI_HI:
37485    case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI:
37486    case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI:
37487    case V8DF_FTYPE_V8DF_V4DF_INT_V8DF_QI:
37488    case V8DI_FTYPE_V8DI_V4DI_INT_V8DI_QI:
37489    case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_QI:
37490    case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_QI:
37491    case V8DF_FTYPE_V8DF_V2DF_INT_V8DF_QI:
37492    case V8DI_FTYPE_V8DI_V2DI_INT_V8DI_QI:
37493    case V8SI_FTYPE_V8SI_V8SI_INT_V8SI_QI:
37494    case V4DI_FTYPE_V4DI_V4DI_INT_V4DI_QI:
37495    case V4SI_FTYPE_V4SI_V4SI_INT_V4SI_QI:
37496    case V2DI_FTYPE_V2DI_V2DI_INT_V2DI_QI:
37497    case V32HI_FTYPE_V64QI_V64QI_INT_V32HI_SI:
37498    case V16HI_FTYPE_V32QI_V32QI_INT_V16HI_HI:
37499    case V8HI_FTYPE_V16QI_V16QI_INT_V8HI_QI:
37500    case V16SF_FTYPE_V16SF_V8SF_INT_V16SF_HI:
37501    case V16SI_FTYPE_V16SI_V8SI_INT_V16SI_HI:
37502    case V8SF_FTYPE_V8SF_V4SF_INT_V8SF_QI:
37503    case V8SI_FTYPE_V8SI_V4SI_INT_V8SI_QI:
37504    case V4DI_FTYPE_V4DI_V2DI_INT_V4DI_QI:
37505    case V4DF_FTYPE_V4DF_V2DF_INT_V4DF_QI:
37506      nargs = 5;
37507      mask_pos = 2;
37508      nargs_constant = 1;
37509      break;
37510    case V8DI_FTYPE_V8DI_V8DI_V8DI_INT_QI:
37511    case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI:
37512    case V16SI_FTYPE_V16SI_V16SI_V16SI_INT_HI:
37513    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI:
37514    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI:
37515    case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_QI:
37516    case V8SI_FTYPE_V8SI_V8SI_V8SI_INT_QI:
37517    case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_QI:
37518    case V4DI_FTYPE_V4DI_V4DI_V4DI_INT_QI:
37519    case V4SI_FTYPE_V4SI_V4SI_V4SI_INT_QI:
37520    case V2DI_FTYPE_V2DI_V2DI_V2DI_INT_QI:
37521       nargs = 5;
37522      nargs = 5;
37523      mask_pos = 1;
37524      nargs_constant = 1;
37525      break;
37526
37527    default:
37528      gcc_unreachable ();
37529    }
37530
37531  gcc_assert (nargs <= ARRAY_SIZE (args));
37532
37533  if (comparison != UNKNOWN)
37534    {
37535      gcc_assert (nargs == 2);
37536      return ix86_expand_sse_compare (d, exp, target, swap);
37537    }
37538
37539  if (rmode == VOIDmode || rmode == tmode)
37540    {
37541      if (optimize
37542	  || target == 0
37543	  || GET_MODE (target) != tmode
37544	  || !insn_p->operand[0].predicate (target, tmode))
37545	target = gen_reg_rtx (tmode);
37546      real_target = target;
37547    }
37548  else
37549    {
37550      real_target = gen_reg_rtx (tmode);
37551      target = simplify_gen_subreg (rmode, real_target, tmode, 0);
37552    }
37553
37554  for (i = 0; i < nargs; i++)
37555    {
37556      tree arg = CALL_EXPR_ARG (exp, i);
37557      rtx op = expand_normal (arg);
37558      machine_mode mode = insn_p->operand[i + 1].mode;
37559      bool match = insn_p->operand[i + 1].predicate (op, mode);
37560
37561      if (last_arg_count && (i + 1) == nargs)
37562	{
37563	  /* SIMD shift insns take either an 8-bit immediate or
37564	     register as count.  But builtin functions take int as
37565	     count.  If count doesn't match, we put it in register.  */
37566	  if (!match)
37567	    {
37568	      op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
37569	      if (!insn_p->operand[i + 1].predicate (op, mode))
37570		op = copy_to_reg (op);
37571	    }
37572	}
37573      else if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37574	       (!mask_pos && (nargs - i) <= nargs_constant))
37575	{
37576	  if (!match)
37577	    switch (icode)
37578	      {
37579	      case CODE_FOR_avx_vinsertf128v4di:
37580	      case CODE_FOR_avx_vextractf128v4di:
37581		error ("the last argument must be an 1-bit immediate");
37582		return const0_rtx;
37583
37584	      case CODE_FOR_avx512f_cmpv8di3_mask:
37585	      case CODE_FOR_avx512f_cmpv16si3_mask:
37586	      case CODE_FOR_avx512f_ucmpv8di3_mask:
37587	      case CODE_FOR_avx512f_ucmpv16si3_mask:
37588	      case CODE_FOR_avx512vl_cmpv4di3_mask:
37589	      case CODE_FOR_avx512vl_cmpv8si3_mask:
37590	      case CODE_FOR_avx512vl_ucmpv4di3_mask:
37591	      case CODE_FOR_avx512vl_ucmpv8si3_mask:
37592	      case CODE_FOR_avx512vl_cmpv2di3_mask:
37593	      case CODE_FOR_avx512vl_cmpv4si3_mask:
37594	      case CODE_FOR_avx512vl_ucmpv2di3_mask:
37595	      case CODE_FOR_avx512vl_ucmpv4si3_mask:
37596		error ("the last argument must be a 3-bit immediate");
37597		return const0_rtx;
37598
37599	      case CODE_FOR_sse4_1_roundsd:
37600	      case CODE_FOR_sse4_1_roundss:
37601
37602	      case CODE_FOR_sse4_1_roundpd:
37603	      case CODE_FOR_sse4_1_roundps:
37604	      case CODE_FOR_avx_roundpd256:
37605	      case CODE_FOR_avx_roundps256:
37606
37607	      case CODE_FOR_sse4_1_roundpd_vec_pack_sfix:
37608	      case CODE_FOR_sse4_1_roundps_sfix:
37609	      case CODE_FOR_avx_roundpd_vec_pack_sfix256:
37610	      case CODE_FOR_avx_roundps_sfix256:
37611
37612	      case CODE_FOR_sse4_1_blendps:
37613	      case CODE_FOR_avx_blendpd256:
37614	      case CODE_FOR_avx_vpermilv4df:
37615	      case CODE_FOR_avx_vpermilv4df_mask:
37616	      case CODE_FOR_avx512f_getmantv8df_mask:
37617	      case CODE_FOR_avx512f_getmantv16sf_mask:
37618	      case CODE_FOR_avx512vl_getmantv8sf_mask:
37619	      case CODE_FOR_avx512vl_getmantv4df_mask:
37620	      case CODE_FOR_avx512vl_getmantv4sf_mask:
37621	      case CODE_FOR_avx512vl_getmantv2df_mask:
37622	      case CODE_FOR_avx512dq_rangepv8df_mask_round:
37623	      case CODE_FOR_avx512dq_rangepv16sf_mask_round:
37624	      case CODE_FOR_avx512dq_rangepv4df_mask:
37625	      case CODE_FOR_avx512dq_rangepv8sf_mask:
37626	      case CODE_FOR_avx512dq_rangepv2df_mask:
37627	      case CODE_FOR_avx512dq_rangepv4sf_mask:
37628	      case CODE_FOR_avx_shufpd256_mask:
37629		error ("the last argument must be a 4-bit immediate");
37630		return const0_rtx;
37631
37632	      case CODE_FOR_sha1rnds4:
37633	      case CODE_FOR_sse4_1_blendpd:
37634	      case CODE_FOR_avx_vpermilv2df:
37635	      case CODE_FOR_avx_vpermilv2df_mask:
37636	      case CODE_FOR_xop_vpermil2v2df3:
37637	      case CODE_FOR_xop_vpermil2v4sf3:
37638	      case CODE_FOR_xop_vpermil2v4df3:
37639	      case CODE_FOR_xop_vpermil2v8sf3:
37640	      case CODE_FOR_avx512f_vinsertf32x4_mask:
37641	      case CODE_FOR_avx512f_vinserti32x4_mask:
37642	      case CODE_FOR_avx512f_vextractf32x4_mask:
37643	      case CODE_FOR_avx512f_vextracti32x4_mask:
37644	      case CODE_FOR_sse2_shufpd:
37645	      case CODE_FOR_sse2_shufpd_mask:
37646	      case CODE_FOR_avx512dq_shuf_f64x2_mask:
37647	      case CODE_FOR_avx512dq_shuf_i64x2_mask:
37648	      case CODE_FOR_avx512vl_shuf_i32x4_mask:
37649	      case CODE_FOR_avx512vl_shuf_f32x4_mask:
37650		error ("the last argument must be a 2-bit immediate");
37651		return const0_rtx;
37652
37653	      case CODE_FOR_avx_vextractf128v4df:
37654	      case CODE_FOR_avx_vextractf128v8sf:
37655	      case CODE_FOR_avx_vextractf128v8si:
37656	      case CODE_FOR_avx_vinsertf128v4df:
37657	      case CODE_FOR_avx_vinsertf128v8sf:
37658	      case CODE_FOR_avx_vinsertf128v8si:
37659	      case CODE_FOR_avx512f_vinsertf64x4_mask:
37660	      case CODE_FOR_avx512f_vinserti64x4_mask:
37661	      case CODE_FOR_avx512f_vextractf64x4_mask:
37662	      case CODE_FOR_avx512f_vextracti64x4_mask:
37663	      case CODE_FOR_avx512dq_vinsertf32x8_mask:
37664	      case CODE_FOR_avx512dq_vinserti32x8_mask:
37665	      case CODE_FOR_avx512vl_vinsertv4df:
37666	      case CODE_FOR_avx512vl_vinsertv4di:
37667	      case CODE_FOR_avx512vl_vinsertv8sf:
37668	      case CODE_FOR_avx512vl_vinsertv8si:
37669		error ("the last argument must be a 1-bit immediate");
37670		return const0_rtx;
37671
37672	      case CODE_FOR_avx_vmcmpv2df3:
37673	      case CODE_FOR_avx_vmcmpv4sf3:
37674	      case CODE_FOR_avx_cmpv2df3:
37675	      case CODE_FOR_avx_cmpv4sf3:
37676	      case CODE_FOR_avx_cmpv4df3:
37677	      case CODE_FOR_avx_cmpv8sf3:
37678	      case CODE_FOR_avx512f_cmpv8df3_mask:
37679	      case CODE_FOR_avx512f_cmpv16sf3_mask:
37680	      case CODE_FOR_avx512f_vmcmpv2df3_mask:
37681	      case CODE_FOR_avx512f_vmcmpv4sf3_mask:
37682		error ("the last argument must be a 5-bit immediate");
37683		return const0_rtx;
37684
37685	      default:
37686		switch (nargs_constant)
37687		  {
37688		  case 2:
37689		    if ((mask_pos && (nargs - i - mask_pos) == nargs_constant) ||
37690			(!mask_pos && (nargs - i) == nargs_constant))
37691		      {
37692			error ("the next to last argument must be an 8-bit immediate");
37693			break;
37694		      }
37695		  case 1:
37696		    error ("the last argument must be an 8-bit immediate");
37697		    break;
37698		  default:
37699		    gcc_unreachable ();
37700		  }
37701		return const0_rtx;
37702	      }
37703	}
37704      else
37705	{
37706	  if (VECTOR_MODE_P (mode))
37707	    op = safe_vector_operand (op, mode);
37708
37709	  /* If we aren't optimizing, only allow one memory operand to
37710	     be generated.  */
37711	  if (memory_operand (op, mode))
37712	    num_memory++;
37713
37714	  op = fixup_modeless_constant (op, mode);
37715
37716	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
37717	    {
37718	      if (optimize || !match || num_memory > 1)
37719		op = copy_to_mode_reg (mode, op);
37720	    }
37721	  else
37722	    {
37723	      op = copy_to_reg (op);
37724	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
37725	    }
37726	}
37727
37728      args[i].op = op;
37729      args[i].mode = mode;
37730    }
37731
37732  switch (nargs)
37733    {
37734    case 1:
37735      pat = GEN_FCN (icode) (real_target, args[0].op);
37736      break;
37737    case 2:
37738      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
37739      break;
37740    case 3:
37741      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37742			     args[2].op);
37743      break;
37744    case 4:
37745      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37746			     args[2].op, args[3].op);
37747      break;
37748    case 5:
37749      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37750			     args[2].op, args[3].op, args[4].op);
37751    case 6:
37752      pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
37753			     args[2].op, args[3].op, args[4].op,
37754			     args[5].op);
37755      break;
37756    default:
37757      gcc_unreachable ();
37758    }
37759
37760  if (! pat)
37761    return 0;
37762
37763  emit_insn (pat);
37764  return target;
37765}
37766
37767/* Transform pattern of following layout:
37768     (parallel [
37769       set (A B)
37770       (unspec [C] UNSPEC_EMBEDDED_ROUNDING)])
37771     ])
37772   into:
37773     (set (A B))
37774
37775   Or:
37776     (parallel [ A B
37777     ...
37778     (unspec [C] UNSPEC_EMBEDDED_ROUNDING)
37779     ...
37780     ])
37781   into:
37782     (parallel [ A B ... ])  */
37783
37784static rtx
37785ix86_erase_embedded_rounding (rtx pat)
37786{
37787  if (GET_CODE (pat) == INSN)
37788    pat = PATTERN (pat);
37789
37790  gcc_assert (GET_CODE (pat) == PARALLEL);
37791
37792  if (XVECLEN (pat, 0) == 2)
37793    {
37794      rtx p0 = XVECEXP (pat, 0, 0);
37795      rtx p1 = XVECEXP (pat, 0, 1);
37796
37797      gcc_assert (GET_CODE (p0) == SET
37798		  && GET_CODE (p1) == UNSPEC
37799		  && XINT (p1, 1) == UNSPEC_EMBEDDED_ROUNDING);
37800
37801      return p0;
37802    }
37803  else
37804    {
37805      rtx *res = XALLOCAVEC (rtx, XVECLEN (pat, 0));
37806      int i = 0;
37807      int j = 0;
37808
37809      for (; i < XVECLEN (pat, 0); ++i)
37810	{
37811	  rtx elem = XVECEXP (pat, 0, i);
37812	  if (GET_CODE (elem) != UNSPEC
37813	      || XINT (elem, 1) != UNSPEC_EMBEDDED_ROUNDING)
37814	    res [j++] = elem;
37815	}
37816
37817      /*  No more than 1 occurence was removed.  */
37818      gcc_assert (j >= XVECLEN (pat, 0) - 1);
37819
37820      return gen_rtx_PARALLEL (GET_MODE (pat), gen_rtvec_v (j, res));
37821    }
37822}
37823
37824/* Subroutine of ix86_expand_round_builtin to take care of comi insns
37825   with rounding.  */
37826static rtx
37827ix86_expand_sse_comi_round (const struct builtin_description *d,
37828			    tree exp, rtx target)
37829{
37830  rtx pat, set_dst;
37831  tree arg0 = CALL_EXPR_ARG (exp, 0);
37832  tree arg1 = CALL_EXPR_ARG (exp, 1);
37833  tree arg2 = CALL_EXPR_ARG (exp, 2);
37834  tree arg3 = CALL_EXPR_ARG (exp, 3);
37835  rtx op0 = expand_normal (arg0);
37836  rtx op1 = expand_normal (arg1);
37837  rtx op2 = expand_normal (arg2);
37838  rtx op3 = expand_normal (arg3);
37839  enum insn_code icode = d->icode;
37840  const struct insn_data_d *insn_p = &insn_data[icode];
37841  machine_mode mode0 = insn_p->operand[0].mode;
37842  machine_mode mode1 = insn_p->operand[1].mode;
37843  enum rtx_code comparison = UNEQ;
37844  bool need_ucomi = false;
37845
37846  /* See avxintrin.h for values.  */
37847  enum rtx_code comi_comparisons[32] =
37848    {
37849      UNEQ, GT, GE, UNORDERED, LTGT, UNLE, UNLT, ORDERED, UNEQ, UNLT,
37850      UNLE, LT, LTGT, GE, GT, LT, UNEQ, GT, GE, UNORDERED, LTGT, UNLE,
37851      UNLT, ORDERED, UNEQ, UNLT, UNLE, LT, LTGT, GE, GT, LT
37852    };
37853  bool need_ucomi_values[32] =
37854    {
37855      true,  false, false, true,  true,  false, false, true,
37856      true,  false, false, true,  true,  false, false, true,
37857      false, true,  true,  false, false, true,  true,  false,
37858      false, true,  true,  false, false, true,  true,  false
37859    };
37860
37861  if (!CONST_INT_P (op2))
37862    {
37863      error ("the third argument must be comparison constant");
37864      return const0_rtx;
37865    }
37866  if (INTVAL (op2) < 0 || INTVAL (op2) >= 32)
37867    {
37868      error ("incorrect comparison mode");
37869      return const0_rtx;
37870    }
37871
37872  if (!insn_p->operand[2].predicate (op3, SImode))
37873    {
37874      error ("incorrect rounding operand");
37875      return const0_rtx;
37876    }
37877
37878  comparison = comi_comparisons[INTVAL (op2)];
37879  need_ucomi = need_ucomi_values[INTVAL (op2)];
37880
37881  if (VECTOR_MODE_P (mode0))
37882    op0 = safe_vector_operand (op0, mode0);
37883  if (VECTOR_MODE_P (mode1))
37884    op1 = safe_vector_operand (op1, mode1);
37885
37886  target = gen_reg_rtx (SImode);
37887  emit_move_insn (target, const0_rtx);
37888  target = gen_rtx_SUBREG (QImode, target, 0);
37889
37890  if ((optimize && !register_operand (op0, mode0))
37891      || !insn_p->operand[0].predicate (op0, mode0))
37892    op0 = copy_to_mode_reg (mode0, op0);
37893  if ((optimize && !register_operand (op1, mode1))
37894      || !insn_p->operand[1].predicate (op1, mode1))
37895    op1 = copy_to_mode_reg (mode1, op1);
37896
37897  if (need_ucomi)
37898    icode = icode == CODE_FOR_sse_comi_round
37899		     ? CODE_FOR_sse_ucomi_round
37900		     : CODE_FOR_sse2_ucomi_round;
37901
37902  pat = GEN_FCN (icode) (op0, op1, op3);
37903  if (! pat)
37904    return 0;
37905
37906  /* Rounding operand can be either NO_ROUND or ROUND_SAE at this point.  */
37907  if (INTVAL (op3) == NO_ROUND)
37908    {
37909      pat = ix86_erase_embedded_rounding (pat);
37910      if (! pat)
37911	return 0;
37912
37913      set_dst = SET_DEST (pat);
37914    }
37915  else
37916    {
37917      gcc_assert (GET_CODE (XVECEXP (pat, 0, 0)) == SET);
37918      set_dst = SET_DEST (XVECEXP (pat, 0, 0));
37919    }
37920
37921  emit_insn (pat);
37922  emit_insn (gen_rtx_SET (VOIDmode,
37923			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
37924			  gen_rtx_fmt_ee (comparison, QImode,
37925					  set_dst,
37926					  const0_rtx)));
37927
37928  return SUBREG_REG (target);
37929}
37930
37931static rtx
37932ix86_expand_round_builtin (const struct builtin_description *d,
37933			   tree exp, rtx target)
37934{
37935  rtx pat;
37936  unsigned int i, nargs;
37937  struct
37938    {
37939      rtx op;
37940      machine_mode mode;
37941    } args[6];
37942  enum insn_code icode = d->icode;
37943  const struct insn_data_d *insn_p = &insn_data[icode];
37944  machine_mode tmode = insn_p->operand[0].mode;
37945  unsigned int nargs_constant = 0;
37946  unsigned int redundant_embed_rnd = 0;
37947
37948  switch ((enum ix86_builtin_func_type) d->flag)
37949    {
37950    case UINT64_FTYPE_V2DF_INT:
37951    case UINT64_FTYPE_V4SF_INT:
37952    case UINT_FTYPE_V2DF_INT:
37953    case UINT_FTYPE_V4SF_INT:
37954    case INT64_FTYPE_V2DF_INT:
37955    case INT64_FTYPE_V4SF_INT:
37956    case INT_FTYPE_V2DF_INT:
37957    case INT_FTYPE_V4SF_INT:
37958      nargs = 2;
37959      break;
37960    case V4SF_FTYPE_V4SF_UINT_INT:
37961    case V4SF_FTYPE_V4SF_UINT64_INT:
37962    case V2DF_FTYPE_V2DF_UINT64_INT:
37963    case V4SF_FTYPE_V4SF_INT_INT:
37964    case V4SF_FTYPE_V4SF_INT64_INT:
37965    case V2DF_FTYPE_V2DF_INT64_INT:
37966    case V4SF_FTYPE_V4SF_V4SF_INT:
37967    case V2DF_FTYPE_V2DF_V2DF_INT:
37968    case V4SF_FTYPE_V4SF_V2DF_INT:
37969    case V2DF_FTYPE_V2DF_V4SF_INT:
37970      nargs = 3;
37971      break;
37972    case V8SF_FTYPE_V8DF_V8SF_QI_INT:
37973    case V8DF_FTYPE_V8DF_V8DF_QI_INT:
37974    case V8SI_FTYPE_V8DF_V8SI_QI_INT:
37975    case V8DI_FTYPE_V8DF_V8DI_QI_INT:
37976    case V8SF_FTYPE_V8DI_V8SF_QI_INT:
37977    case V8DF_FTYPE_V8DI_V8DF_QI_INT:
37978    case V16SF_FTYPE_V16SF_V16SF_HI_INT:
37979    case V8DI_FTYPE_V8SF_V8DI_QI_INT:
37980    case V16SF_FTYPE_V16SI_V16SF_HI_INT:
37981    case V16SI_FTYPE_V16SF_V16SI_HI_INT:
37982    case V8DF_FTYPE_V8SF_V8DF_QI_INT:
37983    case V16SF_FTYPE_V16HI_V16SF_HI_INT:
37984    case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
37985    case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
37986      nargs = 4;
37987      break;
37988    case V4SF_FTYPE_V4SF_V4SF_INT_INT:
37989    case V2DF_FTYPE_V2DF_V2DF_INT_INT:
37990      nargs_constant = 2;
37991      nargs = 4;
37992      break;
37993    case INT_FTYPE_V4SF_V4SF_INT_INT:
37994    case INT_FTYPE_V2DF_V2DF_INT_INT:
37995      return ix86_expand_sse_comi_round (d, exp, target);
37996    case V8DF_FTYPE_V8DF_V8DF_V8DF_QI_INT:
37997    case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
37998    case V2DF_FTYPE_V2DF_V2DF_V2DF_QI_INT:
37999    case V2DF_FTYPE_V2DF_V4SF_V2DF_QI_INT:
38000    case V4SF_FTYPE_V4SF_V4SF_V4SF_QI_INT:
38001    case V4SF_FTYPE_V4SF_V2DF_V4SF_QI_INT:
38002      nargs = 5;
38003      break;
38004    case V16SF_FTYPE_V16SF_INT_V16SF_HI_INT:
38005    case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
38006      nargs_constant = 4;
38007      nargs = 5;
38008      break;
38009    case QI_FTYPE_V8DF_V8DF_INT_QI_INT:
38010    case QI_FTYPE_V2DF_V2DF_INT_QI_INT:
38011    case HI_FTYPE_V16SF_V16SF_INT_HI_INT:
38012    case QI_FTYPE_V4SF_V4SF_INT_QI_INT:
38013      nargs_constant = 3;
38014      nargs = 5;
38015      break;
38016    case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
38017    case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
38018    case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
38019    case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
38020      nargs = 6;
38021      nargs_constant = 4;
38022      break;
38023    case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
38024    case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
38025    case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
38026    case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
38027      nargs = 6;
38028      nargs_constant = 3;
38029      break;
38030    default:
38031      gcc_unreachable ();
38032    }
38033  gcc_assert (nargs <= ARRAY_SIZE (args));
38034
38035  if (optimize
38036      || target == 0
38037      || GET_MODE (target) != tmode
38038      || !insn_p->operand[0].predicate (target, tmode))
38039    target = gen_reg_rtx (tmode);
38040
38041  for (i = 0; i < nargs; i++)
38042    {
38043      tree arg = CALL_EXPR_ARG (exp, i);
38044      rtx op = expand_normal (arg);
38045      machine_mode mode = insn_p->operand[i + 1].mode;
38046      bool match = insn_p->operand[i + 1].predicate (op, mode);
38047
38048      if (i == nargs - nargs_constant)
38049	{
38050	  if (!match)
38051	    {
38052	      switch (icode)
38053		{
38054		case CODE_FOR_avx512f_getmantv8df_mask_round:
38055		case CODE_FOR_avx512f_getmantv16sf_mask_round:
38056		case CODE_FOR_avx512f_vgetmantv2df_round:
38057		case CODE_FOR_avx512f_vgetmantv4sf_round:
38058		  error ("the immediate argument must be a 4-bit immediate");
38059		  return const0_rtx;
38060		case CODE_FOR_avx512f_cmpv8df3_mask_round:
38061		case CODE_FOR_avx512f_cmpv16sf3_mask_round:
38062		case CODE_FOR_avx512f_vmcmpv2df3_mask_round:
38063		case CODE_FOR_avx512f_vmcmpv4sf3_mask_round:
38064		  error ("the immediate argument must be a 5-bit immediate");
38065		  return const0_rtx;
38066		default:
38067		  error ("the immediate argument must be an 8-bit immediate");
38068		  return const0_rtx;
38069		}
38070	    }
38071	}
38072      else if (i == nargs-1)
38073	{
38074	  if (!insn_p->operand[nargs].predicate (op, SImode))
38075	    {
38076	      error ("incorrect rounding operand");
38077	      return const0_rtx;
38078	    }
38079
38080	  /* If there is no rounding use normal version of the pattern.  */
38081	  if (INTVAL (op) == NO_ROUND)
38082	    redundant_embed_rnd = 1;
38083	}
38084      else
38085	{
38086	  if (VECTOR_MODE_P (mode))
38087	    op = safe_vector_operand (op, mode);
38088
38089	  op = fixup_modeless_constant (op, mode);
38090
38091	  if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38092	    {
38093	      if (optimize || !match)
38094		op = copy_to_mode_reg (mode, op);
38095	    }
38096	  else
38097	    {
38098	      op = copy_to_reg (op);
38099	      op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38100	    }
38101	}
38102
38103      args[i].op = op;
38104      args[i].mode = mode;
38105    }
38106
38107  switch (nargs)
38108    {
38109    case 1:
38110      pat = GEN_FCN (icode) (target, args[0].op);
38111      break;
38112    case 2:
38113      pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38114      break;
38115    case 3:
38116      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38117			     args[2].op);
38118      break;
38119    case 4:
38120      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38121			     args[2].op, args[3].op);
38122      break;
38123    case 5:
38124      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38125			     args[2].op, args[3].op, args[4].op);
38126    case 6:
38127      pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
38128			     args[2].op, args[3].op, args[4].op,
38129			     args[5].op);
38130      break;
38131    default:
38132      gcc_unreachable ();
38133    }
38134
38135  if (!pat)
38136    return 0;
38137
38138  if (redundant_embed_rnd)
38139    pat = ix86_erase_embedded_rounding (pat);
38140
38141  emit_insn (pat);
38142  return target;
38143}
38144
38145/* Subroutine of ix86_expand_builtin to take care of special insns
38146   with variable number of operands.  */
38147
38148static rtx
38149ix86_expand_special_args_builtin (const struct builtin_description *d,
38150				  tree exp, rtx target)
38151{
38152  tree arg;
38153  rtx pat, op;
38154  unsigned int i, nargs, arg_adjust, memory;
38155  bool aligned_mem = false;
38156  struct
38157    {
38158      rtx op;
38159      machine_mode mode;
38160    } args[3];
38161  enum insn_code icode = d->icode;
38162  bool last_arg_constant = false;
38163  const struct insn_data_d *insn_p = &insn_data[icode];
38164  machine_mode tmode = insn_p->operand[0].mode;
38165  enum { load, store } klass;
38166
38167  switch ((enum ix86_builtin_func_type) d->flag)
38168    {
38169    case VOID_FTYPE_VOID:
38170      emit_insn (GEN_FCN (icode) (target));
38171      return 0;
38172    case VOID_FTYPE_UINT64:
38173    case VOID_FTYPE_UNSIGNED:
38174      nargs = 0;
38175      klass = store;
38176      memory = 0;
38177      break;
38178
38179    case INT_FTYPE_VOID:
38180    case USHORT_FTYPE_VOID:
38181    case UINT64_FTYPE_VOID:
38182    case UNSIGNED_FTYPE_VOID:
38183      nargs = 0;
38184      klass = load;
38185      memory = 0;
38186      break;
38187    case UINT64_FTYPE_PUNSIGNED:
38188    case V2DI_FTYPE_PV2DI:
38189    case V4DI_FTYPE_PV4DI:
38190    case V32QI_FTYPE_PCCHAR:
38191    case V16QI_FTYPE_PCCHAR:
38192    case V8SF_FTYPE_PCV4SF:
38193    case V8SF_FTYPE_PCFLOAT:
38194    case V4SF_FTYPE_PCFLOAT:
38195    case V4DF_FTYPE_PCV2DF:
38196    case V4DF_FTYPE_PCDOUBLE:
38197    case V2DF_FTYPE_PCDOUBLE:
38198    case VOID_FTYPE_PVOID:
38199    case V16SI_FTYPE_PV4SI:
38200    case V16SF_FTYPE_PV4SF:
38201    case V8DI_FTYPE_PV4DI:
38202    case V8DI_FTYPE_PV8DI:
38203    case V8DF_FTYPE_PV4DF:
38204      nargs = 1;
38205      klass = load;
38206      memory = 0;
38207      switch (icode)
38208	{
38209	case CODE_FOR_sse4_1_movntdqa:
38210	case CODE_FOR_avx2_movntdqa:
38211	case CODE_FOR_avx512f_movntdqa:
38212	  aligned_mem = true;
38213	  break;
38214	default:
38215	  break;
38216	}
38217      break;
38218    case VOID_FTYPE_PV2SF_V4SF:
38219    case VOID_FTYPE_PV8DI_V8DI:
38220    case VOID_FTYPE_PV4DI_V4DI:
38221    case VOID_FTYPE_PV2DI_V2DI:
38222    case VOID_FTYPE_PCHAR_V32QI:
38223    case VOID_FTYPE_PCHAR_V16QI:
38224    case VOID_FTYPE_PFLOAT_V16SF:
38225    case VOID_FTYPE_PFLOAT_V8SF:
38226    case VOID_FTYPE_PFLOAT_V4SF:
38227    case VOID_FTYPE_PDOUBLE_V8DF:
38228    case VOID_FTYPE_PDOUBLE_V4DF:
38229    case VOID_FTYPE_PDOUBLE_V2DF:
38230    case VOID_FTYPE_PLONGLONG_LONGLONG:
38231    case VOID_FTYPE_PULONGLONG_ULONGLONG:
38232    case VOID_FTYPE_PINT_INT:
38233      nargs = 1;
38234      klass = store;
38235      /* Reserve memory operand for target.  */
38236      memory = ARRAY_SIZE (args);
38237      switch (icode)
38238	{
38239	/* These builtins and instructions require the memory
38240	   to be properly aligned.  */
38241	case CODE_FOR_avx_movntv4di:
38242	case CODE_FOR_sse2_movntv2di:
38243	case CODE_FOR_avx_movntv8sf:
38244	case CODE_FOR_sse_movntv4sf:
38245	case CODE_FOR_sse4a_vmmovntv4sf:
38246	case CODE_FOR_avx_movntv4df:
38247	case CODE_FOR_sse2_movntv2df:
38248	case CODE_FOR_sse4a_vmmovntv2df:
38249	case CODE_FOR_sse2_movntidi:
38250	case CODE_FOR_sse_movntq:
38251	case CODE_FOR_sse2_movntisi:
38252	case CODE_FOR_avx512f_movntv16sf:
38253	case CODE_FOR_avx512f_movntv8df:
38254	case CODE_FOR_avx512f_movntv8di:
38255	  aligned_mem = true;
38256	  break;
38257	default:
38258	  break;
38259	}
38260      break;
38261    case V4SF_FTYPE_V4SF_PCV2SF:
38262    case V2DF_FTYPE_V2DF_PCDOUBLE:
38263      nargs = 2;
38264      klass = load;
38265      memory = 1;
38266      break;
38267    case V8SF_FTYPE_PCV8SF_V8SI:
38268    case V4DF_FTYPE_PCV4DF_V4DI:
38269    case V4SF_FTYPE_PCV4SF_V4SI:
38270    case V2DF_FTYPE_PCV2DF_V2DI:
38271    case V8SI_FTYPE_PCV8SI_V8SI:
38272    case V4DI_FTYPE_PCV4DI_V4DI:
38273    case V4SI_FTYPE_PCV4SI_V4SI:
38274    case V2DI_FTYPE_PCV2DI_V2DI:
38275      nargs = 2;
38276      klass = load;
38277      memory = 0;
38278      break;
38279    case VOID_FTYPE_PV8DF_V8DF_QI:
38280    case VOID_FTYPE_PV4DF_V4DF_QI:
38281    case VOID_FTYPE_PV2DF_V2DF_QI:
38282    case VOID_FTYPE_PV16SF_V16SF_HI:
38283    case VOID_FTYPE_PV8SF_V8SF_QI:
38284    case VOID_FTYPE_PV4SF_V4SF_QI:
38285    case VOID_FTYPE_PV8DI_V8DI_QI:
38286    case VOID_FTYPE_PV4DI_V4DI_QI:
38287    case VOID_FTYPE_PV2DI_V2DI_QI:
38288    case VOID_FTYPE_PV16SI_V16SI_HI:
38289    case VOID_FTYPE_PV8SI_V8SI_QI:
38290    case VOID_FTYPE_PV4SI_V4SI_QI:
38291      switch (icode)
38292	{
38293	/* These builtins and instructions require the memory
38294	   to be properly aligned.  */
38295	case CODE_FOR_avx512f_storev16sf_mask:
38296	case CODE_FOR_avx512f_storev16si_mask:
38297	case CODE_FOR_avx512f_storev8df_mask:
38298	case CODE_FOR_avx512f_storev8di_mask:
38299	case CODE_FOR_avx512vl_storev8sf_mask:
38300	case CODE_FOR_avx512vl_storev8si_mask:
38301	case CODE_FOR_avx512vl_storev4df_mask:
38302	case CODE_FOR_avx512vl_storev4di_mask:
38303	case CODE_FOR_avx512vl_storev4sf_mask:
38304	case CODE_FOR_avx512vl_storev4si_mask:
38305	case CODE_FOR_avx512vl_storev2df_mask:
38306	case CODE_FOR_avx512vl_storev2di_mask:
38307	  aligned_mem = true;
38308	  break;
38309	default:
38310	  break;
38311	}
38312      /* FALLTHRU */
38313    case VOID_FTYPE_PV8SF_V8SI_V8SF:
38314    case VOID_FTYPE_PV4DF_V4DI_V4DF:
38315    case VOID_FTYPE_PV4SF_V4SI_V4SF:
38316    case VOID_FTYPE_PV2DF_V2DI_V2DF:
38317    case VOID_FTYPE_PV8SI_V8SI_V8SI:
38318    case VOID_FTYPE_PV4DI_V4DI_V4DI:
38319    case VOID_FTYPE_PV4SI_V4SI_V4SI:
38320    case VOID_FTYPE_PV2DI_V2DI_V2DI:
38321    case VOID_FTYPE_PDOUBLE_V2DF_QI:
38322    case VOID_FTYPE_PFLOAT_V4SF_QI:
38323    case VOID_FTYPE_PV8SI_V8DI_QI:
38324    case VOID_FTYPE_PV8HI_V8DI_QI:
38325    case VOID_FTYPE_PV16HI_V16SI_HI:
38326    case VOID_FTYPE_PV16QI_V8DI_QI:
38327    case VOID_FTYPE_PV16QI_V16SI_HI:
38328    case VOID_FTYPE_PV4SI_V4DI_QI:
38329    case VOID_FTYPE_PV4SI_V2DI_QI:
38330    case VOID_FTYPE_PV8HI_V4DI_QI:
38331    case VOID_FTYPE_PV8HI_V2DI_QI:
38332    case VOID_FTYPE_PV8HI_V8SI_QI:
38333    case VOID_FTYPE_PV8HI_V4SI_QI:
38334    case VOID_FTYPE_PV16QI_V4DI_QI:
38335    case VOID_FTYPE_PV16QI_V2DI_QI:
38336    case VOID_FTYPE_PV16QI_V8SI_QI:
38337    case VOID_FTYPE_PV16QI_V4SI_QI:
38338    case VOID_FTYPE_PV8HI_V8HI_QI:
38339    case VOID_FTYPE_PV16HI_V16HI_HI:
38340    case VOID_FTYPE_PV32HI_V32HI_SI:
38341    case VOID_FTYPE_PV16QI_V16QI_HI:
38342    case VOID_FTYPE_PV32QI_V32QI_SI:
38343    case VOID_FTYPE_PV64QI_V64QI_DI:
38344      nargs = 2;
38345      klass = store;
38346      /* Reserve memory operand for target.  */
38347      memory = ARRAY_SIZE (args);
38348      break;
38349    case V4SF_FTYPE_PCV4SF_V4SF_QI:
38350    case V8SF_FTYPE_PCV8SF_V8SF_QI:
38351    case V16SF_FTYPE_PCV16SF_V16SF_HI:
38352    case V4SI_FTYPE_PCV4SI_V4SI_QI:
38353    case V8SI_FTYPE_PCV8SI_V8SI_QI:
38354    case V16SI_FTYPE_PCV16SI_V16SI_HI:
38355    case V2DF_FTYPE_PCV2DF_V2DF_QI:
38356    case V4DF_FTYPE_PCV4DF_V4DF_QI:
38357    case V8DF_FTYPE_PCV8DF_V8DF_QI:
38358    case V2DI_FTYPE_PCV2DI_V2DI_QI:
38359    case V4DI_FTYPE_PCV4DI_V4DI_QI:
38360    case V8DI_FTYPE_PCV8DI_V8DI_QI:
38361    case V2DF_FTYPE_PCDOUBLE_V2DF_QI:
38362    case V4SF_FTYPE_PCFLOAT_V4SF_QI:
38363    case V8HI_FTYPE_PCV8HI_V8HI_QI:
38364    case V16HI_FTYPE_PCV16HI_V16HI_HI:
38365    case V32HI_FTYPE_PCV32HI_V32HI_SI:
38366    case V16QI_FTYPE_PCV16QI_V16QI_HI:
38367    case V32QI_FTYPE_PCV32QI_V32QI_SI:
38368    case V64QI_FTYPE_PCV64QI_V64QI_DI:
38369      nargs = 3;
38370      klass = load;
38371      memory = 0;
38372      switch (icode)
38373	{
38374	/* These builtins and instructions require the memory
38375	   to be properly aligned.  */
38376	case CODE_FOR_avx512f_loadv16sf_mask:
38377	case CODE_FOR_avx512f_loadv16si_mask:
38378	case CODE_FOR_avx512f_loadv8df_mask:
38379	case CODE_FOR_avx512f_loadv8di_mask:
38380	case CODE_FOR_avx512vl_loadv8sf_mask:
38381	case CODE_FOR_avx512vl_loadv8si_mask:
38382	case CODE_FOR_avx512vl_loadv4df_mask:
38383	case CODE_FOR_avx512vl_loadv4di_mask:
38384	case CODE_FOR_avx512vl_loadv4sf_mask:
38385	case CODE_FOR_avx512vl_loadv4si_mask:
38386	case CODE_FOR_avx512vl_loadv2df_mask:
38387	case CODE_FOR_avx512vl_loadv2di_mask:
38388	case CODE_FOR_avx512bw_loadv64qi_mask:
38389	case CODE_FOR_avx512vl_loadv32qi_mask:
38390	case CODE_FOR_avx512vl_loadv16qi_mask:
38391	case CODE_FOR_avx512bw_loadv32hi_mask:
38392	case CODE_FOR_avx512vl_loadv16hi_mask:
38393	case CODE_FOR_avx512vl_loadv8hi_mask:
38394	  aligned_mem = true;
38395	  break;
38396	default:
38397	  break;
38398	}
38399      break;
38400    case VOID_FTYPE_UINT_UINT_UINT:
38401    case VOID_FTYPE_UINT64_UINT_UINT:
38402    case UCHAR_FTYPE_UINT_UINT_UINT:
38403    case UCHAR_FTYPE_UINT64_UINT_UINT:
38404      nargs = 3;
38405      klass = load;
38406      memory = ARRAY_SIZE (args);
38407      last_arg_constant = true;
38408      break;
38409    default:
38410      gcc_unreachable ();
38411    }
38412
38413  gcc_assert (nargs <= ARRAY_SIZE (args));
38414
38415  if (klass == store)
38416    {
38417      arg = CALL_EXPR_ARG (exp, 0);
38418      op = expand_normal (arg);
38419      gcc_assert (target == 0);
38420      if (memory)
38421	{
38422	  op = ix86_zero_extend_to_Pmode (op);
38423	  target = gen_rtx_MEM (tmode, op);
38424	  /* target at this point has just BITS_PER_UNIT MEM_ALIGN
38425	     on it.  Try to improve it using get_pointer_alignment,
38426	     and if the special builtin is one that requires strict
38427	     mode alignment, also from it's GET_MODE_ALIGNMENT.
38428	     Failure to do so could lead to ix86_legitimate_combined_insn
38429	     rejecting all changes to such insns.  */
38430	  unsigned int align = get_pointer_alignment (arg);
38431	  if (aligned_mem && align < GET_MODE_ALIGNMENT (tmode))
38432	    align = GET_MODE_ALIGNMENT (tmode);
38433	  if (MEM_ALIGN (target) < align)
38434	    set_mem_align (target, align);
38435	}
38436      else
38437	target = force_reg (tmode, op);
38438      arg_adjust = 1;
38439    }
38440  else
38441    {
38442      arg_adjust = 0;
38443      if (optimize
38444	  || target == 0
38445	  || !register_operand (target, tmode)
38446	  || GET_MODE (target) != tmode)
38447	target = gen_reg_rtx (tmode);
38448    }
38449
38450  for (i = 0; i < nargs; i++)
38451    {
38452      machine_mode mode = insn_p->operand[i + 1].mode;
38453      bool match;
38454
38455      arg = CALL_EXPR_ARG (exp, i + arg_adjust);
38456      op = expand_normal (arg);
38457      match = insn_p->operand[i + 1].predicate (op, mode);
38458
38459      if (last_arg_constant && (i + 1) == nargs)
38460	{
38461	  if (!match)
38462	    {
38463	      if (icode == CODE_FOR_lwp_lwpvalsi3
38464		  || icode == CODE_FOR_lwp_lwpinssi3
38465		  || icode == CODE_FOR_lwp_lwpvaldi3
38466		  || icode == CODE_FOR_lwp_lwpinsdi3)
38467		error ("the last argument must be a 32-bit immediate");
38468	      else
38469		error ("the last argument must be an 8-bit immediate");
38470	      return const0_rtx;
38471	    }
38472	}
38473      else
38474	{
38475	  if (i == memory)
38476	    {
38477	      /* This must be the memory operand.  */
38478	      op = ix86_zero_extend_to_Pmode (op);
38479	      op = gen_rtx_MEM (mode, op);
38480	      /* op at this point has just BITS_PER_UNIT MEM_ALIGN
38481		 on it.  Try to improve it using get_pointer_alignment,
38482		 and if the special builtin is one that requires strict
38483		 mode alignment, also from it's GET_MODE_ALIGNMENT.
38484		 Failure to do so could lead to ix86_legitimate_combined_insn
38485		 rejecting all changes to such insns.  */
38486	      unsigned int align = get_pointer_alignment (arg);
38487	      if (aligned_mem && align < GET_MODE_ALIGNMENT (mode))
38488		align = GET_MODE_ALIGNMENT (mode);
38489	      if (MEM_ALIGN (op) < align)
38490		set_mem_align (op, align);
38491	    }
38492	  else
38493	    {
38494	      /* This must be register.  */
38495	      if (VECTOR_MODE_P (mode))
38496		op = safe_vector_operand (op, mode);
38497
38498	      op = fixup_modeless_constant (op, mode);
38499
38500	      if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
38501		op = copy_to_mode_reg (mode, op);
38502	      else
38503	        {
38504	          op = copy_to_reg (op);
38505	          op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
38506	        }
38507	    }
38508	}
38509
38510      args[i].op = op;
38511      args[i].mode = mode;
38512    }
38513
38514  switch (nargs)
38515    {
38516    case 0:
38517      pat = GEN_FCN (icode) (target);
38518      break;
38519    case 1:
38520      pat = GEN_FCN (icode) (target, args[0].op);
38521      break;
38522    case 2:
38523      pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
38524      break;
38525    case 3:
38526      pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
38527      break;
38528    default:
38529      gcc_unreachable ();
38530    }
38531
38532  if (! pat)
38533    return 0;
38534  emit_insn (pat);
38535  return klass == store ? 0 : target;
38536}
38537
38538/* Return the integer constant in ARG.  Constrain it to be in the range
38539   of the subparts of VEC_TYPE; issue an error if not.  */
38540
38541static int
38542get_element_number (tree vec_type, tree arg)
38543{
38544  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
38545
38546  if (!tree_fits_uhwi_p (arg)
38547      || (elt = tree_to_uhwi (arg), elt > max))
38548    {
38549      error ("selector must be an integer constant in the range 0..%wi", max);
38550      return 0;
38551    }
38552
38553  return elt;
38554}
38555
38556/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
38557   ix86_expand_vector_init.  We DO have language-level syntax for this, in
38558   the form of  (type){ init-list }.  Except that since we can't place emms
38559   instructions from inside the compiler, we can't allow the use of MMX
38560   registers unless the user explicitly asks for it.  So we do *not* define
38561   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
38562   we have builtins invoked by mmintrin.h that gives us license to emit
38563   these sorts of instructions.  */
38564
38565static rtx
38566ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
38567{
38568  machine_mode tmode = TYPE_MODE (type);
38569  machine_mode inner_mode = GET_MODE_INNER (tmode);
38570  int i, n_elt = GET_MODE_NUNITS (tmode);
38571  rtvec v = rtvec_alloc (n_elt);
38572
38573  gcc_assert (VECTOR_MODE_P (tmode));
38574  gcc_assert (call_expr_nargs (exp) == n_elt);
38575
38576  for (i = 0; i < n_elt; ++i)
38577    {
38578      rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
38579      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
38580    }
38581
38582  if (!target || !register_operand (target, tmode))
38583    target = gen_reg_rtx (tmode);
38584
38585  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
38586  return target;
38587}
38588
38589/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
38590   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
38591   had a language-level syntax for referencing vector elements.  */
38592
38593static rtx
38594ix86_expand_vec_ext_builtin (tree exp, rtx target)
38595{
38596  machine_mode tmode, mode0;
38597  tree arg0, arg1;
38598  int elt;
38599  rtx op0;
38600
38601  arg0 = CALL_EXPR_ARG (exp, 0);
38602  arg1 = CALL_EXPR_ARG (exp, 1);
38603
38604  op0 = expand_normal (arg0);
38605  elt = get_element_number (TREE_TYPE (arg0), arg1);
38606
38607  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38608  mode0 = TYPE_MODE (TREE_TYPE (arg0));
38609  gcc_assert (VECTOR_MODE_P (mode0));
38610
38611  op0 = force_reg (mode0, op0);
38612
38613  if (optimize || !target || !register_operand (target, tmode))
38614    target = gen_reg_rtx (tmode);
38615
38616  ix86_expand_vector_extract (true, target, op0, elt);
38617
38618  return target;
38619}
38620
38621/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
38622   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
38623   a language-level syntax for referencing vector elements.  */
38624
38625static rtx
38626ix86_expand_vec_set_builtin (tree exp)
38627{
38628  machine_mode tmode, mode1;
38629  tree arg0, arg1, arg2;
38630  int elt;
38631  rtx op0, op1, target;
38632
38633  arg0 = CALL_EXPR_ARG (exp, 0);
38634  arg1 = CALL_EXPR_ARG (exp, 1);
38635  arg2 = CALL_EXPR_ARG (exp, 2);
38636
38637  tmode = TYPE_MODE (TREE_TYPE (arg0));
38638  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
38639  gcc_assert (VECTOR_MODE_P (tmode));
38640
38641  op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
38642  op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
38643  elt = get_element_number (TREE_TYPE (arg0), arg2);
38644
38645  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
38646    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
38647
38648  op0 = force_reg (tmode, op0);
38649  op1 = force_reg (mode1, op1);
38650
38651  /* OP0 is the source of these builtin functions and shouldn't be
38652     modified.  Create a copy, use it and return it as target.  */
38653  target = gen_reg_rtx (tmode);
38654  emit_move_insn (target, op0);
38655  ix86_expand_vector_set (true, target, op1, elt);
38656
38657  return target;
38658}
38659
38660/* Emit conditional move of SRC to DST with condition
38661   OP1 CODE OP2.  */
38662static void
38663ix86_emit_cmove (rtx dst, rtx src, enum rtx_code code, rtx op1, rtx op2)
38664{
38665  rtx t;
38666
38667  if (TARGET_CMOVE)
38668    {
38669      t = ix86_expand_compare (code, op1, op2);
38670      emit_insn (gen_rtx_SET (VOIDmode, dst,
38671			      gen_rtx_IF_THEN_ELSE (GET_MODE (dst), t,
38672						    src, dst)));
38673    }
38674  else
38675    {
38676      rtx nomove = gen_label_rtx ();
38677      emit_cmp_and_jump_insns (op1, op2, reverse_condition (code),
38678			       const0_rtx, GET_MODE (op1), 1, nomove);
38679      emit_move_insn (dst, src);
38680      emit_label (nomove);
38681    }
38682}
38683
38684/* Choose max of DST and SRC and put it to DST.  */
38685static void
38686ix86_emit_move_max (rtx dst, rtx src)
38687{
38688  ix86_emit_cmove (dst, src, LTU, dst, src);
38689}
38690
38691/* Expand an expression EXP that calls a built-in function,
38692   with result going to TARGET if that's convenient
38693   (and in mode MODE if that's convenient).
38694   SUBTARGET may be used as the target for computing one of EXP's operands.
38695   IGNORE is nonzero if the value is to be ignored.  */
38696
38697static rtx
38698ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
38699		     machine_mode mode, int ignore)
38700{
38701  const struct builtin_description *d;
38702  size_t i;
38703  enum insn_code icode;
38704  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
38705  tree arg0, arg1, arg2, arg3, arg4;
38706  rtx op0, op1, op2, op3, op4, pat, insn;
38707  machine_mode mode0, mode1, mode2, mode3, mode4;
38708  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
38709
38710  /* For CPU builtins that can be folded, fold first and expand the fold.  */
38711  switch (fcode)
38712    {
38713    case IX86_BUILTIN_CPU_INIT:
38714      {
38715	/* Make it call __cpu_indicator_init in libgcc. */
38716	tree call_expr, fndecl, type;
38717        type = build_function_type_list (integer_type_node, NULL_TREE);
38718	fndecl = build_fn_decl ("__cpu_indicator_init", type);
38719	call_expr = build_call_expr (fndecl, 0);
38720	return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
38721      }
38722    case IX86_BUILTIN_CPU_IS:
38723    case IX86_BUILTIN_CPU_SUPPORTS:
38724      {
38725	tree arg0 = CALL_EXPR_ARG (exp, 0);
38726	tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
38727	gcc_assert (fold_expr != NULL_TREE);
38728	return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
38729      }
38730    }
38731
38732  /* Determine whether the builtin function is available under the current ISA.
38733     Originally the builtin was not created if it wasn't applicable to the
38734     current ISA based on the command line switches.  With function specific
38735     options, we need to check in the context of the function making the call
38736     whether it is supported.  */
38737  if (ix86_builtins_isa[fcode].isa
38738      && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
38739    {
38740      char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
38741				       NULL, (enum fpmath_unit) 0, false);
38742
38743      if (!opts)
38744	error ("%qE needs unknown isa option", fndecl);
38745      else
38746	{
38747	  gcc_assert (opts != NULL);
38748	  error ("%qE needs isa option %s", fndecl, opts);
38749	  free (opts);
38750	}
38751      return const0_rtx;
38752    }
38753
38754  switch (fcode)
38755    {
38756    case IX86_BUILTIN_BNDMK:
38757      if (!target
38758	  || GET_MODE (target) != BNDmode
38759	  || !register_operand (target, BNDmode))
38760	target = gen_reg_rtx (BNDmode);
38761
38762      arg0 = CALL_EXPR_ARG (exp, 0);
38763      arg1 = CALL_EXPR_ARG (exp, 1);
38764
38765      op0 = expand_normal (arg0);
38766      op1 = expand_normal (arg1);
38767
38768      if (!register_operand (op0, Pmode))
38769	op0 = ix86_zero_extend_to_Pmode (op0);
38770      if (!register_operand (op1, Pmode))
38771	op1 = ix86_zero_extend_to_Pmode (op1);
38772
38773      /* Builtin arg1 is size of block but instruction op1 should
38774	 be (size - 1).  */
38775      op1 = expand_simple_binop (Pmode, PLUS, op1, constm1_rtx,
38776				 NULL_RTX, 1, OPTAB_DIRECT);
38777
38778      emit_insn (BNDmode == BND64mode
38779                 ? gen_bnd64_mk (target, op0, op1)
38780                 : gen_bnd32_mk (target, op0, op1));
38781      return target;
38782
38783    case IX86_BUILTIN_BNDSTX:
38784      arg0 = CALL_EXPR_ARG (exp, 0);
38785      arg1 = CALL_EXPR_ARG (exp, 1);
38786      arg2 = CALL_EXPR_ARG (exp, 2);
38787
38788      op0 = expand_normal (arg0);
38789      op1 = expand_normal (arg1);
38790      op2 = expand_normal (arg2);
38791
38792      if (!register_operand (op0, Pmode))
38793	op0 = ix86_zero_extend_to_Pmode (op0);
38794      if (!register_operand (op1, BNDmode))
38795	op1 = copy_to_mode_reg (BNDmode, op1);
38796      if (!register_operand (op2, Pmode))
38797	op2 = ix86_zero_extend_to_Pmode (op2);
38798
38799      emit_insn (BNDmode == BND64mode
38800                 ? gen_bnd64_stx (op2, op0, op1)
38801                 : gen_bnd32_stx (op2, op0, op1));
38802      return 0;
38803
38804    case IX86_BUILTIN_BNDLDX:
38805      if (!target
38806	  || GET_MODE (target) != BNDmode
38807	  || !register_operand (target, BNDmode))
38808	target = gen_reg_rtx (BNDmode);
38809
38810      arg0 = CALL_EXPR_ARG (exp, 0);
38811      arg1 = CALL_EXPR_ARG (exp, 1);
38812
38813      op0 = expand_normal (arg0);
38814      op1 = expand_normal (arg1);
38815
38816      if (!register_operand (op0, Pmode))
38817	op0 = ix86_zero_extend_to_Pmode (op0);
38818      if (!register_operand (op1, Pmode))
38819	op1 = ix86_zero_extend_to_Pmode (op1);
38820
38821      emit_insn (BNDmode == BND64mode
38822		 ? gen_bnd64_ldx (target, op0, op1)
38823		 : gen_bnd32_ldx (target, op0, op1));
38824      return target;
38825
38826    case IX86_BUILTIN_BNDCL:
38827      arg0 = CALL_EXPR_ARG (exp, 0);
38828      arg1 = CALL_EXPR_ARG (exp, 1);
38829
38830      op0 = expand_normal (arg0);
38831      op1 = expand_normal (arg1);
38832
38833      if (!register_operand (op0, Pmode))
38834	op0 = ix86_zero_extend_to_Pmode (op0);
38835      if (!register_operand (op1, BNDmode))
38836	op1 = copy_to_mode_reg (BNDmode, op1);
38837
38838      emit_insn (BNDmode == BND64mode
38839                 ? gen_bnd64_cl (op1, op0)
38840                 : gen_bnd32_cl (op1, op0));
38841      return 0;
38842
38843    case IX86_BUILTIN_BNDCU:
38844      arg0 = CALL_EXPR_ARG (exp, 0);
38845      arg1 = CALL_EXPR_ARG (exp, 1);
38846
38847      op0 = expand_normal (arg0);
38848      op1 = expand_normal (arg1);
38849
38850      if (!register_operand (op0, Pmode))
38851	op0 = ix86_zero_extend_to_Pmode (op0);
38852      if (!register_operand (op1, BNDmode))
38853	op1 = copy_to_mode_reg (BNDmode, op1);
38854
38855      emit_insn (BNDmode == BND64mode
38856                 ? gen_bnd64_cu (op1, op0)
38857                 : gen_bnd32_cu (op1, op0));
38858      return 0;
38859
38860    case IX86_BUILTIN_BNDRET:
38861      arg0 = CALL_EXPR_ARG (exp, 0);
38862      gcc_assert (TREE_CODE (arg0) == SSA_NAME);
38863      target = chkp_get_rtl_bounds (arg0);
38864
38865      /* If no bounds were specified for returned value,
38866	 then use INIT bounds.  It usually happens when
38867	 some built-in function is expanded.  */
38868      if (!target)
38869	{
38870	  rtx t1 = gen_reg_rtx (Pmode);
38871	  rtx t2 = gen_reg_rtx (Pmode);
38872	  target = gen_reg_rtx (BNDmode);
38873	  emit_move_insn (t1, const0_rtx);
38874	  emit_move_insn (t2, constm1_rtx);
38875	  emit_insn (BNDmode == BND64mode
38876		     ? gen_bnd64_mk (target, t1, t2)
38877		     : gen_bnd32_mk (target, t1, t2));
38878	}
38879
38880      gcc_assert (target && REG_P (target));
38881      return target;
38882
38883    case IX86_BUILTIN_BNDNARROW:
38884      {
38885	rtx m1, m1h1, m1h2, lb, ub, t1;
38886
38887	/* Return value and lb.  */
38888	arg0 = CALL_EXPR_ARG (exp, 0);
38889	/* Bounds.  */
38890	arg1 = CALL_EXPR_ARG (exp, 1);
38891	/* Size.  */
38892	arg2 = CALL_EXPR_ARG (exp, 2);
38893
38894	lb = expand_normal (arg0);
38895	op1 = expand_normal (arg1);
38896	op2 = expand_normal (arg2);
38897
38898	/* Size was passed but we need to use (size - 1) as for bndmk.  */
38899	op2 = expand_simple_binop (Pmode, PLUS, op2, constm1_rtx,
38900				   NULL_RTX, 1, OPTAB_DIRECT);
38901
38902	/* Add LB to size and inverse to get UB.  */
38903	op2 = expand_simple_binop (Pmode, PLUS, op2, lb,
38904				   op2, 1, OPTAB_DIRECT);
38905	ub = expand_simple_unop (Pmode, NOT, op2, op2, 1);
38906
38907	if (!register_operand (lb, Pmode))
38908	  lb = ix86_zero_extend_to_Pmode (lb);
38909	if (!register_operand (ub, Pmode))
38910	  ub = ix86_zero_extend_to_Pmode (ub);
38911
38912	/* We need to move bounds to memory before any computations.  */
38913	if (MEM_P (op1))
38914	  m1 = op1;
38915	else
38916	  {
38917	    m1 = assign_386_stack_local (BNDmode, SLOT_TEMP);
38918	    emit_move_insn (m1, op1);
38919	  }
38920
38921	/* Generate mem expression to be used for access to LB and UB.  */
38922	m1h1 = adjust_address (m1, Pmode, 0);
38923	m1h2 = adjust_address (m1, Pmode, GET_MODE_SIZE (Pmode));
38924
38925	t1 = gen_reg_rtx (Pmode);
38926
38927	/* Compute LB.  */
38928	emit_move_insn (t1, m1h1);
38929	ix86_emit_move_max (t1, lb);
38930	emit_move_insn (m1h1, t1);
38931
38932	/* Compute UB.  UB is stored in 1's complement form.  Therefore
38933	   we also use max here.  */
38934	emit_move_insn (t1, m1h2);
38935	ix86_emit_move_max (t1, ub);
38936	emit_move_insn (m1h2, t1);
38937
38938	op2 = gen_reg_rtx (BNDmode);
38939	emit_move_insn (op2, m1);
38940
38941	return chkp_join_splitted_slot (lb, op2);
38942      }
38943
38944    case IX86_BUILTIN_BNDINT:
38945      {
38946	rtx res, rh1, rh2, lb1, lb2, ub1, ub2;
38947
38948	if (!target
38949	    || GET_MODE (target) != BNDmode
38950	    || !register_operand (target, BNDmode))
38951	  target = gen_reg_rtx (BNDmode);
38952
38953	arg0 = CALL_EXPR_ARG (exp, 0);
38954	arg1 = CALL_EXPR_ARG (exp, 1);
38955
38956	op0 = expand_normal (arg0);
38957	op1 = expand_normal (arg1);
38958
38959	res = assign_386_stack_local (BNDmode, SLOT_TEMP);
38960	rh1 = adjust_address (res, Pmode, 0);
38961	rh2 = adjust_address (res, Pmode, GET_MODE_SIZE (Pmode));
38962
38963	/* Put first bounds to temporaries.  */
38964	lb1 = gen_reg_rtx (Pmode);
38965	ub1 = gen_reg_rtx (Pmode);
38966	if (MEM_P (op0))
38967	  {
38968	    emit_move_insn (lb1, adjust_address (op0, Pmode, 0));
38969	    emit_move_insn (ub1, adjust_address (op0, Pmode,
38970						 GET_MODE_SIZE (Pmode)));
38971	  }
38972	else
38973	  {
38974	    emit_move_insn (res, op0);
38975	    emit_move_insn (lb1, rh1);
38976	    emit_move_insn (ub1, rh2);
38977	  }
38978
38979	/* Put second bounds to temporaries.  */
38980	lb2 = gen_reg_rtx (Pmode);
38981	ub2 = gen_reg_rtx (Pmode);
38982	if (MEM_P (op1))
38983	  {
38984	    emit_move_insn (lb2, adjust_address (op1, Pmode, 0));
38985	    emit_move_insn (ub2, adjust_address (op1, Pmode,
38986						 GET_MODE_SIZE (Pmode)));
38987	  }
38988	else
38989	  {
38990	    emit_move_insn (res, op1);
38991	    emit_move_insn (lb2, rh1);
38992	    emit_move_insn (ub2, rh2);
38993	  }
38994
38995	/* Compute LB.  */
38996	ix86_emit_move_max (lb1, lb2);
38997	emit_move_insn (rh1, lb1);
38998
38999	/* Compute UB.  UB is stored in 1's complement form.  Therefore
39000	   we also use max here.  */
39001	ix86_emit_move_max (ub1, ub2);
39002	emit_move_insn (rh2, ub1);
39003
39004	emit_move_insn (target, res);
39005
39006	return target;
39007      }
39008
39009    case IX86_BUILTIN_SIZEOF:
39010      {
39011	tree name;
39012	rtx symbol;
39013
39014	if (!target
39015	    || GET_MODE (target) != Pmode
39016	    || !register_operand (target, Pmode))
39017	  target = gen_reg_rtx (Pmode);
39018
39019	arg0 = CALL_EXPR_ARG (exp, 0);
39020	gcc_assert (TREE_CODE (arg0) == VAR_DECL);
39021
39022	name = DECL_ASSEMBLER_NAME (arg0);
39023	symbol = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (name));
39024
39025	emit_insn (Pmode == SImode
39026		   ? gen_move_size_reloc_si (target, symbol)
39027		   : gen_move_size_reloc_di (target, symbol));
39028
39029	return target;
39030      }
39031
39032    case IX86_BUILTIN_BNDLOWER:
39033      {
39034	rtx mem, hmem;
39035
39036	if (!target
39037	    || GET_MODE (target) != Pmode
39038	    || !register_operand (target, Pmode))
39039	  target = gen_reg_rtx (Pmode);
39040
39041	arg0 = CALL_EXPR_ARG (exp, 0);
39042	op0 = expand_normal (arg0);
39043
39044	/* We need to move bounds to memory first.  */
39045	if (MEM_P (op0))
39046	  mem = op0;
39047	else
39048	  {
39049	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39050	    emit_move_insn (mem, op0);
39051	  }
39052
39053	/* Generate mem expression to access LB and load it.  */
39054	hmem = adjust_address (mem, Pmode, 0);
39055	emit_move_insn (target, hmem);
39056
39057	return target;
39058      }
39059
39060    case IX86_BUILTIN_BNDUPPER:
39061      {
39062	rtx mem, hmem, res;
39063
39064	if (!target
39065	    || GET_MODE (target) != Pmode
39066	    || !register_operand (target, Pmode))
39067	  target = gen_reg_rtx (Pmode);
39068
39069	arg0 = CALL_EXPR_ARG (exp, 0);
39070	op0 = expand_normal (arg0);
39071
39072	/* We need to move bounds to memory first.  */
39073	if (MEM_P (op0))
39074	  mem = op0;
39075	else
39076	  {
39077	    mem = assign_386_stack_local (BNDmode, SLOT_TEMP);
39078	    emit_move_insn (mem, op0);
39079	  }
39080
39081	/* Generate mem expression to access UB.  */
39082	hmem = adjust_address (mem, Pmode, GET_MODE_SIZE (Pmode));
39083
39084	/* We need to inverse all bits of UB.  */
39085	res = expand_simple_unop (Pmode, NOT, hmem, target, 1);
39086
39087	if (res != target)
39088	  emit_move_insn (target, res);
39089
39090	return target;
39091      }
39092
39093    case IX86_BUILTIN_MASKMOVQ:
39094    case IX86_BUILTIN_MASKMOVDQU:
39095      icode = (fcode == IX86_BUILTIN_MASKMOVQ
39096	       ? CODE_FOR_mmx_maskmovq
39097	       : CODE_FOR_sse2_maskmovdqu);
39098      /* Note the arg order is different from the operand order.  */
39099      arg1 = CALL_EXPR_ARG (exp, 0);
39100      arg2 = CALL_EXPR_ARG (exp, 1);
39101      arg0 = CALL_EXPR_ARG (exp, 2);
39102      op0 = expand_normal (arg0);
39103      op1 = expand_normal (arg1);
39104      op2 = expand_normal (arg2);
39105      mode0 = insn_data[icode].operand[0].mode;
39106      mode1 = insn_data[icode].operand[1].mode;
39107      mode2 = insn_data[icode].operand[2].mode;
39108
39109      op0 = ix86_zero_extend_to_Pmode (op0);
39110      op0 = gen_rtx_MEM (mode1, op0);
39111
39112      if (!insn_data[icode].operand[0].predicate (op0, mode0))
39113	op0 = copy_to_mode_reg (mode0, op0);
39114      if (!insn_data[icode].operand[1].predicate (op1, mode1))
39115	op1 = copy_to_mode_reg (mode1, op1);
39116      if (!insn_data[icode].operand[2].predicate (op2, mode2))
39117	op2 = copy_to_mode_reg (mode2, op2);
39118      pat = GEN_FCN (icode) (op0, op1, op2);
39119      if (! pat)
39120	return 0;
39121      emit_insn (pat);
39122      return 0;
39123
39124    case IX86_BUILTIN_LDMXCSR:
39125      op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
39126      target = assign_386_stack_local (SImode, SLOT_TEMP);
39127      emit_move_insn (target, op0);
39128      emit_insn (gen_sse_ldmxcsr (target));
39129      return 0;
39130
39131    case IX86_BUILTIN_STMXCSR:
39132      target = assign_386_stack_local (SImode, SLOT_TEMP);
39133      emit_insn (gen_sse_stmxcsr (target));
39134      return copy_to_mode_reg (SImode, target);
39135
39136    case IX86_BUILTIN_CLFLUSH:
39137	arg0 = CALL_EXPR_ARG (exp, 0);
39138	op0 = expand_normal (arg0);
39139	icode = CODE_FOR_sse2_clflush;
39140	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39141	  op0 = ix86_zero_extend_to_Pmode (op0);
39142
39143	emit_insn (gen_sse2_clflush (op0));
39144	return 0;
39145
39146    case IX86_BUILTIN_CLWB:
39147	arg0 = CALL_EXPR_ARG (exp, 0);
39148	op0 = expand_normal (arg0);
39149	icode = CODE_FOR_clwb;
39150	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39151	  op0 = ix86_zero_extend_to_Pmode (op0);
39152
39153	emit_insn (gen_clwb (op0));
39154	return 0;
39155
39156    case IX86_BUILTIN_CLFLUSHOPT:
39157	arg0 = CALL_EXPR_ARG (exp, 0);
39158	op0 = expand_normal (arg0);
39159	icode = CODE_FOR_clflushopt;
39160	if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39161	  op0 = ix86_zero_extend_to_Pmode (op0);
39162
39163	emit_insn (gen_clflushopt (op0));
39164	return 0;
39165
39166    case IX86_BUILTIN_MONITOR:
39167    case IX86_BUILTIN_MONITORX:
39168      arg0 = CALL_EXPR_ARG (exp, 0);
39169      arg1 = CALL_EXPR_ARG (exp, 1);
39170      arg2 = CALL_EXPR_ARG (exp, 2);
39171      op0 = expand_normal (arg0);
39172      op1 = expand_normal (arg1);
39173      op2 = expand_normal (arg2);
39174      if (!REG_P (op0))
39175	op0 = ix86_zero_extend_to_Pmode (op0);
39176      if (!REG_P (op1))
39177	op1 = copy_to_mode_reg (SImode, op1);
39178      if (!REG_P (op2))
39179	op2 = copy_to_mode_reg (SImode, op2);
39180
39181      emit_insn (fcode == IX86_BUILTIN_MONITOR
39182		 ? ix86_gen_monitor (op0, op1, op2)
39183		 : ix86_gen_monitorx (op0, op1, op2));
39184      return 0;
39185
39186    case IX86_BUILTIN_MWAIT:
39187      arg0 = CALL_EXPR_ARG (exp, 0);
39188      arg1 = CALL_EXPR_ARG (exp, 1);
39189      op0 = expand_normal (arg0);
39190      op1 = expand_normal (arg1);
39191      if (!REG_P (op0))
39192	op0 = copy_to_mode_reg (SImode, op0);
39193      if (!REG_P (op1))
39194	op1 = copy_to_mode_reg (SImode, op1);
39195      emit_insn (gen_sse3_mwait (op0, op1));
39196      return 0;
39197
39198    case IX86_BUILTIN_MWAITX:
39199      arg0 = CALL_EXPR_ARG (exp, 0);
39200      arg1 = CALL_EXPR_ARG (exp, 1);
39201      arg2 = CALL_EXPR_ARG (exp, 2);
39202      op0 = expand_normal (arg0);
39203      op1 = expand_normal (arg1);
39204      op2 = expand_normal (arg2);
39205      if (!REG_P (op0))
39206	op0 = copy_to_mode_reg (SImode, op0);
39207      if (!REG_P (op1))
39208	op1 = copy_to_mode_reg (SImode, op1);
39209      if (!REG_P (op2))
39210	op2 = copy_to_mode_reg (SImode, op2);
39211      emit_insn (gen_mwaitx (op0, op1, op2));
39212      return 0;
39213
39214    case IX86_BUILTIN_VEC_INIT_V2SI:
39215    case IX86_BUILTIN_VEC_INIT_V4HI:
39216    case IX86_BUILTIN_VEC_INIT_V8QI:
39217      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
39218
39219    case IX86_BUILTIN_VEC_EXT_V2DF:
39220    case IX86_BUILTIN_VEC_EXT_V2DI:
39221    case IX86_BUILTIN_VEC_EXT_V4SF:
39222    case IX86_BUILTIN_VEC_EXT_V4SI:
39223    case IX86_BUILTIN_VEC_EXT_V8HI:
39224    case IX86_BUILTIN_VEC_EXT_V2SI:
39225    case IX86_BUILTIN_VEC_EXT_V4HI:
39226    case IX86_BUILTIN_VEC_EXT_V16QI:
39227      return ix86_expand_vec_ext_builtin (exp, target);
39228
39229    case IX86_BUILTIN_VEC_SET_V2DI:
39230    case IX86_BUILTIN_VEC_SET_V4SF:
39231    case IX86_BUILTIN_VEC_SET_V4SI:
39232    case IX86_BUILTIN_VEC_SET_V8HI:
39233    case IX86_BUILTIN_VEC_SET_V4HI:
39234    case IX86_BUILTIN_VEC_SET_V16QI:
39235      return ix86_expand_vec_set_builtin (exp);
39236
39237    case IX86_BUILTIN_INFQ:
39238    case IX86_BUILTIN_HUGE_VALQ:
39239      {
39240	REAL_VALUE_TYPE inf;
39241	rtx tmp;
39242
39243	real_inf (&inf);
39244	tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
39245
39246	tmp = validize_mem (force_const_mem (mode, tmp));
39247
39248	if (target == 0)
39249	  target = gen_reg_rtx (mode);
39250
39251	emit_move_insn (target, tmp);
39252	return target;
39253      }
39254
39255    case IX86_BUILTIN_RDPMC:
39256    case IX86_BUILTIN_RDTSC:
39257    case IX86_BUILTIN_RDTSCP:
39258
39259      op0 = gen_reg_rtx (DImode);
39260      op1 = gen_reg_rtx (DImode);
39261
39262      if (fcode == IX86_BUILTIN_RDPMC)
39263	{
39264	  arg0 = CALL_EXPR_ARG (exp, 0);
39265	  op2 = expand_normal (arg0);
39266	  if (!register_operand (op2, SImode))
39267	    op2 = copy_to_mode_reg (SImode, op2);
39268
39269	  insn = (TARGET_64BIT
39270		  ? gen_rdpmc_rex64 (op0, op1, op2)
39271		  : gen_rdpmc (op0, op2));
39272	  emit_insn (insn);
39273	}
39274      else if (fcode == IX86_BUILTIN_RDTSC)
39275	{
39276	  insn = (TARGET_64BIT
39277		  ? gen_rdtsc_rex64 (op0, op1)
39278		  : gen_rdtsc (op0));
39279	  emit_insn (insn);
39280	}
39281      else
39282	{
39283	  op2 = gen_reg_rtx (SImode);
39284
39285	  insn = (TARGET_64BIT
39286		  ? gen_rdtscp_rex64 (op0, op1, op2)
39287		  : gen_rdtscp (op0, op2));
39288	  emit_insn (insn);
39289
39290	  arg0 = CALL_EXPR_ARG (exp, 0);
39291	  op4 = expand_normal (arg0);
39292	  if (!address_operand (op4, VOIDmode))
39293	    {
39294	      op4 = convert_memory_address (Pmode, op4);
39295	      op4 = copy_addr_to_reg (op4);
39296	    }
39297	  emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
39298	}
39299
39300      if (target == 0)
39301	{
39302	  /* mode is VOIDmode if __builtin_rd* has been called
39303	     without lhs.  */
39304	  if (mode == VOIDmode)
39305	    return target;
39306	  target = gen_reg_rtx (mode);
39307	}
39308
39309      if (TARGET_64BIT)
39310	{
39311	  op1 = expand_simple_binop (DImode, ASHIFT, op1, GEN_INT (32),
39312				     op1, 1, OPTAB_DIRECT);
39313	  op0 = expand_simple_binop (DImode, IOR, op0, op1,
39314				     op0, 1, OPTAB_DIRECT);
39315	}
39316
39317      emit_move_insn (target, op0);
39318      return target;
39319
39320    case IX86_BUILTIN_FXSAVE:
39321    case IX86_BUILTIN_FXRSTOR:
39322    case IX86_BUILTIN_FXSAVE64:
39323    case IX86_BUILTIN_FXRSTOR64:
39324    case IX86_BUILTIN_FNSTENV:
39325    case IX86_BUILTIN_FLDENV:
39326      mode0 = BLKmode;
39327      switch (fcode)
39328	{
39329	case IX86_BUILTIN_FXSAVE:
39330	  icode = CODE_FOR_fxsave;
39331	  break;
39332	case IX86_BUILTIN_FXRSTOR:
39333	  icode = CODE_FOR_fxrstor;
39334	  break;
39335	case IX86_BUILTIN_FXSAVE64:
39336	  icode = CODE_FOR_fxsave64;
39337	  break;
39338	case IX86_BUILTIN_FXRSTOR64:
39339	  icode = CODE_FOR_fxrstor64;
39340	  break;
39341	case IX86_BUILTIN_FNSTENV:
39342	  icode = CODE_FOR_fnstenv;
39343	  break;
39344	case IX86_BUILTIN_FLDENV:
39345	  icode = CODE_FOR_fldenv;
39346	  break;
39347	default:
39348	  gcc_unreachable ();
39349	}
39350
39351      arg0 = CALL_EXPR_ARG (exp, 0);
39352      op0 = expand_normal (arg0);
39353
39354      if (!address_operand (op0, VOIDmode))
39355	{
39356	  op0 = convert_memory_address (Pmode, op0);
39357	  op0 = copy_addr_to_reg (op0);
39358	}
39359      op0 = gen_rtx_MEM (mode0, op0);
39360
39361      pat = GEN_FCN (icode) (op0);
39362      if (pat)
39363	emit_insn (pat);
39364      return 0;
39365
39366    case IX86_BUILTIN_XSAVE:
39367    case IX86_BUILTIN_XRSTOR:
39368    case IX86_BUILTIN_XSAVE64:
39369    case IX86_BUILTIN_XRSTOR64:
39370    case IX86_BUILTIN_XSAVEOPT:
39371    case IX86_BUILTIN_XSAVEOPT64:
39372    case IX86_BUILTIN_XSAVES:
39373    case IX86_BUILTIN_XRSTORS:
39374    case IX86_BUILTIN_XSAVES64:
39375    case IX86_BUILTIN_XRSTORS64:
39376    case IX86_BUILTIN_XSAVEC:
39377    case IX86_BUILTIN_XSAVEC64:
39378      arg0 = CALL_EXPR_ARG (exp, 0);
39379      arg1 = CALL_EXPR_ARG (exp, 1);
39380      op0 = expand_normal (arg0);
39381      op1 = expand_normal (arg1);
39382
39383      if (!address_operand (op0, VOIDmode))
39384	{
39385	  op0 = convert_memory_address (Pmode, op0);
39386	  op0 = copy_addr_to_reg (op0);
39387	}
39388      op0 = gen_rtx_MEM (BLKmode, op0);
39389
39390      op1 = force_reg (DImode, op1);
39391
39392      if (TARGET_64BIT)
39393	{
39394	  op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
39395				     NULL, 1, OPTAB_DIRECT);
39396	  switch (fcode)
39397	    {
39398	    case IX86_BUILTIN_XSAVE:
39399	      icode = CODE_FOR_xsave_rex64;
39400	      break;
39401	    case IX86_BUILTIN_XRSTOR:
39402	      icode = CODE_FOR_xrstor_rex64;
39403	      break;
39404	    case IX86_BUILTIN_XSAVE64:
39405	      icode = CODE_FOR_xsave64;
39406	      break;
39407	    case IX86_BUILTIN_XRSTOR64:
39408	      icode = CODE_FOR_xrstor64;
39409	      break;
39410	    case IX86_BUILTIN_XSAVEOPT:
39411	      icode = CODE_FOR_xsaveopt_rex64;
39412	      break;
39413	    case IX86_BUILTIN_XSAVEOPT64:
39414	      icode = CODE_FOR_xsaveopt64;
39415	      break;
39416	    case IX86_BUILTIN_XSAVES:
39417	      icode = CODE_FOR_xsaves_rex64;
39418	      break;
39419	    case IX86_BUILTIN_XRSTORS:
39420	      icode = CODE_FOR_xrstors_rex64;
39421	      break;
39422	    case IX86_BUILTIN_XSAVES64:
39423	      icode = CODE_FOR_xsaves64;
39424	      break;
39425	    case IX86_BUILTIN_XRSTORS64:
39426	      icode = CODE_FOR_xrstors64;
39427	      break;
39428	    case IX86_BUILTIN_XSAVEC:
39429	      icode = CODE_FOR_xsavec_rex64;
39430	      break;
39431	    case IX86_BUILTIN_XSAVEC64:
39432	      icode = CODE_FOR_xsavec64;
39433	      break;
39434	    default:
39435	      gcc_unreachable ();
39436	    }
39437
39438	  op2 = gen_lowpart (SImode, op2);
39439	  op1 = gen_lowpart (SImode, op1);
39440	  pat = GEN_FCN (icode) (op0, op1, op2);
39441	}
39442      else
39443	{
39444	  switch (fcode)
39445	    {
39446	    case IX86_BUILTIN_XSAVE:
39447	      icode = CODE_FOR_xsave;
39448	      break;
39449	    case IX86_BUILTIN_XRSTOR:
39450	      icode = CODE_FOR_xrstor;
39451	      break;
39452	    case IX86_BUILTIN_XSAVEOPT:
39453	      icode = CODE_FOR_xsaveopt;
39454	      break;
39455	    case IX86_BUILTIN_XSAVES:
39456	      icode = CODE_FOR_xsaves;
39457	      break;
39458	    case IX86_BUILTIN_XRSTORS:
39459	      icode = CODE_FOR_xrstors;
39460	      break;
39461	    case IX86_BUILTIN_XSAVEC:
39462	      icode = CODE_FOR_xsavec;
39463	      break;
39464	    default:
39465	      gcc_unreachable ();
39466	    }
39467	  pat = GEN_FCN (icode) (op0, op1);
39468	}
39469
39470      if (pat)
39471	emit_insn (pat);
39472      return 0;
39473
39474    case IX86_BUILTIN_LLWPCB:
39475      arg0 = CALL_EXPR_ARG (exp, 0);
39476      op0 = expand_normal (arg0);
39477      icode = CODE_FOR_lwp_llwpcb;
39478      if (!insn_data[icode].operand[0].predicate (op0, Pmode))
39479	op0 = ix86_zero_extend_to_Pmode (op0);
39480      emit_insn (gen_lwp_llwpcb (op0));
39481      return 0;
39482
39483    case IX86_BUILTIN_SLWPCB:
39484      icode = CODE_FOR_lwp_slwpcb;
39485      if (!target
39486	  || !insn_data[icode].operand[0].predicate (target, Pmode))
39487	target = gen_reg_rtx (Pmode);
39488      emit_insn (gen_lwp_slwpcb (target));
39489      return target;
39490
39491    case IX86_BUILTIN_BEXTRI32:
39492    case IX86_BUILTIN_BEXTRI64:
39493      arg0 = CALL_EXPR_ARG (exp, 0);
39494      arg1 = CALL_EXPR_ARG (exp, 1);
39495      op0 = expand_normal (arg0);
39496      op1 = expand_normal (arg1);
39497      icode = (fcode == IX86_BUILTIN_BEXTRI32
39498	  ? CODE_FOR_tbm_bextri_si
39499	  : CODE_FOR_tbm_bextri_di);
39500      if (!CONST_INT_P (op1))
39501        {
39502          error ("last argument must be an immediate");
39503          return const0_rtx;
39504        }
39505      else
39506        {
39507          unsigned char length = (INTVAL (op1) >> 8) & 0xFF;
39508          unsigned char lsb_index = INTVAL (op1) & 0xFF;
39509          op1 = GEN_INT (length);
39510          op2 = GEN_INT (lsb_index);
39511          pat = GEN_FCN (icode) (target, op0, op1, op2);
39512          if (pat)
39513            emit_insn (pat);
39514          return target;
39515        }
39516
39517    case IX86_BUILTIN_RDRAND16_STEP:
39518      icode = CODE_FOR_rdrandhi_1;
39519      mode0 = HImode;
39520      goto rdrand_step;
39521
39522    case IX86_BUILTIN_RDRAND32_STEP:
39523      icode = CODE_FOR_rdrandsi_1;
39524      mode0 = SImode;
39525      goto rdrand_step;
39526
39527    case IX86_BUILTIN_RDRAND64_STEP:
39528      icode = CODE_FOR_rdranddi_1;
39529      mode0 = DImode;
39530
39531rdrand_step:
39532      op0 = gen_reg_rtx (mode0);
39533      emit_insn (GEN_FCN (icode) (op0));
39534
39535      arg0 = CALL_EXPR_ARG (exp, 0);
39536      op1 = expand_normal (arg0);
39537      if (!address_operand (op1, VOIDmode))
39538	{
39539	  op1 = convert_memory_address (Pmode, op1);
39540	  op1 = copy_addr_to_reg (op1);
39541	}
39542      emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39543
39544      op1 = gen_reg_rtx (SImode);
39545      emit_move_insn (op1, CONST1_RTX (SImode));
39546
39547      /* Emit SImode conditional move.  */
39548      if (mode0 == HImode)
39549	{
39550	  op2 = gen_reg_rtx (SImode);
39551	  emit_insn (gen_zero_extendhisi2 (op2, op0));
39552	}
39553      else if (mode0 == SImode)
39554	op2 = op0;
39555      else
39556	op2 = gen_rtx_SUBREG (SImode, op0, 0);
39557
39558      if (target == 0
39559	  || !register_operand (target, SImode))
39560	target = gen_reg_rtx (SImode);
39561
39562      pat = gen_rtx_GEU (VOIDmode, gen_rtx_REG (CCCmode, FLAGS_REG),
39563			 const0_rtx);
39564      emit_insn (gen_rtx_SET (VOIDmode, target,
39565			      gen_rtx_IF_THEN_ELSE (SImode, pat, op2, op1)));
39566      return target;
39567
39568    case IX86_BUILTIN_RDSEED16_STEP:
39569      icode = CODE_FOR_rdseedhi_1;
39570      mode0 = HImode;
39571      goto rdseed_step;
39572
39573    case IX86_BUILTIN_RDSEED32_STEP:
39574      icode = CODE_FOR_rdseedsi_1;
39575      mode0 = SImode;
39576      goto rdseed_step;
39577
39578    case IX86_BUILTIN_RDSEED64_STEP:
39579      icode = CODE_FOR_rdseeddi_1;
39580      mode0 = DImode;
39581
39582rdseed_step:
39583      op0 = gen_reg_rtx (mode0);
39584      emit_insn (GEN_FCN (icode) (op0));
39585
39586      arg0 = CALL_EXPR_ARG (exp, 0);
39587      op1 = expand_normal (arg0);
39588      if (!address_operand (op1, VOIDmode))
39589	{
39590	  op1 = convert_memory_address (Pmode, op1);
39591	  op1 = copy_addr_to_reg (op1);
39592	}
39593      emit_move_insn (gen_rtx_MEM (mode0, op1), op0);
39594
39595      op2 = gen_reg_rtx (QImode);
39596
39597      pat = gen_rtx_LTU (QImode, gen_rtx_REG (CCCmode, FLAGS_REG),
39598                         const0_rtx);
39599      emit_insn (gen_rtx_SET (VOIDmode, op2, pat));
39600
39601      if (target == 0
39602	  || !register_operand (target, SImode))
39603        target = gen_reg_rtx (SImode);
39604
39605      emit_insn (gen_zero_extendqisi2 (target, op2));
39606      return target;
39607
39608    case IX86_BUILTIN_SBB32:
39609      icode = CODE_FOR_subborrowsi;
39610      mode0 = SImode;
39611      goto handlecarry;
39612
39613    case IX86_BUILTIN_SBB64:
39614      icode = CODE_FOR_subborrowdi;
39615      mode0 = DImode;
39616      goto handlecarry;
39617
39618    case IX86_BUILTIN_ADDCARRYX32:
39619      icode = CODE_FOR_addcarrysi;
39620      mode0 = SImode;
39621      goto handlecarry;
39622
39623    case IX86_BUILTIN_ADDCARRYX64:
39624      icode = CODE_FOR_addcarrydi;
39625      mode0 = DImode;
39626
39627    handlecarry:
39628      arg0 = CALL_EXPR_ARG (exp, 0); /* unsigned char c_in.  */
39629      arg1 = CALL_EXPR_ARG (exp, 1); /* unsigned int src1.  */
39630      arg2 = CALL_EXPR_ARG (exp, 2); /* unsigned int src2.  */
39631      arg3 = CALL_EXPR_ARG (exp, 3); /* unsigned int *sum_out.  */
39632
39633      op1 = expand_normal (arg0);
39634      op1 = copy_to_mode_reg (QImode, convert_to_mode (QImode, op1, 1));
39635
39636      op2 = expand_normal (arg1);
39637      if (!register_operand (op2, mode0))
39638	op2 = copy_to_mode_reg (mode0, op2);
39639
39640      op3 = expand_normal (arg2);
39641      if (!register_operand (op3, mode0))
39642	op3 = copy_to_mode_reg (mode0, op3);
39643
39644      op4 = expand_normal (arg3);
39645      if (!address_operand (op4, VOIDmode))
39646	{
39647	  op4 = convert_memory_address (Pmode, op4);
39648	  op4 = copy_addr_to_reg (op4);
39649	}
39650
39651      /* Generate CF from input operand.  */
39652      emit_insn (gen_addqi3_cconly_overflow (op1, constm1_rtx));
39653
39654      /* Generate instruction that consumes CF.  */
39655      op0 = gen_reg_rtx (mode0);
39656
39657      op1 = gen_rtx_REG (CCCmode, FLAGS_REG);
39658      pat = gen_rtx_LTU (mode0, op1, const0_rtx);
39659      emit_insn (GEN_FCN (icode) (op0, op2, op3, op1, pat));
39660
39661      /* Return current CF value.  */
39662      if (target == 0)
39663        target = gen_reg_rtx (QImode);
39664
39665      PUT_MODE (pat, QImode);
39666      emit_insn (gen_rtx_SET (VOIDmode, target, pat));
39667
39668      /* Store the result.  */
39669      emit_move_insn (gen_rtx_MEM (mode0, op4), op0);
39670
39671      return target;
39672
39673    case IX86_BUILTIN_READ_FLAGS:
39674      emit_insn (gen_push (gen_rtx_REG (word_mode, FLAGS_REG)));
39675
39676      if (optimize
39677	  || target == NULL_RTX
39678	  || !nonimmediate_operand (target, word_mode)
39679	  || GET_MODE (target) != word_mode)
39680	target = gen_reg_rtx (word_mode);
39681
39682      emit_insn (gen_pop (target));
39683      return target;
39684
39685    case IX86_BUILTIN_WRITE_FLAGS:
39686
39687      arg0 = CALL_EXPR_ARG (exp, 0);
39688      op0 = expand_normal (arg0);
39689      if (!general_no_elim_operand (op0, word_mode))
39690	op0 = copy_to_mode_reg (word_mode, op0);
39691
39692      emit_insn (gen_push (op0));
39693      emit_insn (gen_pop (gen_rtx_REG (word_mode, FLAGS_REG)));
39694      return 0;
39695
39696    case IX86_BUILTIN_KORTESTC16:
39697      icode = CODE_FOR_kortestchi;
39698      mode0 = HImode;
39699      mode1 = CCCmode;
39700      goto kortest;
39701
39702    case IX86_BUILTIN_KORTESTZ16:
39703      icode = CODE_FOR_kortestzhi;
39704      mode0 = HImode;
39705      mode1 = CCZmode;
39706
39707    kortest:
39708      arg0 = CALL_EXPR_ARG (exp, 0); /* Mask reg src1.  */
39709      arg1 = CALL_EXPR_ARG (exp, 1); /* Mask reg src2.  */
39710      op0 = expand_normal (arg0);
39711      op1 = expand_normal (arg1);
39712
39713      op0 = copy_to_reg (op0);
39714      op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
39715      op1 = copy_to_reg (op1);
39716      op1 = simplify_gen_subreg (mode0, op1, GET_MODE (op1), 0);
39717
39718      target = gen_reg_rtx (QImode);
39719      emit_insn (gen_rtx_SET (mode0, target, const0_rtx));
39720
39721      /* Emit kortest.  */
39722      emit_insn (GEN_FCN (icode) (op0, op1));
39723      /* And use setcc to return result from flags.  */
39724      ix86_expand_setcc (target, EQ,
39725			 gen_rtx_REG (mode1, FLAGS_REG), const0_rtx);
39726      return target;
39727
39728    case IX86_BUILTIN_GATHERSIV2DF:
39729      icode = CODE_FOR_avx2_gathersiv2df;
39730      goto gather_gen;
39731    case IX86_BUILTIN_GATHERSIV4DF:
39732      icode = CODE_FOR_avx2_gathersiv4df;
39733      goto gather_gen;
39734    case IX86_BUILTIN_GATHERDIV2DF:
39735      icode = CODE_FOR_avx2_gatherdiv2df;
39736      goto gather_gen;
39737    case IX86_BUILTIN_GATHERDIV4DF:
39738      icode = CODE_FOR_avx2_gatherdiv4df;
39739      goto gather_gen;
39740    case IX86_BUILTIN_GATHERSIV4SF:
39741      icode = CODE_FOR_avx2_gathersiv4sf;
39742      goto gather_gen;
39743    case IX86_BUILTIN_GATHERSIV8SF:
39744      icode = CODE_FOR_avx2_gathersiv8sf;
39745      goto gather_gen;
39746    case IX86_BUILTIN_GATHERDIV4SF:
39747      icode = CODE_FOR_avx2_gatherdiv4sf;
39748      goto gather_gen;
39749    case IX86_BUILTIN_GATHERDIV8SF:
39750      icode = CODE_FOR_avx2_gatherdiv8sf;
39751      goto gather_gen;
39752    case IX86_BUILTIN_GATHERSIV2DI:
39753      icode = CODE_FOR_avx2_gathersiv2di;
39754      goto gather_gen;
39755    case IX86_BUILTIN_GATHERSIV4DI:
39756      icode = CODE_FOR_avx2_gathersiv4di;
39757      goto gather_gen;
39758    case IX86_BUILTIN_GATHERDIV2DI:
39759      icode = CODE_FOR_avx2_gatherdiv2di;
39760      goto gather_gen;
39761    case IX86_BUILTIN_GATHERDIV4DI:
39762      icode = CODE_FOR_avx2_gatherdiv4di;
39763      goto gather_gen;
39764    case IX86_BUILTIN_GATHERSIV4SI:
39765      icode = CODE_FOR_avx2_gathersiv4si;
39766      goto gather_gen;
39767    case IX86_BUILTIN_GATHERSIV8SI:
39768      icode = CODE_FOR_avx2_gathersiv8si;
39769      goto gather_gen;
39770    case IX86_BUILTIN_GATHERDIV4SI:
39771      icode = CODE_FOR_avx2_gatherdiv4si;
39772      goto gather_gen;
39773    case IX86_BUILTIN_GATHERDIV8SI:
39774      icode = CODE_FOR_avx2_gatherdiv8si;
39775      goto gather_gen;
39776    case IX86_BUILTIN_GATHERALTSIV4DF:
39777      icode = CODE_FOR_avx2_gathersiv4df;
39778      goto gather_gen;
39779    case IX86_BUILTIN_GATHERALTDIV8SF:
39780      icode = CODE_FOR_avx2_gatherdiv8sf;
39781      goto gather_gen;
39782    case IX86_BUILTIN_GATHERALTSIV4DI:
39783      icode = CODE_FOR_avx2_gathersiv4di;
39784      goto gather_gen;
39785    case IX86_BUILTIN_GATHERALTDIV8SI:
39786      icode = CODE_FOR_avx2_gatherdiv8si;
39787      goto gather_gen;
39788    case IX86_BUILTIN_GATHER3SIV16SF:
39789      icode = CODE_FOR_avx512f_gathersiv16sf;
39790      goto gather_gen;
39791    case IX86_BUILTIN_GATHER3SIV8DF:
39792      icode = CODE_FOR_avx512f_gathersiv8df;
39793      goto gather_gen;
39794    case IX86_BUILTIN_GATHER3DIV16SF:
39795      icode = CODE_FOR_avx512f_gatherdiv16sf;
39796      goto gather_gen;
39797    case IX86_BUILTIN_GATHER3DIV8DF:
39798      icode = CODE_FOR_avx512f_gatherdiv8df;
39799      goto gather_gen;
39800    case IX86_BUILTIN_GATHER3SIV16SI:
39801      icode = CODE_FOR_avx512f_gathersiv16si;
39802      goto gather_gen;
39803    case IX86_BUILTIN_GATHER3SIV8DI:
39804      icode = CODE_FOR_avx512f_gathersiv8di;
39805      goto gather_gen;
39806    case IX86_BUILTIN_GATHER3DIV16SI:
39807      icode = CODE_FOR_avx512f_gatherdiv16si;
39808      goto gather_gen;
39809    case IX86_BUILTIN_GATHER3DIV8DI:
39810      icode = CODE_FOR_avx512f_gatherdiv8di;
39811      goto gather_gen;
39812    case IX86_BUILTIN_GATHER3ALTSIV8DF:
39813      icode = CODE_FOR_avx512f_gathersiv8df;
39814      goto gather_gen;
39815    case IX86_BUILTIN_GATHER3ALTDIV16SF:
39816      icode = CODE_FOR_avx512f_gatherdiv16sf;
39817      goto gather_gen;
39818    case IX86_BUILTIN_GATHER3ALTSIV8DI:
39819      icode = CODE_FOR_avx512f_gathersiv8di;
39820      goto gather_gen;
39821    case IX86_BUILTIN_GATHER3ALTDIV16SI:
39822      icode = CODE_FOR_avx512f_gatherdiv16si;
39823      goto gather_gen;
39824    case IX86_BUILTIN_GATHER3SIV2DF:
39825      icode = CODE_FOR_avx512vl_gathersiv2df;
39826      goto gather_gen;
39827    case IX86_BUILTIN_GATHER3SIV4DF:
39828      icode = CODE_FOR_avx512vl_gathersiv4df;
39829      goto gather_gen;
39830    case IX86_BUILTIN_GATHER3DIV2DF:
39831      icode = CODE_FOR_avx512vl_gatherdiv2df;
39832      goto gather_gen;
39833    case IX86_BUILTIN_GATHER3DIV4DF:
39834      icode = CODE_FOR_avx512vl_gatherdiv4df;
39835      goto gather_gen;
39836    case IX86_BUILTIN_GATHER3SIV4SF:
39837      icode = CODE_FOR_avx512vl_gathersiv4sf;
39838      goto gather_gen;
39839    case IX86_BUILTIN_GATHER3SIV8SF:
39840      icode = CODE_FOR_avx512vl_gathersiv8sf;
39841      goto gather_gen;
39842    case IX86_BUILTIN_GATHER3DIV4SF:
39843      icode = CODE_FOR_avx512vl_gatherdiv4sf;
39844      goto gather_gen;
39845    case IX86_BUILTIN_GATHER3DIV8SF:
39846      icode = CODE_FOR_avx512vl_gatherdiv8sf;
39847      goto gather_gen;
39848    case IX86_BUILTIN_GATHER3SIV2DI:
39849      icode = CODE_FOR_avx512vl_gathersiv2di;
39850      goto gather_gen;
39851    case IX86_BUILTIN_GATHER3SIV4DI:
39852      icode = CODE_FOR_avx512vl_gathersiv4di;
39853      goto gather_gen;
39854    case IX86_BUILTIN_GATHER3DIV2DI:
39855      icode = CODE_FOR_avx512vl_gatherdiv2di;
39856      goto gather_gen;
39857    case IX86_BUILTIN_GATHER3DIV4DI:
39858      icode = CODE_FOR_avx512vl_gatherdiv4di;
39859      goto gather_gen;
39860    case IX86_BUILTIN_GATHER3SIV4SI:
39861      icode = CODE_FOR_avx512vl_gathersiv4si;
39862      goto gather_gen;
39863    case IX86_BUILTIN_GATHER3SIV8SI:
39864      icode = CODE_FOR_avx512vl_gathersiv8si;
39865      goto gather_gen;
39866    case IX86_BUILTIN_GATHER3DIV4SI:
39867      icode = CODE_FOR_avx512vl_gatherdiv4si;
39868      goto gather_gen;
39869    case IX86_BUILTIN_GATHER3DIV8SI:
39870      icode = CODE_FOR_avx512vl_gatherdiv8si;
39871      goto gather_gen;
39872    case IX86_BUILTIN_GATHER3ALTSIV4DF:
39873      icode = CODE_FOR_avx512vl_gathersiv4df;
39874      goto gather_gen;
39875    case IX86_BUILTIN_GATHER3ALTDIV8SF:
39876      icode = CODE_FOR_avx512vl_gatherdiv8sf;
39877      goto gather_gen;
39878    case IX86_BUILTIN_GATHER3ALTSIV4DI:
39879      icode = CODE_FOR_avx512vl_gathersiv4di;
39880      goto gather_gen;
39881    case IX86_BUILTIN_GATHER3ALTDIV8SI:
39882      icode = CODE_FOR_avx512vl_gatherdiv8si;
39883      goto gather_gen;
39884    case IX86_BUILTIN_SCATTERSIV16SF:
39885      icode = CODE_FOR_avx512f_scattersiv16sf;
39886      goto scatter_gen;
39887    case IX86_BUILTIN_SCATTERSIV8DF:
39888      icode = CODE_FOR_avx512f_scattersiv8df;
39889      goto scatter_gen;
39890    case IX86_BUILTIN_SCATTERDIV16SF:
39891      icode = CODE_FOR_avx512f_scatterdiv16sf;
39892      goto scatter_gen;
39893    case IX86_BUILTIN_SCATTERDIV8DF:
39894      icode = CODE_FOR_avx512f_scatterdiv8df;
39895      goto scatter_gen;
39896    case IX86_BUILTIN_SCATTERSIV16SI:
39897      icode = CODE_FOR_avx512f_scattersiv16si;
39898      goto scatter_gen;
39899    case IX86_BUILTIN_SCATTERSIV8DI:
39900      icode = CODE_FOR_avx512f_scattersiv8di;
39901      goto scatter_gen;
39902    case IX86_BUILTIN_SCATTERDIV16SI:
39903      icode = CODE_FOR_avx512f_scatterdiv16si;
39904      goto scatter_gen;
39905    case IX86_BUILTIN_SCATTERDIV8DI:
39906      icode = CODE_FOR_avx512f_scatterdiv8di;
39907      goto scatter_gen;
39908    case IX86_BUILTIN_SCATTERSIV8SF:
39909      icode = CODE_FOR_avx512vl_scattersiv8sf;
39910      goto scatter_gen;
39911    case IX86_BUILTIN_SCATTERSIV4SF:
39912      icode = CODE_FOR_avx512vl_scattersiv4sf;
39913      goto scatter_gen;
39914    case IX86_BUILTIN_SCATTERSIV4DF:
39915      icode = CODE_FOR_avx512vl_scattersiv4df;
39916      goto scatter_gen;
39917    case IX86_BUILTIN_SCATTERSIV2DF:
39918      icode = CODE_FOR_avx512vl_scattersiv2df;
39919      goto scatter_gen;
39920    case IX86_BUILTIN_SCATTERDIV8SF:
39921      icode = CODE_FOR_avx512vl_scatterdiv8sf;
39922      goto scatter_gen;
39923    case IX86_BUILTIN_SCATTERDIV4SF:
39924      icode = CODE_FOR_avx512vl_scatterdiv4sf;
39925      goto scatter_gen;
39926    case IX86_BUILTIN_SCATTERDIV4DF:
39927      icode = CODE_FOR_avx512vl_scatterdiv4df;
39928      goto scatter_gen;
39929    case IX86_BUILTIN_SCATTERDIV2DF:
39930      icode = CODE_FOR_avx512vl_scatterdiv2df;
39931      goto scatter_gen;
39932    case IX86_BUILTIN_SCATTERSIV8SI:
39933      icode = CODE_FOR_avx512vl_scattersiv8si;
39934      goto scatter_gen;
39935    case IX86_BUILTIN_SCATTERSIV4SI:
39936      icode = CODE_FOR_avx512vl_scattersiv4si;
39937      goto scatter_gen;
39938    case IX86_BUILTIN_SCATTERSIV4DI:
39939      icode = CODE_FOR_avx512vl_scattersiv4di;
39940      goto scatter_gen;
39941    case IX86_BUILTIN_SCATTERSIV2DI:
39942      icode = CODE_FOR_avx512vl_scattersiv2di;
39943      goto scatter_gen;
39944    case IX86_BUILTIN_SCATTERDIV8SI:
39945      icode = CODE_FOR_avx512vl_scatterdiv8si;
39946      goto scatter_gen;
39947    case IX86_BUILTIN_SCATTERDIV4SI:
39948      icode = CODE_FOR_avx512vl_scatterdiv4si;
39949      goto scatter_gen;
39950    case IX86_BUILTIN_SCATTERDIV4DI:
39951      icode = CODE_FOR_avx512vl_scatterdiv4di;
39952      goto scatter_gen;
39953    case IX86_BUILTIN_SCATTERDIV2DI:
39954      icode = CODE_FOR_avx512vl_scatterdiv2di;
39955      goto scatter_gen;
39956    case IX86_BUILTIN_GATHERPFDPD:
39957      icode = CODE_FOR_avx512pf_gatherpfv8sidf;
39958      goto vec_prefetch_gen;
39959    case IX86_BUILTIN_GATHERPFDPS:
39960      icode = CODE_FOR_avx512pf_gatherpfv16sisf;
39961      goto vec_prefetch_gen;
39962    case IX86_BUILTIN_GATHERPFQPD:
39963      icode = CODE_FOR_avx512pf_gatherpfv8didf;
39964      goto vec_prefetch_gen;
39965    case IX86_BUILTIN_GATHERPFQPS:
39966      icode = CODE_FOR_avx512pf_gatherpfv8disf;
39967      goto vec_prefetch_gen;
39968    case IX86_BUILTIN_SCATTERPFDPD:
39969      icode = CODE_FOR_avx512pf_scatterpfv8sidf;
39970      goto vec_prefetch_gen;
39971    case IX86_BUILTIN_SCATTERPFDPS:
39972      icode = CODE_FOR_avx512pf_scatterpfv16sisf;
39973      goto vec_prefetch_gen;
39974    case IX86_BUILTIN_SCATTERPFQPD:
39975      icode = CODE_FOR_avx512pf_scatterpfv8didf;
39976      goto vec_prefetch_gen;
39977    case IX86_BUILTIN_SCATTERPFQPS:
39978      icode = CODE_FOR_avx512pf_scatterpfv8disf;
39979      goto vec_prefetch_gen;
39980
39981    gather_gen:
39982      rtx half;
39983      rtx (*gen) (rtx, rtx);
39984
39985      arg0 = CALL_EXPR_ARG (exp, 0);
39986      arg1 = CALL_EXPR_ARG (exp, 1);
39987      arg2 = CALL_EXPR_ARG (exp, 2);
39988      arg3 = CALL_EXPR_ARG (exp, 3);
39989      arg4 = CALL_EXPR_ARG (exp, 4);
39990      op0 = expand_normal (arg0);
39991      op1 = expand_normal (arg1);
39992      op2 = expand_normal (arg2);
39993      op3 = expand_normal (arg3);
39994      op4 = expand_normal (arg4);
39995      /* Note the arg order is different from the operand order.  */
39996      mode0 = insn_data[icode].operand[1].mode;
39997      mode2 = insn_data[icode].operand[3].mode;
39998      mode3 = insn_data[icode].operand[4].mode;
39999      mode4 = insn_data[icode].operand[5].mode;
40000
40001      if (target == NULL_RTX
40002	  || GET_MODE (target) != insn_data[icode].operand[0].mode
40003	  || !insn_data[icode].operand[0].predicate (target,
40004						     GET_MODE (target)))
40005	subtarget = gen_reg_rtx (insn_data[icode].operand[0].mode);
40006      else
40007	subtarget = target;
40008
40009      switch (fcode)
40010	{
40011	case IX86_BUILTIN_GATHER3ALTSIV8DF:
40012	case IX86_BUILTIN_GATHER3ALTSIV8DI:
40013	  half = gen_reg_rtx (V8SImode);
40014	  if (!nonimmediate_operand (op2, V16SImode))
40015	    op2 = copy_to_mode_reg (V16SImode, op2);
40016	  emit_insn (gen_vec_extract_lo_v16si (half, op2));
40017	  op2 = half;
40018	  break;
40019	case IX86_BUILTIN_GATHER3ALTSIV4DF:
40020	case IX86_BUILTIN_GATHER3ALTSIV4DI:
40021	case IX86_BUILTIN_GATHERALTSIV4DF:
40022	case IX86_BUILTIN_GATHERALTSIV4DI:
40023	  half = gen_reg_rtx (V4SImode);
40024	  if (!nonimmediate_operand (op2, V8SImode))
40025	    op2 = copy_to_mode_reg (V8SImode, op2);
40026	  emit_insn (gen_vec_extract_lo_v8si (half, op2));
40027	  op2 = half;
40028	  break;
40029	case IX86_BUILTIN_GATHER3ALTDIV16SF:
40030	case IX86_BUILTIN_GATHER3ALTDIV16SI:
40031	  half = gen_reg_rtx (mode0);
40032	  if (mode0 == V8SFmode)
40033	    gen = gen_vec_extract_lo_v16sf;
40034	  else
40035	    gen = gen_vec_extract_lo_v16si;
40036	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
40037	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40038	  emit_insn (gen (half, op0));
40039	  op0 = half;
40040	  if (GET_MODE (op3) != VOIDmode)
40041	    {
40042	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
40043		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40044	      emit_insn (gen (half, op3));
40045	      op3 = half;
40046	    }
40047	  break;
40048	case IX86_BUILTIN_GATHER3ALTDIV8SF:
40049	case IX86_BUILTIN_GATHER3ALTDIV8SI:
40050	case IX86_BUILTIN_GATHERALTDIV8SF:
40051	case IX86_BUILTIN_GATHERALTDIV8SI:
40052	  half = gen_reg_rtx (mode0);
40053	  if (mode0 == V4SFmode)
40054	    gen = gen_vec_extract_lo_v8sf;
40055	  else
40056	    gen = gen_vec_extract_lo_v8si;
40057	  if (!nonimmediate_operand (op0, GET_MODE (op0)))
40058	    op0 = copy_to_mode_reg (GET_MODE (op0), op0);
40059	  emit_insn (gen (half, op0));
40060	  op0 = half;
40061	  if (GET_MODE (op3) != VOIDmode)
40062	    {
40063	      if (!nonimmediate_operand (op3, GET_MODE (op3)))
40064		op3 = copy_to_mode_reg (GET_MODE (op3), op3);
40065	      emit_insn (gen (half, op3));
40066	      op3 = half;
40067	    }
40068	  break;
40069	default:
40070	  break;
40071	}
40072
40073      /* Force memory operand only with base register here.  But we
40074	 don't want to do it on memory operand for other builtin
40075	 functions.  */
40076      op1 = ix86_zero_extend_to_Pmode (op1);
40077
40078      if (!insn_data[icode].operand[1].predicate (op0, mode0))
40079	op0 = copy_to_mode_reg (mode0, op0);
40080      if (!insn_data[icode].operand[2].predicate (op1, Pmode))
40081	op1 = copy_to_mode_reg (Pmode, op1);
40082      if (!insn_data[icode].operand[3].predicate (op2, mode2))
40083	op2 = copy_to_mode_reg (mode2, op2);
40084
40085      op3 = fixup_modeless_constant (op3, mode3);
40086
40087      if (GET_MODE (op3) == mode3 || GET_MODE (op3) == VOIDmode)
40088	{
40089	  if (!insn_data[icode].operand[4].predicate (op3, mode3))
40090	    op3 = copy_to_mode_reg (mode3, op3);
40091	}
40092      else
40093	{
40094	  op3 = copy_to_reg (op3);
40095	  op3 = simplify_gen_subreg (mode3, op3, GET_MODE (op3), 0);
40096	}
40097      if (!insn_data[icode].operand[5].predicate (op4, mode4))
40098	{
40099          error ("the last argument must be scale 1, 2, 4, 8");
40100          return const0_rtx;
40101	}
40102
40103      /* Optimize.  If mask is known to have all high bits set,
40104	 replace op0 with pc_rtx to signal that the instruction
40105	 overwrites the whole destination and doesn't use its
40106	 previous contents.  */
40107      if (optimize)
40108	{
40109	  if (TREE_CODE (arg3) == INTEGER_CST)
40110	    {
40111	      if (integer_all_onesp (arg3))
40112		op0 = pc_rtx;
40113	    }
40114	  else if (TREE_CODE (arg3) == VECTOR_CST)
40115	    {
40116	      unsigned int negative = 0;
40117	      for (i = 0; i < VECTOR_CST_NELTS (arg3); ++i)
40118		{
40119		  tree cst = VECTOR_CST_ELT (arg3, i);
40120		  if (TREE_CODE (cst) == INTEGER_CST
40121		      && tree_int_cst_sign_bit (cst))
40122		    negative++;
40123		  else if (TREE_CODE (cst) == REAL_CST
40124			   && REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
40125		    negative++;
40126		}
40127	      if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
40128		op0 = pc_rtx;
40129	    }
40130	  else if (TREE_CODE (arg3) == SSA_NAME
40131		   && TREE_CODE (TREE_TYPE (arg3)) == VECTOR_TYPE)
40132	    {
40133	      /* Recognize also when mask is like:
40134		 __v2df src = _mm_setzero_pd ();
40135		 __v2df mask = _mm_cmpeq_pd (src, src);
40136		 or
40137		 __v8sf src = _mm256_setzero_ps ();
40138		 __v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
40139		 as that is a cheaper way to load all ones into
40140		 a register than having to load a constant from
40141		 memory.  */
40142	      gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
40143	      if (is_gimple_call (def_stmt))
40144		{
40145		  tree fndecl = gimple_call_fndecl (def_stmt);
40146		  if (fndecl
40147		      && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
40148		    switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
40149		      {
40150		      case IX86_BUILTIN_CMPPD:
40151		      case IX86_BUILTIN_CMPPS:
40152		      case IX86_BUILTIN_CMPPD256:
40153		      case IX86_BUILTIN_CMPPS256:
40154			if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
40155			  break;
40156			/* FALLTHRU */
40157		      case IX86_BUILTIN_CMPEQPD:
40158		      case IX86_BUILTIN_CMPEQPS:
40159			if (initializer_zerop (gimple_call_arg (def_stmt, 0))
40160			    && initializer_zerop (gimple_call_arg (def_stmt,
40161								   1)))
40162			  op0 = pc_rtx;
40163			break;
40164		      default:
40165			break;
40166		      }
40167		}
40168	    }
40169	}
40170
40171      pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
40172      if (! pat)
40173	return const0_rtx;
40174      emit_insn (pat);
40175
40176      switch (fcode)
40177	{
40178	case IX86_BUILTIN_GATHER3DIV16SF:
40179	  if (target == NULL_RTX)
40180	    target = gen_reg_rtx (V8SFmode);
40181	  emit_insn (gen_vec_extract_lo_v16sf (target, subtarget));
40182	  break;
40183	case IX86_BUILTIN_GATHER3DIV16SI:
40184	  if (target == NULL_RTX)
40185	    target = gen_reg_rtx (V8SImode);
40186	  emit_insn (gen_vec_extract_lo_v16si (target, subtarget));
40187	  break;
40188	case IX86_BUILTIN_GATHER3DIV8SF:
40189	case IX86_BUILTIN_GATHERDIV8SF:
40190	  if (target == NULL_RTX)
40191	    target = gen_reg_rtx (V4SFmode);
40192	  emit_insn (gen_vec_extract_lo_v8sf (target, subtarget));
40193	  break;
40194	case IX86_BUILTIN_GATHER3DIV8SI:
40195	case IX86_BUILTIN_GATHERDIV8SI:
40196	  if (target == NULL_RTX)
40197	    target = gen_reg_rtx (V4SImode);
40198	  emit_insn (gen_vec_extract_lo_v8si (target, subtarget));
40199	  break;
40200	default:
40201	  target = subtarget;
40202	  break;
40203	}
40204      return target;
40205
40206    scatter_gen:
40207      arg0 = CALL_EXPR_ARG (exp, 0);
40208      arg1 = CALL_EXPR_ARG (exp, 1);
40209      arg2 = CALL_EXPR_ARG (exp, 2);
40210      arg3 = CALL_EXPR_ARG (exp, 3);
40211      arg4 = CALL_EXPR_ARG (exp, 4);
40212      op0 = expand_normal (arg0);
40213      op1 = expand_normal (arg1);
40214      op2 = expand_normal (arg2);
40215      op3 = expand_normal (arg3);
40216      op4 = expand_normal (arg4);
40217      mode1 = insn_data[icode].operand[1].mode;
40218      mode2 = insn_data[icode].operand[2].mode;
40219      mode3 = insn_data[icode].operand[3].mode;
40220      mode4 = insn_data[icode].operand[4].mode;
40221
40222      /* Force memory operand only with base register here.  But we
40223	 don't want to do it on memory operand for other builtin
40224	 functions.  */
40225      op0 = force_reg (Pmode, convert_to_mode (Pmode, op0, 1));
40226
40227      if (!insn_data[icode].operand[0].predicate (op0, Pmode))
40228	op0 = copy_to_mode_reg (Pmode, op0);
40229
40230      op1 = fixup_modeless_constant (op1, mode1);
40231
40232      if (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode)
40233	{
40234	  if (!insn_data[icode].operand[1].predicate (op1, mode1))
40235	    op1 = copy_to_mode_reg (mode1, op1);
40236	}
40237      else
40238	{
40239	  op1 = copy_to_reg (op1);
40240	  op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
40241	}
40242
40243      if (!insn_data[icode].operand[2].predicate (op2, mode2))
40244	op2 = copy_to_mode_reg (mode2, op2);
40245
40246      if (!insn_data[icode].operand[3].predicate (op3, mode3))
40247	op3 = copy_to_mode_reg (mode3, op3);
40248
40249      if (!insn_data[icode].operand[4].predicate (op4, mode4))
40250	{
40251	  error ("the last argument must be scale 1, 2, 4, 8");
40252	  return const0_rtx;
40253	}
40254
40255      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40256      if (! pat)
40257	return const0_rtx;
40258
40259      emit_insn (pat);
40260      return 0;
40261
40262    vec_prefetch_gen:
40263      arg0 = CALL_EXPR_ARG (exp, 0);
40264      arg1 = CALL_EXPR_ARG (exp, 1);
40265      arg2 = CALL_EXPR_ARG (exp, 2);
40266      arg3 = CALL_EXPR_ARG (exp, 3);
40267      arg4 = CALL_EXPR_ARG (exp, 4);
40268      op0 = expand_normal (arg0);
40269      op1 = expand_normal (arg1);
40270      op2 = expand_normal (arg2);
40271      op3 = expand_normal (arg3);
40272      op4 = expand_normal (arg4);
40273      mode0 = insn_data[icode].operand[0].mode;
40274      mode1 = insn_data[icode].operand[1].mode;
40275      mode3 = insn_data[icode].operand[3].mode;
40276      mode4 = insn_data[icode].operand[4].mode;
40277
40278      op0 = fixup_modeless_constant (op0, mode0);
40279
40280      if (GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
40281	{
40282	  if (!insn_data[icode].operand[0].predicate (op0, mode0))
40283	    op0 = copy_to_mode_reg (mode0, op0);
40284	}
40285      else
40286	{
40287	  op0 = copy_to_reg (op0);
40288	  op0 = simplify_gen_subreg (mode0, op0, GET_MODE (op0), 0);
40289	}
40290
40291      if (!insn_data[icode].operand[1].predicate (op1, mode1))
40292	op1 = copy_to_mode_reg (mode1, op1);
40293
40294      /* Force memory operand only with base register here.  But we
40295	 don't want to do it on memory operand for other builtin
40296	 functions.  */
40297      op2 = force_reg (Pmode, convert_to_mode (Pmode, op2, 1));
40298
40299      if (!insn_data[icode].operand[2].predicate (op2, Pmode))
40300	op2 = copy_to_mode_reg (Pmode, op2);
40301
40302      if (!insn_data[icode].operand[3].predicate (op3, mode3))
40303	{
40304	  error ("the forth argument must be scale 1, 2, 4, 8");
40305	  return const0_rtx;
40306	}
40307
40308      if (!insn_data[icode].operand[4].predicate (op4, mode4))
40309	{
40310	  error ("incorrect hint operand");
40311	  return const0_rtx;
40312	}
40313
40314      pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
40315      if (! pat)
40316	return const0_rtx;
40317
40318      emit_insn (pat);
40319
40320      return 0;
40321
40322    case IX86_BUILTIN_XABORT:
40323      icode = CODE_FOR_xabort;
40324      arg0 = CALL_EXPR_ARG (exp, 0);
40325      op0 = expand_normal (arg0);
40326      mode0 = insn_data[icode].operand[0].mode;
40327      if (!insn_data[icode].operand[0].predicate (op0, mode0))
40328	{
40329	  error ("the xabort's argument must be an 8-bit immediate");
40330	  return const0_rtx;
40331	}
40332      emit_insn (gen_xabort (op0));
40333      return 0;
40334
40335    default:
40336      break;
40337    }
40338
40339  for (i = 0, d = bdesc_special_args;
40340       i < ARRAY_SIZE (bdesc_special_args);
40341       i++, d++)
40342    if (d->code == fcode)
40343      return ix86_expand_special_args_builtin (d, exp, target);
40344
40345  for (i = 0, d = bdesc_args;
40346       i < ARRAY_SIZE (bdesc_args);
40347       i++, d++)
40348    if (d->code == fcode)
40349      switch (fcode)
40350	{
40351	case IX86_BUILTIN_FABSQ:
40352	case IX86_BUILTIN_COPYSIGNQ:
40353	  if (!TARGET_SSE)
40354	    /* Emit a normal call if SSE isn't available.  */
40355	    return expand_call (exp, target, ignore);
40356	default:
40357	  return ix86_expand_args_builtin (d, exp, target);
40358	}
40359
40360  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
40361    if (d->code == fcode)
40362      return ix86_expand_sse_comi (d, exp, target);
40363
40364  for (i = 0, d = bdesc_round_args; i < ARRAY_SIZE (bdesc_round_args); i++, d++)
40365    if (d->code == fcode)
40366      return ix86_expand_round_builtin (d, exp, target);
40367
40368  for (i = 0, d = bdesc_pcmpestr;
40369       i < ARRAY_SIZE (bdesc_pcmpestr);
40370       i++, d++)
40371    if (d->code == fcode)
40372      return ix86_expand_sse_pcmpestr (d, exp, target);
40373
40374  for (i = 0, d = bdesc_pcmpistr;
40375       i < ARRAY_SIZE (bdesc_pcmpistr);
40376       i++, d++)
40377    if (d->code == fcode)
40378      return ix86_expand_sse_pcmpistr (d, exp, target);
40379
40380  for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
40381    if (d->code == fcode)
40382      return ix86_expand_multi_arg_builtin (d->icode, exp, target,
40383					    (enum ix86_builtin_func_type)
40384					    d->flag, d->comparison);
40385
40386  gcc_unreachable ();
40387}
40388
40389/* This returns the target-specific builtin with code CODE if
40390   current_function_decl has visibility on this builtin, which is checked
40391   using isa flags.  Returns NULL_TREE otherwise.  */
40392
40393static tree ix86_get_builtin (enum ix86_builtins code)
40394{
40395  struct cl_target_option *opts;
40396  tree target_tree = NULL_TREE;
40397
40398  /* Determine the isa flags of current_function_decl.  */
40399
40400  if (current_function_decl)
40401    target_tree = DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl);
40402
40403  if (target_tree == NULL)
40404    target_tree = target_option_default_node;
40405
40406  opts = TREE_TARGET_OPTION (target_tree);
40407
40408  if (ix86_builtins_isa[(int) code].isa & opts->x_ix86_isa_flags)
40409    return ix86_builtin_decl (code, true);
40410  else
40411    return NULL_TREE;
40412}
40413
40414/* Return function decl for target specific builtin
40415   for given MPX builtin passed i FCODE.  */
40416static tree
40417ix86_builtin_mpx_function (unsigned fcode)
40418{
40419  switch (fcode)
40420    {
40421    case BUILT_IN_CHKP_BNDMK:
40422      return ix86_builtins[IX86_BUILTIN_BNDMK];
40423
40424    case BUILT_IN_CHKP_BNDSTX:
40425      return ix86_builtins[IX86_BUILTIN_BNDSTX];
40426
40427    case BUILT_IN_CHKP_BNDLDX:
40428      return ix86_builtins[IX86_BUILTIN_BNDLDX];
40429
40430    case BUILT_IN_CHKP_BNDCL:
40431      return ix86_builtins[IX86_BUILTIN_BNDCL];
40432
40433    case BUILT_IN_CHKP_BNDCU:
40434      return ix86_builtins[IX86_BUILTIN_BNDCU];
40435
40436    case BUILT_IN_CHKP_BNDRET:
40437      return ix86_builtins[IX86_BUILTIN_BNDRET];
40438
40439    case BUILT_IN_CHKP_INTERSECT:
40440      return ix86_builtins[IX86_BUILTIN_BNDINT];
40441
40442    case BUILT_IN_CHKP_NARROW:
40443      return ix86_builtins[IX86_BUILTIN_BNDNARROW];
40444
40445    case BUILT_IN_CHKP_SIZEOF:
40446      return ix86_builtins[IX86_BUILTIN_SIZEOF];
40447
40448    case BUILT_IN_CHKP_EXTRACT_LOWER:
40449      return ix86_builtins[IX86_BUILTIN_BNDLOWER];
40450
40451    case BUILT_IN_CHKP_EXTRACT_UPPER:
40452      return ix86_builtins[IX86_BUILTIN_BNDUPPER];
40453
40454    default:
40455      return NULL_TREE;
40456    }
40457
40458  gcc_unreachable ();
40459}
40460
40461/* Helper function for ix86_load_bounds and ix86_store_bounds.
40462
40463   Return an address to be used to load/store bounds for pointer
40464   passed in SLOT.
40465
40466   SLOT_NO is an integer constant holding number of a target
40467   dependent special slot to be used in case SLOT is not a memory.
40468
40469   SPECIAL_BASE is a pointer to be used as a base of fake address
40470   to access special slots in Bounds Table.  SPECIAL_BASE[-1],
40471   SPECIAL_BASE[-2] etc. will be used as fake pointer locations.  */
40472
40473static rtx
40474ix86_get_arg_address_for_bt (rtx slot, rtx slot_no, rtx special_base)
40475{
40476  rtx addr = NULL;
40477
40478  /* NULL slot means we pass bounds for pointer not passed to the
40479     function at all.  Register slot means we pass pointer in a
40480     register.  In both these cases bounds are passed via Bounds
40481     Table.  Since we do not have actual pointer stored in memory,
40482     we have to use fake addresses to access Bounds Table.  We
40483     start with (special_base - sizeof (void*)) and decrease this
40484     address by pointer size to get addresses for other slots.  */
40485  if (!slot || REG_P (slot))
40486    {
40487      gcc_assert (CONST_INT_P (slot_no));
40488      addr = plus_constant (Pmode, special_base,
40489			    -(INTVAL (slot_no) + 1) * GET_MODE_SIZE (Pmode));
40490    }
40491  /* If pointer is passed in a memory then its address is used to
40492     access Bounds Table.  */
40493  else if (MEM_P (slot))
40494    {
40495      addr = XEXP (slot, 0);
40496      if (!register_operand (addr, Pmode))
40497	addr = copy_addr_to_reg (addr);
40498    }
40499  else
40500    gcc_unreachable ();
40501
40502  return addr;
40503}
40504
40505/* Expand pass uses this hook to load bounds for function parameter
40506   PTR passed in SLOT in case its bounds are not passed in a register.
40507
40508   If SLOT is a memory, then bounds are loaded as for regular pointer
40509   loaded from memory.  PTR may be NULL in case SLOT is a memory.
40510   In such case value of PTR (if required) may be loaded from SLOT.
40511
40512   If SLOT is NULL or a register then SLOT_NO is an integer constant
40513   holding number of the target dependent special slot which should be
40514   used to obtain bounds.
40515
40516   Return loaded bounds.  */
40517
40518static rtx
40519ix86_load_bounds (rtx slot, rtx ptr, rtx slot_no)
40520{
40521  rtx reg = gen_reg_rtx (BNDmode);
40522  rtx addr;
40523
40524  /* Get address to be used to access Bounds Table.  Special slots start
40525     at the location of return address of the current function.  */
40526  addr = ix86_get_arg_address_for_bt (slot, slot_no, arg_pointer_rtx);
40527
40528  /* Load pointer value from a memory if we don't have it.  */
40529  if (!ptr)
40530    {
40531      gcc_assert (MEM_P (slot));
40532      ptr = copy_addr_to_reg (slot);
40533    }
40534
40535  emit_insn (BNDmode == BND64mode
40536	     ? gen_bnd64_ldx (reg, addr, ptr)
40537	     : gen_bnd32_ldx (reg, addr, ptr));
40538
40539  return reg;
40540}
40541
40542/* Expand pass uses this hook to store BOUNDS for call argument PTR
40543   passed in SLOT in case BOUNDS are not passed in a register.
40544
40545   If SLOT is a memory, then BOUNDS are stored as for regular pointer
40546   stored in memory.  PTR may be NULL in case SLOT is a memory.
40547   In such case value of PTR (if required) may be loaded from SLOT.
40548
40549   If SLOT is NULL or a register then SLOT_NO is an integer constant
40550   holding number of the target dependent special slot which should be
40551   used to store BOUNDS.  */
40552
40553static void
40554ix86_store_bounds (rtx ptr, rtx slot, rtx bounds, rtx slot_no)
40555{
40556  rtx addr;
40557
40558  /* Get address to be used to access Bounds Table.  Special slots start
40559     at the location of return address of a called function.  */
40560  addr = ix86_get_arg_address_for_bt (slot, slot_no, stack_pointer_rtx);
40561
40562  /* Load pointer value from a memory if we don't have it.  */
40563  if (!ptr)
40564    {
40565      gcc_assert (MEM_P (slot));
40566      ptr = copy_addr_to_reg (slot);
40567    }
40568
40569  gcc_assert (POINTER_BOUNDS_MODE_P (GET_MODE (bounds)));
40570  if (!register_operand (bounds, BNDmode))
40571    bounds = copy_to_mode_reg (BNDmode, bounds);
40572
40573  emit_insn (BNDmode == BND64mode
40574	     ? gen_bnd64_stx (addr, ptr, bounds)
40575	     : gen_bnd32_stx (addr, ptr, bounds));
40576}
40577
40578/* Load and return bounds returned by function in SLOT.  */
40579
40580static rtx
40581ix86_load_returned_bounds (rtx slot)
40582{
40583  rtx res;
40584
40585  gcc_assert (REG_P (slot));
40586  res = gen_reg_rtx (BNDmode);
40587  emit_move_insn (res, slot);
40588
40589  return res;
40590}
40591
40592/* Store BOUNDS returned by function into SLOT.  */
40593
40594static void
40595ix86_store_returned_bounds (rtx slot, rtx bounds)
40596{
40597  gcc_assert (REG_P (slot));
40598  emit_move_insn (slot, bounds);
40599}
40600
40601/* Returns a function decl for a vectorized version of the builtin function
40602   with builtin function code FN and the result vector type TYPE, or NULL_TREE
40603   if it is not available.  */
40604
40605static tree
40606ix86_builtin_vectorized_function (tree fndecl, tree type_out,
40607				  tree type_in)
40608{
40609  machine_mode in_mode, out_mode;
40610  int in_n, out_n;
40611  enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
40612
40613  if (TREE_CODE (type_out) != VECTOR_TYPE
40614      || TREE_CODE (type_in) != VECTOR_TYPE
40615      || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
40616    return NULL_TREE;
40617
40618  out_mode = TYPE_MODE (TREE_TYPE (type_out));
40619  out_n = TYPE_VECTOR_SUBPARTS (type_out);
40620  in_mode = TYPE_MODE (TREE_TYPE (type_in));
40621  in_n = TYPE_VECTOR_SUBPARTS (type_in);
40622
40623  switch (fn)
40624    {
40625    case BUILT_IN_SQRT:
40626      if (out_mode == DFmode && in_mode == DFmode)
40627	{
40628	  if (out_n == 2 && in_n == 2)
40629	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD);
40630	  else if (out_n == 4 && in_n == 4)
40631	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD256);
40632	  else if (out_n == 8 && in_n == 8)
40633	    return ix86_get_builtin (IX86_BUILTIN_SQRTPD512);
40634	}
40635      break;
40636
40637    case BUILT_IN_EXP2F:
40638      if (out_mode == SFmode && in_mode == SFmode)
40639	{
40640	  if (out_n == 16 && in_n == 16)
40641	    return ix86_get_builtin (IX86_BUILTIN_EXP2PS);
40642	}
40643      break;
40644
40645    case BUILT_IN_SQRTF:
40646      if (out_mode == SFmode && in_mode == SFmode)
40647	{
40648	  if (out_n == 4 && in_n == 4)
40649	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR);
40650	  else if (out_n == 8 && in_n == 8)
40651	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR256);
40652	  else if (out_n == 16 && in_n == 16)
40653	    return ix86_get_builtin (IX86_BUILTIN_SQRTPS_NR512);
40654	}
40655      break;
40656
40657    case BUILT_IN_IFLOOR:
40658    case BUILT_IN_LFLOOR:
40659    case BUILT_IN_LLFLOOR:
40660      /* The round insn does not trap on denormals.  */
40661      if (flag_trapping_math || !TARGET_ROUND)
40662	break;
40663
40664      if (out_mode == SImode && in_mode == DFmode)
40665	{
40666	  if (out_n == 4 && in_n == 2)
40667	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX);
40668	  else if (out_n == 8 && in_n == 4)
40669	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX256);
40670	  else if (out_n == 16 && in_n == 8)
40671	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512);
40672	}
40673      break;
40674
40675    case BUILT_IN_IFLOORF:
40676    case BUILT_IN_LFLOORF:
40677    case BUILT_IN_LLFLOORF:
40678      /* The round insn does not trap on denormals.  */
40679      if (flag_trapping_math || !TARGET_ROUND)
40680	break;
40681
40682      if (out_mode == SImode && in_mode == SFmode)
40683	{
40684	  if (out_n == 4 && in_n == 4)
40685	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
40686	  else if (out_n == 8 && in_n == 8)
40687	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
40688	}
40689      break;
40690
40691    case BUILT_IN_ICEIL:
40692    case BUILT_IN_LCEIL:
40693    case BUILT_IN_LLCEIL:
40694      /* The round insn does not trap on denormals.  */
40695      if (flag_trapping_math || !TARGET_ROUND)
40696	break;
40697
40698      if (out_mode == SImode && in_mode == DFmode)
40699	{
40700	  if (out_n == 4 && in_n == 2)
40701	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX);
40702	  else if (out_n == 8 && in_n == 4)
40703	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX256);
40704	  else if (out_n == 16 && in_n == 8)
40705	    return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512);
40706	}
40707      break;
40708
40709    case BUILT_IN_ICEILF:
40710    case BUILT_IN_LCEILF:
40711    case BUILT_IN_LLCEILF:
40712      /* The round insn does not trap on denormals.  */
40713      if (flag_trapping_math || !TARGET_ROUND)
40714	break;
40715
40716      if (out_mode == SImode && in_mode == SFmode)
40717	{
40718	  if (out_n == 4 && in_n == 4)
40719	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
40720	  else if (out_n == 8 && in_n == 8)
40721	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
40722	}
40723      break;
40724
40725    case BUILT_IN_IRINT:
40726    case BUILT_IN_LRINT:
40727    case BUILT_IN_LLRINT:
40728      if (out_mode == SImode && in_mode == DFmode)
40729	{
40730	  if (out_n == 4 && in_n == 2)
40731	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
40732	  else if (out_n == 8 && in_n == 4)
40733	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
40734	}
40735      break;
40736
40737    case BUILT_IN_IRINTF:
40738    case BUILT_IN_LRINTF:
40739    case BUILT_IN_LLRINTF:
40740      if (out_mode == SImode && in_mode == SFmode)
40741	{
40742	  if (out_n == 4 && in_n == 4)
40743	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
40744	  else if (out_n == 8 && in_n == 8)
40745	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
40746	}
40747      break;
40748
40749    case BUILT_IN_IROUND:
40750    case BUILT_IN_LROUND:
40751    case BUILT_IN_LLROUND:
40752      /* The round insn does not trap on denormals.  */
40753      if (flag_trapping_math || !TARGET_ROUND)
40754	break;
40755
40756      if (out_mode == SImode && in_mode == DFmode)
40757	{
40758	  if (out_n == 4 && in_n == 2)
40759	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX);
40760	  else if (out_n == 8 && in_n == 4)
40761	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX256);
40762	  else if (out_n == 16 && in_n == 8)
40763	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512);
40764	}
40765      break;
40766
40767    case BUILT_IN_IROUNDF:
40768    case BUILT_IN_LROUNDF:
40769    case BUILT_IN_LLROUNDF:
40770      /* The round insn does not trap on denormals.  */
40771      if (flag_trapping_math || !TARGET_ROUND)
40772	break;
40773
40774      if (out_mode == SImode && in_mode == SFmode)
40775	{
40776	  if (out_n == 4 && in_n == 4)
40777	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
40778	  else if (out_n == 8 && in_n == 8)
40779	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
40780	}
40781      break;
40782
40783    case BUILT_IN_COPYSIGN:
40784      if (out_mode == DFmode && in_mode == DFmode)
40785	{
40786	  if (out_n == 2 && in_n == 2)
40787	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD);
40788	  else if (out_n == 4 && in_n == 4)
40789	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD256);
40790	  else if (out_n == 8 && in_n == 8)
40791	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPD512);
40792	}
40793      break;
40794
40795    case BUILT_IN_COPYSIGNF:
40796      if (out_mode == SFmode && in_mode == SFmode)
40797	{
40798	  if (out_n == 4 && in_n == 4)
40799	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS);
40800	  else if (out_n == 8 && in_n == 8)
40801	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS256);
40802	  else if (out_n == 16 && in_n == 16)
40803	    return ix86_get_builtin (IX86_BUILTIN_CPYSGNPS512);
40804	}
40805      break;
40806
40807    case BUILT_IN_FLOOR:
40808      /* The round insn does not trap on denormals.  */
40809      if (flag_trapping_math || !TARGET_ROUND)
40810	break;
40811
40812      if (out_mode == DFmode && in_mode == DFmode)
40813	{
40814	  if (out_n == 2 && in_n == 2)
40815	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
40816	  else if (out_n == 4 && in_n == 4)
40817	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
40818	}
40819      break;
40820
40821    case BUILT_IN_FLOORF:
40822      /* The round insn does not trap on denormals.  */
40823      if (flag_trapping_math || !TARGET_ROUND)
40824	break;
40825
40826      if (out_mode == SFmode && in_mode == SFmode)
40827	{
40828	  if (out_n == 4 && in_n == 4)
40829	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
40830	  else if (out_n == 8 && in_n == 8)
40831	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
40832	}
40833      break;
40834
40835    case BUILT_IN_CEIL:
40836      /* The round insn does not trap on denormals.  */
40837      if (flag_trapping_math || !TARGET_ROUND)
40838	break;
40839
40840      if (out_mode == DFmode && in_mode == DFmode)
40841	{
40842	  if (out_n == 2 && in_n == 2)
40843	    return ix86_get_builtin (IX86_BUILTIN_CEILPD);
40844	  else if (out_n == 4 && in_n == 4)
40845	    return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
40846	}
40847      break;
40848
40849    case BUILT_IN_CEILF:
40850      /* The round insn does not trap on denormals.  */
40851      if (flag_trapping_math || !TARGET_ROUND)
40852	break;
40853
40854      if (out_mode == SFmode && in_mode == SFmode)
40855	{
40856	  if (out_n == 4 && in_n == 4)
40857	    return ix86_get_builtin (IX86_BUILTIN_CEILPS);
40858	  else if (out_n == 8 && in_n == 8)
40859	    return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
40860	}
40861      break;
40862
40863    case BUILT_IN_TRUNC:
40864      /* The round insn does not trap on denormals.  */
40865      if (flag_trapping_math || !TARGET_ROUND)
40866	break;
40867
40868      if (out_mode == DFmode && in_mode == DFmode)
40869	{
40870	  if (out_n == 2 && in_n == 2)
40871	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
40872	  else if (out_n == 4 && in_n == 4)
40873	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
40874	}
40875      break;
40876
40877    case BUILT_IN_TRUNCF:
40878      /* The round insn does not trap on denormals.  */
40879      if (flag_trapping_math || !TARGET_ROUND)
40880	break;
40881
40882      if (out_mode == SFmode && in_mode == SFmode)
40883	{
40884	  if (out_n == 4 && in_n == 4)
40885	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
40886	  else if (out_n == 8 && in_n == 8)
40887	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
40888	}
40889      break;
40890
40891    case BUILT_IN_RINT:
40892      /* The round insn does not trap on denormals.  */
40893      if (flag_trapping_math || !TARGET_ROUND)
40894	break;
40895
40896      if (out_mode == DFmode && in_mode == DFmode)
40897	{
40898	  if (out_n == 2 && in_n == 2)
40899	    return ix86_get_builtin (IX86_BUILTIN_RINTPD);
40900	  else if (out_n == 4 && in_n == 4)
40901	    return ix86_get_builtin (IX86_BUILTIN_RINTPD256);
40902	}
40903      break;
40904
40905    case BUILT_IN_RINTF:
40906      /* The round insn does not trap on denormals.  */
40907      if (flag_trapping_math || !TARGET_ROUND)
40908	break;
40909
40910      if (out_mode == SFmode && in_mode == SFmode)
40911	{
40912	  if (out_n == 4 && in_n == 4)
40913	    return ix86_get_builtin (IX86_BUILTIN_RINTPS);
40914	  else if (out_n == 8 && in_n == 8)
40915	    return ix86_get_builtin (IX86_BUILTIN_RINTPS256);
40916	}
40917      break;
40918
40919    case BUILT_IN_ROUND:
40920      /* The round insn does not trap on denormals.  */
40921      if (flag_trapping_math || !TARGET_ROUND)
40922	break;
40923
40924      if (out_mode == DFmode && in_mode == DFmode)
40925	{
40926	  if (out_n == 2 && in_n == 2)
40927	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ);
40928	  else if (out_n == 4 && in_n == 4)
40929	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ256);
40930	}
40931      break;
40932
40933    case BUILT_IN_ROUNDF:
40934      /* The round insn does not trap on denormals.  */
40935      if (flag_trapping_math || !TARGET_ROUND)
40936	break;
40937
40938      if (out_mode == SFmode && in_mode == SFmode)
40939	{
40940	  if (out_n == 4 && in_n == 4)
40941	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ);
40942	  else if (out_n == 8 && in_n == 8)
40943	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ256);
40944	}
40945      break;
40946
40947    case BUILT_IN_FMA:
40948      if (out_mode == DFmode && in_mode == DFmode)
40949	{
40950	  if (out_n == 2 && in_n == 2)
40951	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD);
40952	  if (out_n == 4 && in_n == 4)
40953	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256);
40954	}
40955      break;
40956
40957    case BUILT_IN_FMAF:
40958      if (out_mode == SFmode && in_mode == SFmode)
40959	{
40960	  if (out_n == 4 && in_n == 4)
40961	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS);
40962	  if (out_n == 8 && in_n == 8)
40963	    return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256);
40964	}
40965      break;
40966
40967    default:
40968      break;
40969    }
40970
40971  /* Dispatch to a handler for a vectorization library.  */
40972  if (ix86_veclib_handler)
40973    return ix86_veclib_handler ((enum built_in_function) fn, type_out,
40974				type_in);
40975
40976  return NULL_TREE;
40977}
40978
40979/* Handler for an SVML-style interface to
40980   a library with vectorized intrinsics.  */
40981
40982static tree
40983ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
40984{
40985  char name[20];
40986  tree fntype, new_fndecl, args;
40987  unsigned arity;
40988  const char *bname;
40989  machine_mode el_mode, in_mode;
40990  int n, in_n;
40991
40992  /* The SVML is suitable for unsafe math only.  */
40993  if (!flag_unsafe_math_optimizations)
40994    return NULL_TREE;
40995
40996  el_mode = TYPE_MODE (TREE_TYPE (type_out));
40997  n = TYPE_VECTOR_SUBPARTS (type_out);
40998  in_mode = TYPE_MODE (TREE_TYPE (type_in));
40999  in_n = TYPE_VECTOR_SUBPARTS (type_in);
41000  if (el_mode != in_mode
41001      || n != in_n)
41002    return NULL_TREE;
41003
41004  switch (fn)
41005    {
41006    case BUILT_IN_EXP:
41007    case BUILT_IN_LOG:
41008    case BUILT_IN_LOG10:
41009    case BUILT_IN_POW:
41010    case BUILT_IN_TANH:
41011    case BUILT_IN_TAN:
41012    case BUILT_IN_ATAN:
41013    case BUILT_IN_ATAN2:
41014    case BUILT_IN_ATANH:
41015    case BUILT_IN_CBRT:
41016    case BUILT_IN_SINH:
41017    case BUILT_IN_SIN:
41018    case BUILT_IN_ASINH:
41019    case BUILT_IN_ASIN:
41020    case BUILT_IN_COSH:
41021    case BUILT_IN_COS:
41022    case BUILT_IN_ACOSH:
41023    case BUILT_IN_ACOS:
41024      if (el_mode != DFmode || n != 2)
41025	return NULL_TREE;
41026      break;
41027
41028    case BUILT_IN_EXPF:
41029    case BUILT_IN_LOGF:
41030    case BUILT_IN_LOG10F:
41031    case BUILT_IN_POWF:
41032    case BUILT_IN_TANHF:
41033    case BUILT_IN_TANF:
41034    case BUILT_IN_ATANF:
41035    case BUILT_IN_ATAN2F:
41036    case BUILT_IN_ATANHF:
41037    case BUILT_IN_CBRTF:
41038    case BUILT_IN_SINHF:
41039    case BUILT_IN_SINF:
41040    case BUILT_IN_ASINHF:
41041    case BUILT_IN_ASINF:
41042    case BUILT_IN_COSHF:
41043    case BUILT_IN_COSF:
41044    case BUILT_IN_ACOSHF:
41045    case BUILT_IN_ACOSF:
41046      if (el_mode != SFmode || n != 4)
41047	return NULL_TREE;
41048      break;
41049
41050    default:
41051      return NULL_TREE;
41052    }
41053
41054  bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41055
41056  if (fn == BUILT_IN_LOGF)
41057    strcpy (name, "vmlsLn4");
41058  else if (fn == BUILT_IN_LOG)
41059    strcpy (name, "vmldLn2");
41060  else if (n == 4)
41061    {
41062      sprintf (name, "vmls%s", bname+10);
41063      name[strlen (name)-1] = '4';
41064    }
41065  else
41066    sprintf (name, "vmld%s2", bname+10);
41067
41068  /* Convert to uppercase. */
41069  name[4] &= ~0x20;
41070
41071  arity = 0;
41072  for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41073       args;
41074       args = TREE_CHAIN (args))
41075    arity++;
41076
41077  if (arity == 1)
41078    fntype = build_function_type_list (type_out, type_in, NULL);
41079  else
41080    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41081
41082  /* Build a function declaration for the vectorized function.  */
41083  new_fndecl = build_decl (BUILTINS_LOCATION,
41084			   FUNCTION_DECL, get_identifier (name), fntype);
41085  TREE_PUBLIC (new_fndecl) = 1;
41086  DECL_EXTERNAL (new_fndecl) = 1;
41087  DECL_IS_NOVOPS (new_fndecl) = 1;
41088  TREE_READONLY (new_fndecl) = 1;
41089
41090  return new_fndecl;
41091}
41092
41093/* Handler for an ACML-style interface to
41094   a library with vectorized intrinsics.  */
41095
41096static tree
41097ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
41098{
41099  char name[20] = "__vr.._";
41100  tree fntype, new_fndecl, args;
41101  unsigned arity;
41102  const char *bname;
41103  machine_mode el_mode, in_mode;
41104  int n, in_n;
41105
41106  /* The ACML is 64bits only and suitable for unsafe math only as
41107     it does not correctly support parts of IEEE with the required
41108     precision such as denormals.  */
41109  if (!TARGET_64BIT
41110      || !flag_unsafe_math_optimizations)
41111    return NULL_TREE;
41112
41113  el_mode = TYPE_MODE (TREE_TYPE (type_out));
41114  n = TYPE_VECTOR_SUBPARTS (type_out);
41115  in_mode = TYPE_MODE (TREE_TYPE (type_in));
41116  in_n = TYPE_VECTOR_SUBPARTS (type_in);
41117  if (el_mode != in_mode
41118      || n != in_n)
41119    return NULL_TREE;
41120
41121  switch (fn)
41122    {
41123    case BUILT_IN_SIN:
41124    case BUILT_IN_COS:
41125    case BUILT_IN_EXP:
41126    case BUILT_IN_LOG:
41127    case BUILT_IN_LOG2:
41128    case BUILT_IN_LOG10:
41129      name[4] = 'd';
41130      name[5] = '2';
41131      if (el_mode != DFmode
41132	  || n != 2)
41133	return NULL_TREE;
41134      break;
41135
41136    case BUILT_IN_SINF:
41137    case BUILT_IN_COSF:
41138    case BUILT_IN_EXPF:
41139    case BUILT_IN_POWF:
41140    case BUILT_IN_LOGF:
41141    case BUILT_IN_LOG2F:
41142    case BUILT_IN_LOG10F:
41143      name[4] = 's';
41144      name[5] = '4';
41145      if (el_mode != SFmode
41146	  || n != 4)
41147	return NULL_TREE;
41148      break;
41149
41150    default:
41151      return NULL_TREE;
41152    }
41153
41154  bname = IDENTIFIER_POINTER (DECL_NAME (builtin_decl_implicit (fn)));
41155  sprintf (name + 7, "%s", bname+10);
41156
41157  arity = 0;
41158  for (args = DECL_ARGUMENTS (builtin_decl_implicit (fn));
41159       args;
41160       args = TREE_CHAIN (args))
41161    arity++;
41162
41163  if (arity == 1)
41164    fntype = build_function_type_list (type_out, type_in, NULL);
41165  else
41166    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
41167
41168  /* Build a function declaration for the vectorized function.  */
41169  new_fndecl = build_decl (BUILTINS_LOCATION,
41170			   FUNCTION_DECL, get_identifier (name), fntype);
41171  TREE_PUBLIC (new_fndecl) = 1;
41172  DECL_EXTERNAL (new_fndecl) = 1;
41173  DECL_IS_NOVOPS (new_fndecl) = 1;
41174  TREE_READONLY (new_fndecl) = 1;
41175
41176  return new_fndecl;
41177}
41178
41179/* Returns a decl of a function that implements gather load with
41180   memory type MEM_VECTYPE and index type INDEX_VECTYPE and SCALE.
41181   Return NULL_TREE if it is not available.  */
41182
41183static tree
41184ix86_vectorize_builtin_gather (const_tree mem_vectype,
41185			       const_tree index_type, int scale)
41186{
41187  bool si;
41188  enum ix86_builtins code;
41189
41190  if (! TARGET_AVX2)
41191    return NULL_TREE;
41192
41193  if ((TREE_CODE (index_type) != INTEGER_TYPE
41194       && !POINTER_TYPE_P (index_type))
41195      || (TYPE_MODE (index_type) != SImode
41196	  && TYPE_MODE (index_type) != DImode))
41197    return NULL_TREE;
41198
41199  if (TYPE_PRECISION (index_type) > POINTER_SIZE)
41200    return NULL_TREE;
41201
41202  /* v*gather* insn sign extends index to pointer mode.  */
41203  if (TYPE_PRECISION (index_type) < POINTER_SIZE
41204      && TYPE_UNSIGNED (index_type))
41205    return NULL_TREE;
41206
41207  if (scale <= 0
41208      || scale > 8
41209      || (scale & (scale - 1)) != 0)
41210    return NULL_TREE;
41211
41212  si = TYPE_MODE (index_type) == SImode;
41213  switch (TYPE_MODE (mem_vectype))
41214    {
41215    case V2DFmode:
41216      if (TARGET_AVX512VL)
41217	code = si ? IX86_BUILTIN_GATHER3SIV2DF : IX86_BUILTIN_GATHER3DIV2DF;
41218      else
41219	code = si ? IX86_BUILTIN_GATHERSIV2DF : IX86_BUILTIN_GATHERDIV2DF;
41220      break;
41221    case V4DFmode:
41222      if (TARGET_AVX512VL)
41223	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DF : IX86_BUILTIN_GATHER3DIV4DF;
41224      else
41225	code = si ? IX86_BUILTIN_GATHERALTSIV4DF : IX86_BUILTIN_GATHERDIV4DF;
41226      break;
41227    case V2DImode:
41228      if (TARGET_AVX512VL)
41229	code = si ? IX86_BUILTIN_GATHER3SIV2DI : IX86_BUILTIN_GATHER3DIV2DI;
41230      else
41231	code = si ? IX86_BUILTIN_GATHERSIV2DI : IX86_BUILTIN_GATHERDIV2DI;
41232      break;
41233    case V4DImode:
41234      if (TARGET_AVX512VL)
41235	code = si ? IX86_BUILTIN_GATHER3ALTSIV4DI : IX86_BUILTIN_GATHER3DIV4DI;
41236      else
41237	code = si ? IX86_BUILTIN_GATHERALTSIV4DI : IX86_BUILTIN_GATHERDIV4DI;
41238      break;
41239    case V4SFmode:
41240      if (TARGET_AVX512VL)
41241	code = si ? IX86_BUILTIN_GATHER3SIV4SF : IX86_BUILTIN_GATHER3DIV4SF;
41242      else
41243	code = si ? IX86_BUILTIN_GATHERSIV4SF : IX86_BUILTIN_GATHERDIV4SF;
41244      break;
41245    case V8SFmode:
41246      if (TARGET_AVX512VL)
41247	code = si ? IX86_BUILTIN_GATHER3SIV8SF : IX86_BUILTIN_GATHER3ALTDIV8SF;
41248      else
41249	code = si ? IX86_BUILTIN_GATHERSIV8SF : IX86_BUILTIN_GATHERALTDIV8SF;
41250      break;
41251    case V4SImode:
41252      if (TARGET_AVX512VL)
41253	code = si ? IX86_BUILTIN_GATHER3SIV4SI : IX86_BUILTIN_GATHER3DIV4SI;
41254      else
41255	code = si ? IX86_BUILTIN_GATHERSIV4SI : IX86_BUILTIN_GATHERDIV4SI;
41256      break;
41257    case V8SImode:
41258      if (TARGET_AVX512VL)
41259	code = si ? IX86_BUILTIN_GATHER3SIV8SI : IX86_BUILTIN_GATHER3ALTDIV8SI;
41260      else
41261	code = si ? IX86_BUILTIN_GATHERSIV8SI : IX86_BUILTIN_GATHERALTDIV8SI;
41262      break;
41263    case V8DFmode:
41264      if (TARGET_AVX512F)
41265	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DF : IX86_BUILTIN_GATHER3DIV8DF;
41266      else
41267	return NULL_TREE;
41268      break;
41269    case V8DImode:
41270      if (TARGET_AVX512F)
41271	code = si ? IX86_BUILTIN_GATHER3ALTSIV8DI : IX86_BUILTIN_GATHER3DIV8DI;
41272      else
41273	return NULL_TREE;
41274      break;
41275    case V16SFmode:
41276      if (TARGET_AVX512F)
41277	code = si ? IX86_BUILTIN_GATHER3SIV16SF : IX86_BUILTIN_GATHER3ALTDIV16SF;
41278      else
41279	return NULL_TREE;
41280      break;
41281    case V16SImode:
41282      if (TARGET_AVX512F)
41283	code = si ? IX86_BUILTIN_GATHER3SIV16SI : IX86_BUILTIN_GATHER3ALTDIV16SI;
41284      else
41285	return NULL_TREE;
41286      break;
41287    default:
41288      return NULL_TREE;
41289    }
41290
41291  return ix86_get_builtin (code);
41292}
41293
41294/* Returns a code for a target-specific builtin that implements
41295   reciprocal of the function, or NULL_TREE if not available.  */
41296
41297static tree
41298ix86_builtin_reciprocal (unsigned int fn, bool md_fn, bool)
41299{
41300  if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
41301	 && flag_finite_math_only && !flag_trapping_math
41302	 && flag_unsafe_math_optimizations))
41303    return NULL_TREE;
41304
41305  if (md_fn)
41306    /* Machine dependent builtins.  */
41307    switch (fn)
41308      {
41309	/* Vectorized version of sqrt to rsqrt conversion.  */
41310      case IX86_BUILTIN_SQRTPS_NR:
41311	return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR);
41312
41313      case IX86_BUILTIN_SQRTPS_NR256:
41314	return ix86_get_builtin (IX86_BUILTIN_RSQRTPS_NR256);
41315
41316      default:
41317	return NULL_TREE;
41318      }
41319  else
41320    /* Normal builtins.  */
41321    switch (fn)
41322      {
41323	/* Sqrt to rsqrt conversion.  */
41324      case BUILT_IN_SQRTF:
41325	return ix86_get_builtin (IX86_BUILTIN_RSQRTF);
41326
41327      default:
41328	return NULL_TREE;
41329      }
41330}
41331
41332/* Helper for avx_vpermilps256_operand et al.  This is also used by
41333   the expansion functions to turn the parallel back into a mask.
41334   The return value is 0 for no match and the imm8+1 for a match.  */
41335
41336int
41337avx_vpermilp_parallel (rtx par, machine_mode mode)
41338{
41339  unsigned i, nelt = GET_MODE_NUNITS (mode);
41340  unsigned mask = 0;
41341  unsigned char ipar[16] = {};  /* Silence -Wuninitialized warning.  */
41342
41343  if (XVECLEN (par, 0) != (int) nelt)
41344    return 0;
41345
41346  /* Validate that all of the elements are constants, and not totally
41347     out of range.  Copy the data into an integral array to make the
41348     subsequent checks easier.  */
41349  for (i = 0; i < nelt; ++i)
41350    {
41351      rtx er = XVECEXP (par, 0, i);
41352      unsigned HOST_WIDE_INT ei;
41353
41354      if (!CONST_INT_P (er))
41355	return 0;
41356      ei = INTVAL (er);
41357      if (ei >= nelt)
41358	return 0;
41359      ipar[i] = ei;
41360    }
41361
41362  switch (mode)
41363    {
41364    case V8DFmode:
41365      /* In the 512-bit DFmode case, we can only move elements within
41366         a 128-bit lane.  First fill the second part of the mask,
41367	 then fallthru.  */
41368      for (i = 4; i < 6; ++i)
41369	{
41370	  if (ipar[i] < 4 || ipar[i] >= 6)
41371	    return 0;
41372	  mask |= (ipar[i] - 4) << i;
41373	}
41374      for (i = 6; i < 8; ++i)
41375	{
41376	  if (ipar[i] < 6)
41377	    return 0;
41378	  mask |= (ipar[i] - 6) << i;
41379	}
41380      /* FALLTHRU */
41381
41382    case V4DFmode:
41383      /* In the 256-bit DFmode case, we can only move elements within
41384         a 128-bit lane.  */
41385      for (i = 0; i < 2; ++i)
41386	{
41387	  if (ipar[i] >= 2)
41388	    return 0;
41389	  mask |= ipar[i] << i;
41390	}
41391      for (i = 2; i < 4; ++i)
41392	{
41393	  if (ipar[i] < 2)
41394	    return 0;
41395	  mask |= (ipar[i] - 2) << i;
41396	}
41397      break;
41398
41399    case V16SFmode:
41400      /* In 512 bit SFmode case, permutation in the upper 256 bits
41401	 must mirror the permutation in the lower 256-bits.  */
41402      for (i = 0; i < 8; ++i)
41403	if (ipar[i] + 8 != ipar[i + 8])
41404	  return 0;
41405      /* FALLTHRU */
41406
41407    case V8SFmode:
41408      /* In 256 bit SFmode case, we have full freedom of
41409         movement within the low 128-bit lane, but the high 128-bit
41410         lane must mirror the exact same pattern.  */
41411      for (i = 0; i < 4; ++i)
41412	if (ipar[i] + 4 != ipar[i + 4])
41413	  return 0;
41414      nelt = 4;
41415      /* FALLTHRU */
41416
41417    case V2DFmode:
41418    case V4SFmode:
41419      /* In the 128-bit case, we've full freedom in the placement of
41420	 the elements from the source operand.  */
41421      for (i = 0; i < nelt; ++i)
41422	mask |= ipar[i] << (i * (nelt / 2));
41423      break;
41424
41425    default:
41426      gcc_unreachable ();
41427    }
41428
41429  /* Make sure success has a non-zero value by adding one.  */
41430  return mask + 1;
41431}
41432
41433/* Helper for avx_vperm2f128_v4df_operand et al.  This is also used by
41434   the expansion functions to turn the parallel back into a mask.
41435   The return value is 0 for no match and the imm8+1 for a match.  */
41436
41437int
41438avx_vperm2f128_parallel (rtx par, machine_mode mode)
41439{
41440  unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
41441  unsigned mask = 0;
41442  unsigned char ipar[8] = {};  /* Silence -Wuninitialized warning.  */
41443
41444  if (XVECLEN (par, 0) != (int) nelt)
41445    return 0;
41446
41447  /* Validate that all of the elements are constants, and not totally
41448     out of range.  Copy the data into an integral array to make the
41449     subsequent checks easier.  */
41450  for (i = 0; i < nelt; ++i)
41451    {
41452      rtx er = XVECEXP (par, 0, i);
41453      unsigned HOST_WIDE_INT ei;
41454
41455      if (!CONST_INT_P (er))
41456	return 0;
41457      ei = INTVAL (er);
41458      if (ei >= 2 * nelt)
41459	return 0;
41460      ipar[i] = ei;
41461    }
41462
41463  /* Validate that the halves of the permute are halves.  */
41464  for (i = 0; i < nelt2 - 1; ++i)
41465    if (ipar[i] + 1 != ipar[i + 1])
41466      return 0;
41467  for (i = nelt2; i < nelt - 1; ++i)
41468    if (ipar[i] + 1 != ipar[i + 1])
41469      return 0;
41470
41471  /* Reconstruct the mask.  */
41472  for (i = 0; i < 2; ++i)
41473    {
41474      unsigned e = ipar[i * nelt2];
41475      if (e % nelt2)
41476	return 0;
41477      e /= nelt2;
41478      mask |= e << (i * 4);
41479    }
41480
41481  /* Make sure success has a non-zero value by adding one.  */
41482  return mask + 1;
41483}
41484
41485/* Return a register priority for hard reg REGNO.  */
41486static int
41487ix86_register_priority (int hard_regno)
41488{
41489  /* ebp and r13 as the base always wants a displacement, r12 as the
41490     base always wants an index.  So discourage their usage in an
41491     address.  */
41492  if (hard_regno == R12_REG || hard_regno == R13_REG)
41493    return 0;
41494  if (hard_regno == BP_REG)
41495    return 1;
41496  /* New x86-64 int registers result in bigger code size.  Discourage
41497     them.  */
41498  if (FIRST_REX_INT_REG <= hard_regno && hard_regno <= LAST_REX_INT_REG)
41499    return 2;
41500  /* New x86-64 SSE registers result in bigger code size.  Discourage
41501     them.  */
41502  if (FIRST_REX_SSE_REG <= hard_regno && hard_regno <= LAST_REX_SSE_REG)
41503    return 2;
41504  /* Usage of AX register results in smaller code.  Prefer it.  */
41505  if (hard_regno == AX_REG)
41506    return 4;
41507  return 3;
41508}
41509
41510/* Implement TARGET_PREFERRED_RELOAD_CLASS.
41511
41512   Put float CONST_DOUBLE in the constant pool instead of fp regs.
41513   QImode must go into class Q_REGS.
41514   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
41515   movdf to do mem-to-mem moves through integer regs.  */
41516
41517static reg_class_t
41518ix86_preferred_reload_class (rtx x, reg_class_t regclass)
41519{
41520  machine_mode mode = GET_MODE (x);
41521
41522  /* We're only allowed to return a subclass of CLASS.  Many of the
41523     following checks fail for NO_REGS, so eliminate that early.  */
41524  if (regclass == NO_REGS)
41525    return NO_REGS;
41526
41527  /* All classes can load zeros.  */
41528  if (x == CONST0_RTX (mode))
41529    return regclass;
41530
41531  /* Force constants into memory if we are loading a (nonzero) constant into
41532     an MMX, SSE or MASK register.  This is because there are no MMX/SSE/MASK
41533     instructions to load from a constant.  */
41534  if (CONSTANT_P (x)
41535      && (MAYBE_MMX_CLASS_P (regclass)
41536	  || MAYBE_SSE_CLASS_P (regclass)
41537	  || MAYBE_MASK_CLASS_P (regclass)))
41538    return NO_REGS;
41539
41540  /* Prefer SSE regs only, if we can use them for math.  */
41541  if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
41542    return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
41543
41544  /* Floating-point constants need more complex checks.  */
41545  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
41546    {
41547      /* General regs can load everything.  */
41548      if (reg_class_subset_p (regclass, GENERAL_REGS))
41549        return regclass;
41550
41551      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
41552	 zero above.  We only want to wind up preferring 80387 registers if
41553	 we plan on doing computation with them.  */
41554      if (TARGET_80387
41555	  && standard_80387_constant_p (x) > 0)
41556	{
41557	  /* Limit class to non-sse.  */
41558	  if (regclass == FLOAT_SSE_REGS)
41559	    return FLOAT_REGS;
41560	  if (regclass == FP_TOP_SSE_REGS)
41561	    return FP_TOP_REG;
41562	  if (regclass == FP_SECOND_SSE_REGS)
41563	    return FP_SECOND_REG;
41564	  if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
41565	    return regclass;
41566	}
41567
41568      return NO_REGS;
41569    }
41570
41571  /* Generally when we see PLUS here, it's the function invariant
41572     (plus soft-fp const_int).  Which can only be computed into general
41573     regs.  */
41574  if (GET_CODE (x) == PLUS)
41575    return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
41576
41577  /* QImode constants are easy to load, but non-constant QImode data
41578     must go into Q_REGS.  */
41579  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
41580    {
41581      if (reg_class_subset_p (regclass, Q_REGS))
41582	return regclass;
41583      if (reg_class_subset_p (Q_REGS, regclass))
41584	return Q_REGS;
41585      return NO_REGS;
41586    }
41587
41588  return regclass;
41589}
41590
41591/* Discourage putting floating-point values in SSE registers unless
41592   SSE math is being used, and likewise for the 387 registers.  */
41593static reg_class_t
41594ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
41595{
41596  machine_mode mode = GET_MODE (x);
41597
41598  /* Restrict the output reload class to the register bank that we are doing
41599     math on.  If we would like not to return a subset of CLASS, reject this
41600     alternative: if reload cannot do this, it will still use its choice.  */
41601  mode = GET_MODE (x);
41602  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
41603    return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
41604
41605  if (X87_FLOAT_MODE_P (mode))
41606    {
41607      if (regclass == FP_TOP_SSE_REGS)
41608	return FP_TOP_REG;
41609      else if (regclass == FP_SECOND_SSE_REGS)
41610	return FP_SECOND_REG;
41611      else
41612	return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
41613    }
41614
41615  return regclass;
41616}
41617
41618static reg_class_t
41619ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
41620		       machine_mode mode, secondary_reload_info *sri)
41621{
41622  /* Double-word spills from general registers to non-offsettable memory
41623     references (zero-extended addresses) require special handling.  */
41624  if (TARGET_64BIT
41625      && MEM_P (x)
41626      && GET_MODE_SIZE (mode) > UNITS_PER_WORD
41627      && INTEGER_CLASS_P (rclass)
41628      && !offsettable_memref_p (x))
41629    {
41630      sri->icode = (in_p
41631		    ? CODE_FOR_reload_noff_load
41632		    : CODE_FOR_reload_noff_store);
41633      /* Add the cost of moving address to a temporary.  */
41634      sri->extra_cost = 1;
41635
41636      return NO_REGS;
41637    }
41638
41639  /* QImode spills from non-QI registers require
41640     intermediate register on 32bit targets.  */
41641  if (mode == QImode
41642      && (MAYBE_MASK_CLASS_P (rclass)
41643	  || (!TARGET_64BIT && !in_p
41644	      && INTEGER_CLASS_P (rclass)
41645	      && MAYBE_NON_Q_CLASS_P (rclass))))
41646    {
41647      int regno;
41648
41649      if (REG_P (x))
41650	regno = REGNO (x);
41651      else
41652	regno = -1;
41653
41654      if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
41655	regno = true_regnum (x);
41656
41657      /* Return Q_REGS if the operand is in memory.  */
41658      if (regno == -1)
41659	return Q_REGS;
41660    }
41661
41662  /* This condition handles corner case where an expression involving
41663     pointers gets vectorized.  We're trying to use the address of a
41664     stack slot as a vector initializer.
41665
41666     (set (reg:V2DI 74 [ vect_cst_.2 ])
41667          (vec_duplicate:V2DI (reg/f:DI 20 frame)))
41668
41669     Eventually frame gets turned into sp+offset like this:
41670
41671     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41672          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41673	                               (const_int 392 [0x188]))))
41674
41675     That later gets turned into:
41676
41677     (set (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41678          (vec_duplicate:V2DI (plus:DI (reg/f:DI 7 sp)
41679	    (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))))
41680
41681     We'll have the following reload recorded:
41682
41683     Reload 0: reload_in (DI) =
41684           (plus:DI (reg/f:DI 7 sp)
41685            (mem/u/c/i:DI (symbol_ref/u:DI ("*.LC0") [flags 0x2]) [0 S8 A64]))
41686     reload_out (V2DI) = (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41687     SSE_REGS, RELOAD_OTHER (opnum = 0), can't combine
41688     reload_in_reg: (plus:DI (reg/f:DI 7 sp) (const_int 392 [0x188]))
41689     reload_out_reg: (reg:V2DI 21 xmm0 [orig:74 vect_cst_.2 ] [74])
41690     reload_reg_rtx: (reg:V2DI 22 xmm1)
41691
41692     Which isn't going to work since SSE instructions can't handle scalar
41693     additions.  Returning GENERAL_REGS forces the addition into integer
41694     register and reload can handle subsequent reloads without problems.  */
41695
41696  if (in_p && GET_CODE (x) == PLUS
41697      && SSE_CLASS_P (rclass)
41698      && SCALAR_INT_MODE_P (mode))
41699    return GENERAL_REGS;
41700
41701  return NO_REGS;
41702}
41703
41704/* Implement TARGET_CLASS_LIKELY_SPILLED_P.  */
41705
41706static bool
41707ix86_class_likely_spilled_p (reg_class_t rclass)
41708{
41709  switch (rclass)
41710    {
41711      case AREG:
41712      case DREG:
41713      case CREG:
41714      case BREG:
41715      case AD_REGS:
41716      case SIREG:
41717      case DIREG:
41718      case SSE_FIRST_REG:
41719      case FP_TOP_REG:
41720      case FP_SECOND_REG:
41721      case BND_REGS:
41722	return true;
41723
41724      default:
41725	break;
41726    }
41727
41728  return false;
41729}
41730
41731/* If we are copying between general and FP registers, we need a memory
41732   location. The same is true for SSE and MMX registers.
41733
41734   To optimize register_move_cost performance, allow inline variant.
41735
41736   The macro can't work reliably when one of the CLASSES is class containing
41737   registers from multiple units (SSE, MMX, integer).  We avoid this by never
41738   combining those units in single alternative in the machine description.
41739   Ensure that this constraint holds to avoid unexpected surprises.
41740
41741   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
41742   enforce these sanity checks.  */
41743
41744static inline bool
41745inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41746				machine_mode mode, int strict)
41747{
41748  if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
41749    return false;
41750  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
41751      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
41752      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
41753      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
41754      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
41755      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
41756    {
41757      gcc_assert (!strict || lra_in_progress);
41758      return true;
41759    }
41760
41761  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
41762    return true;
41763
41764  /* Between mask and general, we have moves no larger than word size.  */
41765  if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
41766      && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
41767  return true;
41768
41769  /* ??? This is a lie.  We do have moves between mmx/general, and for
41770     mmx/sse2.  But by saying we need secondary memory we discourage the
41771     register allocator from using the mmx registers unless needed.  */
41772  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
41773    return true;
41774
41775  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
41776    {
41777      /* SSE1 doesn't have any direct moves from other classes.  */
41778      if (!TARGET_SSE2)
41779	return true;
41780
41781      /* If the target says that inter-unit moves are more expensive
41782	 than moving through memory, then don't generate them.  */
41783      if ((SSE_CLASS_P (class1) && !TARGET_INTER_UNIT_MOVES_FROM_VEC)
41784	  || (SSE_CLASS_P (class2) && !TARGET_INTER_UNIT_MOVES_TO_VEC))
41785	return true;
41786
41787      /* Between SSE and general, we have moves no larger than word size.  */
41788      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
41789	return true;
41790    }
41791
41792  return false;
41793}
41794
41795bool
41796ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
41797			      machine_mode mode, int strict)
41798{
41799  return inline_secondary_memory_needed (class1, class2, mode, strict);
41800}
41801
41802/* Implement the TARGET_CLASS_MAX_NREGS hook.
41803
41804   On the 80386, this is the size of MODE in words,
41805   except in the FP regs, where a single reg is always enough.  */
41806
41807static unsigned char
41808ix86_class_max_nregs (reg_class_t rclass, machine_mode mode)
41809{
41810  if (MAYBE_INTEGER_CLASS_P (rclass))
41811    {
41812      if (mode == XFmode)
41813	return (TARGET_64BIT ? 2 : 3);
41814      else if (mode == XCmode)
41815	return (TARGET_64BIT ? 4 : 6);
41816      else
41817	return ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
41818    }
41819  else
41820    {
41821      if (COMPLEX_MODE_P (mode))
41822	return 2;
41823      else
41824	return 1;
41825    }
41826}
41827
41828/* Return true if the registers in CLASS cannot represent the change from
41829   modes FROM to TO.  */
41830
41831bool
41832ix86_cannot_change_mode_class (machine_mode from, machine_mode to,
41833			       enum reg_class regclass)
41834{
41835  if (from == to)
41836    return false;
41837
41838  /* x87 registers can't do subreg at all, as all values are reformatted
41839     to extended precision.  */
41840  if (MAYBE_FLOAT_CLASS_P (regclass))
41841    return true;
41842
41843  if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
41844    {
41845      int from_size = GET_MODE_SIZE (from);
41846      int to_size = GET_MODE_SIZE (to);
41847
41848      /* Vector registers do not support QI or HImode loads.  If we don't
41849	 disallow a change to these modes, reload will assume it's ok to
41850	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
41851	 the vec_dupv4hi pattern.  */
41852      if (from_size < 4)
41853	return true;
41854
41855      /* Further, we cannot allow word_mode subregs of full vector modes.
41856         Otherwise the middle-end will assume it's ok to store to
41857         (subreg:DI (reg:TI 100) 0) in order to modify only the low 64 bits
41858         of the 128-bit register.  However, after reload the subreg will
41859         be dropped leaving a plain DImode store.  This is indistinguishable
41860         from a "normal" DImode move, and so we're justified to use movsd,
41861         which modifies the entire 128-bit register.  */
41862      if (to_size == UNITS_PER_WORD && from_size > UNITS_PER_WORD)
41863	return true;
41864    }
41865
41866  return false;
41867}
41868
41869/* Return the cost of moving data of mode M between a
41870   register and memory.  A value of 2 is the default; this cost is
41871   relative to those in `REGISTER_MOVE_COST'.
41872
41873   This function is used extensively by register_move_cost that is used to
41874   build tables at startup.  Make it inline in this case.
41875   When IN is 2, return maximum of in and out move cost.
41876
41877   If moving between registers and memory is more expensive than
41878   between two registers, you should define this macro to express the
41879   relative cost.
41880
41881   Model also increased moving costs of QImode registers in non
41882   Q_REGS classes.
41883 */
41884static inline int
41885inline_memory_move_cost (machine_mode mode, enum reg_class regclass,
41886			 int in)
41887{
41888  int cost;
41889  if (FLOAT_CLASS_P (regclass))
41890    {
41891      int index;
41892      switch (mode)
41893	{
41894	  case SFmode:
41895	    index = 0;
41896	    break;
41897	  case DFmode:
41898	    index = 1;
41899	    break;
41900	  case XFmode:
41901	    index = 2;
41902	    break;
41903	  default:
41904	    return 100;
41905	}
41906      if (in == 2)
41907        return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
41908      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
41909    }
41910  if (SSE_CLASS_P (regclass))
41911    {
41912      int index;
41913      switch (GET_MODE_SIZE (mode))
41914	{
41915	  case 4:
41916	    index = 0;
41917	    break;
41918	  case 8:
41919	    index = 1;
41920	    break;
41921	  case 16:
41922	    index = 2;
41923	    break;
41924	  default:
41925	    return 100;
41926	}
41927      if (in == 2)
41928        return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
41929      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
41930    }
41931  if (MMX_CLASS_P (regclass))
41932    {
41933      int index;
41934      switch (GET_MODE_SIZE (mode))
41935	{
41936	  case 4:
41937	    index = 0;
41938	    break;
41939	  case 8:
41940	    index = 1;
41941	    break;
41942	  default:
41943	    return 100;
41944	}
41945      if (in)
41946        return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
41947      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
41948    }
41949  switch (GET_MODE_SIZE (mode))
41950    {
41951      case 1:
41952	if (Q_CLASS_P (regclass) || TARGET_64BIT)
41953	  {
41954	    if (!in)
41955	      return ix86_cost->int_store[0];
41956	    if (TARGET_PARTIAL_REG_DEPENDENCY
41957	        && optimize_function_for_speed_p (cfun))
41958	      cost = ix86_cost->movzbl_load;
41959	    else
41960	      cost = ix86_cost->int_load[0];
41961	    if (in == 2)
41962	      return MAX (cost, ix86_cost->int_store[0]);
41963	    return cost;
41964	  }
41965	else
41966	  {
41967	   if (in == 2)
41968	     return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
41969	   if (in)
41970	     return ix86_cost->movzbl_load;
41971	   else
41972	     return ix86_cost->int_store[0] + 4;
41973	  }
41974	break;
41975      case 2:
41976	if (in == 2)
41977	  return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
41978	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
41979      default:
41980	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
41981	if (mode == TFmode)
41982	  mode = XFmode;
41983	if (in == 2)
41984	  cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
41985	else if (in)
41986	  cost = ix86_cost->int_load[2];
41987	else
41988	  cost = ix86_cost->int_store[2];
41989	return (cost * (((int) GET_MODE_SIZE (mode)
41990		        + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
41991    }
41992}
41993
41994static int
41995ix86_memory_move_cost (machine_mode mode, reg_class_t regclass,
41996		       bool in)
41997{
41998  return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
41999}
42000
42001
42002/* Return the cost of moving data from a register in class CLASS1 to
42003   one in class CLASS2.
42004
42005   It is not required that the cost always equal 2 when FROM is the same as TO;
42006   on some machines it is expensive to move between registers if they are not
42007   general registers.  */
42008
42009static int
42010ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
42011			 reg_class_t class2_i)
42012{
42013  enum reg_class class1 = (enum reg_class) class1_i;
42014  enum reg_class class2 = (enum reg_class) class2_i;
42015
42016  /* In case we require secondary memory, compute cost of the store followed
42017     by load.  In order to avoid bad register allocation choices, we need
42018     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
42019
42020  if (inline_secondary_memory_needed (class1, class2, mode, 0))
42021    {
42022      int cost = 1;
42023
42024      cost += inline_memory_move_cost (mode, class1, 2);
42025      cost += inline_memory_move_cost (mode, class2, 2);
42026
42027      /* In case of copying from general_purpose_register we may emit multiple
42028         stores followed by single load causing memory size mismatch stall.
42029         Count this as arbitrarily high cost of 20.  */
42030      if (targetm.class_max_nregs (class1, mode)
42031	  > targetm.class_max_nregs (class2, mode))
42032	cost += 20;
42033
42034      /* In the case of FP/MMX moves, the registers actually overlap, and we
42035	 have to switch modes in order to treat them differently.  */
42036      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
42037          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
42038	cost += 20;
42039
42040      return cost;
42041    }
42042
42043  /* Moves between SSE/MMX and integer unit are expensive.  */
42044  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
42045      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
42046
42047    /* ??? By keeping returned value relatively high, we limit the number
42048       of moves between integer and MMX/SSE registers for all targets.
42049       Additionally, high value prevents problem with x86_modes_tieable_p(),
42050       where integer modes in MMX/SSE registers are not tieable
42051       because of missing QImode and HImode moves to, from or between
42052       MMX/SSE registers.  */
42053    return MAX (8, ix86_cost->mmxsse_to_integer);
42054
42055  if (MAYBE_FLOAT_CLASS_P (class1))
42056    return ix86_cost->fp_move;
42057  if (MAYBE_SSE_CLASS_P (class1))
42058    return ix86_cost->sse_move;
42059  if (MAYBE_MMX_CLASS_P (class1))
42060    return ix86_cost->mmx_move;
42061  return 2;
42062}
42063
42064/* Return TRUE if hard register REGNO can hold a value of machine-mode
42065   MODE.  */
42066
42067bool
42068ix86_hard_regno_mode_ok (int regno, machine_mode mode)
42069{
42070  /* Flags and only flags can only hold CCmode values.  */
42071  if (CC_REGNO_P (regno))
42072    return GET_MODE_CLASS (mode) == MODE_CC;
42073  if (GET_MODE_CLASS (mode) == MODE_CC
42074      || GET_MODE_CLASS (mode) == MODE_RANDOM
42075      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
42076    return false;
42077  if (STACK_REGNO_P (regno))
42078    return VALID_FP_MODE_P (mode);
42079  if (MASK_REGNO_P (regno))
42080    return (VALID_MASK_REG_MODE (mode)
42081	    || (TARGET_AVX512BW
42082		&& VALID_MASK_AVX512BW_MODE (mode)));
42083  if (BND_REGNO_P (regno))
42084    return VALID_BND_REG_MODE (mode);
42085  if (SSE_REGNO_P (regno))
42086    {
42087      /* We implement the move patterns for all vector modes into and
42088	 out of SSE registers, even when no operation instructions
42089	 are available.  */
42090
42091      /* For AVX-512 we allow, regardless of regno:
42092	  - XI mode
42093	  - any of 512-bit wide vector mode
42094	  - any scalar mode.  */
42095      if (TARGET_AVX512F
42096	  && (mode == XImode
42097	      || VALID_AVX512F_REG_MODE (mode)
42098	      || VALID_AVX512F_SCALAR_MODE (mode)))
42099	return true;
42100
42101      /* TODO check for QI/HI scalars.  */
42102      /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
42103      if (TARGET_AVX512VL
42104	  && (mode == OImode
42105	      || mode == TImode
42106	      || VALID_AVX256_REG_MODE (mode)
42107	      || VALID_AVX512VL_128_REG_MODE (mode)))
42108	return true;
42109
42110      /* xmm16-xmm31 are only available for AVX-512.  */
42111      if (EXT_REX_SSE_REGNO_P (regno))
42112	return false;
42113
42114      /* OImode and AVX modes are available only when AVX is enabled.  */
42115      return ((TARGET_AVX
42116	       && VALID_AVX256_REG_OR_OI_MODE (mode))
42117	      || VALID_SSE_REG_MODE (mode)
42118	      || VALID_SSE2_REG_MODE (mode)
42119	      || VALID_MMX_REG_MODE (mode)
42120	      || VALID_MMX_REG_MODE_3DNOW (mode));
42121    }
42122  if (MMX_REGNO_P (regno))
42123    {
42124      /* We implement the move patterns for 3DNOW modes even in MMX mode,
42125	 so if the register is available at all, then we can move data of
42126	 the given mode into or out of it.  */
42127      return (VALID_MMX_REG_MODE (mode)
42128	      || VALID_MMX_REG_MODE_3DNOW (mode));
42129    }
42130
42131  if (mode == QImode)
42132    {
42133      /* Take care for QImode values - they can be in non-QI regs,
42134	 but then they do cause partial register stalls.  */
42135      if (ANY_QI_REGNO_P (regno))
42136	return true;
42137      if (!TARGET_PARTIAL_REG_STALL)
42138	return true;
42139      /* LRA checks if the hard register is OK for the given mode.
42140	 QImode values can live in non-QI regs, so we allow all
42141	 registers here.  */
42142      if (lra_in_progress)
42143       return true;
42144      return !can_create_pseudo_p ();
42145    }
42146  /* We handle both integer and floats in the general purpose registers.  */
42147  else if (VALID_INT_MODE_P (mode))
42148    return true;
42149  else if (VALID_FP_MODE_P (mode))
42150    return true;
42151  else if (VALID_DFP_MODE_P (mode))
42152    return true;
42153  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
42154     on to use that value in smaller contexts, this can easily force a
42155     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
42156     supporting DImode, allow it.  */
42157  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
42158    return true;
42159
42160  return false;
42161}
42162
42163/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
42164   tieable integer mode.  */
42165
42166static bool
42167ix86_tieable_integer_mode_p (machine_mode mode)
42168{
42169  switch (mode)
42170    {
42171    case HImode:
42172    case SImode:
42173      return true;
42174
42175    case QImode:
42176      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
42177
42178    case DImode:
42179      return TARGET_64BIT;
42180
42181    default:
42182      return false;
42183    }
42184}
42185
42186/* Return true if MODE1 is accessible in a register that can hold MODE2
42187   without copying.  That is, all register classes that can hold MODE2
42188   can also hold MODE1.  */
42189
42190bool
42191ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
42192{
42193  if (mode1 == mode2)
42194    return true;
42195
42196  if (ix86_tieable_integer_mode_p (mode1)
42197      && ix86_tieable_integer_mode_p (mode2))
42198    return true;
42199
42200  /* MODE2 being XFmode implies fp stack or general regs, which means we
42201     can tie any smaller floating point modes to it.  Note that we do not
42202     tie this with TFmode.  */
42203  if (mode2 == XFmode)
42204    return mode1 == SFmode || mode1 == DFmode;
42205
42206  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
42207     that we can tie it with SFmode.  */
42208  if (mode2 == DFmode)
42209    return mode1 == SFmode;
42210
42211  /* If MODE2 is only appropriate for an SSE register, then tie with
42212     any other mode acceptable to SSE registers.  */
42213  if (GET_MODE_SIZE (mode2) == 32
42214      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42215    return (GET_MODE_SIZE (mode1) == 32
42216	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42217  if (GET_MODE_SIZE (mode2) == 16
42218      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
42219    return (GET_MODE_SIZE (mode1) == 16
42220	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
42221
42222  /* If MODE2 is appropriate for an MMX register, then tie
42223     with any other mode acceptable to MMX registers.  */
42224  if (GET_MODE_SIZE (mode2) == 8
42225      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
42226    return (GET_MODE_SIZE (mode1) == 8
42227	    && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
42228
42229  return false;
42230}
42231
42232/* Return the cost of moving between two registers of mode MODE.  */
42233
42234static int
42235ix86_set_reg_reg_cost (machine_mode mode)
42236{
42237  unsigned int units = UNITS_PER_WORD;
42238
42239  switch (GET_MODE_CLASS (mode))
42240    {
42241    default:
42242      break;
42243
42244    case MODE_CC:
42245      units = GET_MODE_SIZE (CCmode);
42246      break;
42247
42248    case MODE_FLOAT:
42249      if ((TARGET_SSE && mode == TFmode)
42250	  || (TARGET_80387 && mode == XFmode)
42251	  || ((TARGET_80387 || TARGET_SSE2) && mode == DFmode)
42252	  || ((TARGET_80387 || TARGET_SSE) && mode == SFmode))
42253	units = GET_MODE_SIZE (mode);
42254      break;
42255
42256    case MODE_COMPLEX_FLOAT:
42257      if ((TARGET_SSE && mode == TCmode)
42258	  || (TARGET_80387 && mode == XCmode)
42259	  || ((TARGET_80387 || TARGET_SSE2) && mode == DCmode)
42260	  || ((TARGET_80387 || TARGET_SSE) && mode == SCmode))
42261	units = GET_MODE_SIZE (mode);
42262      break;
42263
42264    case MODE_VECTOR_INT:
42265    case MODE_VECTOR_FLOAT:
42266      if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
42267	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
42268	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
42269	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
42270	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
42271	units = GET_MODE_SIZE (mode);
42272    }
42273
42274  /* Return the cost of moving between two registers of mode MODE,
42275     assuming that the move will be in pieces of at most UNITS bytes.  */
42276  return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units);
42277}
42278
42279/* Compute a (partial) cost for rtx X.  Return true if the complete
42280   cost has been computed, and false if subexpressions should be
42281   scanned.  In either case, *TOTAL contains the cost result.  */
42282
42283static bool
42284ix86_rtx_costs (rtx x, int code_i, int outer_code_i, int opno, int *total,
42285		bool speed)
42286{
42287  rtx mask;
42288  enum rtx_code code = (enum rtx_code) code_i;
42289  enum rtx_code outer_code = (enum rtx_code) outer_code_i;
42290  machine_mode mode = GET_MODE (x);
42291  const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
42292
42293  switch (code)
42294    {
42295    case SET:
42296      if (register_operand (SET_DEST (x), VOIDmode)
42297	  && reg_or_0_operand (SET_SRC (x), VOIDmode))
42298	{
42299	  *total = ix86_set_reg_reg_cost (GET_MODE (SET_DEST (x)));
42300	  return true;
42301	}
42302      return false;
42303
42304    case CONST_INT:
42305    case CONST:
42306    case LABEL_REF:
42307    case SYMBOL_REF:
42308      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
42309	*total = 3;
42310      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
42311	*total = 2;
42312      else if (flag_pic && SYMBOLIC_CONST (x)
42313	       && !(TARGET_64BIT
42314		    && (GET_CODE (x) == LABEL_REF
42315			|| (GET_CODE (x) == SYMBOL_REF
42316			    && SYMBOL_REF_LOCAL_P (x)))))
42317	*total = 1;
42318      else
42319	*total = 0;
42320      return true;
42321
42322    case CONST_DOUBLE:
42323      if (mode == VOIDmode)
42324	{
42325	  *total = 0;
42326	  return true;
42327	}
42328      switch (standard_80387_constant_p (x))
42329	{
42330	case 1: /* 0.0 */
42331	  *total = 1;
42332	  return true;
42333	default: /* Other constants */
42334	  *total = 2;
42335	  return true;
42336	case 0:
42337	case -1:
42338	  break;
42339	}
42340      if (SSE_FLOAT_MODE_P (mode))
42341	{
42342    case CONST_VECTOR:
42343	  switch (standard_sse_constant_p (x))
42344	    {
42345	    case 0:
42346	      break;
42347	    case 1:  /* 0: xor eliminates false dependency */
42348	      *total = 0;
42349	      return true;
42350	    default: /* -1: cmp contains false dependency */
42351	      *total = 1;
42352	      return true;
42353	    }
42354	}
42355      /* Fall back to (MEM (SYMBOL_REF)), since that's where
42356	 it'll probably end up.  Add a penalty for size.  */
42357      *total = (COSTS_N_INSNS (1)
42358		+ (flag_pic != 0 && !TARGET_64BIT)
42359		+ (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
42360      return true;
42361
42362    case ZERO_EXTEND:
42363      /* The zero extensions is often completely free on x86_64, so make
42364	 it as cheap as possible.  */
42365      if (TARGET_64BIT && mode == DImode
42366	  && GET_MODE (XEXP (x, 0)) == SImode)
42367	*total = 1;
42368      else if (TARGET_ZERO_EXTEND_WITH_AND)
42369	*total = cost->add;
42370      else
42371	*total = cost->movzx;
42372      return false;
42373
42374    case SIGN_EXTEND:
42375      *total = cost->movsx;
42376      return false;
42377
42378    case ASHIFT:
42379      if (SCALAR_INT_MODE_P (mode)
42380	  && GET_MODE_SIZE (mode) < UNITS_PER_WORD
42381	  && CONST_INT_P (XEXP (x, 1)))
42382	{
42383	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42384	  if (value == 1)
42385	    {
42386	      *total = cost->add;
42387	      return false;
42388	    }
42389	  if ((value == 2 || value == 3)
42390	      && cost->lea <= cost->shift_const)
42391	    {
42392	      *total = cost->lea;
42393	      return false;
42394	    }
42395	}
42396      /* FALLTHRU */
42397
42398    case ROTATE:
42399    case ASHIFTRT:
42400    case LSHIFTRT:
42401    case ROTATERT:
42402      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42403	{
42404	  /* ??? Should be SSE vector operation cost.  */
42405	  /* At least for published AMD latencies, this really is the same
42406	     as the latency for a simple fpu operation like fabs.  */
42407	  /* V*QImode is emulated with 1-11 insns.  */
42408	  if (mode == V16QImode || mode == V32QImode)
42409	    {
42410	      int count = 11;
42411	      if (TARGET_XOP && mode == V16QImode)
42412		{
42413		  /* For XOP we use vpshab, which requires a broadcast of the
42414		     value to the variable shift insn.  For constants this
42415		     means a V16Q const in mem; even when we can perform the
42416		     shift with one insn set the cost to prefer paddb.  */
42417		  if (CONSTANT_P (XEXP (x, 1)))
42418		    {
42419		      *total = (cost->fabs
42420				+ rtx_cost (XEXP (x, 0), code, 0, speed)
42421				+ (speed ? 2 : COSTS_N_BYTES (16)));
42422		      return true;
42423		    }
42424		  count = 3;
42425		}
42426	      else if (TARGET_SSSE3)
42427		count = 7;
42428	      *total = cost->fabs * count;
42429	    }
42430	  else
42431	    *total = cost->fabs;
42432	}
42433      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42434	{
42435	  if (CONST_INT_P (XEXP (x, 1)))
42436	    {
42437	      if (INTVAL (XEXP (x, 1)) > 32)
42438		*total = cost->shift_const + COSTS_N_INSNS (2);
42439	      else
42440		*total = cost->shift_const * 2;
42441	    }
42442	  else
42443	    {
42444	      if (GET_CODE (XEXP (x, 1)) == AND)
42445		*total = cost->shift_var * 2;
42446	      else
42447		*total = cost->shift_var * 6 + COSTS_N_INSNS (2);
42448	    }
42449	}
42450      else
42451	{
42452	  if (CONST_INT_P (XEXP (x, 1)))
42453	    *total = cost->shift_const;
42454	  else if (GET_CODE (XEXP (x, 1)) == SUBREG
42455		   && GET_CODE (XEXP (XEXP (x, 1), 0)) == AND)
42456	    {
42457	      /* Return the cost after shift-and truncation.  */
42458	      *total = cost->shift_var;
42459	      return true;
42460	    }
42461	  else
42462	    *total = cost->shift_var;
42463	}
42464      return false;
42465
42466    case FMA:
42467      {
42468	rtx sub;
42469
42470        gcc_assert (FLOAT_MODE_P (mode));
42471        gcc_assert (TARGET_FMA || TARGET_FMA4 || TARGET_AVX512F);
42472
42473        /* ??? SSE scalar/vector cost should be used here.  */
42474        /* ??? Bald assumption that fma has the same cost as fmul.  */
42475        *total = cost->fmul;
42476	*total += rtx_cost (XEXP (x, 1), FMA, 1, speed);
42477
42478        /* Negate in op0 or op2 is free: FMS, FNMA, FNMS.  */
42479	sub = XEXP (x, 0);
42480	if (GET_CODE (sub) == NEG)
42481	  sub = XEXP (sub, 0);
42482	*total += rtx_cost (sub, FMA, 0, speed);
42483
42484	sub = XEXP (x, 2);
42485	if (GET_CODE (sub) == NEG)
42486	  sub = XEXP (sub, 0);
42487	*total += rtx_cost (sub, FMA, 2, speed);
42488	return true;
42489      }
42490
42491    case MULT:
42492      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42493	{
42494	  /* ??? SSE scalar cost should be used here.  */
42495	  *total = cost->fmul;
42496	  return false;
42497	}
42498      else if (X87_FLOAT_MODE_P (mode))
42499	{
42500	  *total = cost->fmul;
42501	  return false;
42502	}
42503      else if (FLOAT_MODE_P (mode))
42504	{
42505	  /* ??? SSE vector cost should be used here.  */
42506	  *total = cost->fmul;
42507	  return false;
42508	}
42509      else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42510	{
42511	  /* V*QImode is emulated with 7-13 insns.  */
42512	  if (mode == V16QImode || mode == V32QImode)
42513	    {
42514	      int extra = 11;
42515	      if (TARGET_XOP && mode == V16QImode)
42516		extra = 5;
42517	      else if (TARGET_SSSE3)
42518		extra = 6;
42519	      *total = cost->fmul * 2 + cost->fabs * extra;
42520	    }
42521	  /* V*DImode is emulated with 5-8 insns.  */
42522	  else if (mode == V2DImode || mode == V4DImode)
42523	    {
42524	      if (TARGET_XOP && mode == V2DImode)
42525		*total = cost->fmul * 2 + cost->fabs * 3;
42526	      else
42527		*total = cost->fmul * 3 + cost->fabs * 5;
42528	    }
42529	  /* Without sse4.1, we don't have PMULLD; it's emulated with 7
42530	     insns, including two PMULUDQ.  */
42531	  else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
42532	    *total = cost->fmul * 2 + cost->fabs * 5;
42533	  else
42534	    *total = cost->fmul;
42535	  return false;
42536	}
42537      else
42538	{
42539	  rtx op0 = XEXP (x, 0);
42540	  rtx op1 = XEXP (x, 1);
42541	  int nbits;
42542	  if (CONST_INT_P (XEXP (x, 1)))
42543	    {
42544	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
42545	      for (nbits = 0; value != 0; value &= value - 1)
42546	        nbits++;
42547	    }
42548	  else
42549	    /* This is arbitrary.  */
42550	    nbits = 7;
42551
42552	  /* Compute costs correctly for widening multiplication.  */
42553	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
42554	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
42555	         == GET_MODE_SIZE (mode))
42556	    {
42557	      int is_mulwiden = 0;
42558	      machine_mode inner_mode = GET_MODE (op0);
42559
42560	      if (GET_CODE (op0) == GET_CODE (op1))
42561		is_mulwiden = 1, op1 = XEXP (op1, 0);
42562	      else if (CONST_INT_P (op1))
42563		{
42564		  if (GET_CODE (op0) == SIGN_EXTEND)
42565		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
42566			          == INTVAL (op1);
42567		  else
42568		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
42569	        }
42570
42571	      if (is_mulwiden)
42572	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
42573	    }
42574
42575  	  *total = (cost->mult_init[MODE_INDEX (mode)]
42576		    + nbits * cost->mult_bit
42577	            + rtx_cost (op0, outer_code, opno, speed)
42578		    + rtx_cost (op1, outer_code, opno, speed));
42579
42580          return true;
42581	}
42582
42583    case DIV:
42584    case UDIV:
42585    case MOD:
42586    case UMOD:
42587      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42588	/* ??? SSE cost should be used here.  */
42589	*total = cost->fdiv;
42590      else if (X87_FLOAT_MODE_P (mode))
42591	*total = cost->fdiv;
42592      else if (FLOAT_MODE_P (mode))
42593	/* ??? SSE vector cost should be used here.  */
42594	*total = cost->fdiv;
42595      else
42596	*total = cost->divide[MODE_INDEX (mode)];
42597      return false;
42598
42599    case PLUS:
42600      if (GET_MODE_CLASS (mode) == MODE_INT
42601	  && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
42602	{
42603	  if (GET_CODE (XEXP (x, 0)) == PLUS
42604	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
42605	      && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
42606	      && CONSTANT_P (XEXP (x, 1)))
42607	    {
42608	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
42609	      if (val == 2 || val == 4 || val == 8)
42610		{
42611		  *total = cost->lea;
42612		  *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42613				      outer_code, opno, speed);
42614		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
42615				      outer_code, opno, speed);
42616		  *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42617		  return true;
42618		}
42619	    }
42620	  else if (GET_CODE (XEXP (x, 0)) == MULT
42621		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
42622	    {
42623	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
42624	      if (val == 2 || val == 4 || val == 8)
42625		{
42626		  *total = cost->lea;
42627		  *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42628				      outer_code, opno, speed);
42629		  *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42630		  return true;
42631		}
42632	    }
42633	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
42634	    {
42635	      *total = cost->lea;
42636	      *total += rtx_cost (XEXP (XEXP (x, 0), 0),
42637				  outer_code, opno, speed);
42638	      *total += rtx_cost (XEXP (XEXP (x, 0), 1),
42639				  outer_code, opno, speed);
42640	      *total += rtx_cost (XEXP (x, 1), outer_code, opno, speed);
42641	      return true;
42642	    }
42643	}
42644      /* FALLTHRU */
42645
42646    case MINUS:
42647      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42648	{
42649	  /* ??? SSE cost should be used here.  */
42650	  *total = cost->fadd;
42651	  return false;
42652	}
42653      else if (X87_FLOAT_MODE_P (mode))
42654	{
42655	  *total = cost->fadd;
42656	  return false;
42657	}
42658      else if (FLOAT_MODE_P (mode))
42659	{
42660	  /* ??? SSE vector cost should be used here.  */
42661	  *total = cost->fadd;
42662	  return false;
42663	}
42664      /* FALLTHRU */
42665
42666    case AND:
42667    case IOR:
42668    case XOR:
42669      if (GET_MODE_CLASS (mode) == MODE_INT
42670	  && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42671	{
42672	  *total = (cost->add * 2
42673		    + (rtx_cost (XEXP (x, 0), outer_code, opno, speed)
42674		       << (GET_MODE (XEXP (x, 0)) != DImode))
42675		    + (rtx_cost (XEXP (x, 1), outer_code, opno, speed)
42676	               << (GET_MODE (XEXP (x, 1)) != DImode)));
42677	  return true;
42678	}
42679      /* FALLTHRU */
42680
42681    case NEG:
42682      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42683	{
42684	  /* ??? SSE cost should be used here.  */
42685	  *total = cost->fchs;
42686	  return false;
42687	}
42688      else if (X87_FLOAT_MODE_P (mode))
42689	{
42690	  *total = cost->fchs;
42691	  return false;
42692	}
42693      else if (FLOAT_MODE_P (mode))
42694	{
42695	  /* ??? SSE vector cost should be used here.  */
42696	  *total = cost->fchs;
42697	  return false;
42698	}
42699      /* FALLTHRU */
42700
42701    case NOT:
42702      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
42703	{
42704	  /* ??? Should be SSE vector operation cost.  */
42705	  /* At least for published AMD latencies, this really is the same
42706	     as the latency for a simple fpu operation like fabs.  */
42707	  *total = cost->fabs;
42708	}
42709      else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
42710	*total = cost->add * 2;
42711      else
42712	*total = cost->add;
42713      return false;
42714
42715    case COMPARE:
42716      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
42717	  && XEXP (XEXP (x, 0), 1) == const1_rtx
42718	  && CONST_INT_P (XEXP (XEXP (x, 0), 2))
42719	  && XEXP (x, 1) == const0_rtx)
42720	{
42721	  /* This kind of construct is implemented using test[bwl].
42722	     Treat it as if we had an AND.  */
42723	  *total = (cost->add
42724		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, opno, speed)
42725		    + rtx_cost (const1_rtx, outer_code, opno, speed));
42726	  return true;
42727	}
42728      return false;
42729
42730    case FLOAT_EXTEND:
42731      if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
42732	*total = 0;
42733      return false;
42734
42735    case ABS:
42736      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42737	/* ??? SSE cost should be used here.  */
42738	*total = cost->fabs;
42739      else if (X87_FLOAT_MODE_P (mode))
42740	*total = cost->fabs;
42741      else if (FLOAT_MODE_P (mode))
42742	/* ??? SSE vector cost should be used here.  */
42743	*total = cost->fabs;
42744      return false;
42745
42746    case SQRT:
42747      if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
42748	/* ??? SSE cost should be used here.  */
42749	*total = cost->fsqrt;
42750      else if (X87_FLOAT_MODE_P (mode))
42751	*total = cost->fsqrt;
42752      else if (FLOAT_MODE_P (mode))
42753	/* ??? SSE vector cost should be used here.  */
42754	*total = cost->fsqrt;
42755      return false;
42756
42757    case UNSPEC:
42758      if (XINT (x, 1) == UNSPEC_TP)
42759	*total = 0;
42760      return false;
42761
42762    case VEC_SELECT:
42763    case VEC_CONCAT:
42764    case VEC_DUPLICATE:
42765      /* ??? Assume all of these vector manipulation patterns are
42766	 recognizable.  In which case they all pretty much have the
42767	 same cost.  */
42768     *total = cost->fabs;
42769     return true;
42770    case VEC_MERGE:
42771      mask = XEXP (x, 2);
42772      /* This is masked instruction, assume the same cost,
42773	 as nonmasked variant.  */
42774      if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
42775	*total = rtx_cost (XEXP (x, 0), outer_code, opno, speed);
42776      else
42777	*total = cost->fabs;
42778      return true;
42779
42780    default:
42781      return false;
42782    }
42783}
42784
42785#if TARGET_MACHO
42786
42787static int current_machopic_label_num;
42788
42789/* Given a symbol name and its associated stub, write out the
42790   definition of the stub.  */
42791
42792void
42793machopic_output_stub (FILE *file, const char *symb, const char *stub)
42794{
42795  unsigned int length;
42796  char *binder_name, *symbol_name, lazy_ptr_name[32];
42797  int label = ++current_machopic_label_num;
42798
42799  /* For 64-bit we shouldn't get here.  */
42800  gcc_assert (!TARGET_64BIT);
42801
42802  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
42803  symb = targetm.strip_name_encoding (symb);
42804
42805  length = strlen (stub);
42806  binder_name = XALLOCAVEC (char, length + 32);
42807  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
42808
42809  length = strlen (symb);
42810  symbol_name = XALLOCAVEC (char, length + 32);
42811  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
42812
42813  sprintf (lazy_ptr_name, "L%d$lz", label);
42814
42815  if (MACHOPIC_ATT_STUB)
42816    switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]);
42817  else if (MACHOPIC_PURE)
42818    switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]);
42819  else
42820    switch_to_section (darwin_sections[machopic_symbol_stub_section]);
42821
42822  fprintf (file, "%s:\n", stub);
42823  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42824
42825  if (MACHOPIC_ATT_STUB)
42826    {
42827      fprintf (file, "\thlt ; hlt ; hlt ; hlt ; hlt\n");
42828    }
42829  else if (MACHOPIC_PURE)
42830    {
42831      /* PIC stub.  */
42832      /* 25-byte PIC stub using "CALL get_pc_thunk".  */
42833      rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */);
42834      output_set_got (tmp, NULL_RTX);	/* "CALL ___<cpu>.get_pc_thunk.cx".  */
42835      fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n",
42836	       label, lazy_ptr_name, label);
42837      fprintf (file, "\tjmp\t*%%ecx\n");
42838    }
42839  else
42840    fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
42841
42842  /* The AT&T-style ("self-modifying") stub is not lazily bound, thus
42843     it needs no stub-binding-helper.  */
42844  if (MACHOPIC_ATT_STUB)
42845    return;
42846
42847  fprintf (file, "%s:\n", binder_name);
42848
42849  if (MACHOPIC_PURE)
42850    {
42851      fprintf (file, "\tlea\t%s-%s(%%ecx),%%ecx\n", lazy_ptr_name, binder_name);
42852      fprintf (file, "\tpushl\t%%ecx\n");
42853    }
42854  else
42855    fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
42856
42857  fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
42858
42859  /* N.B. Keep the correspondence of these
42860     'symbol_ptr/symbol_ptr2/symbol_ptr3' sections consistent with the
42861     old-pic/new-pic/non-pic stubs; altering this will break
42862     compatibility with existing dylibs.  */
42863  if (MACHOPIC_PURE)
42864    {
42865      /* 25-byte PIC stub using "CALL get_pc_thunk".  */
42866      switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]);
42867    }
42868  else
42869    /* 16-byte -mdynamic-no-pic stub.  */
42870    switch_to_section(darwin_sections[machopic_lazy_symbol_ptr3_section]);
42871
42872  fprintf (file, "%s:\n", lazy_ptr_name);
42873  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
42874  fprintf (file, ASM_LONG "%s\n", binder_name);
42875}
42876#endif /* TARGET_MACHO */
42877
42878/* Order the registers for register allocator.  */
42879
42880void
42881x86_order_regs_for_local_alloc (void)
42882{
42883   int pos = 0;
42884   int i;
42885
42886   /* First allocate the local general purpose registers.  */
42887   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42888     if (GENERAL_REGNO_P (i) && call_used_regs[i])
42889	reg_alloc_order [pos++] = i;
42890
42891   /* Global general purpose registers.  */
42892   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
42893     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
42894	reg_alloc_order [pos++] = i;
42895
42896   /* x87 registers come first in case we are doing FP math
42897      using them.  */
42898   if (!TARGET_SSE_MATH)
42899     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42900       reg_alloc_order [pos++] = i;
42901
42902   /* SSE registers.  */
42903   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
42904     reg_alloc_order [pos++] = i;
42905   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
42906     reg_alloc_order [pos++] = i;
42907
42908   /* Extended REX SSE registers.  */
42909   for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
42910     reg_alloc_order [pos++] = i;
42911
42912   /* Mask register.  */
42913   for (i = FIRST_MASK_REG; i <= LAST_MASK_REG; i++)
42914     reg_alloc_order [pos++] = i;
42915
42916   /* MPX bound registers.  */
42917   for (i = FIRST_BND_REG; i <= LAST_BND_REG; i++)
42918     reg_alloc_order [pos++] = i;
42919
42920   /* x87 registers.  */
42921   if (TARGET_SSE_MATH)
42922     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
42923       reg_alloc_order [pos++] = i;
42924
42925   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
42926     reg_alloc_order [pos++] = i;
42927
42928   /* Initialize the rest of array as we do not allocate some registers
42929      at all.  */
42930   while (pos < FIRST_PSEUDO_REGISTER)
42931     reg_alloc_order [pos++] = 0;
42932}
42933
42934/* Handle a "callee_pop_aggregate_return" attribute; arguments as
42935   in struct attribute_spec handler.  */
42936static tree
42937ix86_handle_callee_pop_aggregate_return (tree *node, tree name,
42938					      tree args,
42939					      int,
42940					      bool *no_add_attrs)
42941{
42942  if (TREE_CODE (*node) != FUNCTION_TYPE
42943      && TREE_CODE (*node) != METHOD_TYPE
42944      && TREE_CODE (*node) != FIELD_DECL
42945      && TREE_CODE (*node) != TYPE_DECL)
42946    {
42947      warning (OPT_Wattributes, "%qE attribute only applies to functions",
42948	       name);
42949      *no_add_attrs = true;
42950      return NULL_TREE;
42951    }
42952  if (TARGET_64BIT)
42953    {
42954      warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
42955	       name);
42956      *no_add_attrs = true;
42957      return NULL_TREE;
42958    }
42959  if (is_attribute_p ("callee_pop_aggregate_return", name))
42960    {
42961      tree cst;
42962
42963      cst = TREE_VALUE (args);
42964      if (TREE_CODE (cst) != INTEGER_CST)
42965	{
42966	  warning (OPT_Wattributes,
42967		   "%qE attribute requires an integer constant argument",
42968		   name);
42969	  *no_add_attrs = true;
42970	}
42971      else if (compare_tree_int (cst, 0) != 0
42972	       && compare_tree_int (cst, 1) != 0)
42973	{
42974	  warning (OPT_Wattributes,
42975		   "argument to %qE attribute is neither zero, nor one",
42976		   name);
42977	  *no_add_attrs = true;
42978	}
42979
42980      return NULL_TREE;
42981    }
42982
42983  return NULL_TREE;
42984}
42985
42986/* Handle a "ms_abi" or "sysv" attribute; arguments as in
42987   struct attribute_spec.handler.  */
42988static tree
42989ix86_handle_abi_attribute (tree *node, tree name, tree, int,
42990			   bool *no_add_attrs)
42991{
42992  if (TREE_CODE (*node) != FUNCTION_TYPE
42993      && TREE_CODE (*node) != METHOD_TYPE
42994      && TREE_CODE (*node) != FIELD_DECL
42995      && TREE_CODE (*node) != TYPE_DECL)
42996    {
42997      warning (OPT_Wattributes, "%qE attribute only applies to functions",
42998	       name);
42999      *no_add_attrs = true;
43000      return NULL_TREE;
43001    }
43002
43003  /* Can combine regparm with all attributes but fastcall.  */
43004  if (is_attribute_p ("ms_abi", name))
43005    {
43006      if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
43007        {
43008	  error ("ms_abi and sysv_abi attributes are not compatible");
43009	}
43010
43011      return NULL_TREE;
43012    }
43013  else if (is_attribute_p ("sysv_abi", name))
43014    {
43015      if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
43016        {
43017	  error ("ms_abi and sysv_abi attributes are not compatible");
43018	}
43019
43020      return NULL_TREE;
43021    }
43022
43023  return NULL_TREE;
43024}
43025
43026/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
43027   struct attribute_spec.handler.  */
43028static tree
43029ix86_handle_struct_attribute (tree *node, tree name, tree, int,
43030			      bool *no_add_attrs)
43031{
43032  tree *type = NULL;
43033  if (DECL_P (*node))
43034    {
43035      if (TREE_CODE (*node) == TYPE_DECL)
43036	type = &TREE_TYPE (*node);
43037    }
43038  else
43039    type = node;
43040
43041  if (!(type && RECORD_OR_UNION_TYPE_P (*type)))
43042    {
43043      warning (OPT_Wattributes, "%qE attribute ignored",
43044	       name);
43045      *no_add_attrs = true;
43046    }
43047
43048  else if ((is_attribute_p ("ms_struct", name)
43049	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
43050	   || ((is_attribute_p ("gcc_struct", name)
43051		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
43052    {
43053      warning (OPT_Wattributes, "%qE incompatible attribute ignored",
43054               name);
43055      *no_add_attrs = true;
43056    }
43057
43058  return NULL_TREE;
43059}
43060
43061static tree
43062ix86_handle_fndecl_attribute (tree *node, tree name, tree, int,
43063			      bool *no_add_attrs)
43064{
43065  if (TREE_CODE (*node) != FUNCTION_DECL)
43066    {
43067      warning (OPT_Wattributes, "%qE attribute only applies to functions",
43068               name);
43069      *no_add_attrs = true;
43070    }
43071  return NULL_TREE;
43072}
43073
43074static bool
43075ix86_ms_bitfield_layout_p (const_tree record_type)
43076{
43077  return ((TARGET_MS_BITFIELD_LAYOUT
43078	   && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
43079          || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
43080}
43081
43082/* Returns an expression indicating where the this parameter is
43083   located on entry to the FUNCTION.  */
43084
43085static rtx
43086x86_this_parameter (tree function)
43087{
43088  tree type = TREE_TYPE (function);
43089  bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
43090  int nregs;
43091
43092  if (TARGET_64BIT)
43093    {
43094      const int *parm_regs;
43095
43096      if (ix86_function_type_abi (type) == MS_ABI)
43097        parm_regs = x86_64_ms_abi_int_parameter_registers;
43098      else
43099        parm_regs = x86_64_int_parameter_registers;
43100      return gen_rtx_REG (Pmode, parm_regs[aggr]);
43101    }
43102
43103  nregs = ix86_function_regparm (type, function);
43104
43105  if (nregs > 0 && !stdarg_p (type))
43106    {
43107      int regno;
43108      unsigned int ccvt = ix86_get_callcvt (type);
43109
43110      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43111	regno = aggr ? DX_REG : CX_REG;
43112      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43113        {
43114	  regno = CX_REG;
43115	  if (aggr)
43116	    return gen_rtx_MEM (SImode,
43117				plus_constant (Pmode, stack_pointer_rtx, 4));
43118	}
43119      else
43120        {
43121	  regno = AX_REG;
43122	  if (aggr)
43123	    {
43124	      regno = DX_REG;
43125	      if (nregs == 1)
43126		return gen_rtx_MEM (SImode,
43127				    plus_constant (Pmode,
43128						   stack_pointer_rtx, 4));
43129	    }
43130	}
43131      return gen_rtx_REG (SImode, regno);
43132    }
43133
43134  return gen_rtx_MEM (SImode, plus_constant (Pmode, stack_pointer_rtx,
43135					     aggr ? 8 : 4));
43136}
43137
43138/* Determine whether x86_output_mi_thunk can succeed.  */
43139
43140static bool
43141x86_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
43142			 const_tree function)
43143{
43144  /* 64-bit can handle anything.  */
43145  if (TARGET_64BIT)
43146    return true;
43147
43148  /* For 32-bit, everything's fine if we have one free register.  */
43149  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
43150    return true;
43151
43152  /* Need a free register for vcall_offset.  */
43153  if (vcall_offset)
43154    return false;
43155
43156  /* Need a free register for GOT references.  */
43157  if (flag_pic && !targetm.binds_local_p (function))
43158    return false;
43159
43160  /* Otherwise ok.  */
43161  return true;
43162}
43163
43164/* Output the assembler code for a thunk function.  THUNK_DECL is the
43165   declaration for the thunk function itself, FUNCTION is the decl for
43166   the target function.  DELTA is an immediate constant offset to be
43167   added to THIS.  If VCALL_OFFSET is nonzero, the word at
43168   *(*this + vcall_offset) should be added to THIS.  */
43169
43170static void
43171x86_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
43172		     HOST_WIDE_INT vcall_offset, tree function)
43173{
43174  rtx this_param = x86_this_parameter (function);
43175  rtx this_reg, tmp, fnaddr;
43176  unsigned int tmp_regno;
43177  rtx_insn *insn;
43178
43179  if (TARGET_64BIT)
43180    tmp_regno = R10_REG;
43181  else
43182    {
43183      unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (function));
43184      if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
43185	tmp_regno = AX_REG;
43186      else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
43187	tmp_regno = DX_REG;
43188      else
43189	tmp_regno = CX_REG;
43190    }
43191
43192  emit_note (NOTE_INSN_PROLOGUE_END);
43193
43194  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
43195     pull it in now and let DELTA benefit.  */
43196  if (REG_P (this_param))
43197    this_reg = this_param;
43198  else if (vcall_offset)
43199    {
43200      /* Put the this parameter into %eax.  */
43201      this_reg = gen_rtx_REG (Pmode, AX_REG);
43202      emit_move_insn (this_reg, this_param);
43203    }
43204  else
43205    this_reg = NULL_RTX;
43206
43207  /* Adjust the this parameter by a fixed constant.  */
43208  if (delta)
43209    {
43210      rtx delta_rtx = GEN_INT (delta);
43211      rtx delta_dst = this_reg ? this_reg : this_param;
43212
43213      if (TARGET_64BIT)
43214	{
43215	  if (!x86_64_general_operand (delta_rtx, Pmode))
43216	    {
43217	      tmp = gen_rtx_REG (Pmode, tmp_regno);
43218	      emit_move_insn (tmp, delta_rtx);
43219	      delta_rtx = tmp;
43220	    }
43221	}
43222
43223      ix86_emit_binop (PLUS, Pmode, delta_dst, delta_rtx);
43224    }
43225
43226  /* Adjust the this parameter by a value stored in the vtable.  */
43227  if (vcall_offset)
43228    {
43229      rtx vcall_addr, vcall_mem, this_mem;
43230
43231      tmp = gen_rtx_REG (Pmode, tmp_regno);
43232
43233      this_mem = gen_rtx_MEM (ptr_mode, this_reg);
43234      if (Pmode != ptr_mode)
43235	this_mem = gen_rtx_ZERO_EXTEND (Pmode, this_mem);
43236      emit_move_insn (tmp, this_mem);
43237
43238      /* Adjust the this parameter.  */
43239      vcall_addr = plus_constant (Pmode, tmp, vcall_offset);
43240      if (TARGET_64BIT
43241	  && !ix86_legitimate_address_p (ptr_mode, vcall_addr, true))
43242	{
43243	  rtx tmp2 = gen_rtx_REG (Pmode, R11_REG);
43244	  emit_move_insn (tmp2, GEN_INT (vcall_offset));
43245	  vcall_addr = gen_rtx_PLUS (Pmode, tmp, tmp2);
43246	}
43247
43248      vcall_mem = gen_rtx_MEM (ptr_mode, vcall_addr);
43249      if (Pmode != ptr_mode)
43250	emit_insn (gen_addsi_1_zext (this_reg,
43251				     gen_rtx_REG (ptr_mode,
43252						  REGNO (this_reg)),
43253				     vcall_mem));
43254      else
43255	ix86_emit_binop (PLUS, Pmode, this_reg, vcall_mem);
43256    }
43257
43258  /* If necessary, drop THIS back to its stack slot.  */
43259  if (this_reg && this_reg != this_param)
43260    emit_move_insn (this_param, this_reg);
43261
43262  fnaddr = XEXP (DECL_RTL (function), 0);
43263  if (TARGET_64BIT)
43264    {
43265      if (!flag_pic || targetm.binds_local_p (function)
43266	  || TARGET_PECOFF)
43267	;
43268      else
43269	{
43270	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOTPCREL);
43271	  tmp = gen_rtx_CONST (Pmode, tmp);
43272	  fnaddr = gen_const_mem (Pmode, tmp);
43273	}
43274    }
43275  else
43276    {
43277      if (!flag_pic || targetm.binds_local_p (function))
43278	;
43279#if TARGET_MACHO
43280      else if (TARGET_MACHO)
43281	{
43282	  fnaddr = machopic_indirect_call_target (DECL_RTL (function));
43283	  fnaddr = XEXP (fnaddr, 0);
43284	}
43285#endif /* TARGET_MACHO */
43286      else
43287	{
43288	  tmp = gen_rtx_REG (Pmode, CX_REG);
43289	  output_set_got (tmp, NULL_RTX);
43290
43291	  fnaddr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, fnaddr), UNSPEC_GOT);
43292	  fnaddr = gen_rtx_CONST (Pmode, fnaddr);
43293	  fnaddr = gen_rtx_PLUS (Pmode, tmp, fnaddr);
43294	  fnaddr = gen_const_mem (Pmode, fnaddr);
43295	}
43296    }
43297
43298  /* Our sibling call patterns do not allow memories, because we have no
43299     predicate that can distinguish between frame and non-frame memory.
43300     For our purposes here, we can get away with (ab)using a jump pattern,
43301     because we're going to do no optimization.  */
43302  if (MEM_P (fnaddr))
43303    {
43304      if (sibcall_insn_operand (fnaddr, word_mode))
43305	{
43306	  fnaddr = XEXP (DECL_RTL (function), 0);
43307	  tmp = gen_rtx_MEM (QImode, fnaddr);
43308	  tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43309	  tmp = emit_call_insn (tmp);
43310	  SIBLING_CALL_P (tmp) = 1;
43311	}
43312      else
43313	emit_jump_insn (gen_indirect_jump (fnaddr));
43314    }
43315  else
43316    {
43317      if (ix86_cmodel == CM_LARGE_PIC && SYMBOLIC_CONST (fnaddr))
43318	{
43319	  // CM_LARGE_PIC always uses pseudo PIC register which is
43320	  // uninitialized.  Since FUNCTION is local and calling it
43321	  // doesn't go through PLT, we use scratch register %r11 as
43322	  // PIC register and initialize it here.
43323	  pic_offset_table_rtx = gen_rtx_REG (Pmode, R11_REG);
43324	  ix86_init_large_pic_reg (tmp_regno);
43325	  fnaddr = legitimize_pic_address (fnaddr,
43326					   gen_rtx_REG (Pmode, tmp_regno));
43327	}
43328
43329      if (!sibcall_insn_operand (fnaddr, word_mode))
43330	{
43331	  tmp = gen_rtx_REG (word_mode, tmp_regno);
43332	  if (GET_MODE (fnaddr) != word_mode)
43333	    fnaddr = gen_rtx_ZERO_EXTEND (word_mode, fnaddr);
43334	  emit_move_insn (tmp, fnaddr);
43335	  fnaddr = tmp;
43336	}
43337
43338      tmp = gen_rtx_MEM (QImode, fnaddr);
43339      tmp = gen_rtx_CALL (VOIDmode, tmp, const0_rtx);
43340      tmp = emit_call_insn (tmp);
43341      SIBLING_CALL_P (tmp) = 1;
43342    }
43343  emit_barrier ();
43344
43345  /* Emit just enough of rest_of_compilation to get the insns emitted.
43346     Note that use_thunk calls assemble_start_function et al.  */
43347  insn = get_insns ();
43348  shorten_branches (insn);
43349  final_start_function (insn, file, 1);
43350  final (insn, file, 1);
43351  final_end_function ();
43352}
43353
43354static void
43355x86_file_start (void)
43356{
43357  default_file_start ();
43358  if (TARGET_16BIT)
43359    fputs ("\t.code16gcc\n", asm_out_file);
43360#if TARGET_MACHO
43361  darwin_file_start ();
43362#endif
43363  if (X86_FILE_START_VERSION_DIRECTIVE)
43364    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
43365  if (X86_FILE_START_FLTUSED)
43366    fputs ("\t.global\t__fltused\n", asm_out_file);
43367  if (ix86_asm_dialect == ASM_INTEL)
43368    fputs ("\t.intel_syntax noprefix\n", asm_out_file);
43369}
43370
43371int
43372x86_field_alignment (tree field, int computed)
43373{
43374  machine_mode mode;
43375  tree type = TREE_TYPE (field);
43376
43377  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
43378    return computed;
43379  mode = TYPE_MODE (strip_array_types (type));
43380  if (mode == DFmode || mode == DCmode
43381      || GET_MODE_CLASS (mode) == MODE_INT
43382      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
43383    return MIN (32, computed);
43384  return computed;
43385}
43386
43387/* Print call to TARGET to FILE.  */
43388
43389static void
43390x86_print_call_or_nop (FILE *file, const char *target)
43391{
43392  if (flag_nop_mcount)
43393    fprintf (file, "1:\tnopl 0x00(%%eax,%%eax,1)\n"); /* 5 byte nop.  */
43394  else
43395    fprintf (file, "1:\tcall\t%s\n", target);
43396}
43397
43398/* Output assembler code to FILE to increment profiler label # LABELNO
43399   for profiling a function entry.  */
43400void
43401x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
43402{
43403  const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
43404					 : MCOUNT_NAME);
43405  if (TARGET_64BIT)
43406    {
43407#ifndef NO_PROFILE_COUNTERS
43408      fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
43409#endif
43410
43411      if (!TARGET_PECOFF && flag_pic)
43412	fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
43413      else
43414	x86_print_call_or_nop (file, mcount_name);
43415    }
43416  else if (flag_pic)
43417    {
43418#ifndef NO_PROFILE_COUNTERS
43419      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
43420	       LPREFIX, labelno);
43421#endif
43422      fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
43423    }
43424  else
43425    {
43426#ifndef NO_PROFILE_COUNTERS
43427      fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
43428	       LPREFIX, labelno);
43429#endif
43430      x86_print_call_or_nop (file, mcount_name);
43431    }
43432
43433  if (flag_record_mcount)
43434    {
43435      fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
43436      fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
43437      fprintf (file, "\t.previous\n");
43438    }
43439}
43440
43441/* We don't have exact information about the insn sizes, but we may assume
43442   quite safely that we are informed about all 1 byte insns and memory
43443   address sizes.  This is enough to eliminate unnecessary padding in
43444   99% of cases.  */
43445
43446static int
43447min_insn_size (rtx_insn *insn)
43448{
43449  int l = 0, len;
43450
43451  if (!INSN_P (insn) || !active_insn_p (insn))
43452    return 0;
43453
43454  /* Discard alignments we've emit and jump instructions.  */
43455  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
43456      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
43457    return 0;
43458
43459  /* Important case - calls are always 5 bytes.
43460     It is common to have many calls in the row.  */
43461  if (CALL_P (insn)
43462      && symbolic_reference_mentioned_p (PATTERN (insn))
43463      && !SIBLING_CALL_P (insn))
43464    return 5;
43465  len = get_attr_length (insn);
43466  if (len <= 1)
43467    return 1;
43468
43469  /* For normal instructions we rely on get_attr_length being exact,
43470     with a few exceptions.  */
43471  if (!JUMP_P (insn))
43472    {
43473      enum attr_type type = get_attr_type (insn);
43474
43475      switch (type)
43476	{
43477	case TYPE_MULTI:
43478	  if (GET_CODE (PATTERN (insn)) == ASM_INPUT
43479	      || asm_noperands (PATTERN (insn)) >= 0)
43480	    return 0;
43481	  break;
43482	case TYPE_OTHER:
43483	case TYPE_FCMP:
43484	  break;
43485	default:
43486	  /* Otherwise trust get_attr_length.  */
43487	  return len;
43488	}
43489
43490      l = get_attr_length_address (insn);
43491      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
43492	l = 4;
43493    }
43494  if (l)
43495    return 1+l;
43496  else
43497    return 2;
43498}
43499
43500#ifdef ASM_OUTPUT_MAX_SKIP_PAD
43501
43502/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
43503   window.  */
43504
43505static void
43506ix86_avoid_jump_mispredicts (void)
43507{
43508  rtx_insn *insn, *start = get_insns ();
43509  int nbytes = 0, njumps = 0;
43510  bool isjump = false;
43511
43512  /* Look for all minimal intervals of instructions containing 4 jumps.
43513     The intervals are bounded by START and INSN.  NBYTES is the total
43514     size of instructions in the interval including INSN and not including
43515     START.  When the NBYTES is smaller than 16 bytes, it is possible
43516     that the end of START and INSN ends up in the same 16byte page.
43517
43518     The smallest offset in the page INSN can start is the case where START
43519     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
43520     We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
43521
43522     Don't consider asm goto as jump, while it can contain a jump, it doesn't
43523     have to, control transfer to label(s) can be performed through other
43524     means, and also we estimate minimum length of all asm stmts as 0.  */
43525  for (insn = start; insn; insn = NEXT_INSN (insn))
43526    {
43527      int min_size;
43528
43529      if (LABEL_P (insn))
43530	{
43531	  int align = label_to_alignment (insn);
43532	  int max_skip = label_to_max_skip (insn);
43533
43534	  if (max_skip > 15)
43535	    max_skip = 15;
43536	  /* If align > 3, only up to 16 - max_skip - 1 bytes can be
43537	     already in the current 16 byte page, because otherwise
43538	     ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
43539	     bytes to reach 16 byte boundary.  */
43540	  if (align <= 0
43541	      || (align <= 3 && max_skip != (1 << align) - 1))
43542	    max_skip = 0;
43543	  if (dump_file)
43544	    fprintf (dump_file, "Label %i with max_skip %i\n",
43545		     INSN_UID (insn), max_skip);
43546	  if (max_skip)
43547	    {
43548	      while (nbytes + max_skip >= 16)
43549		{
43550		  start = NEXT_INSN (start);
43551		  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43552		      || CALL_P (start))
43553		    njumps--, isjump = true;
43554		  else
43555		    isjump = false;
43556		  nbytes -= min_insn_size (start);
43557		}
43558	    }
43559	  continue;
43560	}
43561
43562      min_size = min_insn_size (insn);
43563      nbytes += min_size;
43564      if (dump_file)
43565	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
43566		 INSN_UID (insn), min_size);
43567      if ((JUMP_P (insn) && asm_noperands (PATTERN (insn)) < 0)
43568	  || CALL_P (insn))
43569	njumps++;
43570      else
43571	continue;
43572
43573      while (njumps > 3)
43574	{
43575	  start = NEXT_INSN (start);
43576	  if ((JUMP_P (start) && asm_noperands (PATTERN (start)) < 0)
43577	      || CALL_P (start))
43578	    njumps--, isjump = true;
43579	  else
43580	    isjump = false;
43581	  nbytes -= min_insn_size (start);
43582	}
43583      gcc_assert (njumps >= 0);
43584      if (dump_file)
43585        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
43586		 INSN_UID (start), INSN_UID (insn), nbytes);
43587
43588      if (njumps == 3 && isjump && nbytes < 16)
43589	{
43590	  int padsize = 15 - nbytes + min_insn_size (insn);
43591
43592	  if (dump_file)
43593	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
43594		     INSN_UID (insn), padsize);
43595          emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
43596	}
43597    }
43598}
43599#endif
43600
43601/* AMD Athlon works faster
43602   when RET is not destination of conditional jump or directly preceded
43603   by other jump instruction.  We avoid the penalty by inserting NOP just
43604   before the RET instructions in such cases.  */
43605static void
43606ix86_pad_returns (void)
43607{
43608  edge e;
43609  edge_iterator ei;
43610
43611  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43612    {
43613      basic_block bb = e->src;
43614      rtx_insn *ret = BB_END (bb);
43615      rtx_insn *prev;
43616      bool replace = false;
43617
43618      if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
43619	  || optimize_bb_for_size_p (bb))
43620	continue;
43621      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
43622	if (active_insn_p (prev) || LABEL_P (prev))
43623	  break;
43624      if (prev && LABEL_P (prev))
43625	{
43626	  edge e;
43627	  edge_iterator ei;
43628
43629	  FOR_EACH_EDGE (e, ei, bb->preds)
43630	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
43631		&& !(e->flags & EDGE_FALLTHRU))
43632	      {
43633		replace = true;
43634		break;
43635	      }
43636	}
43637      if (!replace)
43638	{
43639	  prev = prev_active_insn (ret);
43640	  if (prev
43641	      && ((JUMP_P (prev) && any_condjump_p (prev))
43642		  || CALL_P (prev)))
43643	    replace = true;
43644	  /* Empty functions get branch mispredict even when
43645	     the jump destination is not visible to us.  */
43646	  if (!prev && !optimize_function_for_size_p (cfun))
43647	    replace = true;
43648	}
43649      if (replace)
43650	{
43651	  emit_jump_insn_before (gen_simple_return_internal_long (), ret);
43652	  delete_insn (ret);
43653	}
43654    }
43655}
43656
43657/* Count the minimum number of instructions in BB.  Return 4 if the
43658   number of instructions >= 4.  */
43659
43660static int
43661ix86_count_insn_bb (basic_block bb)
43662{
43663  rtx_insn *insn;
43664  int insn_count = 0;
43665
43666  /* Count number of instructions in this block.  Return 4 if the number
43667     of instructions >= 4.  */
43668  FOR_BB_INSNS (bb, insn)
43669    {
43670      /* Only happen in exit blocks.  */
43671      if (JUMP_P (insn)
43672	  && ANY_RETURN_P (PATTERN (insn)))
43673	break;
43674
43675      if (NONDEBUG_INSN_P (insn)
43676	  && GET_CODE (PATTERN (insn)) != USE
43677	  && GET_CODE (PATTERN (insn)) != CLOBBER)
43678	{
43679	  insn_count++;
43680	  if (insn_count >= 4)
43681	    return insn_count;
43682	}
43683    }
43684
43685  return insn_count;
43686}
43687
43688
43689/* Count the minimum number of instructions in code path in BB.
43690   Return 4 if the number of instructions >= 4.  */
43691
43692static int
43693ix86_count_insn (basic_block bb)
43694{
43695  edge e;
43696  edge_iterator ei;
43697  int min_prev_count;
43698
43699  /* Only bother counting instructions along paths with no
43700     more than 2 basic blocks between entry and exit.  Given
43701     that BB has an edge to exit, determine if a predecessor
43702     of BB has an edge from entry.  If so, compute the number
43703     of instructions in the predecessor block.  If there
43704     happen to be multiple such blocks, compute the minimum.  */
43705  min_prev_count = 4;
43706  FOR_EACH_EDGE (e, ei, bb->preds)
43707    {
43708      edge prev_e;
43709      edge_iterator prev_ei;
43710
43711      if (e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43712	{
43713	  min_prev_count = 0;
43714	  break;
43715	}
43716      FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds)
43717	{
43718	  if (prev_e->src == ENTRY_BLOCK_PTR_FOR_FN (cfun))
43719	    {
43720	      int count = ix86_count_insn_bb (e->src);
43721	      if (count < min_prev_count)
43722		min_prev_count = count;
43723	      break;
43724	    }
43725	}
43726    }
43727
43728  if (min_prev_count < 4)
43729    min_prev_count += ix86_count_insn_bb (bb);
43730
43731  return min_prev_count;
43732}
43733
43734/* Pad short function to 4 instructions.   */
43735
43736static void
43737ix86_pad_short_function (void)
43738{
43739  edge e;
43740  edge_iterator ei;
43741
43742  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43743    {
43744      rtx_insn *ret = BB_END (e->src);
43745      if (JUMP_P (ret) && ANY_RETURN_P (PATTERN (ret)))
43746	{
43747	  int insn_count = ix86_count_insn (e->src);
43748
43749	  /* Pad short function.  */
43750	  if (insn_count < 4)
43751	    {
43752	      rtx_insn *insn = ret;
43753
43754	      /* Find epilogue.  */
43755	      while (insn
43756		     && (!NOTE_P (insn)
43757			 || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG))
43758		insn = PREV_INSN (insn);
43759
43760	      if (!insn)
43761		insn = ret;
43762
43763	      /* Two NOPs count as one instruction.  */
43764	      insn_count = 2 * (4 - insn_count);
43765	      emit_insn_before (gen_nops (GEN_INT (insn_count)), insn);
43766	    }
43767	}
43768    }
43769}
43770
43771/* Fix up a Windows system unwinder issue.  If an EH region falls through into
43772   the epilogue, the Windows system unwinder will apply epilogue logic and
43773   produce incorrect offsets.  This can be avoided by adding a nop between
43774   the last insn that can throw and the first insn of the epilogue.  */
43775
43776static void
43777ix86_seh_fixup_eh_fallthru (void)
43778{
43779  edge e;
43780  edge_iterator ei;
43781
43782  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
43783    {
43784      rtx_insn *insn, *next;
43785
43786      /* Find the beginning of the epilogue.  */
43787      for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
43788	if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
43789	  break;
43790      if (insn == NULL)
43791	continue;
43792
43793      /* We only care about preceding insns that can throw.  */
43794      insn = prev_active_insn (insn);
43795      if (insn == NULL || !can_throw_internal (insn))
43796	continue;
43797
43798      /* Do not separate calls from their debug information.  */
43799      for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
43800	if (NOTE_P (next)
43801            && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
43802                || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
43803	  insn = next;
43804	else
43805	  break;
43806
43807      emit_insn_after (gen_nops (const1_rtx), insn);
43808    }
43809}
43810
43811/* Implement machine specific optimizations.  We implement padding of returns
43812   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
43813static void
43814ix86_reorg (void)
43815{
43816  /* We are freeing block_for_insn in the toplev to keep compatibility
43817     with old MDEP_REORGS that are not CFG based.  Recompute it now.  */
43818  compute_bb_for_insn ();
43819
43820  if (TARGET_SEH && current_function_has_exception_handlers ())
43821    ix86_seh_fixup_eh_fallthru ();
43822
43823  if (optimize && optimize_function_for_speed_p (cfun))
43824    {
43825      if (TARGET_PAD_SHORT_FUNCTION)
43826	ix86_pad_short_function ();
43827      else if (TARGET_PAD_RETURNS)
43828	ix86_pad_returns ();
43829#ifdef ASM_OUTPUT_MAX_SKIP_PAD
43830      if (TARGET_FOUR_JUMP_LIMIT)
43831	ix86_avoid_jump_mispredicts ();
43832#endif
43833    }
43834}
43835
43836/* Return nonzero when QImode register that must be represented via REX prefix
43837   is used.  */
43838bool
43839x86_extended_QIreg_mentioned_p (rtx_insn *insn)
43840{
43841  int i;
43842  extract_insn_cached (insn);
43843  for (i = 0; i < recog_data.n_operands; i++)
43844    if (GENERAL_REG_P (recog_data.operand[i])
43845	&& !QI_REGNO_P (REGNO (recog_data.operand[i])))
43846       return true;
43847  return false;
43848}
43849
43850/* Return true when INSN mentions register that must be encoded using REX
43851   prefix.  */
43852bool
43853x86_extended_reg_mentioned_p (rtx insn)
43854{
43855  subrtx_iterator::array_type array;
43856  FOR_EACH_SUBRTX (iter, array, INSN_P (insn) ? PATTERN (insn) : insn, NONCONST)
43857    {
43858      const_rtx x = *iter;
43859      if (REG_P (x)
43860	  && (REX_INT_REGNO_P (REGNO (x)) || REX_SSE_REGNO_P (REGNO (x))))
43861	return true;
43862    }
43863  return false;
43864}
43865
43866/* If profitable, negate (without causing overflow) integer constant
43867   of mode MODE at location LOC.  Return true in this case.  */
43868bool
43869x86_maybe_negate_const_int (rtx *loc, machine_mode mode)
43870{
43871  HOST_WIDE_INT val;
43872
43873  if (!CONST_INT_P (*loc))
43874    return false;
43875
43876  switch (mode)
43877    {
43878    case DImode:
43879      /* DImode x86_64 constants must fit in 32 bits.  */
43880      gcc_assert (x86_64_immediate_operand (*loc, mode));
43881
43882      mode = SImode;
43883      break;
43884
43885    case SImode:
43886    case HImode:
43887    case QImode:
43888      break;
43889
43890    default:
43891      gcc_unreachable ();
43892    }
43893
43894  /* Avoid overflows.  */
43895  if (mode_signbit_p (mode, *loc))
43896    return false;
43897
43898  val = INTVAL (*loc);
43899
43900  /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
43901     Exceptions: -128 encodes smaller than 128, so swap sign and op.  */
43902  if ((val < 0 && val != -128)
43903      || val == 128)
43904    {
43905      *loc = GEN_INT (-val);
43906      return true;
43907    }
43908
43909  return false;
43910}
43911
43912/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
43913   optabs would emit if we didn't have TFmode patterns.  */
43914
43915void
43916x86_emit_floatuns (rtx operands[2])
43917{
43918  rtx_code_label *neglab, *donelab;
43919  rtx i0, i1, f0, in, out;
43920  machine_mode mode, inmode;
43921
43922  inmode = GET_MODE (operands[1]);
43923  gcc_assert (inmode == SImode || inmode == DImode);
43924
43925  out = operands[0];
43926  in = force_reg (inmode, operands[1]);
43927  mode = GET_MODE (out);
43928  neglab = gen_label_rtx ();
43929  donelab = gen_label_rtx ();
43930  f0 = gen_reg_rtx (mode);
43931
43932  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
43933
43934  expand_float (out, in, 0);
43935
43936  emit_jump_insn (gen_jump (donelab));
43937  emit_barrier ();
43938
43939  emit_label (neglab);
43940
43941  i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
43942			    1, OPTAB_DIRECT);
43943  i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
43944			    1, OPTAB_DIRECT);
43945  i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
43946
43947  expand_float (f0, i0, 0);
43948
43949  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
43950
43951  emit_label (donelab);
43952}
43953
43954static bool canonicalize_perm (struct expand_vec_perm_d *d);
43955static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
43956static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
43957static bool expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool);
43958
43959/* Get a vector mode of the same size as the original but with elements
43960   twice as wide.  This is only guaranteed to apply to integral vectors.  */
43961
43962static inline machine_mode
43963get_mode_wider_vector (machine_mode o)
43964{
43965  /* ??? Rely on the ordering that genmodes.c gives to vectors.  */
43966  machine_mode n = GET_MODE_WIDER_MODE (o);
43967  gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
43968  gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
43969  return n;
43970}
43971
43972/* A subroutine of ix86_expand_vector_init_duplicate.  Tries to
43973   fill target with val via vec_duplicate.  */
43974
43975static bool
43976ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
43977{
43978  bool ok;
43979  rtx_insn *insn;
43980  rtx dup;
43981
43982  /* First attempt to recognize VAL as-is.  */
43983  dup = gen_rtx_VEC_DUPLICATE (mode, val);
43984  insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
43985  if (recog_memoized (insn) < 0)
43986    {
43987      rtx_insn *seq;
43988      /* If that fails, force VAL into a register.  */
43989
43990      start_sequence ();
43991      XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
43992      seq = get_insns ();
43993      end_sequence ();
43994      if (seq)
43995	emit_insn_before (seq, insn);
43996
43997      ok = recog_memoized (insn) >= 0;
43998      gcc_assert (ok);
43999    }
44000  return true;
44001}
44002
44003/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
44004   with all elements equal to VAR.  Return true if successful.  */
44005
44006static bool
44007ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
44008				   rtx target, rtx val)
44009{
44010  bool ok;
44011
44012  switch (mode)
44013    {
44014    case V2SImode:
44015    case V2SFmode:
44016      if (!mmx_ok)
44017	return false;
44018      /* FALLTHRU */
44019
44020    case V4DFmode:
44021    case V4DImode:
44022    case V8SFmode:
44023    case V8SImode:
44024    case V2DFmode:
44025    case V2DImode:
44026    case V4SFmode:
44027    case V4SImode:
44028    case V16SImode:
44029    case V8DImode:
44030    case V16SFmode:
44031    case V8DFmode:
44032      return ix86_vector_duplicate_value (mode, target, val);
44033
44034    case V4HImode:
44035      if (!mmx_ok)
44036	return false;
44037      if (TARGET_SSE || TARGET_3DNOW_A)
44038	{
44039	  rtx x;
44040
44041	  val = gen_lowpart (SImode, val);
44042	  x = gen_rtx_TRUNCATE (HImode, val);
44043	  x = gen_rtx_VEC_DUPLICATE (mode, x);
44044	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
44045	  return true;
44046	}
44047      goto widen;
44048
44049    case V8QImode:
44050      if (!mmx_ok)
44051	return false;
44052      goto widen;
44053
44054    case V8HImode:
44055      if (TARGET_AVX2)
44056	return ix86_vector_duplicate_value (mode, target, val);
44057
44058      if (TARGET_SSE2)
44059	{
44060	  struct expand_vec_perm_d dperm;
44061	  rtx tmp1, tmp2;
44062
44063	permute:
44064	  memset (&dperm, 0, sizeof (dperm));
44065	  dperm.target = target;
44066	  dperm.vmode = mode;
44067	  dperm.nelt = GET_MODE_NUNITS (mode);
44068	  dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
44069	  dperm.one_operand_p = true;
44070
44071	  /* Extend to SImode using a paradoxical SUBREG.  */
44072	  tmp1 = gen_reg_rtx (SImode);
44073	  emit_move_insn (tmp1, gen_lowpart (SImode, val));
44074
44075	  /* Insert the SImode value as low element of a V4SImode vector. */
44076	  tmp2 = gen_reg_rtx (V4SImode);
44077	  emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
44078	  emit_move_insn (dperm.op0, gen_lowpart (mode, tmp2));
44079
44080	  ok = (expand_vec_perm_1 (&dperm)
44081		|| expand_vec_perm_broadcast_1 (&dperm));
44082	  gcc_assert (ok);
44083	  return ok;
44084	}
44085      goto widen;
44086
44087    case V16QImode:
44088      if (TARGET_AVX2)
44089	return ix86_vector_duplicate_value (mode, target, val);
44090
44091      if (TARGET_SSE2)
44092	goto permute;
44093      goto widen;
44094
44095    widen:
44096      /* Replicate the value once into the next wider mode and recurse.  */
44097      {
44098	machine_mode smode, wsmode, wvmode;
44099	rtx x;
44100
44101	smode = GET_MODE_INNER (mode);
44102	wvmode = get_mode_wider_vector (mode);
44103	wsmode = GET_MODE_INNER (wvmode);
44104
44105	val = convert_modes (wsmode, smode, val, true);
44106	x = expand_simple_binop (wsmode, ASHIFT, val,
44107				 GEN_INT (GET_MODE_BITSIZE (smode)),
44108				 NULL_RTX, 1, OPTAB_LIB_WIDEN);
44109	val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
44110
44111	x = gen_reg_rtx (wvmode);
44112	ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
44113	gcc_assert (ok);
44114	emit_move_insn (target, gen_lowpart (GET_MODE (target), x));
44115	return ok;
44116      }
44117
44118    case V16HImode:
44119    case V32QImode:
44120      if (TARGET_AVX2)
44121	return ix86_vector_duplicate_value (mode, target, val);
44122      else
44123	{
44124	  machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
44125	  rtx x = gen_reg_rtx (hvmode);
44126
44127	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44128	  gcc_assert (ok);
44129
44130	  x = gen_rtx_VEC_CONCAT (mode, x, x);
44131	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
44132	}
44133      return true;
44134
44135    case V64QImode:
44136    case V32HImode:
44137      if (TARGET_AVX512BW)
44138	return ix86_vector_duplicate_value (mode, target, val);
44139      else
44140	{
44141	  machine_mode hvmode = (mode == V32HImode ? V16HImode : V32QImode);
44142	  rtx x = gen_reg_rtx (hvmode);
44143
44144	  ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
44145	  gcc_assert (ok);
44146
44147	  x = gen_rtx_VEC_CONCAT (mode, x, x);
44148	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
44149	}
44150      return true;
44151
44152    default:
44153      return false;
44154    }
44155}
44156
44157/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
44158   whose ONE_VAR element is VAR, and other elements are zero.  Return true
44159   if successful.  */
44160
44161static bool
44162ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
44163				     rtx target, rtx var, int one_var)
44164{
44165  machine_mode vsimode;
44166  rtx new_target;
44167  rtx x, tmp;
44168  bool use_vector_set = false;
44169
44170  switch (mode)
44171    {
44172    case V2DImode:
44173      /* For SSE4.1, we normally use vector set.  But if the second
44174	 element is zero and inter-unit moves are OK, we use movq
44175	 instead.  */
44176      use_vector_set = (TARGET_64BIT && TARGET_SSE4_1
44177			&& !(TARGET_INTER_UNIT_MOVES_TO_VEC
44178			     && one_var == 0));
44179      break;
44180    case V16QImode:
44181    case V4SImode:
44182    case V4SFmode:
44183      use_vector_set = TARGET_SSE4_1;
44184      break;
44185    case V8HImode:
44186      use_vector_set = TARGET_SSE2;
44187      break;
44188    case V4HImode:
44189      use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
44190      break;
44191    case V32QImode:
44192    case V16HImode:
44193    case V8SImode:
44194    case V8SFmode:
44195    case V4DFmode:
44196      use_vector_set = TARGET_AVX;
44197      break;
44198    case V4DImode:
44199      /* Use ix86_expand_vector_set in 64bit mode only.  */
44200      use_vector_set = TARGET_AVX && TARGET_64BIT;
44201      break;
44202    default:
44203      break;
44204    }
44205
44206  if (use_vector_set)
44207    {
44208      emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
44209      var = force_reg (GET_MODE_INNER (mode), var);
44210      ix86_expand_vector_set (mmx_ok, target, var, one_var);
44211      return true;
44212    }
44213
44214  switch (mode)
44215    {
44216    case V2SFmode:
44217    case V2SImode:
44218      if (!mmx_ok)
44219	return false;
44220      /* FALLTHRU */
44221
44222    case V2DFmode:
44223    case V2DImode:
44224      if (one_var != 0)
44225	return false;
44226      var = force_reg (GET_MODE_INNER (mode), var);
44227      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
44228      emit_insn (gen_rtx_SET (VOIDmode, target, x));
44229      return true;
44230
44231    case V4SFmode:
44232    case V4SImode:
44233      if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
44234	new_target = gen_reg_rtx (mode);
44235      else
44236	new_target = target;
44237      var = force_reg (GET_MODE_INNER (mode), var);
44238      x = gen_rtx_VEC_DUPLICATE (mode, var);
44239      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
44240      emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
44241      if (one_var != 0)
44242	{
44243	  /* We need to shuffle the value to the correct position, so
44244	     create a new pseudo to store the intermediate result.  */
44245
44246	  /* With SSE2, we can use the integer shuffle insns.  */
44247	  if (mode != V4SFmode && TARGET_SSE2)
44248	    {
44249	      emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
44250					    const1_rtx,
44251					    GEN_INT (one_var == 1 ? 0 : 1),
44252					    GEN_INT (one_var == 2 ? 0 : 1),
44253					    GEN_INT (one_var == 3 ? 0 : 1)));
44254	      if (target != new_target)
44255		emit_move_insn (target, new_target);
44256	      return true;
44257	    }
44258
44259	  /* Otherwise convert the intermediate result to V4SFmode and
44260	     use the SSE1 shuffle instructions.  */
44261	  if (mode != V4SFmode)
44262	    {
44263	      tmp = gen_reg_rtx (V4SFmode);
44264	      emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
44265	    }
44266	  else
44267	    tmp = new_target;
44268
44269	  emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
44270				       const1_rtx,
44271				       GEN_INT (one_var == 1 ? 0 : 1),
44272				       GEN_INT (one_var == 2 ? 0+4 : 1+4),
44273				       GEN_INT (one_var == 3 ? 0+4 : 1+4)));
44274
44275	  if (mode != V4SFmode)
44276	    emit_move_insn (target, gen_lowpart (V4SImode, tmp));
44277	  else if (tmp != target)
44278	    emit_move_insn (target, tmp);
44279	}
44280      else if (target != new_target)
44281	emit_move_insn (target, new_target);
44282      return true;
44283
44284    case V8HImode:
44285    case V16QImode:
44286      vsimode = V4SImode;
44287      goto widen;
44288    case V4HImode:
44289    case V8QImode:
44290      if (!mmx_ok)
44291	return false;
44292      vsimode = V2SImode;
44293      goto widen;
44294    widen:
44295      if (one_var != 0)
44296	return false;
44297
44298      /* Zero extend the variable element to SImode and recurse.  */
44299      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
44300
44301      x = gen_reg_rtx (vsimode);
44302      if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
44303						var, one_var))
44304	gcc_unreachable ();
44305
44306      emit_move_insn (target, gen_lowpart (mode, x));
44307      return true;
44308
44309    default:
44310      return false;
44311    }
44312}
44313
44314/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
44315   consisting of the values in VALS.  It is known that all elements
44316   except ONE_VAR are constants.  Return true if successful.  */
44317
44318static bool
44319ix86_expand_vector_init_one_var (bool mmx_ok, machine_mode mode,
44320				 rtx target, rtx vals, int one_var)
44321{
44322  rtx var = XVECEXP (vals, 0, one_var);
44323  machine_mode wmode;
44324  rtx const_vec, x;
44325
44326  const_vec = copy_rtx (vals);
44327  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
44328  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
44329
44330  switch (mode)
44331    {
44332    case V2DFmode:
44333    case V2DImode:
44334    case V2SFmode:
44335    case V2SImode:
44336      /* For the two element vectors, it's just as easy to use
44337	 the general case.  */
44338      return false;
44339
44340    case V4DImode:
44341      /* Use ix86_expand_vector_set in 64bit mode only.  */
44342      if (!TARGET_64BIT)
44343	return false;
44344    case V4DFmode:
44345    case V8SFmode:
44346    case V8SImode:
44347    case V16HImode:
44348    case V32QImode:
44349    case V4SFmode:
44350    case V4SImode:
44351    case V8HImode:
44352    case V4HImode:
44353      break;
44354
44355    case V16QImode:
44356      if (TARGET_SSE4_1)
44357	break;
44358      wmode = V8HImode;
44359      goto widen;
44360    case V8QImode:
44361      wmode = V4HImode;
44362      goto widen;
44363    widen:
44364      /* There's no way to set one QImode entry easily.  Combine
44365	 the variable value with its adjacent constant value, and
44366	 promote to an HImode set.  */
44367      x = XVECEXP (vals, 0, one_var ^ 1);
44368      if (one_var & 1)
44369	{
44370	  var = convert_modes (HImode, QImode, var, true);
44371	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
44372				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
44373	  x = GEN_INT (INTVAL (x) & 0xff);
44374	}
44375      else
44376	{
44377	  var = convert_modes (HImode, QImode, var, true);
44378	  x = gen_int_mode (INTVAL (x) << 8, HImode);
44379	}
44380      if (x != const0_rtx)
44381	var = expand_simple_binop (HImode, IOR, var, x, var,
44382				   1, OPTAB_LIB_WIDEN);
44383
44384      x = gen_reg_rtx (wmode);
44385      emit_move_insn (x, gen_lowpart (wmode, const_vec));
44386      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
44387
44388      emit_move_insn (target, gen_lowpart (mode, x));
44389      return true;
44390
44391    default:
44392      return false;
44393    }
44394
44395  emit_move_insn (target, const_vec);
44396  ix86_expand_vector_set (mmx_ok, target, var, one_var);
44397  return true;
44398}
44399
44400/* A subroutine of ix86_expand_vector_init_general.  Use vector
44401   concatenate to handle the most general case: all values variable,
44402   and none identical.  */
44403
44404static void
44405ix86_expand_vector_init_concat (machine_mode mode,
44406				rtx target, rtx *ops, int n)
44407{
44408  machine_mode cmode, hmode = VOIDmode, gmode = VOIDmode;
44409  rtx first[16], second[8], third[4];
44410  rtvec v;
44411  int i, j;
44412
44413  switch (n)
44414    {
44415    case 2:
44416      switch (mode)
44417	{
44418	case V16SImode:
44419	  cmode = V8SImode;
44420	  break;
44421	case V16SFmode:
44422	  cmode = V8SFmode;
44423	  break;
44424	case V8DImode:
44425	  cmode = V4DImode;
44426	  break;
44427	case V8DFmode:
44428	  cmode = V4DFmode;
44429	  break;
44430	case V8SImode:
44431	  cmode = V4SImode;
44432	  break;
44433	case V8SFmode:
44434	  cmode = V4SFmode;
44435	  break;
44436	case V4DImode:
44437	  cmode = V2DImode;
44438	  break;
44439	case V4DFmode:
44440	  cmode = V2DFmode;
44441	  break;
44442	case V4SImode:
44443	  cmode = V2SImode;
44444	  break;
44445	case V4SFmode:
44446	  cmode = V2SFmode;
44447	  break;
44448	case V2DImode:
44449	  cmode = DImode;
44450	  break;
44451	case V2SImode:
44452	  cmode = SImode;
44453	  break;
44454	case V2DFmode:
44455	  cmode = DFmode;
44456	  break;
44457	case V2SFmode:
44458	  cmode = SFmode;
44459	  break;
44460	default:
44461	  gcc_unreachable ();
44462	}
44463
44464      if (!register_operand (ops[1], cmode))
44465	ops[1] = force_reg (cmode, ops[1]);
44466      if (!register_operand (ops[0], cmode))
44467	ops[0] = force_reg (cmode, ops[0]);
44468      emit_insn (gen_rtx_SET (VOIDmode, target,
44469			      gen_rtx_VEC_CONCAT (mode, ops[0],
44470						  ops[1])));
44471      break;
44472
44473    case 4:
44474      switch (mode)
44475	{
44476	case V4DImode:
44477	  cmode = V2DImode;
44478	  break;
44479	case V4DFmode:
44480	  cmode = V2DFmode;
44481	  break;
44482	case V4SImode:
44483	  cmode = V2SImode;
44484	  break;
44485	case V4SFmode:
44486	  cmode = V2SFmode;
44487	  break;
44488	default:
44489	  gcc_unreachable ();
44490	}
44491      goto half;
44492
44493    case 8:
44494      switch (mode)
44495	{
44496	case V8DImode:
44497	  cmode = V2DImode;
44498	  hmode = V4DImode;
44499	  break;
44500	case V8DFmode:
44501	  cmode = V2DFmode;
44502	  hmode = V4DFmode;
44503	  break;
44504	case V8SImode:
44505	  cmode = V2SImode;
44506	  hmode = V4SImode;
44507	  break;
44508	case V8SFmode:
44509	  cmode = V2SFmode;
44510	  hmode = V4SFmode;
44511	  break;
44512	default:
44513	  gcc_unreachable ();
44514	}
44515      goto half;
44516
44517    case 16:
44518      switch (mode)
44519	{
44520	case V16SImode:
44521	  cmode = V2SImode;
44522	  hmode = V4SImode;
44523	  gmode = V8SImode;
44524	  break;
44525	case V16SFmode:
44526	  cmode = V2SFmode;
44527	  hmode = V4SFmode;
44528	  gmode = V8SFmode;
44529	  break;
44530	default:
44531	  gcc_unreachable ();
44532	}
44533      goto half;
44534
44535half:
44536      /* FIXME: We process inputs backward to help RA.  PR 36222.  */
44537      i = n - 1;
44538      j = (n >> 1) - 1;
44539      for (; i > 0; i -= 2, j--)
44540	{
44541	  first[j] = gen_reg_rtx (cmode);
44542	  v = gen_rtvec (2, ops[i - 1], ops[i]);
44543	  ix86_expand_vector_init (false, first[j],
44544				   gen_rtx_PARALLEL (cmode, v));
44545	}
44546
44547      n >>= 1;
44548      if (n > 4)
44549	{
44550	  gcc_assert (hmode != VOIDmode);
44551	  gcc_assert (gmode != VOIDmode);
44552	  for (i = j = 0; i < n; i += 2, j++)
44553	    {
44554	      second[j] = gen_reg_rtx (hmode);
44555	      ix86_expand_vector_init_concat (hmode, second [j],
44556					      &first [i], 2);
44557	    }
44558	  n >>= 1;
44559	  for (i = j = 0; i < n; i += 2, j++)
44560	    {
44561	      third[j] = gen_reg_rtx (gmode);
44562	      ix86_expand_vector_init_concat (gmode, third[j],
44563					      &second[i], 2);
44564	    }
44565	  n >>= 1;
44566	  ix86_expand_vector_init_concat (mode, target, third, n);
44567	}
44568      else if (n > 2)
44569	{
44570	  gcc_assert (hmode != VOIDmode);
44571	  for (i = j = 0; i < n; i += 2, j++)
44572	    {
44573	      second[j] = gen_reg_rtx (hmode);
44574	      ix86_expand_vector_init_concat (hmode, second [j],
44575					      &first [i], 2);
44576	    }
44577	  n >>= 1;
44578	  ix86_expand_vector_init_concat (mode, target, second, n);
44579	}
44580      else
44581	ix86_expand_vector_init_concat (mode, target, first, n);
44582      break;
44583
44584    default:
44585      gcc_unreachable ();
44586    }
44587}
44588
44589/* A subroutine of ix86_expand_vector_init_general.  Use vector
44590   interleave to handle the most general case: all values variable,
44591   and none identical.  */
44592
44593static void
44594ix86_expand_vector_init_interleave (machine_mode mode,
44595				    rtx target, rtx *ops, int n)
44596{
44597  machine_mode first_imode, second_imode, third_imode, inner_mode;
44598  int i, j;
44599  rtx op0, op1;
44600  rtx (*gen_load_even) (rtx, rtx, rtx);
44601  rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
44602  rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
44603
44604  switch (mode)
44605    {
44606    case V8HImode:
44607      gen_load_even = gen_vec_setv8hi;
44608      gen_interleave_first_low = gen_vec_interleave_lowv4si;
44609      gen_interleave_second_low = gen_vec_interleave_lowv2di;
44610      inner_mode = HImode;
44611      first_imode = V4SImode;
44612      second_imode = V2DImode;
44613      third_imode = VOIDmode;
44614      break;
44615    case V16QImode:
44616      gen_load_even = gen_vec_setv16qi;
44617      gen_interleave_first_low = gen_vec_interleave_lowv8hi;
44618      gen_interleave_second_low = gen_vec_interleave_lowv4si;
44619      inner_mode = QImode;
44620      first_imode = V8HImode;
44621      second_imode = V4SImode;
44622      third_imode = V2DImode;
44623      break;
44624    default:
44625      gcc_unreachable ();
44626    }
44627
44628  for (i = 0; i < n; i++)
44629    {
44630      /* Extend the odd elment to SImode using a paradoxical SUBREG.  */
44631      op0 = gen_reg_rtx (SImode);
44632      emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
44633
44634      /* Insert the SImode value as low element of V4SImode vector. */
44635      op1 = gen_reg_rtx (V4SImode);
44636      op0 = gen_rtx_VEC_MERGE (V4SImode,
44637			       gen_rtx_VEC_DUPLICATE (V4SImode,
44638						      op0),
44639			       CONST0_RTX (V4SImode),
44640			       const1_rtx);
44641      emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
44642
44643      /* Cast the V4SImode vector back to a vector in orignal mode.  */
44644      op0 = gen_reg_rtx (mode);
44645      emit_move_insn (op0, gen_lowpart (mode, op1));
44646
44647      /* Load even elements into the second position.  */
44648      emit_insn (gen_load_even (op0,
44649				force_reg (inner_mode,
44650					   ops [i + i + 1]),
44651				const1_rtx));
44652
44653      /* Cast vector to FIRST_IMODE vector.  */
44654      ops[i] = gen_reg_rtx (first_imode);
44655      emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
44656    }
44657
44658  /* Interleave low FIRST_IMODE vectors.  */
44659  for (i = j = 0; i < n; i += 2, j++)
44660    {
44661      op0 = gen_reg_rtx (first_imode);
44662      emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
44663
44664      /* Cast FIRST_IMODE vector to SECOND_IMODE vector.  */
44665      ops[j] = gen_reg_rtx (second_imode);
44666      emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
44667    }
44668
44669  /* Interleave low SECOND_IMODE vectors.  */
44670  switch (second_imode)
44671    {
44672    case V4SImode:
44673      for (i = j = 0; i < n / 2; i += 2, j++)
44674	{
44675	  op0 = gen_reg_rtx (second_imode);
44676	  emit_insn (gen_interleave_second_low (op0, ops[i],
44677						ops[i + 1]));
44678
44679	  /* Cast the SECOND_IMODE vector to the THIRD_IMODE
44680	     vector.  */
44681	  ops[j] = gen_reg_rtx (third_imode);
44682	  emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
44683	}
44684      second_imode = V2DImode;
44685      gen_interleave_second_low = gen_vec_interleave_lowv2di;
44686      /* FALLTHRU */
44687
44688    case V2DImode:
44689      op0 = gen_reg_rtx (second_imode);
44690      emit_insn (gen_interleave_second_low (op0, ops[0],
44691					    ops[1]));
44692
44693      /* Cast the SECOND_IMODE vector back to a vector on original
44694	 mode.  */
44695      emit_insn (gen_rtx_SET (VOIDmode, target,
44696			      gen_lowpart (mode, op0)));
44697      break;
44698
44699    default:
44700      gcc_unreachable ();
44701    }
44702}
44703
44704/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
44705   all values variable, and none identical.  */
44706
44707static void
44708ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
44709				 rtx target, rtx vals)
44710{
44711  rtx ops[64], op0, op1, op2, op3, op4, op5;
44712  machine_mode half_mode = VOIDmode;
44713  machine_mode quarter_mode = VOIDmode;
44714  int n, i;
44715
44716  switch (mode)
44717    {
44718    case V2SFmode:
44719    case V2SImode:
44720      if (!mmx_ok && !TARGET_SSE)
44721	break;
44722      /* FALLTHRU */
44723
44724    case V16SImode:
44725    case V16SFmode:
44726    case V8DFmode:
44727    case V8DImode:
44728    case V8SFmode:
44729    case V8SImode:
44730    case V4DFmode:
44731    case V4DImode:
44732    case V4SFmode:
44733    case V4SImode:
44734    case V2DFmode:
44735    case V2DImode:
44736      n = GET_MODE_NUNITS (mode);
44737      for (i = 0; i < n; i++)
44738	ops[i] = XVECEXP (vals, 0, i);
44739      ix86_expand_vector_init_concat (mode, target, ops, n);
44740      return;
44741
44742    case V32QImode:
44743      half_mode = V16QImode;
44744      goto half;
44745
44746    case V16HImode:
44747      half_mode = V8HImode;
44748      goto half;
44749
44750half:
44751      n = GET_MODE_NUNITS (mode);
44752      for (i = 0; i < n; i++)
44753	ops[i] = XVECEXP (vals, 0, i);
44754      op0 = gen_reg_rtx (half_mode);
44755      op1 = gen_reg_rtx (half_mode);
44756      ix86_expand_vector_init_interleave (half_mode, op0, ops,
44757					  n >> 2);
44758      ix86_expand_vector_init_interleave (half_mode, op1,
44759					  &ops [n >> 1], n >> 2);
44760      emit_insn (gen_rtx_SET (VOIDmode, target,
44761			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
44762      return;
44763
44764    case V64QImode:
44765      quarter_mode = V16QImode;
44766      half_mode = V32QImode;
44767      goto quarter;
44768
44769    case V32HImode:
44770      quarter_mode = V8HImode;
44771      half_mode = V16HImode;
44772      goto quarter;
44773
44774quarter:
44775      n = GET_MODE_NUNITS (mode);
44776      for (i = 0; i < n; i++)
44777	ops[i] = XVECEXP (vals, 0, i);
44778      op0 = gen_reg_rtx (quarter_mode);
44779      op1 = gen_reg_rtx (quarter_mode);
44780      op2 = gen_reg_rtx (quarter_mode);
44781      op3 = gen_reg_rtx (quarter_mode);
44782      op4 = gen_reg_rtx (half_mode);
44783      op5 = gen_reg_rtx (half_mode);
44784      ix86_expand_vector_init_interleave (quarter_mode, op0, ops,
44785					  n >> 3);
44786      ix86_expand_vector_init_interleave (quarter_mode, op1,
44787					  &ops [n >> 2], n >> 3);
44788      ix86_expand_vector_init_interleave (quarter_mode, op2,
44789					  &ops [n >> 1], n >> 3);
44790      ix86_expand_vector_init_interleave (quarter_mode, op3,
44791					  &ops [(n >> 1) | (n >> 2)], n >> 3);
44792      emit_insn (gen_rtx_SET (VOIDmode, op4,
44793			      gen_rtx_VEC_CONCAT (half_mode, op0, op1)));
44794      emit_insn (gen_rtx_SET (VOIDmode, op5,
44795			      gen_rtx_VEC_CONCAT (half_mode, op2, op3)));
44796      emit_insn (gen_rtx_SET (VOIDmode, target,
44797			      gen_rtx_VEC_CONCAT (mode, op4, op5)));
44798      return;
44799
44800    case V16QImode:
44801      if (!TARGET_SSE4_1)
44802	break;
44803      /* FALLTHRU */
44804
44805    case V8HImode:
44806      if (!TARGET_SSE2)
44807	break;
44808
44809      /* Don't use ix86_expand_vector_init_interleave if we can't
44810	 move from GPR to SSE register directly.  */
44811      if (!TARGET_INTER_UNIT_MOVES_TO_VEC)
44812	break;
44813
44814      n = GET_MODE_NUNITS (mode);
44815      for (i = 0; i < n; i++)
44816	ops[i] = XVECEXP (vals, 0, i);
44817      ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
44818      return;
44819
44820    case V4HImode:
44821    case V8QImode:
44822      break;
44823
44824    default:
44825      gcc_unreachable ();
44826    }
44827
44828    {
44829      int i, j, n_elts, n_words, n_elt_per_word;
44830      machine_mode inner_mode;
44831      rtx words[4], shift;
44832
44833      inner_mode = GET_MODE_INNER (mode);
44834      n_elts = GET_MODE_NUNITS (mode);
44835      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
44836      n_elt_per_word = n_elts / n_words;
44837      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
44838
44839      for (i = 0; i < n_words; ++i)
44840	{
44841	  rtx word = NULL_RTX;
44842
44843	  for (j = 0; j < n_elt_per_word; ++j)
44844	    {
44845	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
44846	      elt = convert_modes (word_mode, inner_mode, elt, true);
44847
44848	      if (j == 0)
44849		word = elt;
44850	      else
44851		{
44852		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
44853					      word, 1, OPTAB_LIB_WIDEN);
44854		  word = expand_simple_binop (word_mode, IOR, word, elt,
44855					      word, 1, OPTAB_LIB_WIDEN);
44856		}
44857	    }
44858
44859	  words[i] = word;
44860	}
44861
44862      if (n_words == 1)
44863	emit_move_insn (target, gen_lowpart (mode, words[0]));
44864      else if (n_words == 2)
44865	{
44866	  rtx tmp = gen_reg_rtx (mode);
44867	  emit_clobber (tmp);
44868	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
44869	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
44870	  emit_move_insn (target, tmp);
44871	}
44872      else if (n_words == 4)
44873	{
44874	  rtx tmp = gen_reg_rtx (V4SImode);
44875	  gcc_assert (word_mode == SImode);
44876	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
44877	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
44878	  emit_move_insn (target, gen_lowpart (mode, tmp));
44879	}
44880      else
44881	gcc_unreachable ();
44882    }
44883}
44884
44885/* Initialize vector TARGET via VALS.  Suppress the use of MMX
44886   instructions unless MMX_OK is true.  */
44887
44888void
44889ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
44890{
44891  machine_mode mode = GET_MODE (target);
44892  machine_mode inner_mode = GET_MODE_INNER (mode);
44893  int n_elts = GET_MODE_NUNITS (mode);
44894  int n_var = 0, one_var = -1;
44895  bool all_same = true, all_const_zero = true;
44896  int i;
44897  rtx x;
44898
44899  for (i = 0; i < n_elts; ++i)
44900    {
44901      x = XVECEXP (vals, 0, i);
44902      if (!(CONST_INT_P (x)
44903	    || GET_CODE (x) == CONST_DOUBLE
44904	    || GET_CODE (x) == CONST_FIXED))
44905	n_var++, one_var = i;
44906      else if (x != CONST0_RTX (inner_mode))
44907	all_const_zero = false;
44908      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
44909	all_same = false;
44910    }
44911
44912  /* Constants are best loaded from the constant pool.  */
44913  if (n_var == 0)
44914    {
44915      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
44916      return;
44917    }
44918
44919  /* If all values are identical, broadcast the value.  */
44920  if (all_same
44921      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
44922					    XVECEXP (vals, 0, 0)))
44923    return;
44924
44925  /* Values where only one field is non-constant are best loaded from
44926     the pool and overwritten via move later.  */
44927  if (n_var == 1)
44928    {
44929      if (all_const_zero
44930	  && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
44931						  XVECEXP (vals, 0, one_var),
44932						  one_var))
44933	return;
44934
44935      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
44936	return;
44937    }
44938
44939  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
44940}
44941
44942void
44943ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
44944{
44945  machine_mode mode = GET_MODE (target);
44946  machine_mode inner_mode = GET_MODE_INNER (mode);
44947  machine_mode half_mode;
44948  bool use_vec_merge = false;
44949  rtx tmp;
44950  static rtx (*gen_extract[6][2]) (rtx, rtx)
44951    = {
44952	{ gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
44953	{ gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
44954	{ gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
44955	{ gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
44956	{ gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
44957	{ gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
44958      };
44959  static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
44960    = {
44961	{ gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
44962	{ gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
44963	{ gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
44964	{ gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
44965	{ gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
44966	{ gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
44967      };
44968  int i, j, n;
44969  machine_mode mmode = VOIDmode;
44970  rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
44971
44972  switch (mode)
44973    {
44974    case V2SFmode:
44975    case V2SImode:
44976      if (mmx_ok)
44977	{
44978	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44979	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
44980	  if (elt == 0)
44981	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44982	  else
44983	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
44984	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
44985	  return;
44986	}
44987      break;
44988
44989    case V2DImode:
44990      use_vec_merge = TARGET_SSE4_1 && TARGET_64BIT;
44991      if (use_vec_merge)
44992	break;
44993
44994      tmp = gen_reg_rtx (GET_MODE_INNER (mode));
44995      ix86_expand_vector_extract (false, tmp, target, 1 - elt);
44996      if (elt == 0)
44997	tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
44998      else
44999	tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
45000      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45001      return;
45002
45003    case V2DFmode:
45004      {
45005	rtx op0, op1;
45006
45007	/* For the two element vectors, we implement a VEC_CONCAT with
45008	   the extraction of the other element.  */
45009
45010	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
45011	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
45012
45013	if (elt == 0)
45014	  op0 = val, op1 = tmp;
45015	else
45016	  op0 = tmp, op1 = val;
45017
45018	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
45019	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45020      }
45021      return;
45022
45023    case V4SFmode:
45024      use_vec_merge = TARGET_SSE4_1;
45025      if (use_vec_merge)
45026	break;
45027
45028      switch (elt)
45029	{
45030	case 0:
45031	  use_vec_merge = true;
45032	  break;
45033
45034	case 1:
45035	  /* tmp = target = A B C D */
45036	  tmp = copy_to_reg (target);
45037	  /* target = A A B B */
45038	  emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
45039	  /* target = X A B B */
45040	  ix86_expand_vector_set (false, target, val, 0);
45041	  /* target = A X C D  */
45042	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45043					  const1_rtx, const0_rtx,
45044					  GEN_INT (2+4), GEN_INT (3+4)));
45045	  return;
45046
45047	case 2:
45048	  /* tmp = target = A B C D */
45049	  tmp = copy_to_reg (target);
45050	  /* tmp = X B C D */
45051	  ix86_expand_vector_set (false, tmp, val, 0);
45052	  /* target = A B X D */
45053	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45054					  const0_rtx, const1_rtx,
45055					  GEN_INT (0+4), GEN_INT (3+4)));
45056	  return;
45057
45058	case 3:
45059	  /* tmp = target = A B C D */
45060	  tmp = copy_to_reg (target);
45061	  /* tmp = X B C D */
45062	  ix86_expand_vector_set (false, tmp, val, 0);
45063	  /* target = A B X D */
45064	  emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
45065					  const0_rtx, const1_rtx,
45066					  GEN_INT (2+4), GEN_INT (0+4)));
45067	  return;
45068
45069	default:
45070	  gcc_unreachable ();
45071	}
45072      break;
45073
45074    case V4SImode:
45075      use_vec_merge = TARGET_SSE4_1;
45076      if (use_vec_merge)
45077	break;
45078
45079      /* Element 0 handled by vec_merge below.  */
45080      if (elt == 0)
45081	{
45082	  use_vec_merge = true;
45083	  break;
45084	}
45085
45086      if (TARGET_SSE2)
45087	{
45088	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
45089	     store into element 0, then shuffle them back.  */
45090
45091	  rtx order[4];
45092
45093	  order[0] = GEN_INT (elt);
45094	  order[1] = const1_rtx;
45095	  order[2] = const2_rtx;
45096	  order[3] = GEN_INT (3);
45097	  order[elt] = const0_rtx;
45098
45099	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45100					order[1], order[2], order[3]));
45101
45102	  ix86_expand_vector_set (false, target, val, 0);
45103
45104	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
45105					order[1], order[2], order[3]));
45106	}
45107      else
45108	{
45109	  /* For SSE1, we have to reuse the V4SF code.  */
45110	  rtx t = gen_reg_rtx (V4SFmode);
45111	  emit_move_insn (t, gen_lowpart (V4SFmode, target));
45112	  ix86_expand_vector_set (false, t, gen_lowpart (SFmode, val), elt);
45113	  emit_move_insn (target, gen_lowpart (mode, t));
45114	}
45115      return;
45116
45117    case V8HImode:
45118      use_vec_merge = TARGET_SSE2;
45119      break;
45120    case V4HImode:
45121      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45122      break;
45123
45124    case V16QImode:
45125      use_vec_merge = TARGET_SSE4_1;
45126      break;
45127
45128    case V8QImode:
45129      break;
45130
45131    case V32QImode:
45132      half_mode = V16QImode;
45133      j = 0;
45134      n = 16;
45135      goto half;
45136
45137    case V16HImode:
45138      half_mode = V8HImode;
45139      j = 1;
45140      n = 8;
45141      goto half;
45142
45143    case V8SImode:
45144      half_mode = V4SImode;
45145      j = 2;
45146      n = 4;
45147      goto half;
45148
45149    case V4DImode:
45150      half_mode = V2DImode;
45151      j = 3;
45152      n = 2;
45153      goto half;
45154
45155    case V8SFmode:
45156      half_mode = V4SFmode;
45157      j = 4;
45158      n = 4;
45159      goto half;
45160
45161    case V4DFmode:
45162      half_mode = V2DFmode;
45163      j = 5;
45164      n = 2;
45165      goto half;
45166
45167half:
45168      /* Compute offset.  */
45169      i = elt / n;
45170      elt %= n;
45171
45172      gcc_assert (i <= 1);
45173
45174      /* Extract the half.  */
45175      tmp = gen_reg_rtx (half_mode);
45176      emit_insn (gen_extract[j][i] (tmp, target));
45177
45178      /* Put val in tmp at elt.  */
45179      ix86_expand_vector_set (false, tmp, val, elt);
45180
45181      /* Put it back.  */
45182      emit_insn (gen_insert[j][i] (target, target, tmp));
45183      return;
45184
45185    case V8DFmode:
45186      if (TARGET_AVX512F)
45187	{
45188	  mmode = QImode;
45189	  gen_blendm = gen_avx512f_blendmv8df;
45190	}
45191      break;
45192
45193    case V8DImode:
45194      if (TARGET_AVX512F)
45195	{
45196	  mmode = QImode;
45197	  gen_blendm = gen_avx512f_blendmv8di;
45198	}
45199      break;
45200
45201    case V16SFmode:
45202      if (TARGET_AVX512F)
45203	{
45204	  mmode = HImode;
45205	  gen_blendm = gen_avx512f_blendmv16si;
45206	}
45207      break;
45208
45209    case V16SImode:
45210      if (TARGET_AVX512F)
45211	{
45212	  mmode = HImode;
45213	  gen_blendm = gen_avx512f_blendmv16si;
45214	}
45215      break;
45216
45217    case V32HImode:
45218      if (TARGET_AVX512F && TARGET_AVX512BW)
45219	{
45220	  mmode = SImode;
45221	  gen_blendm = gen_avx512bw_blendmv32hi;
45222	}
45223      break;
45224
45225    case V64QImode:
45226      if (TARGET_AVX512F && TARGET_AVX512BW)
45227	{
45228	  mmode = DImode;
45229	  gen_blendm = gen_avx512bw_blendmv64qi;
45230	}
45231      break;
45232
45233    default:
45234      break;
45235    }
45236
45237  if (mmode != VOIDmode)
45238    {
45239      tmp = gen_reg_rtx (mode);
45240      emit_insn (gen_rtx_SET (VOIDmode, tmp,
45241			      gen_rtx_VEC_DUPLICATE (mode, val)));
45242      /* The avx512*_blendm<mode> expanders have different operand order
45243	 from VEC_MERGE.  In VEC_MERGE, the first input operand is used for
45244	 elements where the mask is set and second input operand otherwise,
45245	 in {sse,avx}*_*blend* the first input operand is used for elements
45246	 where the mask is clear and second input operand otherwise.  */
45247      emit_insn (gen_blendm (target, target, tmp,
45248			     force_reg (mmode,
45249					gen_int_mode (1 << elt, mmode))));
45250    }
45251  else if (use_vec_merge)
45252    {
45253      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
45254      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
45255      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45256    }
45257  else
45258    {
45259      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45260
45261      emit_move_insn (mem, target);
45262
45263      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45264      emit_move_insn (tmp, val);
45265
45266      emit_move_insn (target, mem);
45267    }
45268}
45269
45270void
45271ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
45272{
45273  machine_mode mode = GET_MODE (vec);
45274  machine_mode inner_mode = GET_MODE_INNER (mode);
45275  bool use_vec_extr = false;
45276  rtx tmp;
45277
45278  switch (mode)
45279    {
45280    case V2SImode:
45281    case V2SFmode:
45282      if (!mmx_ok)
45283	break;
45284      /* FALLTHRU */
45285
45286    case V2DFmode:
45287    case V2DImode:
45288      use_vec_extr = true;
45289      break;
45290
45291    case V4SFmode:
45292      use_vec_extr = TARGET_SSE4_1;
45293      if (use_vec_extr)
45294	break;
45295
45296      switch (elt)
45297	{
45298	case 0:
45299	  tmp = vec;
45300	  break;
45301
45302	case 1:
45303	case 3:
45304	  tmp = gen_reg_rtx (mode);
45305	  emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
45306				       GEN_INT (elt), GEN_INT (elt),
45307				       GEN_INT (elt+4), GEN_INT (elt+4)));
45308	  break;
45309
45310	case 2:
45311	  tmp = gen_reg_rtx (mode);
45312	  emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
45313	  break;
45314
45315	default:
45316	  gcc_unreachable ();
45317	}
45318      vec = tmp;
45319      use_vec_extr = true;
45320      elt = 0;
45321      break;
45322
45323    case V4SImode:
45324      use_vec_extr = TARGET_SSE4_1;
45325      if (use_vec_extr)
45326	break;
45327
45328      if (TARGET_SSE2)
45329	{
45330	  switch (elt)
45331	    {
45332	    case 0:
45333	      tmp = vec;
45334	      break;
45335
45336	    case 1:
45337	    case 3:
45338	      tmp = gen_reg_rtx (mode);
45339	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
45340					    GEN_INT (elt), GEN_INT (elt),
45341					    GEN_INT (elt), GEN_INT (elt)));
45342	      break;
45343
45344	    case 2:
45345	      tmp = gen_reg_rtx (mode);
45346	      emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
45347	      break;
45348
45349	    default:
45350	      gcc_unreachable ();
45351	    }
45352	  vec = tmp;
45353	  use_vec_extr = true;
45354	  elt = 0;
45355	}
45356      else
45357	{
45358	  /* For SSE1, we have to reuse the V4SF code.  */
45359	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
45360				      gen_lowpart (V4SFmode, vec), elt);
45361	  return;
45362	}
45363      break;
45364
45365    case V8HImode:
45366      use_vec_extr = TARGET_SSE2;
45367      break;
45368    case V4HImode:
45369      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
45370      break;
45371
45372    case V16QImode:
45373      use_vec_extr = TARGET_SSE4_1;
45374      break;
45375
45376    case V8SFmode:
45377      if (TARGET_AVX)
45378	{
45379	  tmp = gen_reg_rtx (V4SFmode);
45380	  if (elt < 4)
45381	    emit_insn (gen_vec_extract_lo_v8sf (tmp, vec));
45382	  else
45383	    emit_insn (gen_vec_extract_hi_v8sf (tmp, vec));
45384	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
45385	  return;
45386	}
45387      break;
45388
45389    case V4DFmode:
45390      if (TARGET_AVX)
45391	{
45392	  tmp = gen_reg_rtx (V2DFmode);
45393	  if (elt < 2)
45394	    emit_insn (gen_vec_extract_lo_v4df (tmp, vec));
45395	  else
45396	    emit_insn (gen_vec_extract_hi_v4df (tmp, vec));
45397	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
45398	  return;
45399	}
45400      break;
45401
45402    case V32QImode:
45403      if (TARGET_AVX)
45404	{
45405	  tmp = gen_reg_rtx (V16QImode);
45406	  if (elt < 16)
45407	    emit_insn (gen_vec_extract_lo_v32qi (tmp, vec));
45408	  else
45409	    emit_insn (gen_vec_extract_hi_v32qi (tmp, vec));
45410	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
45411	  return;
45412	}
45413      break;
45414
45415    case V16HImode:
45416      if (TARGET_AVX)
45417	{
45418	  tmp = gen_reg_rtx (V8HImode);
45419	  if (elt < 8)
45420	    emit_insn (gen_vec_extract_lo_v16hi (tmp, vec));
45421	  else
45422	    emit_insn (gen_vec_extract_hi_v16hi (tmp, vec));
45423	  ix86_expand_vector_extract (false, target, tmp, elt & 7);
45424	  return;
45425	}
45426      break;
45427
45428    case V8SImode:
45429      if (TARGET_AVX)
45430	{
45431	  tmp = gen_reg_rtx (V4SImode);
45432	  if (elt < 4)
45433	    emit_insn (gen_vec_extract_lo_v8si (tmp, vec));
45434	  else
45435	    emit_insn (gen_vec_extract_hi_v8si (tmp, vec));
45436	  ix86_expand_vector_extract (false, target, tmp, elt & 3);
45437	  return;
45438	}
45439      break;
45440
45441    case V4DImode:
45442      if (TARGET_AVX)
45443	{
45444	  tmp = gen_reg_rtx (V2DImode);
45445	  if (elt < 2)
45446	    emit_insn (gen_vec_extract_lo_v4di (tmp, vec));
45447	  else
45448	    emit_insn (gen_vec_extract_hi_v4di (tmp, vec));
45449	  ix86_expand_vector_extract (false, target, tmp, elt & 1);
45450	  return;
45451	}
45452      break;
45453
45454    case V32HImode:
45455      if (TARGET_AVX512BW)
45456	{
45457	  tmp = gen_reg_rtx (V16HImode);
45458	  if (elt < 16)
45459	    emit_insn (gen_vec_extract_lo_v32hi (tmp, vec));
45460	  else
45461	    emit_insn (gen_vec_extract_hi_v32hi (tmp, vec));
45462	  ix86_expand_vector_extract (false, target, tmp, elt & 15);
45463	  return;
45464	}
45465      break;
45466
45467    case V64QImode:
45468      if (TARGET_AVX512BW)
45469	{
45470	  tmp = gen_reg_rtx (V32QImode);
45471	  if (elt < 32)
45472	    emit_insn (gen_vec_extract_lo_v64qi (tmp, vec));
45473	  else
45474	    emit_insn (gen_vec_extract_hi_v64qi (tmp, vec));
45475	  ix86_expand_vector_extract (false, target, tmp, elt & 31);
45476	  return;
45477	}
45478      break;
45479
45480    case V16SFmode:
45481      tmp = gen_reg_rtx (V8SFmode);
45482      if (elt < 8)
45483	emit_insn (gen_vec_extract_lo_v16sf (tmp, vec));
45484      else
45485	emit_insn (gen_vec_extract_hi_v16sf (tmp, vec));
45486      ix86_expand_vector_extract (false, target, tmp, elt & 7);
45487      return;
45488
45489    case V8DFmode:
45490      tmp = gen_reg_rtx (V4DFmode);
45491      if (elt < 4)
45492	emit_insn (gen_vec_extract_lo_v8df (tmp, vec));
45493      else
45494	emit_insn (gen_vec_extract_hi_v8df (tmp, vec));
45495      ix86_expand_vector_extract (false, target, tmp, elt & 3);
45496      return;
45497
45498    case V16SImode:
45499      tmp = gen_reg_rtx (V8SImode);
45500      if (elt < 8)
45501	emit_insn (gen_vec_extract_lo_v16si (tmp, vec));
45502      else
45503	emit_insn (gen_vec_extract_hi_v16si (tmp, vec));
45504      ix86_expand_vector_extract (false, target, tmp, elt & 7);
45505      return;
45506
45507    case V8DImode:
45508      tmp = gen_reg_rtx (V4DImode);
45509      if (elt < 4)
45510	emit_insn (gen_vec_extract_lo_v8di (tmp, vec));
45511      else
45512	emit_insn (gen_vec_extract_hi_v8di (tmp, vec));
45513      ix86_expand_vector_extract (false, target, tmp, elt & 3);
45514      return;
45515
45516    case V8QImode:
45517      /* ??? Could extract the appropriate HImode element and shift.  */
45518    default:
45519      break;
45520    }
45521
45522  if (use_vec_extr)
45523    {
45524      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
45525      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
45526
45527      /* Let the rtl optimizers know about the zero extension performed.  */
45528      if (inner_mode == QImode || inner_mode == HImode)
45529	{
45530	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
45531	  target = gen_lowpart (SImode, target);
45532	}
45533
45534      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
45535    }
45536  else
45537    {
45538      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
45539
45540      emit_move_insn (mem, vec);
45541
45542      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
45543      emit_move_insn (target, tmp);
45544    }
45545}
45546
45547/* Generate code to copy vector bits i / 2 ... i - 1 from vector SRC
45548   to bits 0 ... i / 2 - 1 of vector DEST, which has the same mode.
45549   The upper bits of DEST are undefined, though they shouldn't cause
45550   exceptions (some bits from src or all zeros are ok).  */
45551
45552static void
45553emit_reduc_half (rtx dest, rtx src, int i)
45554{
45555  rtx tem, d = dest;
45556  switch (GET_MODE (src))
45557    {
45558    case V4SFmode:
45559      if (i == 128)
45560	tem = gen_sse_movhlps (dest, src, src);
45561      else
45562	tem = gen_sse_shufps_v4sf (dest, src, src, const1_rtx, const1_rtx,
45563				   GEN_INT (1 + 4), GEN_INT (1 + 4));
45564      break;
45565    case V2DFmode:
45566      tem = gen_vec_interleave_highv2df (dest, src, src);
45567      break;
45568    case V16QImode:
45569    case V8HImode:
45570    case V4SImode:
45571    case V2DImode:
45572      d = gen_reg_rtx (V1TImode);
45573      tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
45574				GEN_INT (i / 2));
45575      break;
45576    case V8SFmode:
45577      if (i == 256)
45578	tem = gen_avx_vperm2f128v8sf3 (dest, src, src, const1_rtx);
45579      else
45580	tem = gen_avx_shufps256 (dest, src, src,
45581				 GEN_INT (i == 128 ? 2 + (3 << 2) : 1));
45582      break;
45583    case V4DFmode:
45584      if (i == 256)
45585	tem = gen_avx_vperm2f128v4df3 (dest, src, src, const1_rtx);
45586      else
45587	tem = gen_avx_shufpd256 (dest, src, src, const1_rtx);
45588      break;
45589    case V32QImode:
45590    case V16HImode:
45591    case V8SImode:
45592    case V4DImode:
45593      if (i == 256)
45594	{
45595	  if (GET_MODE (dest) != V4DImode)
45596	    d = gen_reg_rtx (V4DImode);
45597	  tem = gen_avx2_permv2ti (d, gen_lowpart (V4DImode, src),
45598				   gen_lowpart (V4DImode, src),
45599				   const1_rtx);
45600	}
45601      else
45602	{
45603	  d = gen_reg_rtx (V2TImode);
45604	  tem = gen_avx2_lshrv2ti3 (d, gen_lowpart (V2TImode, src),
45605				    GEN_INT (i / 2));
45606	}
45607      break;
45608    case V64QImode:
45609    case V32HImode:
45610    case V16SImode:
45611    case V16SFmode:
45612    case V8DImode:
45613    case V8DFmode:
45614      if (i > 128)
45615	tem = gen_avx512f_shuf_i32x4_1 (gen_lowpart (V16SImode, dest),
45616				      gen_lowpart (V16SImode, src),
45617				      gen_lowpart (V16SImode, src),
45618				      GEN_INT (0x4 + (i == 512 ? 4 : 0)),
45619				      GEN_INT (0x5 + (i == 512 ? 4 : 0)),
45620				      GEN_INT (0x6 + (i == 512 ? 4 : 0)),
45621				      GEN_INT (0x7 + (i == 512 ? 4 : 0)),
45622				      GEN_INT (0xC), GEN_INT (0xD),
45623				      GEN_INT (0xE), GEN_INT (0xF),
45624				      GEN_INT (0x10), GEN_INT (0x11),
45625				      GEN_INT (0x12), GEN_INT (0x13),
45626				      GEN_INT (0x14), GEN_INT (0x15),
45627				      GEN_INT (0x16), GEN_INT (0x17));
45628      else
45629	tem = gen_avx512f_pshufd_1 (gen_lowpart (V16SImode, dest),
45630				   gen_lowpart (V16SImode, src),
45631				   GEN_INT (i == 128 ? 0x2 : 0x1),
45632				   GEN_INT (0x3),
45633				   GEN_INT (0x3),
45634				   GEN_INT (0x3),
45635				   GEN_INT (i == 128 ? 0x6 : 0x5),
45636				   GEN_INT (0x7),
45637				   GEN_INT (0x7),
45638				   GEN_INT (0x7),
45639				   GEN_INT (i == 128 ? 0xA : 0x9),
45640				   GEN_INT (0xB),
45641				   GEN_INT (0xB),
45642				   GEN_INT (0xB),
45643				   GEN_INT (i == 128 ? 0xE : 0xD),
45644				   GEN_INT (0xF),
45645				   GEN_INT (0xF),
45646				   GEN_INT (0xF));
45647      break;
45648    default:
45649      gcc_unreachable ();
45650    }
45651  emit_insn (tem);
45652  if (d != dest)
45653    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), d));
45654}
45655
45656/* Expand a vector reduction.  FN is the binary pattern to reduce;
45657   DEST is the destination; IN is the input vector.  */
45658
45659void
45660ix86_expand_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
45661{
45662  rtx half, dst, vec = in;
45663  machine_mode mode = GET_MODE (in);
45664  int i;
45665
45666  /* SSE4 has a special instruction for V8HImode UMIN reduction.  */
45667  if (TARGET_SSE4_1
45668      && mode == V8HImode
45669      && fn == gen_uminv8hi3)
45670    {
45671      emit_insn (gen_sse4_1_phminposuw (dest, in));
45672      return;
45673    }
45674
45675  for (i = GET_MODE_BITSIZE (mode);
45676       i > GET_MODE_BITSIZE (GET_MODE_INNER (mode));
45677       i >>= 1)
45678    {
45679      half = gen_reg_rtx (mode);
45680      emit_reduc_half (half, vec, i);
45681      if (i == GET_MODE_BITSIZE (GET_MODE_INNER (mode)) * 2)
45682	dst = dest;
45683      else
45684	dst = gen_reg_rtx (mode);
45685      emit_insn (fn (dst, half, vec));
45686      vec = dst;
45687    }
45688}
45689
45690/* Target hook for scalar_mode_supported_p.  */
45691static bool
45692ix86_scalar_mode_supported_p (machine_mode mode)
45693{
45694  if (DECIMAL_FLOAT_MODE_P (mode))
45695    return default_decimal_float_supported_p ();
45696  else if (mode == TFmode)
45697    return true;
45698  else
45699    return default_scalar_mode_supported_p (mode);
45700}
45701
45702/* Implements target hook vector_mode_supported_p.  */
45703static bool
45704ix86_vector_mode_supported_p (machine_mode mode)
45705{
45706  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
45707    return true;
45708  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
45709    return true;
45710  if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
45711    return true;
45712  if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
45713    return true;
45714  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
45715    return true;
45716  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
45717    return true;
45718  return false;
45719}
45720
45721/* Implement target hook libgcc_floating_mode_supported_p.  */
45722static bool
45723ix86_libgcc_floating_mode_supported_p (machine_mode mode)
45724{
45725  switch (mode)
45726    {
45727    case SFmode:
45728    case DFmode:
45729    case XFmode:
45730      return true;
45731
45732    case TFmode:
45733#ifdef IX86_NO_LIBGCC_TFMODE
45734      return false;
45735#elif defined IX86_MAYBE_NO_LIBGCC_TFMODE
45736      return TARGET_LONG_DOUBLE_128;
45737#else
45738      return true;
45739#endif
45740
45741    default:
45742      return false;
45743    }
45744}
45745
45746/* Target hook for c_mode_for_suffix.  */
45747static machine_mode
45748ix86_c_mode_for_suffix (char suffix)
45749{
45750  if (suffix == 'q')
45751    return TFmode;
45752  if (suffix == 'w')
45753    return XFmode;
45754
45755  return VOIDmode;
45756}
45757
45758/* Worker function for TARGET_MD_ASM_CLOBBERS.
45759
45760   We do this in the new i386 backend to maintain source compatibility
45761   with the old cc0-based compiler.  */
45762
45763static tree
45764ix86_md_asm_clobbers (tree, tree, tree clobbers)
45765{
45766  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
45767			clobbers);
45768  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
45769			clobbers);
45770  return clobbers;
45771}
45772
45773/* Implements target vector targetm.asm.encode_section_info.  */
45774
45775static void ATTRIBUTE_UNUSED
45776ix86_encode_section_info (tree decl, rtx rtl, int first)
45777{
45778  default_encode_section_info (decl, rtl, first);
45779
45780  if (ix86_in_large_data_p (decl))
45781    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
45782}
45783
45784/* Worker function for REVERSE_CONDITION.  */
45785
45786enum rtx_code
45787ix86_reverse_condition (enum rtx_code code, machine_mode mode)
45788{
45789  return (mode != CCFPmode && mode != CCFPUmode
45790	  ? reverse_condition (code)
45791	  : reverse_condition_maybe_unordered (code));
45792}
45793
45794/* Output code to perform an x87 FP register move, from OPERANDS[1]
45795   to OPERANDS[0].  */
45796
45797const char *
45798output_387_reg_move (rtx insn, rtx *operands)
45799{
45800  if (REG_P (operands[0]))
45801    {
45802      if (REG_P (operands[1])
45803	  && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45804	{
45805	  if (REGNO (operands[0]) == FIRST_STACK_REG)
45806	    return output_387_ffreep (operands, 0);
45807	  return "fstp\t%y0";
45808	}
45809      if (STACK_TOP_P (operands[0]))
45810	return "fld%Z1\t%y1";
45811      return "fst\t%y0";
45812    }
45813  else if (MEM_P (operands[0]))
45814    {
45815      gcc_assert (REG_P (operands[1]));
45816      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
45817	return "fstp%Z0\t%y0";
45818      else
45819	{
45820	  /* There is no non-popping store to memory for XFmode.
45821	     So if we need one, follow the store with a load.  */
45822	  if (GET_MODE (operands[0]) == XFmode)
45823	    return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
45824	  else
45825	    return "fst%Z0\t%y0";
45826	}
45827    }
45828  else
45829    gcc_unreachable();
45830}
45831
45832/* Output code to perform a conditional jump to LABEL, if C2 flag in
45833   FP status register is set.  */
45834
45835void
45836ix86_emit_fp_unordered_jump (rtx label)
45837{
45838  rtx reg = gen_reg_rtx (HImode);
45839  rtx temp;
45840
45841  emit_insn (gen_x86_fnstsw_1 (reg));
45842
45843  if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
45844    {
45845      emit_insn (gen_x86_sahf_1 (reg));
45846
45847      temp = gen_rtx_REG (CCmode, FLAGS_REG);
45848      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
45849    }
45850  else
45851    {
45852      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
45853
45854      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
45855      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
45856    }
45857
45858  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
45859			      gen_rtx_LABEL_REF (VOIDmode, label),
45860			      pc_rtx);
45861  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
45862
45863  emit_jump_insn (temp);
45864  predict_jump (REG_BR_PROB_BASE * 10 / 100);
45865}
45866
45867/* Output code to perform a log1p XFmode calculation.  */
45868
45869void ix86_emit_i387_log1p (rtx op0, rtx op1)
45870{
45871  rtx_code_label *label1 = gen_label_rtx ();
45872  rtx_code_label *label2 = gen_label_rtx ();
45873
45874  rtx tmp = gen_reg_rtx (XFmode);
45875  rtx tmp2 = gen_reg_rtx (XFmode);
45876  rtx test;
45877
45878  emit_insn (gen_absxf2 (tmp, op1));
45879  test = gen_rtx_GE (VOIDmode, tmp,
45880    CONST_DOUBLE_FROM_REAL_VALUE (
45881       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
45882       XFmode));
45883  emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
45884
45885  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45886  emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
45887  emit_jump (label2);
45888
45889  emit_label (label1);
45890  emit_move_insn (tmp, CONST1_RTX (XFmode));
45891  emit_insn (gen_addxf3 (tmp, op1, tmp));
45892  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
45893  emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
45894
45895  emit_label (label2);
45896}
45897
45898/* Emit code for round calculation.  */
45899void ix86_emit_i387_round (rtx op0, rtx op1)
45900{
45901  machine_mode inmode = GET_MODE (op1);
45902  machine_mode outmode = GET_MODE (op0);
45903  rtx e1, e2, res, tmp, tmp1, half;
45904  rtx scratch = gen_reg_rtx (HImode);
45905  rtx flags = gen_rtx_REG (CCNOmode, FLAGS_REG);
45906  rtx_code_label *jump_label = gen_label_rtx ();
45907  rtx insn;
45908  rtx (*gen_abs) (rtx, rtx);
45909  rtx (*gen_neg) (rtx, rtx);
45910
45911  switch (inmode)
45912    {
45913    case SFmode:
45914      gen_abs = gen_abssf2;
45915      break;
45916    case DFmode:
45917      gen_abs = gen_absdf2;
45918      break;
45919    case XFmode:
45920      gen_abs = gen_absxf2;
45921      break;
45922    default:
45923      gcc_unreachable ();
45924    }
45925
45926  switch (outmode)
45927    {
45928    case SFmode:
45929      gen_neg = gen_negsf2;
45930      break;
45931    case DFmode:
45932      gen_neg = gen_negdf2;
45933      break;
45934    case XFmode:
45935      gen_neg = gen_negxf2;
45936      break;
45937    case HImode:
45938      gen_neg = gen_neghi2;
45939      break;
45940    case SImode:
45941      gen_neg = gen_negsi2;
45942      break;
45943    case DImode:
45944      gen_neg = gen_negdi2;
45945      break;
45946    default:
45947      gcc_unreachable ();
45948    }
45949
45950  e1 = gen_reg_rtx (inmode);
45951  e2 = gen_reg_rtx (inmode);
45952  res = gen_reg_rtx (outmode);
45953
45954  half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, inmode);
45955
45956  /* round(a) = sgn(a) * floor(fabs(a) + 0.5) */
45957
45958  /* scratch = fxam(op1) */
45959  emit_insn (gen_rtx_SET (VOIDmode, scratch,
45960			  gen_rtx_UNSPEC (HImode, gen_rtvec (1, op1),
45961					  UNSPEC_FXAM)));
45962  /* e1 = fabs(op1) */
45963  emit_insn (gen_abs (e1, op1));
45964
45965  /* e2 = e1 + 0.5 */
45966  half = force_reg (inmode, half);
45967  emit_insn (gen_rtx_SET (VOIDmode, e2,
45968			  gen_rtx_PLUS (inmode, e1, half)));
45969
45970  /* res = floor(e2) */
45971  if (inmode != XFmode)
45972    {
45973      tmp1 = gen_reg_rtx (XFmode);
45974
45975      emit_insn (gen_rtx_SET (VOIDmode, tmp1,
45976			      gen_rtx_FLOAT_EXTEND (XFmode, e2)));
45977    }
45978  else
45979    tmp1 = e2;
45980
45981  switch (outmode)
45982    {
45983    case SFmode:
45984    case DFmode:
45985      {
45986	rtx tmp0 = gen_reg_rtx (XFmode);
45987
45988	emit_insn (gen_frndintxf2_floor (tmp0, tmp1));
45989
45990	emit_insn (gen_rtx_SET (VOIDmode, res,
45991				gen_rtx_UNSPEC (outmode, gen_rtvec (1, tmp0),
45992						UNSPEC_TRUNC_NOOP)));
45993      }
45994      break;
45995    case XFmode:
45996      emit_insn (gen_frndintxf2_floor (res, tmp1));
45997      break;
45998    case HImode:
45999      emit_insn (gen_lfloorxfhi2 (res, tmp1));
46000      break;
46001    case SImode:
46002      emit_insn (gen_lfloorxfsi2 (res, tmp1));
46003      break;
46004    case DImode:
46005      emit_insn (gen_lfloorxfdi2 (res, tmp1));
46006	break;
46007    default:
46008      gcc_unreachable ();
46009    }
46010
46011  /* flags = signbit(a) */
46012  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x02)));
46013
46014  /* if (flags) then res = -res */
46015  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode,
46016			      gen_rtx_EQ (VOIDmode, flags, const0_rtx),
46017			      gen_rtx_LABEL_REF (VOIDmode, jump_label),
46018			      pc_rtx);
46019  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46020  predict_jump (REG_BR_PROB_BASE * 50 / 100);
46021  JUMP_LABEL (insn) = jump_label;
46022
46023  emit_insn (gen_neg (res, res));
46024
46025  emit_label (jump_label);
46026  LABEL_NUSES (jump_label) = 1;
46027
46028  emit_move_insn (op0, res);
46029}
46030
46031/* Output code to perform a Newton-Rhapson approximation of a single precision
46032   floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm].  */
46033
46034void ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
46035{
46036  rtx x0, x1, e0, e1;
46037
46038  x0 = gen_reg_rtx (mode);
46039  e0 = gen_reg_rtx (mode);
46040  e1 = gen_reg_rtx (mode);
46041  x1 = gen_reg_rtx (mode);
46042
46043  /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
46044
46045  b = force_reg (mode, b);
46046
46047  /* x0 = rcp(b) estimate */
46048  if (mode == V16SFmode || mode == V8DFmode)
46049    emit_insn (gen_rtx_SET (VOIDmode, x0,
46050			    gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46051					    UNSPEC_RCP14)));
46052  else
46053    emit_insn (gen_rtx_SET (VOIDmode, x0,
46054			    gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
46055					    UNSPEC_RCP)));
46056
46057  /* e0 = x0 * b */
46058  emit_insn (gen_rtx_SET (VOIDmode, e0,
46059			  gen_rtx_MULT (mode, x0, b)));
46060
46061  /* e0 = x0 * e0 */
46062  emit_insn (gen_rtx_SET (VOIDmode, e0,
46063			  gen_rtx_MULT (mode, x0, e0)));
46064
46065  /* e1 = x0 + x0 */
46066  emit_insn (gen_rtx_SET (VOIDmode, e1,
46067			  gen_rtx_PLUS (mode, x0, x0)));
46068
46069  /* x1 = e1 - e0 */
46070  emit_insn (gen_rtx_SET (VOIDmode, x1,
46071			  gen_rtx_MINUS (mode, e1, e0)));
46072
46073  /* res = a * x1 */
46074  emit_insn (gen_rtx_SET (VOIDmode, res,
46075			  gen_rtx_MULT (mode, a, x1)));
46076}
46077
46078/* Output code to perform a Newton-Rhapson approximation of a
46079   single precision floating point [reciprocal] square root.  */
46080
46081void ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode,
46082			 bool recip)
46083{
46084  rtx x0, e0, e1, e2, e3, mthree, mhalf;
46085  REAL_VALUE_TYPE r;
46086  int unspec;
46087
46088  x0 = gen_reg_rtx (mode);
46089  e0 = gen_reg_rtx (mode);
46090  e1 = gen_reg_rtx (mode);
46091  e2 = gen_reg_rtx (mode);
46092  e3 = gen_reg_rtx (mode);
46093
46094  real_from_integer (&r, VOIDmode, -3, SIGNED);
46095  mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46096
46097  real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
46098  mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
46099  unspec = UNSPEC_RSQRT;
46100
46101  if (VECTOR_MODE_P (mode))
46102    {
46103      mthree = ix86_build_const_vector (mode, true, mthree);
46104      mhalf = ix86_build_const_vector (mode, true, mhalf);
46105      /* There is no 512-bit rsqrt.  There is however rsqrt14.  */
46106      if (GET_MODE_SIZE (mode) == 64)
46107	unspec = UNSPEC_RSQRT14;
46108    }
46109
46110  /* sqrt(a)  = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
46111     rsqrt(a) = -0.5     * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
46112
46113  a = force_reg (mode, a);
46114
46115  /* x0 = rsqrt(a) estimate */
46116  emit_insn (gen_rtx_SET (VOIDmode, x0,
46117			  gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
46118					  unspec)));
46119
46120  /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0).  */
46121  if (!recip)
46122    {
46123      rtx zero, mask;
46124
46125      zero = gen_reg_rtx (mode);
46126      mask = gen_reg_rtx (mode);
46127
46128      zero = force_reg (mode, CONST0_RTX(mode));
46129
46130      /* Handle masked compare.  */
46131      if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 64)
46132	{
46133	  mask = gen_reg_rtx (HImode);
46134	  /* Imm value 0x4 corresponds to not-equal comparison.  */
46135	  emit_insn (gen_avx512f_cmpv16sf3 (mask, zero, a, GEN_INT (0x4)));
46136	  emit_insn (gen_avx512f_blendmv16sf (x0, zero, x0, mask));
46137	}
46138      else
46139	{
46140	  emit_insn (gen_rtx_SET (VOIDmode, mask,
46141				  gen_rtx_NE (mode, zero, a)));
46142
46143	  emit_insn (gen_rtx_SET (VOIDmode, x0,
46144				  gen_rtx_AND (mode, x0, mask)));
46145	}
46146    }
46147
46148  /* e0 = x0 * a */
46149  emit_insn (gen_rtx_SET (VOIDmode, e0,
46150			  gen_rtx_MULT (mode, x0, a)));
46151  /* e1 = e0 * x0 */
46152  emit_insn (gen_rtx_SET (VOIDmode, e1,
46153			  gen_rtx_MULT (mode, e0, x0)));
46154
46155  /* e2 = e1 - 3. */
46156  mthree = force_reg (mode, mthree);
46157  emit_insn (gen_rtx_SET (VOIDmode, e2,
46158			  gen_rtx_PLUS (mode, e1, mthree)));
46159
46160  mhalf = force_reg (mode, mhalf);
46161  if (recip)
46162    /* e3 = -.5 * x0 */
46163    emit_insn (gen_rtx_SET (VOIDmode, e3,
46164			    gen_rtx_MULT (mode, x0, mhalf)));
46165  else
46166    /* e3 = -.5 * e0 */
46167    emit_insn (gen_rtx_SET (VOIDmode, e3,
46168			    gen_rtx_MULT (mode, e0, mhalf)));
46169  /* ret = e2 * e3 */
46170  emit_insn (gen_rtx_SET (VOIDmode, res,
46171			  gen_rtx_MULT (mode, e2, e3)));
46172}
46173
46174#ifdef TARGET_SOLARIS
46175/* Solaris implementation of TARGET_ASM_NAMED_SECTION.  */
46176
46177static void
46178i386_solaris_elf_named_section (const char *name, unsigned int flags,
46179				tree decl)
46180{
46181  /* With Binutils 2.15, the "@unwind" marker must be specified on
46182     every occurrence of the ".eh_frame" section, not just the first
46183     one.  */
46184  if (TARGET_64BIT
46185      && strcmp (name, ".eh_frame") == 0)
46186    {
46187      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
46188	       flags & SECTION_WRITE ? "aw" : "a");
46189      return;
46190    }
46191
46192#ifndef USE_GAS
46193  if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
46194    {
46195      solaris_elf_asm_comdat_section (name, flags, decl);
46196      return;
46197    }
46198#endif
46199
46200  default_elf_asm_named_section (name, flags, decl);
46201}
46202#endif /* TARGET_SOLARIS */
46203
46204/* Return the mangling of TYPE if it is an extended fundamental type.  */
46205
46206static const char *
46207ix86_mangle_type (const_tree type)
46208{
46209  type = TYPE_MAIN_VARIANT (type);
46210
46211  if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
46212      && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
46213    return NULL;
46214
46215  switch (TYPE_MODE (type))
46216    {
46217    case TFmode:
46218      /* __float128 is "g".  */
46219      return "g";
46220    case XFmode:
46221      /* "long double" or __float80 is "e".  */
46222      return "e";
46223    default:
46224      return NULL;
46225    }
46226}
46227
46228/* For 32-bit code we can save PIC register setup by using
46229   __stack_chk_fail_local hidden function instead of calling
46230   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
46231   register, so it is better to call __stack_chk_fail directly.  */
46232
46233static tree ATTRIBUTE_UNUSED
46234ix86_stack_protect_fail (void)
46235{
46236  return TARGET_64BIT
46237	 ? default_external_stack_protect_fail ()
46238	 : default_hidden_stack_protect_fail ();
46239}
46240
46241/* Select a format to encode pointers in exception handling data.  CODE
46242   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
46243   true if the symbol may be affected by dynamic relocations.
46244
46245   ??? All x86 object file formats are capable of representing this.
46246   After all, the relocation needed is the same as for the call insn.
46247   Whether or not a particular assembler allows us to enter such, I
46248   guess we'll have to see.  */
46249int
46250asm_preferred_eh_data_format (int code, int global)
46251{
46252  if (flag_pic)
46253    {
46254      int type = DW_EH_PE_sdata8;
46255      if (!TARGET_64BIT
46256	  || ix86_cmodel == CM_SMALL_PIC
46257	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
46258	type = DW_EH_PE_sdata4;
46259      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
46260    }
46261  if (ix86_cmodel == CM_SMALL
46262      || (ix86_cmodel == CM_MEDIUM && code))
46263    return DW_EH_PE_udata4;
46264  return DW_EH_PE_absptr;
46265}
46266
46267/* Expand copysign from SIGN to the positive value ABS_VALUE
46268   storing in RESULT.  If MASK is non-null, it shall be a mask to mask out
46269   the sign-bit.  */
46270static void
46271ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
46272{
46273  machine_mode mode = GET_MODE (sign);
46274  rtx sgn = gen_reg_rtx (mode);
46275  if (mask == NULL_RTX)
46276    {
46277      machine_mode vmode;
46278
46279      if (mode == SFmode)
46280	vmode = V4SFmode;
46281      else if (mode == DFmode)
46282	vmode = V2DFmode;
46283      else
46284	vmode = mode;
46285
46286      mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), false);
46287      if (!VECTOR_MODE_P (mode))
46288	{
46289	  /* We need to generate a scalar mode mask in this case.  */
46290	  rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46291	  tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46292	  mask = gen_reg_rtx (mode);
46293	  emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46294	}
46295    }
46296  else
46297    mask = gen_rtx_NOT (mode, mask);
46298  emit_insn (gen_rtx_SET (VOIDmode, sgn,
46299			  gen_rtx_AND (mode, mask, sign)));
46300  emit_insn (gen_rtx_SET (VOIDmode, result,
46301			  gen_rtx_IOR (mode, abs_value, sgn)));
46302}
46303
46304/* Expand fabs (OP0) and return a new rtx that holds the result.  The
46305   mask for masking out the sign-bit is stored in *SMASK, if that is
46306   non-null.  */
46307static rtx
46308ix86_expand_sse_fabs (rtx op0, rtx *smask)
46309{
46310  machine_mode vmode, mode = GET_MODE (op0);
46311  rtx xa, mask;
46312
46313  xa = gen_reg_rtx (mode);
46314  if (mode == SFmode)
46315    vmode = V4SFmode;
46316  else if (mode == DFmode)
46317    vmode = V2DFmode;
46318  else
46319    vmode = mode;
46320  mask = ix86_build_signbit_mask (vmode, VECTOR_MODE_P (mode), true);
46321  if (!VECTOR_MODE_P (mode))
46322    {
46323      /* We need to generate a scalar mode mask in this case.  */
46324      rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
46325      tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
46326      mask = gen_reg_rtx (mode);
46327      emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
46328    }
46329  emit_insn (gen_rtx_SET (VOIDmode, xa,
46330			  gen_rtx_AND (mode, op0, mask)));
46331
46332  if (smask)
46333    *smask = mask;
46334
46335  return xa;
46336}
46337
46338/* Expands a comparison of OP0 with OP1 using comparison code CODE,
46339   swapping the operands if SWAP_OPERANDS is true.  The expanded
46340   code is a forward jump to a newly created label in case the
46341   comparison is true.  The generated label rtx is returned.  */
46342static rtx_code_label *
46343ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
46344                                  bool swap_operands)
46345{
46346  machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
46347  rtx_code_label *label;
46348  rtx tmp;
46349
46350  if (swap_operands)
46351    std::swap (op0, op1);
46352
46353  label = gen_label_rtx ();
46354  tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
46355  emit_insn (gen_rtx_SET (VOIDmode, tmp,
46356			  gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
46357  tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
46358  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
46359			      gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
46360  tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
46361  JUMP_LABEL (tmp) = label;
46362
46363  return label;
46364}
46365
46366/* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
46367   using comparison code CODE.  Operands are swapped for the comparison if
46368   SWAP_OPERANDS is true.  Returns a rtx for the generated mask.  */
46369static rtx
46370ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
46371			      bool swap_operands)
46372{
46373  rtx (*insn)(rtx, rtx, rtx, rtx);
46374  machine_mode mode = GET_MODE (op0);
46375  rtx mask = gen_reg_rtx (mode);
46376
46377  if (swap_operands)
46378    std::swap (op0, op1);
46379
46380  insn = mode == DFmode ? gen_setcc_df_sse : gen_setcc_sf_sse;
46381
46382  emit_insn (insn (mask, op0, op1,
46383		   gen_rtx_fmt_ee (code, mode, op0, op1)));
46384  return mask;
46385}
46386
46387/* Generate and return a rtx of mode MODE for 2**n where n is the number
46388   of bits of the mantissa of MODE, which must be one of DFmode or SFmode.  */
46389static rtx
46390ix86_gen_TWO52 (machine_mode mode)
46391{
46392  REAL_VALUE_TYPE TWO52r;
46393  rtx TWO52;
46394
46395  real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
46396  TWO52 = const_double_from_real_value (TWO52r, mode);
46397  TWO52 = force_reg (mode, TWO52);
46398
46399  return TWO52;
46400}
46401
46402/* Expand SSE sequence for computing lround from OP1 storing
46403   into OP0.  */
46404void
46405ix86_expand_lround (rtx op0, rtx op1)
46406{
46407  /* C code for the stuff we're doing below:
46408       tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
46409       return (long)tmp;
46410   */
46411  machine_mode mode = GET_MODE (op1);
46412  const struct real_format *fmt;
46413  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46414  rtx adj;
46415
46416  /* load nextafter (0.5, 0.0) */
46417  fmt = REAL_MODE_FORMAT (mode);
46418  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46419  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46420
46421  /* adj = copysign (0.5, op1) */
46422  adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
46423  ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
46424
46425  /* adj = op1 + adj */
46426  adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
46427
46428  /* op0 = (imode)adj */
46429  expand_fix (op0, adj, 0);
46430}
46431
46432/* Expand SSE2 sequence for computing lround from OPERAND1 storing
46433   into OPERAND0.  */
46434void
46435ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
46436{
46437  /* C code for the stuff we're doing below (for do_floor):
46438	xi = (long)op1;
46439        xi -= (double)xi > op1 ? 1 : 0;
46440        return xi;
46441   */
46442  machine_mode fmode = GET_MODE (op1);
46443  machine_mode imode = GET_MODE (op0);
46444  rtx ireg, freg, tmp;
46445  rtx_code_label *label;
46446
46447  /* reg = (long)op1 */
46448  ireg = gen_reg_rtx (imode);
46449  expand_fix (ireg, op1, 0);
46450
46451  /* freg = (double)reg */
46452  freg = gen_reg_rtx (fmode);
46453  expand_float (freg, ireg, 0);
46454
46455  /* ireg = (freg > op1) ? ireg - 1 : ireg */
46456  label = ix86_expand_sse_compare_and_jump (UNLE,
46457					    freg, op1, !do_floor);
46458  tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
46459			     ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
46460  emit_move_insn (ireg, tmp);
46461
46462  emit_label (label);
46463  LABEL_NUSES (label) = 1;
46464
46465  emit_move_insn (op0, ireg);
46466}
46467
46468/* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
46469   result in OPERAND0.  */
46470void
46471ix86_expand_rint (rtx operand0, rtx operand1)
46472{
46473  /* C code for the stuff we're doing below:
46474	xa = fabs (operand1);
46475        if (!isless (xa, 2**52))
46476	  return operand1;
46477        xa = xa + 2**52 - 2**52;
46478        return copysign (xa, operand1);
46479   */
46480  machine_mode mode = GET_MODE (operand0);
46481  rtx res, xa, TWO52, mask;
46482  rtx_code_label *label;
46483
46484  res = gen_reg_rtx (mode);
46485  emit_move_insn (res, operand1);
46486
46487  /* xa = abs (operand1) */
46488  xa = ix86_expand_sse_fabs (res, &mask);
46489
46490  /* if (!isless (xa, TWO52)) goto label; */
46491  TWO52 = ix86_gen_TWO52 (mode);
46492  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46493
46494  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46495  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46496
46497  ix86_sse_copysign_to_positive (res, xa, res, mask);
46498
46499  emit_label (label);
46500  LABEL_NUSES (label) = 1;
46501
46502  emit_move_insn (operand0, res);
46503}
46504
46505/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46506   into OPERAND0.  */
46507void
46508ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
46509{
46510  /* C code for the stuff we expand below.
46511        double xa = fabs (x), x2;
46512        if (!isless (xa, TWO52))
46513          return x;
46514        xa = xa + TWO52 - TWO52;
46515        x2 = copysign (xa, x);
46516     Compensate.  Floor:
46517        if (x2 > x)
46518          x2 -= 1;
46519     Compensate.  Ceil:
46520        if (x2 < x)
46521          x2 -= -1;
46522        return x2;
46523   */
46524  machine_mode mode = GET_MODE (operand0);
46525  rtx xa, TWO52, tmp, one, res, mask;
46526  rtx_code_label *label;
46527
46528  TWO52 = ix86_gen_TWO52 (mode);
46529
46530  /* Temporary for holding the result, initialized to the input
46531     operand to ease control flow.  */
46532  res = gen_reg_rtx (mode);
46533  emit_move_insn (res, operand1);
46534
46535  /* xa = abs (operand1) */
46536  xa = ix86_expand_sse_fabs (res, &mask);
46537
46538  /* if (!isless (xa, TWO52)) goto label; */
46539  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46540
46541  /* xa = xa + TWO52 - TWO52; */
46542  xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46543  xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
46544
46545  /* xa = copysign (xa, operand1) */
46546  ix86_sse_copysign_to_positive (xa, xa, res, mask);
46547
46548  /* generate 1.0 or -1.0 */
46549  one = force_reg (mode,
46550	           const_double_from_real_value (do_floor
46551						 ? dconst1 : dconstm1, mode));
46552
46553  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46554  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46555  emit_insn (gen_rtx_SET (VOIDmode, tmp,
46556                          gen_rtx_AND (mode, one, tmp)));
46557  /* We always need to subtract here to preserve signed zero.  */
46558  tmp = expand_simple_binop (mode, MINUS,
46559			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46560  emit_move_insn (res, tmp);
46561
46562  emit_label (label);
46563  LABEL_NUSES (label) = 1;
46564
46565  emit_move_insn (operand0, res);
46566}
46567
46568/* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
46569   into OPERAND0.  */
46570void
46571ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
46572{
46573  /* C code for the stuff we expand below.
46574	double xa = fabs (x), x2;
46575        if (!isless (xa, TWO52))
46576          return x;
46577	x2 = (double)(long)x;
46578     Compensate.  Floor:
46579	if (x2 > x)
46580	  x2 -= 1;
46581     Compensate.  Ceil:
46582	if (x2 < x)
46583	  x2 += 1;
46584	if (HONOR_SIGNED_ZEROS (mode))
46585	  return copysign (x2, x);
46586	return x2;
46587   */
46588  machine_mode mode = GET_MODE (operand0);
46589  rtx xa, xi, TWO52, tmp, one, res, mask;
46590  rtx_code_label *label;
46591
46592  TWO52 = ix86_gen_TWO52 (mode);
46593
46594  /* Temporary for holding the result, initialized to the input
46595     operand to ease control flow.  */
46596  res = gen_reg_rtx (mode);
46597  emit_move_insn (res, operand1);
46598
46599  /* xa = abs (operand1) */
46600  xa = ix86_expand_sse_fabs (res, &mask);
46601
46602  /* if (!isless (xa, TWO52)) goto label; */
46603  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46604
46605  /* xa = (double)(long)x */
46606  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46607  expand_fix (xi, res, 0);
46608  expand_float (xa, xi, 0);
46609
46610  /* generate 1.0 */
46611  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46612
46613  /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
46614  tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
46615  emit_insn (gen_rtx_SET (VOIDmode, tmp,
46616                          gen_rtx_AND (mode, one, tmp)));
46617  tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
46618			     xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46619  emit_move_insn (res, tmp);
46620
46621  if (HONOR_SIGNED_ZEROS (mode))
46622    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46623
46624  emit_label (label);
46625  LABEL_NUSES (label) = 1;
46626
46627  emit_move_insn (operand0, res);
46628}
46629
46630/* Expand SSE sequence for computing round from OPERAND1 storing
46631   into OPERAND0.  Sequence that works without relying on DImode truncation
46632   via cvttsd2siq that is only available on 64bit targets.  */
46633void
46634ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
46635{
46636  /* C code for the stuff we expand below.
46637        double xa = fabs (x), xa2, x2;
46638        if (!isless (xa, TWO52))
46639          return x;
46640     Using the absolute value and copying back sign makes
46641     -0.0 -> -0.0 correct.
46642        xa2 = xa + TWO52 - TWO52;
46643     Compensate.
46644	dxa = xa2 - xa;
46645        if (dxa <= -0.5)
46646          xa2 += 1;
46647        else if (dxa > 0.5)
46648          xa2 -= 1;
46649        x2 = copysign (xa2, x);
46650        return x2;
46651   */
46652  machine_mode mode = GET_MODE (operand0);
46653  rtx xa, xa2, dxa, TWO52, tmp, half, mhalf, one, res, mask;
46654  rtx_code_label *label;
46655
46656  TWO52 = ix86_gen_TWO52 (mode);
46657
46658  /* Temporary for holding the result, initialized to the input
46659     operand to ease control flow.  */
46660  res = gen_reg_rtx (mode);
46661  emit_move_insn (res, operand1);
46662
46663  /* xa = abs (operand1) */
46664  xa = ix86_expand_sse_fabs (res, &mask);
46665
46666  /* if (!isless (xa, TWO52)) goto label; */
46667  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46668
46669  /* xa2 = xa + TWO52 - TWO52; */
46670  xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46671  xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
46672
46673  /* dxa = xa2 - xa; */
46674  dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
46675
46676  /* generate 0.5, 1.0 and -0.5 */
46677  half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
46678  one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
46679  mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
46680			       0, OPTAB_DIRECT);
46681
46682  /* Compensate.  */
46683  tmp = gen_reg_rtx (mode);
46684  /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
46685  tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
46686  emit_insn (gen_rtx_SET (VOIDmode, tmp,
46687                          gen_rtx_AND (mode, one, tmp)));
46688  xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46689  /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
46690  tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
46691  emit_insn (gen_rtx_SET (VOIDmode, tmp,
46692                          gen_rtx_AND (mode, one, tmp)));
46693  xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
46694
46695  /* res = copysign (xa2, operand1) */
46696  ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
46697
46698  emit_label (label);
46699  LABEL_NUSES (label) = 1;
46700
46701  emit_move_insn (operand0, res);
46702}
46703
46704/* Expand SSE sequence for computing trunc from OPERAND1 storing
46705   into OPERAND0.  */
46706void
46707ix86_expand_trunc (rtx operand0, rtx operand1)
46708{
46709  /* C code for SSE variant we expand below.
46710        double xa = fabs (x), x2;
46711        if (!isless (xa, TWO52))
46712          return x;
46713        x2 = (double)(long)x;
46714	if (HONOR_SIGNED_ZEROS (mode))
46715	  return copysign (x2, x);
46716	return x2;
46717   */
46718  machine_mode mode = GET_MODE (operand0);
46719  rtx xa, xi, TWO52, res, mask;
46720  rtx_code_label *label;
46721
46722  TWO52 = ix86_gen_TWO52 (mode);
46723
46724  /* Temporary for holding the result, initialized to the input
46725     operand to ease control flow.  */
46726  res = gen_reg_rtx (mode);
46727  emit_move_insn (res, operand1);
46728
46729  /* xa = abs (operand1) */
46730  xa = ix86_expand_sse_fabs (res, &mask);
46731
46732  /* if (!isless (xa, TWO52)) goto label; */
46733  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46734
46735  /* x = (double)(long)x */
46736  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46737  expand_fix (xi, res, 0);
46738  expand_float (res, xi, 0);
46739
46740  if (HONOR_SIGNED_ZEROS (mode))
46741    ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
46742
46743  emit_label (label);
46744  LABEL_NUSES (label) = 1;
46745
46746  emit_move_insn (operand0, res);
46747}
46748
46749/* Expand SSE sequence for computing trunc from OPERAND1 storing
46750   into OPERAND0.  */
46751void
46752ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
46753{
46754  machine_mode mode = GET_MODE (operand0);
46755  rtx xa, mask, TWO52, one, res, smask, tmp;
46756  rtx_code_label *label;
46757
46758  /* C code for SSE variant we expand below.
46759        double xa = fabs (x), x2;
46760        if (!isless (xa, TWO52))
46761          return x;
46762        xa2 = xa + TWO52 - TWO52;
46763     Compensate:
46764        if (xa2 > xa)
46765          xa2 -= 1.0;
46766        x2 = copysign (xa2, x);
46767        return x2;
46768   */
46769
46770  TWO52 = ix86_gen_TWO52 (mode);
46771
46772  /* Temporary for holding the result, initialized to the input
46773     operand to ease control flow.  */
46774  res = gen_reg_rtx (mode);
46775  emit_move_insn (res, operand1);
46776
46777  /* xa = abs (operand1) */
46778  xa = ix86_expand_sse_fabs (res, &smask);
46779
46780  /* if (!isless (xa, TWO52)) goto label; */
46781  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46782
46783  /* res = xa + TWO52 - TWO52; */
46784  tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
46785  tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
46786  emit_move_insn (res, tmp);
46787
46788  /* generate 1.0 */
46789  one = force_reg (mode, const_double_from_real_value (dconst1, mode));
46790
46791  /* Compensate: res = xa2 - (res > xa ? 1 : 0)  */
46792  mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
46793  emit_insn (gen_rtx_SET (VOIDmode, mask,
46794                          gen_rtx_AND (mode, mask, one)));
46795  tmp = expand_simple_binop (mode, MINUS,
46796			     res, mask, NULL_RTX, 0, OPTAB_DIRECT);
46797  emit_move_insn (res, tmp);
46798
46799  /* res = copysign (res, operand1) */
46800  ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
46801
46802  emit_label (label);
46803  LABEL_NUSES (label) = 1;
46804
46805  emit_move_insn (operand0, res);
46806}
46807
46808/* Expand SSE sequence for computing round from OPERAND1 storing
46809   into OPERAND0.  */
46810void
46811ix86_expand_round (rtx operand0, rtx operand1)
46812{
46813  /* C code for the stuff we're doing below:
46814        double xa = fabs (x);
46815        if (!isless (xa, TWO52))
46816          return x;
46817        xa = (double)(long)(xa + nextafter (0.5, 0.0));
46818        return copysign (xa, x);
46819   */
46820  machine_mode mode = GET_MODE (operand0);
46821  rtx res, TWO52, xa, xi, half, mask;
46822  rtx_code_label *label;
46823  const struct real_format *fmt;
46824  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46825
46826  /* Temporary for holding the result, initialized to the input
46827     operand to ease control flow.  */
46828  res = gen_reg_rtx (mode);
46829  emit_move_insn (res, operand1);
46830
46831  TWO52 = ix86_gen_TWO52 (mode);
46832  xa = ix86_expand_sse_fabs (res, &mask);
46833  label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
46834
46835  /* load nextafter (0.5, 0.0) */
46836  fmt = REAL_MODE_FORMAT (mode);
46837  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46838  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46839
46840  /* xa = xa + 0.5 */
46841  half = force_reg (mode, const_double_from_real_value (pred_half, mode));
46842  xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
46843
46844  /* xa = (double)(int64_t)xa */
46845  xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
46846  expand_fix (xi, xa, 0);
46847  expand_float (xa, xi, 0);
46848
46849  /* res = copysign (xa, operand1) */
46850  ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
46851
46852  emit_label (label);
46853  LABEL_NUSES (label) = 1;
46854
46855  emit_move_insn (operand0, res);
46856}
46857
46858/* Expand SSE sequence for computing round
46859   from OP1 storing into OP0 using sse4 round insn.  */
46860void
46861ix86_expand_round_sse4 (rtx op0, rtx op1)
46862{
46863  machine_mode mode = GET_MODE (op0);
46864  rtx e1, e2, res, half;
46865  const struct real_format *fmt;
46866  REAL_VALUE_TYPE pred_half, half_minus_pred_half;
46867  rtx (*gen_copysign) (rtx, rtx, rtx);
46868  rtx (*gen_round) (rtx, rtx, rtx);
46869
46870  switch (mode)
46871    {
46872    case SFmode:
46873      gen_copysign = gen_copysignsf3;
46874      gen_round = gen_sse4_1_roundsf2;
46875      break;
46876    case DFmode:
46877      gen_copysign = gen_copysigndf3;
46878      gen_round = gen_sse4_1_rounddf2;
46879      break;
46880    default:
46881      gcc_unreachable ();
46882    }
46883
46884  /* round (a) = trunc (a + copysign (0.5, a)) */
46885
46886  /* load nextafter (0.5, 0.0) */
46887  fmt = REAL_MODE_FORMAT (mode);
46888  real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
46889  REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
46890  half = const_double_from_real_value (pred_half, mode);
46891
46892  /* e1 = copysign (0.5, op1) */
46893  e1 = gen_reg_rtx (mode);
46894  emit_insn (gen_copysign (e1, half, op1));
46895
46896  /* e2 = op1 + e1 */
46897  e2 = expand_simple_binop (mode, PLUS, op1, e1, NULL_RTX, 0, OPTAB_DIRECT);
46898
46899  /* res = trunc (e2) */
46900  res = gen_reg_rtx (mode);
46901  emit_insn (gen_round (res, e2, GEN_INT (ROUND_TRUNC)));
46902
46903  emit_move_insn (op0, res);
46904}
46905
46906
46907/* Table of valid machine attributes.  */
46908static const struct attribute_spec ix86_attribute_table[] =
46909{
46910  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
46911       affects_type_identity } */
46912  /* Stdcall attribute says callee is responsible for popping arguments
46913     if they are not variable.  */
46914  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46915    true },
46916  /* Fastcall attribute says callee is responsible for popping arguments
46917     if they are not variable.  */
46918  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46919    true },
46920  /* Thiscall attribute says callee is responsible for popping arguments
46921     if they are not variable.  */
46922  { "thiscall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46923    true },
46924  /* Cdecl attribute says the callee is a normal C declaration */
46925  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute,
46926    true },
46927  /* Regparm attribute specifies how many integer arguments are to be
46928     passed in registers.  */
46929  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute,
46930    true },
46931  /* Sseregparm attribute says we are using x86_64 calling conventions
46932     for FP arguments.  */
46933  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute,
46934    true },
46935  /* The transactional memory builtins are implicitly regparm or fastcall
46936     depending on the ABI.  Override the generic do-nothing attribute that
46937     these builtins were declared with.  */
46938  { "*tm regparm", 0, 0, false, true, true, ix86_handle_tm_regparm_attribute,
46939    true },
46940  /* force_align_arg_pointer says this function realigns the stack at entry.  */
46941  { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
46942    false, true,  true, ix86_handle_force_align_arg_pointer_attribute, false },
46943#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
46944  { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
46945  { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
46946  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute,
46947    false },
46948#endif
46949  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute,
46950    false },
46951  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute,
46952    false },
46953#ifdef SUBTARGET_ATTRIBUTE_TABLE
46954  SUBTARGET_ATTRIBUTE_TABLE,
46955#endif
46956  /* ms_abi and sysv_abi calling convention function attributes.  */
46957  { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46958  { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute, true },
46959  { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute,
46960    false },
46961  { "callee_pop_aggregate_return", 1, 1, false, true, true,
46962    ix86_handle_callee_pop_aggregate_return, true },
46963  /* End element.  */
46964  { NULL,        0, 0, false, false, false, NULL, false }
46965};
46966
46967/* Implement targetm.vectorize.builtin_vectorization_cost.  */
46968static int
46969ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
46970                                 tree vectype, int)
46971{
46972  unsigned elements;
46973
46974  switch (type_of_cost)
46975    {
46976      case scalar_stmt:
46977        return ix86_cost->scalar_stmt_cost;
46978
46979      case scalar_load:
46980        return ix86_cost->scalar_load_cost;
46981
46982      case scalar_store:
46983        return ix86_cost->scalar_store_cost;
46984
46985      case vector_stmt:
46986        return ix86_cost->vec_stmt_cost;
46987
46988      case vector_load:
46989        return ix86_cost->vec_align_load_cost;
46990
46991      case vector_store:
46992        return ix86_cost->vec_store_cost;
46993
46994      case vec_to_scalar:
46995        return ix86_cost->vec_to_scalar_cost;
46996
46997      case scalar_to_vec:
46998        return ix86_cost->scalar_to_vec_cost;
46999
47000      case unaligned_load:
47001      case unaligned_store:
47002        return ix86_cost->vec_unalign_load_cost;
47003
47004      case cond_branch_taken:
47005        return ix86_cost->cond_taken_branch_cost;
47006
47007      case cond_branch_not_taken:
47008        return ix86_cost->cond_not_taken_branch_cost;
47009
47010      case vec_perm:
47011      case vec_promote_demote:
47012        return ix86_cost->vec_stmt_cost;
47013
47014      case vec_construct:
47015	elements = TYPE_VECTOR_SUBPARTS (vectype);
47016	return elements / 2 + 1;
47017
47018      default:
47019        gcc_unreachable ();
47020    }
47021}
47022
47023/* A cached (set (nil) (vselect (vconcat (nil) (nil)) (parallel [])))
47024   insn, so that expand_vselect{,_vconcat} doesn't have to create a fresh
47025   insn every time.  */
47026
47027static GTY(()) rtx_insn *vselect_insn;
47028
47029/* Initialize vselect_insn.  */
47030
47031static void
47032init_vselect_insn (void)
47033{
47034  unsigned i;
47035  rtx x;
47036
47037  x = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (MAX_VECT_LEN));
47038  for (i = 0; i < MAX_VECT_LEN; ++i)
47039    XVECEXP (x, 0, i) = const0_rtx;
47040  x = gen_rtx_VEC_SELECT (V2DFmode, gen_rtx_VEC_CONCAT (V4DFmode, const0_rtx,
47041							const0_rtx), x);
47042  x = gen_rtx_SET (VOIDmode, const0_rtx, x);
47043  start_sequence ();
47044  vselect_insn = emit_insn (x);
47045  end_sequence ();
47046}
47047
47048/* Construct (set target (vec_select op0 (parallel perm))) and
47049   return true if that's a valid instruction in the active ISA.  */
47050
47051static bool
47052expand_vselect (rtx target, rtx op0, const unsigned char *perm,
47053		unsigned nelt, bool testing_p)
47054{
47055  unsigned int i;
47056  rtx x, save_vconcat;
47057  int icode;
47058
47059  if (vselect_insn == NULL_RTX)
47060    init_vselect_insn ();
47061
47062  x = XEXP (SET_SRC (PATTERN (vselect_insn)), 1);
47063  PUT_NUM_ELEM (XVEC (x, 0), nelt);
47064  for (i = 0; i < nelt; ++i)
47065    XVECEXP (x, 0, i) = GEN_INT (perm[i]);
47066  save_vconcat = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47067  XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = op0;
47068  PUT_MODE (SET_SRC (PATTERN (vselect_insn)), GET_MODE (target));
47069  SET_DEST (PATTERN (vselect_insn)) = target;
47070  icode = recog_memoized (vselect_insn);
47071
47072  if (icode >= 0 && !testing_p)
47073    emit_insn (copy_rtx (PATTERN (vselect_insn)));
47074
47075  SET_DEST (PATTERN (vselect_insn)) = const0_rtx;
47076  XEXP (SET_SRC (PATTERN (vselect_insn)), 0) = save_vconcat;
47077  INSN_CODE (vselect_insn) = -1;
47078
47079  return icode >= 0;
47080}
47081
47082/* Similar, but generate a vec_concat from op0 and op1 as well.  */
47083
47084static bool
47085expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
47086			const unsigned char *perm, unsigned nelt,
47087			bool testing_p)
47088{
47089  machine_mode v2mode;
47090  rtx x;
47091  bool ok;
47092
47093  if (vselect_insn == NULL_RTX)
47094    init_vselect_insn ();
47095
47096  v2mode = GET_MODE_2XWIDER_MODE (GET_MODE (op0));
47097  x = XEXP (SET_SRC (PATTERN (vselect_insn)), 0);
47098  PUT_MODE (x, v2mode);
47099  XEXP (x, 0) = op0;
47100  XEXP (x, 1) = op1;
47101  ok = expand_vselect (target, x, perm, nelt, testing_p);
47102  XEXP (x, 0) = const0_rtx;
47103  XEXP (x, 1) = const0_rtx;
47104  return ok;
47105}
47106
47107/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47108   in terms of blendp[sd] / pblendw / pblendvb / vpblendd.  */
47109
47110static bool
47111expand_vec_perm_blend (struct expand_vec_perm_d *d)
47112{
47113  machine_mode mmode, vmode = d->vmode;
47114  unsigned i, mask, nelt = d->nelt;
47115  rtx target, op0, op1, maskop, x;
47116  rtx rperm[32], vperm;
47117
47118  if (d->one_operand_p)
47119    return false;
47120  if (TARGET_AVX512F && GET_MODE_SIZE (vmode) == 64
47121      && (TARGET_AVX512BW
47122	  || GET_MODE_SIZE (GET_MODE_INNER (vmode)) >= 4))
47123    ;
47124  else if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47125    ;
47126  else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47127    ;
47128  else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47129    ;
47130  else
47131    return false;
47132
47133  /* This is a blend, not a permute.  Elements must stay in their
47134     respective lanes.  */
47135  for (i = 0; i < nelt; ++i)
47136    {
47137      unsigned e = d->perm[i];
47138      if (!(e == i || e == i + nelt))
47139	return false;
47140    }
47141
47142  if (d->testing_p)
47143    return true;
47144
47145  /* ??? Without SSE4.1, we could implement this with and/andn/or.  This
47146     decision should be extracted elsewhere, so that we only try that
47147     sequence once all budget==3 options have been tried.  */
47148  target = d->target;
47149  op0 = d->op0;
47150  op1 = d->op1;
47151  mask = 0;
47152
47153  switch (vmode)
47154    {
47155    case V8DFmode:
47156    case V16SFmode:
47157    case V4DFmode:
47158    case V8SFmode:
47159    case V2DFmode:
47160    case V4SFmode:
47161    case V8HImode:
47162    case V8SImode:
47163    case V32HImode:
47164    case V64QImode:
47165    case V16SImode:
47166    case V8DImode:
47167      for (i = 0; i < nelt; ++i)
47168	mask |= (d->perm[i] >= nelt) << i;
47169      break;
47170
47171    case V2DImode:
47172      for (i = 0; i < 2; ++i)
47173	mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
47174      vmode = V8HImode;
47175      goto do_subreg;
47176
47177    case V4SImode:
47178      for (i = 0; i < 4; ++i)
47179	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47180      vmode = V8HImode;
47181      goto do_subreg;
47182
47183    case V16QImode:
47184      /* See if bytes move in pairs so we can use pblendw with
47185	 an immediate argument, rather than pblendvb with a vector
47186	 argument.  */
47187      for (i = 0; i < 16; i += 2)
47188	if (d->perm[i] + 1 != d->perm[i + 1])
47189	  {
47190	  use_pblendvb:
47191	    for (i = 0; i < nelt; ++i)
47192	      rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
47193
47194	  finish_pblendvb:
47195	    vperm = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
47196	    vperm = force_reg (vmode, vperm);
47197
47198	    if (GET_MODE_SIZE (vmode) == 16)
47199	      emit_insn (gen_sse4_1_pblendvb (target, op0, op1, vperm));
47200	    else
47201	      emit_insn (gen_avx2_pblendvb (target, op0, op1, vperm));
47202	    if (target != d->target)
47203	      emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47204	    return true;
47205	  }
47206
47207      for (i = 0; i < 8; ++i)
47208	mask |= (d->perm[i * 2] >= 16) << i;
47209      vmode = V8HImode;
47210      /* FALLTHRU */
47211
47212    do_subreg:
47213      target = gen_reg_rtx (vmode);
47214      op0 = gen_lowpart (vmode, op0);
47215      op1 = gen_lowpart (vmode, op1);
47216      break;
47217
47218    case V32QImode:
47219      /* See if bytes move in pairs.  If not, vpblendvb must be used.  */
47220      for (i = 0; i < 32; i += 2)
47221	if (d->perm[i] + 1 != d->perm[i + 1])
47222	  goto use_pblendvb;
47223      /* See if bytes move in quadruplets.  If yes, vpblendd
47224	 with immediate can be used.  */
47225      for (i = 0; i < 32; i += 4)
47226	if (d->perm[i] + 2 != d->perm[i + 2])
47227	  break;
47228      if (i < 32)
47229	{
47230	  /* See if bytes move the same in both lanes.  If yes,
47231	     vpblendw with immediate can be used.  */
47232	  for (i = 0; i < 16; i += 2)
47233	    if (d->perm[i] + 16 != d->perm[i + 16])
47234	      goto use_pblendvb;
47235
47236	  /* Use vpblendw.  */
47237	  for (i = 0; i < 16; ++i)
47238	    mask |= (d->perm[i * 2] >= 32) << i;
47239	  vmode = V16HImode;
47240	  goto do_subreg;
47241	}
47242
47243      /* Use vpblendd.  */
47244      for (i = 0; i < 8; ++i)
47245	mask |= (d->perm[i * 4] >= 32) << i;
47246      vmode = V8SImode;
47247      goto do_subreg;
47248
47249    case V16HImode:
47250      /* See if words move in pairs.  If yes, vpblendd can be used.  */
47251      for (i = 0; i < 16; i += 2)
47252	if (d->perm[i] + 1 != d->perm[i + 1])
47253	  break;
47254      if (i < 16)
47255	{
47256	  /* See if words move the same in both lanes.  If not,
47257	     vpblendvb must be used.  */
47258	  for (i = 0; i < 8; i++)
47259	    if (d->perm[i] + 8 != d->perm[i + 8])
47260	      {
47261		/* Use vpblendvb.  */
47262		for (i = 0; i < 32; ++i)
47263		  rperm[i] = (d->perm[i / 2] < 16 ? const0_rtx : constm1_rtx);
47264
47265		vmode = V32QImode;
47266		nelt = 32;
47267		target = gen_reg_rtx (vmode);
47268		op0 = gen_lowpart (vmode, op0);
47269		op1 = gen_lowpart (vmode, op1);
47270		goto finish_pblendvb;
47271	      }
47272
47273	  /* Use vpblendw.  */
47274	  for (i = 0; i < 16; ++i)
47275	    mask |= (d->perm[i] >= 16) << i;
47276	  break;
47277	}
47278
47279      /* Use vpblendd.  */
47280      for (i = 0; i < 8; ++i)
47281	mask |= (d->perm[i * 2] >= 16) << i;
47282      vmode = V8SImode;
47283      goto do_subreg;
47284
47285    case V4DImode:
47286      /* Use vpblendd.  */
47287      for (i = 0; i < 4; ++i)
47288	mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
47289      vmode = V8SImode;
47290      goto do_subreg;
47291
47292    default:
47293      gcc_unreachable ();
47294    }
47295
47296  switch (vmode)
47297    {
47298    case V8DFmode:
47299    case V8DImode:
47300      mmode = QImode;
47301      break;
47302    case V16SFmode:
47303    case V16SImode:
47304      mmode = HImode;
47305      break;
47306    case V32HImode:
47307      mmode = SImode;
47308      break;
47309    case V64QImode:
47310      mmode = DImode;
47311      break;
47312    default:
47313      mmode = VOIDmode;
47314    }
47315
47316  if (mmode != VOIDmode)
47317    maskop = force_reg (mmode, gen_int_mode (mask, mmode));
47318  else
47319    maskop = GEN_INT (mask);
47320
47321  /* This matches five different patterns with the different modes.  */
47322  x = gen_rtx_VEC_MERGE (vmode, op1, op0, maskop);
47323  x = gen_rtx_SET (VOIDmode, target, x);
47324  emit_insn (x);
47325  if (target != d->target)
47326    emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47327
47328  return true;
47329}
47330
47331/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47332   in terms of the variable form of vpermilps.
47333
47334   Note that we will have already failed the immediate input vpermilps,
47335   which requires that the high and low part shuffle be identical; the
47336   variable form doesn't require that.  */
47337
47338static bool
47339expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
47340{
47341  rtx rperm[8], vperm;
47342  unsigned i;
47343
47344  if (!TARGET_AVX || d->vmode != V8SFmode || !d->one_operand_p)
47345    return false;
47346
47347  /* We can only permute within the 128-bit lane.  */
47348  for (i = 0; i < 8; ++i)
47349    {
47350      unsigned e = d->perm[i];
47351      if (i < 4 ? e >= 4 : e < 4)
47352	return false;
47353    }
47354
47355  if (d->testing_p)
47356    return true;
47357
47358  for (i = 0; i < 8; ++i)
47359    {
47360      unsigned e = d->perm[i];
47361
47362      /* Within each 128-bit lane, the elements of op0 are numbered
47363	 from 0 and the elements of op1 are numbered from 4.  */
47364      if (e >= 8 + 4)
47365	e -= 8;
47366      else if (e >= 4)
47367	e -= 4;
47368
47369      rperm[i] = GEN_INT (e);
47370    }
47371
47372  vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
47373  vperm = force_reg (V8SImode, vperm);
47374  emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
47375
47376  return true;
47377}
47378
47379/* Return true if permutation D can be performed as VMODE permutation
47380   instead.  */
47381
47382static bool
47383valid_perm_using_mode_p (machine_mode vmode, struct expand_vec_perm_d *d)
47384{
47385  unsigned int i, j, chunk;
47386
47387  if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT
47388      || GET_MODE_CLASS (d->vmode) != MODE_VECTOR_INT
47389      || GET_MODE_SIZE (vmode) != GET_MODE_SIZE (d->vmode))
47390    return false;
47391
47392  if (GET_MODE_NUNITS (vmode) >= d->nelt)
47393    return true;
47394
47395  chunk = d->nelt / GET_MODE_NUNITS (vmode);
47396  for (i = 0; i < d->nelt; i += chunk)
47397    if (d->perm[i] & (chunk - 1))
47398      return false;
47399    else
47400      for (j = 1; j < chunk; ++j)
47401	if (d->perm[i] + j != d->perm[i + j])
47402	  return false;
47403
47404  return true;
47405}
47406
47407/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47408   in terms of pshufb, vpperm, vpermq, vpermd, vpermps or vperm2i128.  */
47409
47410static bool
47411expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
47412{
47413  unsigned i, nelt, eltsz, mask;
47414  unsigned char perm[64];
47415  machine_mode vmode = V16QImode;
47416  rtx rperm[64], vperm, target, op0, op1;
47417
47418  nelt = d->nelt;
47419
47420  if (!d->one_operand_p)
47421    {
47422      if (!TARGET_XOP || GET_MODE_SIZE (d->vmode) != 16)
47423	{
47424	  if (TARGET_AVX2
47425	      && valid_perm_using_mode_p (V2TImode, d))
47426	    {
47427	      if (d->testing_p)
47428		return true;
47429
47430	      /* Use vperm2i128 insn.  The pattern uses
47431		 V4DImode instead of V2TImode.  */
47432	      target = d->target;
47433	      if (d->vmode != V4DImode)
47434		target = gen_reg_rtx (V4DImode);
47435	      op0 = gen_lowpart (V4DImode, d->op0);
47436	      op1 = gen_lowpart (V4DImode, d->op1);
47437	      rperm[0]
47438		= GEN_INT ((d->perm[0] / (nelt / 2))
47439			   | ((d->perm[nelt / 2] / (nelt / 2)) * 16));
47440	      emit_insn (gen_avx2_permv2ti (target, op0, op1, rperm[0]));
47441	      if (target != d->target)
47442		emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47443	      return true;
47444	    }
47445	  return false;
47446	}
47447    }
47448  else
47449    {
47450      if (GET_MODE_SIZE (d->vmode) == 16)
47451	{
47452	  if (!TARGET_SSSE3)
47453	    return false;
47454	}
47455      else if (GET_MODE_SIZE (d->vmode) == 32)
47456	{
47457	  if (!TARGET_AVX2)
47458	    return false;
47459
47460	  /* V4DImode should be already handled through
47461	     expand_vselect by vpermq instruction.  */
47462	  gcc_assert (d->vmode != V4DImode);
47463
47464	  vmode = V32QImode;
47465	  if (d->vmode == V8SImode
47466	      || d->vmode == V16HImode
47467	      || d->vmode == V32QImode)
47468	    {
47469	      /* First see if vpermq can be used for
47470		 V8SImode/V16HImode/V32QImode.  */
47471	      if (valid_perm_using_mode_p (V4DImode, d))
47472		{
47473		  for (i = 0; i < 4; i++)
47474		    perm[i] = (d->perm[i * nelt / 4] * 4 / nelt) & 3;
47475		  if (d->testing_p)
47476		    return true;
47477		  target = gen_reg_rtx (V4DImode);
47478		  if (expand_vselect (target, gen_lowpart (V4DImode, d->op0),
47479				      perm, 4, false))
47480		    {
47481		      emit_move_insn (d->target,
47482				      gen_lowpart (d->vmode, target));
47483		      return true;
47484		    }
47485		  return false;
47486		}
47487
47488	      /* Next see if vpermd can be used.  */
47489	      if (valid_perm_using_mode_p (V8SImode, d))
47490		vmode = V8SImode;
47491	    }
47492	  /* Or if vpermps can be used.  */
47493	  else if (d->vmode == V8SFmode)
47494	    vmode = V8SImode;
47495
47496	  if (vmode == V32QImode)
47497	    {
47498	      /* vpshufb only works intra lanes, it is not
47499		 possible to shuffle bytes in between the lanes.  */
47500	      for (i = 0; i < nelt; ++i)
47501		if ((d->perm[i] ^ i) & (nelt / 2))
47502		  return false;
47503	    }
47504	}
47505      else if (GET_MODE_SIZE (d->vmode) == 64)
47506	{
47507	  if (!TARGET_AVX512BW)
47508	    return false;
47509
47510	  /* If vpermq didn't work, vpshufb won't work either.  */
47511	  if (d->vmode == V8DFmode || d->vmode == V8DImode)
47512	    return false;
47513
47514	  vmode = V64QImode;
47515	  if (d->vmode == V16SImode
47516	      || d->vmode == V32HImode
47517	      || d->vmode == V64QImode)
47518	    {
47519	      /* First see if vpermq can be used for
47520		 V16SImode/V32HImode/V64QImode.  */
47521	      if (valid_perm_using_mode_p (V8DImode, d))
47522		{
47523		  for (i = 0; i < 8; i++)
47524		    perm[i] = (d->perm[i * nelt / 8] * 8 / nelt) & 7;
47525		  if (d->testing_p)
47526		    return true;
47527		  target = gen_reg_rtx (V8DImode);
47528		  if (expand_vselect (target, gen_lowpart (V8DImode, d->op0),
47529				      perm, 8, false))
47530		    {
47531		      emit_move_insn (d->target,
47532				      gen_lowpart (d->vmode, target));
47533		      return true;
47534		    }
47535		  return false;
47536		}
47537
47538	      /* Next see if vpermd can be used.  */
47539	      if (valid_perm_using_mode_p (V16SImode, d))
47540		vmode = V16SImode;
47541	    }
47542	  /* Or if vpermps can be used.  */
47543	  else if (d->vmode == V16SFmode)
47544	    vmode = V16SImode;
47545	  if (vmode == V64QImode)
47546	    {
47547	      /* vpshufb only works intra lanes, it is not
47548		 possible to shuffle bytes in between the lanes.  */
47549	      for (i = 0; i < nelt; ++i)
47550		if ((d->perm[i] ^ i) & (nelt / 4))
47551		  return false;
47552	    }
47553	}
47554      else
47555	return false;
47556    }
47557
47558  if (d->testing_p)
47559    return true;
47560
47561  if (vmode == V8SImode)
47562    for (i = 0; i < 8; ++i)
47563      rperm[i] = GEN_INT ((d->perm[i * nelt / 8] * 8 / nelt) & 7);
47564  else if (vmode == V16SImode)
47565    for (i = 0; i < 16; ++i)
47566      rperm[i] = GEN_INT ((d->perm[i * nelt / 16] * 16 / nelt) & 15);
47567  else
47568    {
47569      eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
47570      if (!d->one_operand_p)
47571	mask = 2 * nelt - 1;
47572      else if (vmode == V16QImode)
47573	mask = nelt - 1;
47574      else if (vmode == V64QImode)
47575	mask = nelt / 4 - 1;
47576      else
47577	mask = nelt / 2 - 1;
47578
47579      for (i = 0; i < nelt; ++i)
47580	{
47581	  unsigned j, e = d->perm[i] & mask;
47582	  for (j = 0; j < eltsz; ++j)
47583	    rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
47584	}
47585    }
47586
47587  vperm = gen_rtx_CONST_VECTOR (vmode,
47588				gen_rtvec_v (GET_MODE_NUNITS (vmode), rperm));
47589  vperm = force_reg (vmode, vperm);
47590
47591  target = d->target;
47592  if (d->vmode != vmode)
47593    target = gen_reg_rtx (vmode);
47594  op0 = gen_lowpart (vmode, d->op0);
47595  if (d->one_operand_p)
47596    {
47597      if (vmode == V16QImode)
47598	emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
47599      else if (vmode == V32QImode)
47600	emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
47601      else if (vmode == V64QImode)
47602	emit_insn (gen_avx512bw_pshufbv64qi3 (target, op0, vperm));
47603      else if (vmode == V8SFmode)
47604	emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
47605      else if (vmode == V8SImode)
47606	emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
47607      else if (vmode == V16SFmode)
47608	emit_insn (gen_avx512f_permvarv16sf (target, op0, vperm));
47609      else if (vmode == V16SImode)
47610	emit_insn (gen_avx512f_permvarv16si (target, op0, vperm));
47611      else
47612	gcc_unreachable ();
47613    }
47614  else
47615    {
47616      op1 = gen_lowpart (vmode, d->op1);
47617      emit_insn (gen_xop_pperm (target, op0, op1, vperm));
47618    }
47619  if (target != d->target)
47620    emit_move_insn (d->target, gen_lowpart (d->vmode, target));
47621
47622  return true;
47623}
47624
47625/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
47626   in a single instruction.  */
47627
47628static bool
47629expand_vec_perm_1 (struct expand_vec_perm_d *d)
47630{
47631  unsigned i, nelt = d->nelt;
47632  unsigned char perm2[MAX_VECT_LEN];
47633
47634  /* Check plain VEC_SELECT first, because AVX has instructions that could
47635     match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
47636     input where SEL+CONCAT may not.  */
47637  if (d->one_operand_p)
47638    {
47639      int mask = nelt - 1;
47640      bool identity_perm = true;
47641      bool broadcast_perm = true;
47642
47643      for (i = 0; i < nelt; i++)
47644	{
47645	  perm2[i] = d->perm[i] & mask;
47646	  if (perm2[i] != i)
47647	    identity_perm = false;
47648	  if (perm2[i])
47649	    broadcast_perm = false;
47650	}
47651
47652      if (identity_perm)
47653	{
47654	  if (!d->testing_p)
47655	    emit_move_insn (d->target, d->op0);
47656	  return true;
47657	}
47658      else if (broadcast_perm && TARGET_AVX2)
47659	{
47660	  /* Use vpbroadcast{b,w,d}.  */
47661	  rtx (*gen) (rtx, rtx) = NULL;
47662	  switch (d->vmode)
47663	    {
47664	    case V64QImode:
47665	      if (TARGET_AVX512BW)
47666		gen = gen_avx512bw_vec_dupv64qi_1;
47667	      break;
47668	    case V32QImode:
47669	      gen = gen_avx2_pbroadcastv32qi_1;
47670	      break;
47671	    case V32HImode:
47672	      if (TARGET_AVX512BW)
47673		gen = gen_avx512bw_vec_dupv32hi_1;
47674	      break;
47675	    case V16HImode:
47676	      gen = gen_avx2_pbroadcastv16hi_1;
47677	      break;
47678	    case V16SImode:
47679	      if (TARGET_AVX512F)
47680		gen = gen_avx512f_vec_dupv16si_1;
47681	      break;
47682	    case V8SImode:
47683	      gen = gen_avx2_pbroadcastv8si_1;
47684	      break;
47685	    case V16QImode:
47686	      gen = gen_avx2_pbroadcastv16qi;
47687	      break;
47688	    case V8HImode:
47689	      gen = gen_avx2_pbroadcastv8hi;
47690	      break;
47691	    case V16SFmode:
47692	      if (TARGET_AVX512F)
47693		gen = gen_avx512f_vec_dupv16sf_1;
47694	      break;
47695	    case V8SFmode:
47696	      gen = gen_avx2_vec_dupv8sf_1;
47697	      break;
47698	    case V8DFmode:
47699	      if (TARGET_AVX512F)
47700		gen = gen_avx512f_vec_dupv8df_1;
47701	      break;
47702	    case V8DImode:
47703	      if (TARGET_AVX512F)
47704		gen = gen_avx512f_vec_dupv8di_1;
47705	      break;
47706	    /* For other modes prefer other shuffles this function creates.  */
47707	    default: break;
47708	    }
47709	  if (gen != NULL)
47710	    {
47711	      if (!d->testing_p)
47712		emit_insn (gen (d->target, d->op0));
47713	      return true;
47714	    }
47715	}
47716
47717      if (expand_vselect (d->target, d->op0, perm2, nelt, d->testing_p))
47718	return true;
47719
47720      /* There are plenty of patterns in sse.md that are written for
47721	 SEL+CONCAT and are not replicated for a single op.  Perhaps
47722	 that should be changed, to avoid the nastiness here.  */
47723
47724      /* Recognize interleave style patterns, which means incrementing
47725	 every other permutation operand.  */
47726      for (i = 0; i < nelt; i += 2)
47727	{
47728	  perm2[i] = d->perm[i] & mask;
47729	  perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
47730	}
47731      if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47732				  d->testing_p))
47733	return true;
47734
47735      /* Recognize shufps, which means adding {0, 0, nelt, nelt}.  */
47736      if (nelt >= 4)
47737	{
47738	  for (i = 0; i < nelt; i += 4)
47739	    {
47740	      perm2[i + 0] = d->perm[i + 0] & mask;
47741	      perm2[i + 1] = d->perm[i + 1] & mask;
47742	      perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
47743	      perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
47744	    }
47745
47746	  if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt,
47747				      d->testing_p))
47748	    return true;
47749	}
47750    }
47751
47752  /* Finally, try the fully general two operand permute.  */
47753  if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
47754			      d->testing_p))
47755    return true;
47756
47757  /* Recognize interleave style patterns with reversed operands.  */
47758  if (!d->one_operand_p)
47759    {
47760      for (i = 0; i < nelt; ++i)
47761	{
47762	  unsigned e = d->perm[i];
47763	  if (e >= nelt)
47764	    e -= nelt;
47765	  else
47766	    e += nelt;
47767	  perm2[i] = e;
47768	}
47769
47770      if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt,
47771				  d->testing_p))
47772	return true;
47773    }
47774
47775  /* Try the SSE4.1 blend variable merge instructions.  */
47776  if (expand_vec_perm_blend (d))
47777    return true;
47778
47779  /* Try one of the AVX vpermil variable permutations.  */
47780  if (expand_vec_perm_vpermil (d))
47781    return true;
47782
47783  /* Try the SSSE3 pshufb or XOP vpperm or AVX2 vperm2i128,
47784     vpshufb, vpermd, vpermps or vpermq variable permutation.  */
47785  if (expand_vec_perm_pshufb (d))
47786    return true;
47787
47788  /* Try the AVX2 vpalignr instruction.  */
47789  if (expand_vec_perm_palignr (d, true))
47790    return true;
47791
47792  /* Try the AVX512F vpermi2 instructions.  */
47793  if (ix86_expand_vec_perm_vpermi2 (NULL_RTX, NULL_RTX, NULL_RTX, NULL_RTX, d))
47794    return true;
47795
47796  return false;
47797}
47798
47799/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement D
47800   in terms of a pair of pshuflw + pshufhw instructions.  */
47801
47802static bool
47803expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
47804{
47805  unsigned char perm2[MAX_VECT_LEN];
47806  unsigned i;
47807  bool ok;
47808
47809  if (d->vmode != V8HImode || !d->one_operand_p)
47810    return false;
47811
47812  /* The two permutations only operate in 64-bit lanes.  */
47813  for (i = 0; i < 4; ++i)
47814    if (d->perm[i] >= 4)
47815      return false;
47816  for (i = 4; i < 8; ++i)
47817    if (d->perm[i] < 4)
47818      return false;
47819
47820  if (d->testing_p)
47821    return true;
47822
47823  /* Emit the pshuflw.  */
47824  memcpy (perm2, d->perm, 4);
47825  for (i = 4; i < 8; ++i)
47826    perm2[i] = i;
47827  ok = expand_vselect (d->target, d->op0, perm2, 8, d->testing_p);
47828  gcc_assert (ok);
47829
47830  /* Emit the pshufhw.  */
47831  memcpy (perm2 + 4, d->perm + 4, 4);
47832  for (i = 0; i < 4; ++i)
47833    perm2[i] = i;
47834  ok = expand_vselect (d->target, d->target, perm2, 8, d->testing_p);
47835  gcc_assert (ok);
47836
47837  return true;
47838}
47839
47840/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
47841   the permutation using the SSSE3 palignr instruction.  This succeeds
47842   when all of the elements in PERM fit within one vector and we merely
47843   need to shift them down so that a single vector permutation has a
47844   chance to succeed.  If SINGLE_INSN_ONLY_P, succeed if only
47845   the vpalignr instruction itself can perform the requested permutation.  */
47846
47847static bool
47848expand_vec_perm_palignr (struct expand_vec_perm_d *d, bool single_insn_only_p)
47849{
47850  unsigned i, nelt = d->nelt;
47851  unsigned min, max, minswap, maxswap;
47852  bool in_order, ok, swap = false;
47853  rtx shift, target;
47854  struct expand_vec_perm_d dcopy;
47855
47856  /* Even with AVX, palignr only operates on 128-bit vectors,
47857     in AVX2 palignr operates on both 128-bit lanes.  */
47858  if ((!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
47859      && (!TARGET_AVX2 || GET_MODE_SIZE (d->vmode) != 32))
47860    return false;
47861
47862  min = 2 * nelt;
47863  max = 0;
47864  minswap = 2 * nelt;
47865  maxswap = 0;
47866  for (i = 0; i < nelt; ++i)
47867    {
47868      unsigned e = d->perm[i];
47869      unsigned eswap = d->perm[i] ^ nelt;
47870      if (GET_MODE_SIZE (d->vmode) == 32)
47871	{
47872	  e = (e & ((nelt / 2) - 1)) | ((e & nelt) >> 1);
47873	  eswap = e ^ (nelt / 2);
47874	}
47875      if (e < min)
47876	min = e;
47877      if (e > max)
47878	max = e;
47879      if (eswap < minswap)
47880	minswap = eswap;
47881      if (eswap > maxswap)
47882	maxswap = eswap;
47883    }
47884  if (min == 0
47885      || max - min >= (GET_MODE_SIZE (d->vmode) == 32 ? nelt / 2 : nelt))
47886    {
47887      if (d->one_operand_p
47888	  || minswap == 0
47889	  || maxswap - minswap >= (GET_MODE_SIZE (d->vmode) == 32
47890				   ? nelt / 2 : nelt))
47891	return false;
47892      swap = true;
47893      min = minswap;
47894      max = maxswap;
47895    }
47896
47897  /* Given that we have SSSE3, we know we'll be able to implement the
47898     single operand permutation after the palignr with pshufb for
47899     128-bit vectors.  If SINGLE_INSN_ONLY_P, in_order has to be computed
47900     first.  */
47901  if (d->testing_p && GET_MODE_SIZE (d->vmode) == 16 && !single_insn_only_p)
47902    return true;
47903
47904  dcopy = *d;
47905  if (swap)
47906    {
47907      dcopy.op0 = d->op1;
47908      dcopy.op1 = d->op0;
47909      for (i = 0; i < nelt; ++i)
47910	dcopy.perm[i] ^= nelt;
47911    }
47912
47913  in_order = true;
47914  for (i = 0; i < nelt; ++i)
47915    {
47916      unsigned e = dcopy.perm[i];
47917      if (GET_MODE_SIZE (d->vmode) == 32
47918	  && e >= nelt
47919	  && (e & (nelt / 2 - 1)) < min)
47920	e = e - min - (nelt / 2);
47921      else
47922	e = e - min;
47923      if (e != i)
47924	in_order = false;
47925      dcopy.perm[i] = e;
47926    }
47927  dcopy.one_operand_p = true;
47928
47929  if (single_insn_only_p && !in_order)
47930    return false;
47931
47932  /* For AVX2, test whether we can permute the result in one instruction.  */
47933  if (d->testing_p)
47934    {
47935      if (in_order)
47936	return true;
47937      dcopy.op1 = dcopy.op0;
47938      return expand_vec_perm_1 (&dcopy);
47939    }
47940
47941  shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
47942  if (GET_MODE_SIZE (d->vmode) == 16)
47943    {
47944      target = gen_reg_rtx (TImode);
47945      emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, dcopy.op1),
47946				      gen_lowpart (TImode, dcopy.op0), shift));
47947    }
47948  else
47949    {
47950      target = gen_reg_rtx (V2TImode);
47951      emit_insn (gen_avx2_palignrv2ti (target,
47952				       gen_lowpart (V2TImode, dcopy.op1),
47953				       gen_lowpart (V2TImode, dcopy.op0),
47954				       shift));
47955    }
47956
47957  dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
47958
47959  /* Test for the degenerate case where the alignment by itself
47960     produces the desired permutation.  */
47961  if (in_order)
47962    {
47963      emit_move_insn (d->target, dcopy.op0);
47964      return true;
47965    }
47966
47967  ok = expand_vec_perm_1 (&dcopy);
47968  gcc_assert (ok || GET_MODE_SIZE (d->vmode) == 32);
47969
47970  return ok;
47971}
47972
47973/* A subroutine of ix86_expand_vec_perm_const_1.  Try to simplify
47974   the permutation using the SSE4_1 pblendv instruction.  Potentially
47975   reduces permutation from 2 pshufb and or to 1 pshufb and pblendv.  */
47976
47977static bool
47978expand_vec_perm_pblendv (struct expand_vec_perm_d *d)
47979{
47980  unsigned i, which, nelt = d->nelt;
47981  struct expand_vec_perm_d dcopy, dcopy1;
47982  machine_mode vmode = d->vmode;
47983  bool ok;
47984
47985  /* Use the same checks as in expand_vec_perm_blend.  */
47986  if (d->one_operand_p)
47987    return false;
47988  if (TARGET_AVX2 && GET_MODE_SIZE (vmode) == 32)
47989    ;
47990  else if (TARGET_AVX && (vmode == V4DFmode || vmode == V8SFmode))
47991    ;
47992  else if (TARGET_SSE4_1 && GET_MODE_SIZE (vmode) == 16)
47993    ;
47994  else
47995    return false;
47996
47997  /* Figure out where permutation elements stay not in their
47998     respective lanes.  */
47999  for (i = 0, which = 0; i < nelt; ++i)
48000    {
48001      unsigned e = d->perm[i];
48002      if (e != i)
48003	which |= (e < nelt ? 1 : 2);
48004    }
48005  /* We can pblend the part where elements stay not in their
48006     respective lanes only when these elements are all in one
48007     half of a permutation.
48008     {0 1 8 3 4 5 9 7} is ok as 8, 9 are at not at their respective
48009     lanes, but both 8 and 9 >= 8
48010     {0 1 8 3 4 5 2 7} is not ok as 2 and 8 are not at their
48011     respective lanes and 8 >= 8, but 2 not.  */
48012  if (which != 1 && which != 2)
48013    return false;
48014  if (d->testing_p && GET_MODE_SIZE (vmode) == 16)
48015    return true;
48016
48017  /* First we apply one operand permutation to the part where
48018     elements stay not in their respective lanes.  */
48019  dcopy = *d;
48020  if (which == 2)
48021    dcopy.op0 = dcopy.op1 = d->op1;
48022  else
48023    dcopy.op0 = dcopy.op1 = d->op0;
48024  if (!d->testing_p)
48025    dcopy.target = gen_reg_rtx (vmode);
48026  dcopy.one_operand_p = true;
48027
48028  for (i = 0; i < nelt; ++i)
48029    dcopy.perm[i] = d->perm[i] & (nelt - 1);
48030
48031  ok = expand_vec_perm_1 (&dcopy);
48032  if (GET_MODE_SIZE (vmode) != 16 && !ok)
48033    return false;
48034  else
48035    gcc_assert (ok);
48036  if (d->testing_p)
48037    return true;
48038
48039  /* Next we put permuted elements into their positions.  */
48040  dcopy1 = *d;
48041  if (which == 2)
48042    dcopy1.op1 = dcopy.target;
48043  else
48044    dcopy1.op0 = dcopy.target;
48045
48046  for (i = 0; i < nelt; ++i)
48047    dcopy1.perm[i] = ((d->perm[i] >= nelt) ? (nelt + i) : i);
48048
48049  ok = expand_vec_perm_blend (&dcopy1);
48050  gcc_assert (ok);
48051
48052  return true;
48053}
48054
48055static bool expand_vec_perm_interleave3 (struct expand_vec_perm_d *d);
48056
48057/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
48058   a two vector permutation into a single vector permutation by using
48059   an interleave operation to merge the vectors.  */
48060
48061static bool
48062expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
48063{
48064  struct expand_vec_perm_d dremap, dfinal;
48065  unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
48066  unsigned HOST_WIDE_INT contents;
48067  unsigned char remap[2 * MAX_VECT_LEN];
48068  rtx_insn *seq;
48069  bool ok, same_halves = false;
48070
48071  if (GET_MODE_SIZE (d->vmode) == 16)
48072    {
48073      if (d->one_operand_p)
48074	return false;
48075    }
48076  else if (GET_MODE_SIZE (d->vmode) == 32)
48077    {
48078      if (!TARGET_AVX)
48079	return false;
48080      /* For 32-byte modes allow even d->one_operand_p.
48081	 The lack of cross-lane shuffling in some instructions
48082	 might prevent a single insn shuffle.  */
48083      dfinal = *d;
48084      dfinal.testing_p = true;
48085      /* If expand_vec_perm_interleave3 can expand this into
48086	 a 3 insn sequence, give up and let it be expanded as
48087	 3 insn sequence.  While that is one insn longer,
48088	 it doesn't need a memory operand and in the common
48089	 case that both interleave low and high permutations
48090	 with the same operands are adjacent needs 4 insns
48091	 for both after CSE.  */
48092      if (expand_vec_perm_interleave3 (&dfinal))
48093	return false;
48094    }
48095  else
48096    return false;
48097
48098  /* Examine from whence the elements come.  */
48099  contents = 0;
48100  for (i = 0; i < nelt; ++i)
48101    contents |= ((unsigned HOST_WIDE_INT) 1) << d->perm[i];
48102
48103  memset (remap, 0xff, sizeof (remap));
48104  dremap = *d;
48105
48106  if (GET_MODE_SIZE (d->vmode) == 16)
48107    {
48108      unsigned HOST_WIDE_INT h1, h2, h3, h4;
48109
48110      /* Split the two input vectors into 4 halves.  */
48111      h1 = (((unsigned HOST_WIDE_INT) 1) << nelt2) - 1;
48112      h2 = h1 << nelt2;
48113      h3 = h2 << nelt2;
48114      h4 = h3 << nelt2;
48115
48116      /* If the elements from the low halves use interleave low, and similarly
48117	 for interleave high.  If the elements are from mis-matched halves, we
48118	 can use shufps for V4SF/V4SI or do a DImode shuffle.  */
48119      if ((contents & (h1 | h3)) == contents)
48120	{
48121	  /* punpckl* */
48122	  for (i = 0; i < nelt2; ++i)
48123	    {
48124	      remap[i] = i * 2;
48125	      remap[i + nelt] = i * 2 + 1;
48126	      dremap.perm[i * 2] = i;
48127	      dremap.perm[i * 2 + 1] = i + nelt;
48128	    }
48129	  if (!TARGET_SSE2 && d->vmode == V4SImode)
48130	    dremap.vmode = V4SFmode;
48131	}
48132      else if ((contents & (h2 | h4)) == contents)
48133	{
48134	  /* punpckh* */
48135	  for (i = 0; i < nelt2; ++i)
48136	    {
48137	      remap[i + nelt2] = i * 2;
48138	      remap[i + nelt + nelt2] = i * 2 + 1;
48139	      dremap.perm[i * 2] = i + nelt2;
48140	      dremap.perm[i * 2 + 1] = i + nelt + nelt2;
48141	    }
48142	  if (!TARGET_SSE2 && d->vmode == V4SImode)
48143	    dremap.vmode = V4SFmode;
48144	}
48145      else if ((contents & (h1 | h4)) == contents)
48146	{
48147	  /* shufps */
48148	  for (i = 0; i < nelt2; ++i)
48149	    {
48150	      remap[i] = i;
48151	      remap[i + nelt + nelt2] = i + nelt2;
48152	      dremap.perm[i] = i;
48153	      dremap.perm[i + nelt2] = i + nelt + nelt2;
48154	    }
48155	  if (nelt != 4)
48156	    {
48157	      /* shufpd */
48158	      dremap.vmode = V2DImode;
48159	      dremap.nelt = 2;
48160	      dremap.perm[0] = 0;
48161	      dremap.perm[1] = 3;
48162	    }
48163	}
48164      else if ((contents & (h2 | h3)) == contents)
48165	{
48166	  /* shufps */
48167	  for (i = 0; i < nelt2; ++i)
48168	    {
48169	      remap[i + nelt2] = i;
48170	      remap[i + nelt] = i + nelt2;
48171	      dremap.perm[i] = i + nelt2;
48172	      dremap.perm[i + nelt2] = i + nelt;
48173	    }
48174	  if (nelt != 4)
48175	    {
48176	      /* shufpd */
48177	      dremap.vmode = V2DImode;
48178	      dremap.nelt = 2;
48179	      dremap.perm[0] = 1;
48180	      dremap.perm[1] = 2;
48181	    }
48182	}
48183      else
48184	return false;
48185    }
48186  else
48187    {
48188      unsigned int nelt4 = nelt / 4, nzcnt = 0;
48189      unsigned HOST_WIDE_INT q[8];
48190      unsigned int nonzero_halves[4];
48191
48192      /* Split the two input vectors into 8 quarters.  */
48193      q[0] = (((unsigned HOST_WIDE_INT) 1) << nelt4) - 1;
48194      for (i = 1; i < 8; ++i)
48195	q[i] = q[0] << (nelt4 * i);
48196      for (i = 0; i < 4; ++i)
48197	if (((q[2 * i] | q[2 * i + 1]) & contents) != 0)
48198	  {
48199	    nonzero_halves[nzcnt] = i;
48200	    ++nzcnt;
48201	  }
48202
48203      if (nzcnt == 1)
48204	{
48205	  gcc_assert (d->one_operand_p);
48206	  nonzero_halves[1] = nonzero_halves[0];
48207	  same_halves = true;
48208	}
48209      else if (d->one_operand_p)
48210	{
48211	  gcc_assert (nonzero_halves[0] == 0);
48212	  gcc_assert (nonzero_halves[1] == 1);
48213	}
48214
48215      if (nzcnt <= 2)
48216	{
48217	  if (d->perm[0] / nelt2 == nonzero_halves[1])
48218	    {
48219	      /* Attempt to increase the likelihood that dfinal
48220		 shuffle will be intra-lane.  */
48221	      char tmph = nonzero_halves[0];
48222	      nonzero_halves[0] = nonzero_halves[1];
48223	      nonzero_halves[1] = tmph;
48224	    }
48225
48226	  /* vperm2f128 or vperm2i128.  */
48227	  for (i = 0; i < nelt2; ++i)
48228	    {
48229	      remap[i + nonzero_halves[1] * nelt2] = i + nelt2;
48230	      remap[i + nonzero_halves[0] * nelt2] = i;
48231	      dremap.perm[i + nelt2] = i + nonzero_halves[1] * nelt2;
48232	      dremap.perm[i] = i + nonzero_halves[0] * nelt2;
48233	    }
48234
48235	  if (d->vmode != V8SFmode
48236	      && d->vmode != V4DFmode
48237	      && d->vmode != V8SImode)
48238	    {
48239	      dremap.vmode = V8SImode;
48240	      dremap.nelt = 8;
48241	      for (i = 0; i < 4; ++i)
48242		{
48243		  dremap.perm[i] = i + nonzero_halves[0] * 4;
48244		  dremap.perm[i + 4] = i + nonzero_halves[1] * 4;
48245		}
48246	    }
48247	}
48248      else if (d->one_operand_p)
48249	return false;
48250      else if (TARGET_AVX2
48251	       && (contents & (q[0] | q[2] | q[4] | q[6])) == contents)
48252	{
48253	  /* vpunpckl* */
48254	  for (i = 0; i < nelt4; ++i)
48255	    {
48256	      remap[i] = i * 2;
48257	      remap[i + nelt] = i * 2 + 1;
48258	      remap[i + nelt2] = i * 2 + nelt2;
48259	      remap[i + nelt + nelt2] = i * 2 + nelt2 + 1;
48260	      dremap.perm[i * 2] = i;
48261	      dremap.perm[i * 2 + 1] = i + nelt;
48262	      dremap.perm[i * 2 + nelt2] = i + nelt2;
48263	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2;
48264	    }
48265	}
48266      else if (TARGET_AVX2
48267	       && (contents & (q[1] | q[3] | q[5] | q[7])) == contents)
48268	{
48269	  /* vpunpckh* */
48270	  for (i = 0; i < nelt4; ++i)
48271	    {
48272	      remap[i + nelt4] = i * 2;
48273	      remap[i + nelt + nelt4] = i * 2 + 1;
48274	      remap[i + nelt2 + nelt4] = i * 2 + nelt2;
48275	      remap[i + nelt + nelt2 + nelt4] = i * 2 + nelt2 + 1;
48276	      dremap.perm[i * 2] = i + nelt4;
48277	      dremap.perm[i * 2 + 1] = i + nelt + nelt4;
48278	      dremap.perm[i * 2 + nelt2] = i + nelt2 + nelt4;
48279	      dremap.perm[i * 2 + nelt2 + 1] = i + nelt + nelt2 + nelt4;
48280	    }
48281	}
48282      else
48283	return false;
48284    }
48285
48286  /* Use the remapping array set up above to move the elements from their
48287     swizzled locations into their final destinations.  */
48288  dfinal = *d;
48289  for (i = 0; i < nelt; ++i)
48290    {
48291      unsigned e = remap[d->perm[i]];
48292      gcc_assert (e < nelt);
48293      /* If same_halves is true, both halves of the remapped vector are the
48294	 same.  Avoid cross-lane accesses if possible.  */
48295      if (same_halves && i >= nelt2)
48296	{
48297	  gcc_assert (e < nelt2);
48298	  dfinal.perm[i] = e + nelt2;
48299	}
48300      else
48301	dfinal.perm[i] = e;
48302    }
48303  if (!d->testing_p)
48304    {
48305      dremap.target = gen_reg_rtx (dremap.vmode);
48306      dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48307    }
48308  dfinal.op1 = dfinal.op0;
48309  dfinal.one_operand_p = true;
48310
48311  /* Test if the final remap can be done with a single insn.  For V4SFmode or
48312     V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
48313  start_sequence ();
48314  ok = expand_vec_perm_1 (&dfinal);
48315  seq = get_insns ();
48316  end_sequence ();
48317
48318  if (!ok)
48319    return false;
48320
48321  if (d->testing_p)
48322    return true;
48323
48324  if (dremap.vmode != dfinal.vmode)
48325    {
48326      dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
48327      dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
48328    }
48329
48330  ok = expand_vec_perm_1 (&dremap);
48331  gcc_assert (ok);
48332
48333  emit_insn (seq);
48334  return true;
48335}
48336
48337/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
48338   a single vector cross-lane permutation into vpermq followed
48339   by any of the single insn permutations.  */
48340
48341static bool
48342expand_vec_perm_vpermq_perm_1 (struct expand_vec_perm_d *d)
48343{
48344  struct expand_vec_perm_d dremap, dfinal;
48345  unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, nelt4 = nelt / 4;
48346  unsigned contents[2];
48347  bool ok;
48348
48349  if (!(TARGET_AVX2
48350	&& (d->vmode == V32QImode || d->vmode == V16HImode)
48351	&& d->one_operand_p))
48352    return false;
48353
48354  contents[0] = 0;
48355  contents[1] = 0;
48356  for (i = 0; i < nelt2; ++i)
48357    {
48358      contents[0] |= 1u << (d->perm[i] / nelt4);
48359      contents[1] |= 1u << (d->perm[i + nelt2] / nelt4);
48360    }
48361
48362  for (i = 0; i < 2; ++i)
48363    {
48364      unsigned int cnt = 0;
48365      for (j = 0; j < 4; ++j)
48366	if ((contents[i] & (1u << j)) != 0 && ++cnt > 2)
48367	  return false;
48368    }
48369
48370  if (d->testing_p)
48371    return true;
48372
48373  dremap = *d;
48374  dremap.vmode = V4DImode;
48375  dremap.nelt = 4;
48376  dremap.target = gen_reg_rtx (V4DImode);
48377  dremap.op0 = gen_lowpart (V4DImode, d->op0);
48378  dremap.op1 = dremap.op0;
48379  dremap.one_operand_p = true;
48380  for (i = 0; i < 2; ++i)
48381    {
48382      unsigned int cnt = 0;
48383      for (j = 0; j < 4; ++j)
48384	if ((contents[i] & (1u << j)) != 0)
48385	  dremap.perm[2 * i + cnt++] = j;
48386      for (; cnt < 2; ++cnt)
48387	dremap.perm[2 * i + cnt] = 0;
48388    }
48389
48390  dfinal = *d;
48391  dfinal.op0 = gen_lowpart (dfinal.vmode, dremap.target);
48392  dfinal.op1 = dfinal.op0;
48393  dfinal.one_operand_p = true;
48394  for (i = 0, j = 0; i < nelt; ++i)
48395    {
48396      if (i == nelt2)
48397	j = 2;
48398      dfinal.perm[i] = (d->perm[i] & (nelt4 - 1)) | (j ? nelt2 : 0);
48399      if ((d->perm[i] / nelt4) == dremap.perm[j])
48400	;
48401      else if ((d->perm[i] / nelt4) == dremap.perm[j + 1])
48402	dfinal.perm[i] |= nelt4;
48403      else
48404	gcc_unreachable ();
48405    }
48406
48407  ok = expand_vec_perm_1 (&dremap);
48408  gcc_assert (ok);
48409
48410  ok = expand_vec_perm_1 (&dfinal);
48411  gcc_assert (ok);
48412
48413  return true;
48414}
48415
48416/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to expand
48417   a vector permutation using two instructions, vperm2f128 resp.
48418   vperm2i128 followed by any single in-lane permutation.  */
48419
48420static bool
48421expand_vec_perm_vperm2f128 (struct expand_vec_perm_d *d)
48422{
48423  struct expand_vec_perm_d dfirst, dsecond;
48424  unsigned i, j, nelt = d->nelt, nelt2 = nelt / 2, perm;
48425  bool ok;
48426
48427  if (!TARGET_AVX
48428      || GET_MODE_SIZE (d->vmode) != 32
48429      || (d->vmode != V8SFmode && d->vmode != V4DFmode && !TARGET_AVX2))
48430    return false;
48431
48432  dsecond = *d;
48433  dsecond.one_operand_p = false;
48434  dsecond.testing_p = true;
48435
48436  /* ((perm << 2)|perm) & 0x33 is the vperm2[fi]128
48437     immediate.  For perm < 16 the second permutation uses
48438     d->op0 as first operand, for perm >= 16 it uses d->op1
48439     as first operand.  The second operand is the result of
48440     vperm2[fi]128.  */
48441  for (perm = 0; perm < 32; perm++)
48442    {
48443      /* Ignore permutations which do not move anything cross-lane.  */
48444      if (perm < 16)
48445	{
48446	  /* The second shuffle for e.g. V4DFmode has
48447	     0123 and ABCD operands.
48448	     Ignore AB23, as 23 is already in the second lane
48449	     of the first operand.  */
48450	  if ((perm & 0xc) == (1 << 2)) continue;
48451	  /* And 01CD, as 01 is in the first lane of the first
48452	     operand.  */
48453	  if ((perm & 3) == 0) continue;
48454	  /* And 4567, as then the vperm2[fi]128 doesn't change
48455	     anything on the original 4567 second operand.  */
48456	  if ((perm & 0xf) == ((3 << 2) | 2)) continue;
48457	}
48458      else
48459	{
48460	  /* The second shuffle for e.g. V4DFmode has
48461	     4567 and ABCD operands.
48462	     Ignore AB67, as 67 is already in the second lane
48463	     of the first operand.  */
48464	  if ((perm & 0xc) == (3 << 2)) continue;
48465	  /* And 45CD, as 45 is in the first lane of the first
48466	     operand.  */
48467	  if ((perm & 3) == 2) continue;
48468	  /* And 0123, as then the vperm2[fi]128 doesn't change
48469	     anything on the original 0123 first operand.  */
48470	  if ((perm & 0xf) == (1 << 2)) continue;
48471	}
48472
48473      for (i = 0; i < nelt; i++)
48474	{
48475	  j = d->perm[i] / nelt2;
48476	  if (j == ((perm >> (2 * (i >= nelt2))) & 3))
48477	    dsecond.perm[i] = nelt + (i & nelt2) + (d->perm[i] & (nelt2 - 1));
48478	  else if (j == (unsigned) (i >= nelt2) + 2 * (perm >= 16))
48479	    dsecond.perm[i] = d->perm[i] & (nelt - 1);
48480	  else
48481	    break;
48482	}
48483
48484      if (i == nelt)
48485	{
48486	  start_sequence ();
48487	  ok = expand_vec_perm_1 (&dsecond);
48488	  end_sequence ();
48489	}
48490      else
48491	ok = false;
48492
48493      if (ok)
48494	{
48495	  if (d->testing_p)
48496	    return true;
48497
48498	  /* Found a usable second shuffle.  dfirst will be
48499	     vperm2f128 on d->op0 and d->op1.  */
48500	  dsecond.testing_p = false;
48501	  dfirst = *d;
48502	  dfirst.target = gen_reg_rtx (d->vmode);
48503	  for (i = 0; i < nelt; i++)
48504	    dfirst.perm[i] = (i & (nelt2 - 1))
48505			     + ((perm >> (2 * (i >= nelt2))) & 3) * nelt2;
48506
48507	  canonicalize_perm (&dfirst);
48508	  ok = expand_vec_perm_1 (&dfirst);
48509	  gcc_assert (ok);
48510
48511	  /* And dsecond is some single insn shuffle, taking
48512	     d->op0 and result of vperm2f128 (if perm < 16) or
48513	     d->op1 and result of vperm2f128 (otherwise).  */
48514	  if (perm >= 16)
48515	    dsecond.op0 = dsecond.op1;
48516	  dsecond.op1 = dfirst.target;
48517
48518	  ok = expand_vec_perm_1 (&dsecond);
48519	  gcc_assert (ok);
48520
48521	  return true;
48522	}
48523
48524      /* For one operand, the only useful vperm2f128 permutation is 0x01
48525	 aka lanes swap.  */
48526      if (d->one_operand_p)
48527	return false;
48528    }
48529
48530  return false;
48531}
48532
48533/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
48534   a two vector permutation using 2 intra-lane interleave insns
48535   and cross-lane shuffle for 32-byte vectors.  */
48536
48537static bool
48538expand_vec_perm_interleave3 (struct expand_vec_perm_d *d)
48539{
48540  unsigned i, nelt;
48541  rtx (*gen) (rtx, rtx, rtx);
48542
48543  if (d->one_operand_p)
48544    return false;
48545  if (TARGET_AVX2 && GET_MODE_SIZE (d->vmode) == 32)
48546    ;
48547  else if (TARGET_AVX && (d->vmode == V8SFmode || d->vmode == V4DFmode))
48548    ;
48549  else
48550    return false;
48551
48552  nelt = d->nelt;
48553  if (d->perm[0] != 0 && d->perm[0] != nelt / 2)
48554    return false;
48555  for (i = 0; i < nelt; i += 2)
48556    if (d->perm[i] != d->perm[0] + i / 2
48557	|| d->perm[i + 1] != d->perm[0] + i / 2 + nelt)
48558      return false;
48559
48560  if (d->testing_p)
48561    return true;
48562
48563  switch (d->vmode)
48564    {
48565    case V32QImode:
48566      if (d->perm[0])
48567	gen = gen_vec_interleave_highv32qi;
48568      else
48569	gen = gen_vec_interleave_lowv32qi;
48570      break;
48571    case V16HImode:
48572      if (d->perm[0])
48573	gen = gen_vec_interleave_highv16hi;
48574      else
48575	gen = gen_vec_interleave_lowv16hi;
48576      break;
48577    case V8SImode:
48578      if (d->perm[0])
48579	gen = gen_vec_interleave_highv8si;
48580      else
48581	gen = gen_vec_interleave_lowv8si;
48582      break;
48583    case V4DImode:
48584      if (d->perm[0])
48585	gen = gen_vec_interleave_highv4di;
48586      else
48587	gen = gen_vec_interleave_lowv4di;
48588      break;
48589    case V8SFmode:
48590      if (d->perm[0])
48591	gen = gen_vec_interleave_highv8sf;
48592      else
48593	gen = gen_vec_interleave_lowv8sf;
48594      break;
48595    case V4DFmode:
48596      if (d->perm[0])
48597	gen = gen_vec_interleave_highv4df;
48598      else
48599	gen = gen_vec_interleave_lowv4df;
48600      break;
48601    default:
48602      gcc_unreachable ();
48603    }
48604
48605  emit_insn (gen (d->target, d->op0, d->op1));
48606  return true;
48607}
48608
48609/* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to implement
48610   a single vector permutation using a single intra-lane vector
48611   permutation, vperm2f128 swapping the lanes and vblend* insn blending
48612   the non-swapped and swapped vectors together.  */
48613
48614static bool
48615expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
48616{
48617  struct expand_vec_perm_d dfirst, dsecond;
48618  unsigned i, j, msk, nelt = d->nelt, nelt2 = nelt / 2;
48619  rtx_insn *seq;
48620  bool ok;
48621  rtx (*blend) (rtx, rtx, rtx, rtx) = NULL;
48622
48623  if (!TARGET_AVX
48624      || TARGET_AVX2
48625      || (d->vmode != V8SFmode && d->vmode != V4DFmode)
48626      || !d->one_operand_p)
48627    return false;
48628
48629  dfirst = *d;
48630  for (i = 0; i < nelt; i++)
48631    dfirst.perm[i] = 0xff;
48632  for (i = 0, msk = 0; i < nelt; i++)
48633    {
48634      j = (d->perm[i] & nelt2) ? i | nelt2 : i & ~nelt2;
48635      if (dfirst.perm[j] != 0xff && dfirst.perm[j] != d->perm[i])
48636	return false;
48637      dfirst.perm[j] = d->perm[i];
48638      if (j != i)
48639	msk |= (1 << i);
48640    }
48641  for (i = 0; i < nelt; i++)
48642    if (dfirst.perm[i] == 0xff)
48643      dfirst.perm[i] = i;
48644
48645  if (!d->testing_p)
48646    dfirst.target = gen_reg_rtx (dfirst.vmode);
48647
48648  start_sequence ();
48649  ok = expand_vec_perm_1 (&dfirst);
48650  seq = get_insns ();
48651  end_sequence ();
48652
48653  if (!ok)
48654    return false;
48655
48656  if (d->testing_p)
48657    return true;
48658
48659  emit_insn (seq);
48660
48661  dsecond = *d;
48662  dsecond.op0 = dfirst.target;
48663  dsecond.op1 = dfirst.target;
48664  dsecond.one_operand_p = true;
48665  dsecond.target = gen_reg_rtx (dsecond.vmode);
48666  for (i = 0; i < nelt; i++)
48667    dsecond.perm[i] = i ^ nelt2;
48668
48669  ok = expand_vec_perm_1 (&dsecond);
48670  gcc_assert (ok);
48671
48672  blend = d->vmode == V8SFmode ? gen_avx_blendps256 : gen_avx_blendpd256;
48673  emit_insn (blend (d->target, dfirst.target, dsecond.target, GEN_INT (msk)));
48674  return true;
48675}
48676
48677/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement a V4DF
48678   permutation using two vperm2f128, followed by a vshufpd insn blending
48679   the two vectors together.  */
48680
48681static bool
48682expand_vec_perm_2vperm2f128_vshuf (struct expand_vec_perm_d *d)
48683{
48684  struct expand_vec_perm_d dfirst, dsecond, dthird;
48685  bool ok;
48686
48687  if (!TARGET_AVX || (d->vmode != V4DFmode))
48688    return false;
48689
48690  if (d->testing_p)
48691    return true;
48692
48693  dfirst = *d;
48694  dsecond = *d;
48695  dthird = *d;
48696
48697  dfirst.perm[0] = (d->perm[0] & ~1);
48698  dfirst.perm[1] = (d->perm[0] & ~1) + 1;
48699  dfirst.perm[2] = (d->perm[2] & ~1);
48700  dfirst.perm[3] = (d->perm[2] & ~1) + 1;
48701  dsecond.perm[0] = (d->perm[1] & ~1);
48702  dsecond.perm[1] = (d->perm[1] & ~1) + 1;
48703  dsecond.perm[2] = (d->perm[3] & ~1);
48704  dsecond.perm[3] = (d->perm[3] & ~1) + 1;
48705  dthird.perm[0] = (d->perm[0] % 2);
48706  dthird.perm[1] = (d->perm[1] % 2) + 4;
48707  dthird.perm[2] = (d->perm[2] % 2) + 2;
48708  dthird.perm[3] = (d->perm[3] % 2) + 6;
48709
48710  dfirst.target = gen_reg_rtx (dfirst.vmode);
48711  dsecond.target = gen_reg_rtx (dsecond.vmode);
48712  dthird.op0 = dfirst.target;
48713  dthird.op1 = dsecond.target;
48714  dthird.one_operand_p = false;
48715
48716  canonicalize_perm (&dfirst);
48717  canonicalize_perm (&dsecond);
48718
48719  ok = expand_vec_perm_1 (&dfirst)
48720       && expand_vec_perm_1 (&dsecond)
48721       && expand_vec_perm_1 (&dthird);
48722
48723  gcc_assert (ok);
48724
48725  return true;
48726}
48727
48728/* A subroutine of expand_vec_perm_even_odd_1.  Implement the double-word
48729   permutation with two pshufb insns and an ior.  We should have already
48730   failed all two instruction sequences.  */
48731
48732static bool
48733expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
48734{
48735  rtx rperm[2][16], vperm, l, h, op, m128;
48736  unsigned int i, nelt, eltsz;
48737
48738  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
48739    return false;
48740  gcc_assert (!d->one_operand_p);
48741
48742  if (d->testing_p)
48743    return true;
48744
48745  nelt = d->nelt;
48746  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48747
48748  /* Generate two permutation masks.  If the required element is within
48749     the given vector it is shuffled into the proper lane.  If the required
48750     element is in the other vector, force a zero into the lane by setting
48751     bit 7 in the permutation mask.  */
48752  m128 = GEN_INT (-128);
48753  for (i = 0; i < nelt; ++i)
48754    {
48755      unsigned j, e = d->perm[i];
48756      unsigned which = (e >= nelt);
48757      if (e >= nelt)
48758	e -= nelt;
48759
48760      for (j = 0; j < eltsz; ++j)
48761	{
48762	  rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
48763	  rperm[1-which][i*eltsz + j] = m128;
48764	}
48765    }
48766
48767  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
48768  vperm = force_reg (V16QImode, vperm);
48769
48770  l = gen_reg_rtx (V16QImode);
48771  op = gen_lowpart (V16QImode, d->op0);
48772  emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
48773
48774  vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
48775  vperm = force_reg (V16QImode, vperm);
48776
48777  h = gen_reg_rtx (V16QImode);
48778  op = gen_lowpart (V16QImode, d->op1);
48779  emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
48780
48781  op = d->target;
48782  if (d->vmode != V16QImode)
48783    op = gen_reg_rtx (V16QImode);
48784  emit_insn (gen_iorv16qi3 (op, l, h));
48785  if (op != d->target)
48786    emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48787
48788  return true;
48789}
48790
48791/* Implement arbitrary permutation of one V32QImode and V16QImode operand
48792   with two vpshufb insns, vpermq and vpor.  We should have already failed
48793   all two or three instruction sequences.  */
48794
48795static bool
48796expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d)
48797{
48798  rtx rperm[2][32], vperm, l, h, hp, op, m128;
48799  unsigned int i, nelt, eltsz;
48800
48801  if (!TARGET_AVX2
48802      || !d->one_operand_p
48803      || (d->vmode != V32QImode && d->vmode != V16HImode))
48804    return false;
48805
48806  if (d->testing_p)
48807    return true;
48808
48809  nelt = d->nelt;
48810  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48811
48812  /* Generate two permutation masks.  If the required element is within
48813     the same lane, it is shuffled in.  If the required element from the
48814     other lane, force a zero by setting bit 7 in the permutation mask.
48815     In the other mask the mask has non-negative elements if element
48816     is requested from the other lane, but also moved to the other lane,
48817     so that the result of vpshufb can have the two V2TImode halves
48818     swapped.  */
48819  m128 = GEN_INT (-128);
48820  for (i = 0; i < nelt; ++i)
48821    {
48822      unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48823      unsigned which = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
48824
48825      for (j = 0; j < eltsz; ++j)
48826	{
48827	  rperm[!!which][(i * eltsz + j) ^ which] = GEN_INT (e * eltsz + j);
48828	  rperm[!which][(i * eltsz + j) ^ (which ^ 16)] = m128;
48829	}
48830    }
48831
48832  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48833  vperm = force_reg (V32QImode, vperm);
48834
48835  h = gen_reg_rtx (V32QImode);
48836  op = gen_lowpart (V32QImode, d->op0);
48837  emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48838
48839  /* Swap the 128-byte lanes of h into hp.  */
48840  hp = gen_reg_rtx (V4DImode);
48841  op = gen_lowpart (V4DImode, h);
48842  emit_insn (gen_avx2_permv4di_1 (hp, op, const2_rtx, GEN_INT (3), const0_rtx,
48843				  const1_rtx));
48844
48845  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48846  vperm = force_reg (V32QImode, vperm);
48847
48848  l = gen_reg_rtx (V32QImode);
48849  op = gen_lowpart (V32QImode, d->op0);
48850  emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48851
48852  op = d->target;
48853  if (d->vmode != V32QImode)
48854    op = gen_reg_rtx (V32QImode);
48855  emit_insn (gen_iorv32qi3 (op, l, gen_lowpart (V32QImode, hp)));
48856  if (op != d->target)
48857    emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48858
48859  return true;
48860}
48861
48862/* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
48863   and extract-odd permutations of two V32QImode and V16QImode operand
48864   with two vpshufb insns, vpor and vpermq.  We should have already
48865   failed all two or three instruction sequences.  */
48866
48867static bool
48868expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
48869{
48870  rtx rperm[2][32], vperm, l, h, ior, op, m128;
48871  unsigned int i, nelt, eltsz;
48872
48873  if (!TARGET_AVX2
48874      || d->one_operand_p
48875      || (d->vmode != V32QImode && d->vmode != V16HImode))
48876    return false;
48877
48878  for (i = 0; i < d->nelt; ++i)
48879    if ((d->perm[i] ^ (i * 2)) & (3 * d->nelt / 2))
48880      return false;
48881
48882  if (d->testing_p)
48883    return true;
48884
48885  nelt = d->nelt;
48886  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
48887
48888  /* Generate two permutation masks.  In the first permutation mask
48889     the first quarter will contain indexes for the first half
48890     of the op0, the second quarter will contain bit 7 set, third quarter
48891     will contain indexes for the second half of the op0 and the
48892     last quarter bit 7 set.  In the second permutation mask
48893     the first quarter will contain bit 7 set, the second quarter
48894     indexes for the first half of the op1, the third quarter bit 7 set
48895     and last quarter indexes for the second half of the op1.
48896     I.e. the first mask e.g. for V32QImode extract even will be:
48897     0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128
48898     (all values masked with 0xf except for -128) and second mask
48899     for extract even will be
48900     -128, ..., -128, 0, 2, ..., 0xe, -128, ..., -128, 0, 2, ..., 0xe.  */
48901  m128 = GEN_INT (-128);
48902  for (i = 0; i < nelt; ++i)
48903    {
48904      unsigned j, e = d->perm[i] & (nelt / 2 - 1);
48905      unsigned which = d->perm[i] >= nelt;
48906      unsigned xorv = (i >= nelt / 4 && i < 3 * nelt / 4) ? 24 : 0;
48907
48908      for (j = 0; j < eltsz; ++j)
48909	{
48910	  rperm[which][(i * eltsz + j) ^ xorv] = GEN_INT (e * eltsz + j);
48911	  rperm[1 - which][(i * eltsz + j) ^ xorv] = m128;
48912	}
48913    }
48914
48915  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[0]));
48916  vperm = force_reg (V32QImode, vperm);
48917
48918  l = gen_reg_rtx (V32QImode);
48919  op = gen_lowpart (V32QImode, d->op0);
48920  emit_insn (gen_avx2_pshufbv32qi3 (l, op, vperm));
48921
48922  vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[1]));
48923  vperm = force_reg (V32QImode, vperm);
48924
48925  h = gen_reg_rtx (V32QImode);
48926  op = gen_lowpart (V32QImode, d->op1);
48927  emit_insn (gen_avx2_pshufbv32qi3 (h, op, vperm));
48928
48929  ior = gen_reg_rtx (V32QImode);
48930  emit_insn (gen_iorv32qi3 (ior, l, h));
48931
48932  /* Permute the V4DImode quarters using { 0, 2, 1, 3 } permutation.  */
48933  op = gen_reg_rtx (V4DImode);
48934  ior = gen_lowpart (V4DImode, ior);
48935  emit_insn (gen_avx2_permv4di_1 (op, ior, const0_rtx, const2_rtx,
48936				  const1_rtx, GEN_INT (3)));
48937  emit_move_insn (d->target, gen_lowpart (d->vmode, op));
48938
48939  return true;
48940}
48941
48942/* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
48943   and extract-odd permutations of two V16QI, V8HI, V16HI or V32QI operands
48944   with two "and" and "pack" or two "shift" and "pack" insns.  We should
48945   have already failed all two instruction sequences.  */
48946
48947static bool
48948expand_vec_perm_even_odd_pack (struct expand_vec_perm_d *d)
48949{
48950  rtx op, dop0, dop1, t, rperm[16];
48951  unsigned i, odd, c, s, nelt = d->nelt;
48952  bool end_perm = false;
48953  machine_mode half_mode;
48954  rtx (*gen_and) (rtx, rtx, rtx);
48955  rtx (*gen_pack) (rtx, rtx, rtx);
48956  rtx (*gen_shift) (rtx, rtx, rtx);
48957
48958  if (d->one_operand_p)
48959    return false;
48960
48961  switch (d->vmode)
48962    {
48963    case V8HImode:
48964      /* Required for "pack".  */
48965      if (!TARGET_SSE4_1)
48966        return false;
48967      c = 0xffff;
48968      s = 16;
48969      half_mode = V4SImode;
48970      gen_and = gen_andv4si3;
48971      gen_pack = gen_sse4_1_packusdw;
48972      gen_shift = gen_lshrv4si3;
48973      break;
48974    case V16QImode:
48975      /* No check as all instructions are SSE2.  */
48976      c = 0xff;
48977      s = 8;
48978      half_mode = V8HImode;
48979      gen_and = gen_andv8hi3;
48980      gen_pack = gen_sse2_packuswb;
48981      gen_shift = gen_lshrv8hi3;
48982      break;
48983    case V16HImode:
48984      if (!TARGET_AVX2)
48985        return false;
48986      c = 0xffff;
48987      s = 16;
48988      half_mode = V8SImode;
48989      gen_and = gen_andv8si3;
48990      gen_pack = gen_avx2_packusdw;
48991      gen_shift = gen_lshrv8si3;
48992      end_perm = true;
48993      break;
48994    case V32QImode:
48995      if (!TARGET_AVX2)
48996        return false;
48997      c = 0xff;
48998      s = 8;
48999      half_mode = V16HImode;
49000      gen_and = gen_andv16hi3;
49001      gen_pack = gen_avx2_packuswb;
49002      gen_shift = gen_lshrv16hi3;
49003      end_perm = true;
49004      break;
49005    default:
49006      /* Only V8HI, V16QI, V16HI and V32QI modes are more profitable than
49007	 general shuffles.  */
49008      return false;
49009    }
49010
49011  /* Check that permutation is even or odd.  */
49012  odd = d->perm[0];
49013  if (odd > 1)
49014    return false;
49015
49016  for (i = 1; i < nelt; ++i)
49017    if (d->perm[i] != 2 * i + odd)
49018      return false;
49019
49020  if (d->testing_p)
49021    return true;
49022
49023  dop0 = gen_reg_rtx (half_mode);
49024  dop1 = gen_reg_rtx (half_mode);
49025  if (odd == 0)
49026    {
49027      for (i = 0; i < nelt / 2; i++)
49028	rperm[i] = GEN_INT (c);
49029      t = gen_rtx_CONST_VECTOR (half_mode, gen_rtvec_v (nelt / 2, rperm));
49030      t = force_reg (half_mode, t);
49031      emit_insn (gen_and (dop0, t, gen_lowpart (half_mode, d->op0)));
49032      emit_insn (gen_and (dop1, t, gen_lowpart (half_mode, d->op1)));
49033    }
49034  else
49035    {
49036      emit_insn (gen_shift (dop0,
49037			    gen_lowpart (half_mode, d->op0),
49038			    GEN_INT (s)));
49039      emit_insn (gen_shift (dop1,
49040			    gen_lowpart (half_mode, d->op1),
49041			    GEN_INT (s)));
49042    }
49043  /* In AVX2 for 256 bit case we need to permute pack result.  */
49044  if (TARGET_AVX2 && end_perm)
49045    {
49046      op = gen_reg_rtx (d->vmode);
49047      t = gen_reg_rtx (V4DImode);
49048      emit_insn (gen_pack (op, dop0, dop1));
49049      emit_insn (gen_avx2_permv4di_1 (t,
49050				      gen_lowpart (V4DImode, op),
49051				      const0_rtx,
49052				      const2_rtx,
49053				      const1_rtx,
49054				      GEN_INT (3)));
49055      emit_move_insn (d->target, gen_lowpart (d->vmode, t));
49056    }
49057  else
49058    emit_insn (gen_pack (d->target, dop0, dop1));
49059
49060  return true;
49061}
49062
49063/* A subroutine of expand_vec_perm_even_odd_1.  Implement extract-even
49064   and extract-odd permutations of two V64QI operands
49065   with two "shifts", two "truncs" and one "concat" insns for "odd"
49066   and two "truncs" and one concat insn for "even."
49067   Have already failed all two instruction sequences.  */
49068
49069static bool
49070expand_vec_perm_even_odd_trunc (struct expand_vec_perm_d *d)
49071{
49072  rtx t1, t2, t3, t4;
49073  unsigned i, odd, nelt = d->nelt;
49074
49075  if (!TARGET_AVX512BW
49076      || d->one_operand_p
49077      || d->vmode != V64QImode)
49078    return false;
49079
49080  /* Check that permutation is even or odd.  */
49081  odd = d->perm[0];
49082  if (odd > 1)
49083    return false;
49084
49085  for (i = 1; i < nelt; ++i)
49086    if (d->perm[i] != 2 * i + odd)
49087      return false;
49088
49089  if (d->testing_p)
49090    return true;
49091
49092
49093  if (odd)
49094    {
49095      t1 = gen_reg_rtx (V32HImode);
49096      t2 = gen_reg_rtx (V32HImode);
49097      emit_insn (gen_lshrv32hi3 (t1,
49098				 gen_lowpart (V32HImode, d->op0),
49099				 GEN_INT (8)));
49100      emit_insn (gen_lshrv32hi3 (t2,
49101				 gen_lowpart (V32HImode, d->op1),
49102				 GEN_INT (8)));
49103    }
49104  else
49105    {
49106      t1 = gen_lowpart (V32HImode, d->op0);
49107      t2 = gen_lowpart (V32HImode, d->op1);
49108    }
49109
49110  t3 = gen_reg_rtx (V32QImode);
49111  t4 = gen_reg_rtx (V32QImode);
49112  emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t3, t1));
49113  emit_insn (gen_avx512bw_truncatev32hiv32qi2 (t4, t2));
49114  emit_insn (gen_avx_vec_concatv64qi (d->target, t3, t4));
49115
49116  return true;
49117}
49118
49119/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement extract-even
49120   and extract-odd permutations.  */
49121
49122static bool
49123expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
49124{
49125  rtx t1, t2, t3, t4, t5;
49126
49127  switch (d->vmode)
49128    {
49129    case V4DFmode:
49130      if (d->testing_p)
49131	break;
49132      t1 = gen_reg_rtx (V4DFmode);
49133      t2 = gen_reg_rtx (V4DFmode);
49134
49135      /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
49136      emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
49137      emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
49138
49139      /* Now an unpck[lh]pd will produce the result required.  */
49140      if (odd)
49141	t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
49142      else
49143	t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
49144      emit_insn (t3);
49145      break;
49146
49147    case V8SFmode:
49148      {
49149	int mask = odd ? 0xdd : 0x88;
49150
49151	if (d->testing_p)
49152	  break;
49153	t1 = gen_reg_rtx (V8SFmode);
49154	t2 = gen_reg_rtx (V8SFmode);
49155	t3 = gen_reg_rtx (V8SFmode);
49156
49157	/* Shuffle within the 128-bit lanes to produce:
49158	   { 0 2 8 a 4 6 c e } | { 1 3 9 b 5 7 d f }.  */
49159	emit_insn (gen_avx_shufps256 (t1, d->op0, d->op1,
49160				      GEN_INT (mask)));
49161
49162	/* Shuffle the lanes around to produce:
49163	   { 4 6 c e 0 2 8 a } and { 5 7 d f 1 3 9 b }.  */
49164	emit_insn (gen_avx_vperm2f128v8sf3 (t2, t1, t1,
49165					    GEN_INT (0x3)));
49166
49167	/* Shuffle within the 128-bit lanes to produce:
49168	   { 0 2 4 6 4 6 0 2 } | { 1 3 5 7 5 7 1 3 }.  */
49169	emit_insn (gen_avx_shufps256 (t3, t1, t2, GEN_INT (0x44)));
49170
49171	/* Shuffle within the 128-bit lanes to produce:
49172	   { 8 a c e c e 8 a } | { 9 b d f d f 9 b }.  */
49173	emit_insn (gen_avx_shufps256 (t2, t1, t2, GEN_INT (0xee)));
49174
49175	/* Shuffle the lanes around to produce:
49176	   { 0 2 4 6 8 a c e } | { 1 3 5 7 9 b d f }.  */
49177	emit_insn (gen_avx_vperm2f128v8sf3 (d->target, t3, t2,
49178					    GEN_INT (0x20)));
49179      }
49180      break;
49181
49182    case V2DFmode:
49183    case V4SFmode:
49184    case V2DImode:
49185    case V4SImode:
49186      /* These are always directly implementable by expand_vec_perm_1.  */
49187      gcc_unreachable ();
49188
49189    case V8HImode:
49190      if (TARGET_SSE4_1)
49191	return expand_vec_perm_even_odd_pack (d);
49192      else if (TARGET_SSSE3 && !TARGET_SLOW_PSHUFB)
49193	return expand_vec_perm_pshufb2 (d);
49194      else
49195	{
49196	  if (d->testing_p)
49197	    break;
49198	  /* We need 2*log2(N)-1 operations to achieve odd/even
49199	     with interleave. */
49200	  t1 = gen_reg_rtx (V8HImode);
49201	  t2 = gen_reg_rtx (V8HImode);
49202	  emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
49203	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
49204	  emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
49205	  emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
49206	  if (odd)
49207	    t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
49208	  else
49209	    t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
49210	  emit_insn (t3);
49211	}
49212      break;
49213
49214    case V16QImode:
49215      return expand_vec_perm_even_odd_pack (d);
49216
49217    case V16HImode:
49218    case V32QImode:
49219      return expand_vec_perm_even_odd_pack (d);
49220
49221    case V64QImode:
49222      return expand_vec_perm_even_odd_trunc (d);
49223
49224    case V4DImode:
49225      if (!TARGET_AVX2)
49226	{
49227	  struct expand_vec_perm_d d_copy = *d;
49228	  d_copy.vmode = V4DFmode;
49229	  if (d->testing_p)
49230	    d_copy.target = gen_lowpart (V4DFmode, d->target);
49231	  else
49232	    d_copy.target = gen_reg_rtx (V4DFmode);
49233	  d_copy.op0 = gen_lowpart (V4DFmode, d->op0);
49234	  d_copy.op1 = gen_lowpart (V4DFmode, d->op1);
49235	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49236	    {
49237	      if (!d->testing_p)
49238		emit_move_insn (d->target,
49239				gen_lowpart (V4DImode, d_copy.target));
49240	      return true;
49241	    }
49242	  return false;
49243	}
49244
49245      if (d->testing_p)
49246	break;
49247
49248      t1 = gen_reg_rtx (V4DImode);
49249      t2 = gen_reg_rtx (V4DImode);
49250
49251      /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }.  */
49252      emit_insn (gen_avx2_permv2ti (t1, d->op0, d->op1, GEN_INT (0x20)));
49253      emit_insn (gen_avx2_permv2ti (t2, d->op0, d->op1, GEN_INT (0x31)));
49254
49255      /* Now an vpunpck[lh]qdq will produce the result required.  */
49256      if (odd)
49257	t3 = gen_avx2_interleave_highv4di (d->target, t1, t2);
49258      else
49259	t3 = gen_avx2_interleave_lowv4di (d->target, t1, t2);
49260      emit_insn (t3);
49261      break;
49262
49263    case V8SImode:
49264      if (!TARGET_AVX2)
49265	{
49266	  struct expand_vec_perm_d d_copy = *d;
49267	  d_copy.vmode = V8SFmode;
49268	  if (d->testing_p)
49269	    d_copy.target = gen_lowpart (V8SFmode, d->target);
49270	  else
49271	    d_copy.target = gen_reg_rtx (V8SFmode);
49272	  d_copy.op0 = gen_lowpart (V8SFmode, d->op0);
49273	  d_copy.op1 = gen_lowpart (V8SFmode, d->op1);
49274	  if (expand_vec_perm_even_odd_1 (&d_copy, odd))
49275	    {
49276	      if (!d->testing_p)
49277		emit_move_insn (d->target,
49278				gen_lowpart (V8SImode, d_copy.target));
49279	      return true;
49280	    }
49281	  return false;
49282	}
49283
49284      if (d->testing_p)
49285	break;
49286
49287      t1 = gen_reg_rtx (V8SImode);
49288      t2 = gen_reg_rtx (V8SImode);
49289      t3 = gen_reg_rtx (V4DImode);
49290      t4 = gen_reg_rtx (V4DImode);
49291      t5 = gen_reg_rtx (V4DImode);
49292
49293      /* Shuffle the lanes around into
49294	 { 0 1 2 3 8 9 a b } and { 4 5 6 7 c d e f }.  */
49295      emit_insn (gen_avx2_permv2ti (t3, gen_lowpart (V4DImode, d->op0),
49296				    gen_lowpart (V4DImode, d->op1),
49297				    GEN_INT (0x20)));
49298      emit_insn (gen_avx2_permv2ti (t4, gen_lowpart (V4DImode, d->op0),
49299				    gen_lowpart (V4DImode, d->op1),
49300				    GEN_INT (0x31)));
49301
49302      /* Swap the 2nd and 3rd position in each lane into
49303	 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }.  */
49304      emit_insn (gen_avx2_pshufdv3 (t1, gen_lowpart (V8SImode, t3),
49305				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49306      emit_insn (gen_avx2_pshufdv3 (t2, gen_lowpart (V8SImode, t4),
49307				    GEN_INT (2 * 4 + 1 * 16 + 3 * 64)));
49308
49309      /* Now an vpunpck[lh]qdq will produce
49310	 { 0 2 4 6 8 a c e } resp. { 1 3 5 7 9 b d f }.  */
49311      if (odd)
49312	t3 = gen_avx2_interleave_highv4di (t5, gen_lowpart (V4DImode, t1),
49313					   gen_lowpart (V4DImode, t2));
49314      else
49315	t3 = gen_avx2_interleave_lowv4di (t5, gen_lowpart (V4DImode, t1),
49316					  gen_lowpart (V4DImode, t2));
49317      emit_insn (t3);
49318      emit_move_insn (d->target, gen_lowpart (V8SImode, t5));
49319      break;
49320
49321    default:
49322      gcc_unreachable ();
49323    }
49324
49325  return true;
49326}
49327
49328/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
49329   extract-even and extract-odd permutations.  */
49330
49331static bool
49332expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
49333{
49334  unsigned i, odd, nelt = d->nelt;
49335
49336  odd = d->perm[0];
49337  if (odd != 0 && odd != 1)
49338    return false;
49339
49340  for (i = 1; i < nelt; ++i)
49341    if (d->perm[i] != 2 * i + odd)
49342      return false;
49343
49344  return expand_vec_perm_even_odd_1 (d, odd);
49345}
49346
49347/* A subroutine of ix86_expand_vec_perm_builtin_1.  Implement broadcast
49348   permutations.  We assume that expand_vec_perm_1 has already failed.  */
49349
49350static bool
49351expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
49352{
49353  unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
49354  machine_mode vmode = d->vmode;
49355  unsigned char perm2[4];
49356  rtx op0 = d->op0, dest;
49357  bool ok;
49358
49359  switch (vmode)
49360    {
49361    case V4DFmode:
49362    case V8SFmode:
49363      /* These are special-cased in sse.md so that we can optionally
49364	 use the vbroadcast instruction.  They expand to two insns
49365	 if the input happens to be in a register.  */
49366      gcc_unreachable ();
49367
49368    case V2DFmode:
49369    case V2DImode:
49370    case V4SFmode:
49371    case V4SImode:
49372      /* These are always implementable using standard shuffle patterns.  */
49373      gcc_unreachable ();
49374
49375    case V8HImode:
49376    case V16QImode:
49377      /* These can be implemented via interleave.  We save one insn by
49378	 stopping once we have promoted to V4SImode and then use pshufd.  */
49379      if (d->testing_p)
49380	return true;
49381      do
49382	{
49383	  rtx dest;
49384	  rtx (*gen) (rtx, rtx, rtx)
49385	    = vmode == V16QImode ? gen_vec_interleave_lowv16qi
49386				 : gen_vec_interleave_lowv8hi;
49387
49388	  if (elt >= nelt2)
49389	    {
49390	      gen = vmode == V16QImode ? gen_vec_interleave_highv16qi
49391				       : gen_vec_interleave_highv8hi;
49392	      elt -= nelt2;
49393	    }
49394	  nelt2 /= 2;
49395
49396	  dest = gen_reg_rtx (vmode);
49397	  emit_insn (gen (dest, op0, op0));
49398	  vmode = get_mode_wider_vector (vmode);
49399	  op0 = gen_lowpart (vmode, dest);
49400	}
49401      while (vmode != V4SImode);
49402
49403      memset (perm2, elt, 4);
49404      dest = gen_reg_rtx (V4SImode);
49405      ok = expand_vselect (dest, op0, perm2, 4, d->testing_p);
49406      gcc_assert (ok);
49407      if (!d->testing_p)
49408	emit_move_insn (d->target, gen_lowpart (d->vmode, dest));
49409      return true;
49410
49411    case V64QImode:
49412    case V32QImode:
49413    case V16HImode:
49414    case V8SImode:
49415    case V4DImode:
49416      /* For AVX2 broadcasts of the first element vpbroadcast* or
49417	 vpermq should be used by expand_vec_perm_1.  */
49418      gcc_assert (!TARGET_AVX2 || d->perm[0]);
49419      return false;
49420
49421    default:
49422      gcc_unreachable ();
49423    }
49424}
49425
49426/* A subroutine of ix86_expand_vec_perm_builtin_1.  Pattern match
49427   broadcast permutations.  */
49428
49429static bool
49430expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
49431{
49432  unsigned i, elt, nelt = d->nelt;
49433
49434  if (!d->one_operand_p)
49435    return false;
49436
49437  elt = d->perm[0];
49438  for (i = 1; i < nelt; ++i)
49439    if (d->perm[i] != elt)
49440      return false;
49441
49442  return expand_vec_perm_broadcast_1 (d);
49443}
49444
49445/* Implement arbitrary permutations of two V64QImode operands
49446   will 2 vpermi2w, 2 vpshufb and one vpor instruction.  */
49447static bool
49448expand_vec_perm_vpermi2_vpshub2 (struct expand_vec_perm_d *d)
49449{
49450  if (!TARGET_AVX512BW || !(d->vmode == V64QImode))
49451    return false;
49452
49453  if (d->testing_p)
49454    return true;
49455
49456  struct expand_vec_perm_d ds[2];
49457  rtx rperm[128], vperm, target0, target1;
49458  unsigned int i, nelt;
49459  machine_mode vmode;
49460
49461  nelt = d->nelt;
49462  vmode = V64QImode;
49463
49464  for (i = 0; i < 2; i++)
49465    {
49466      ds[i] = *d;
49467      ds[i].vmode = V32HImode;
49468      ds[i].nelt = 32;
49469      ds[i].target = gen_reg_rtx (V32HImode);
49470      ds[i].op0 = gen_lowpart (V32HImode, d->op0);
49471      ds[i].op1 = gen_lowpart (V32HImode, d->op1);
49472    }
49473
49474  /* Prepare permutations such that the first one takes care of
49475     putting the even bytes into the right positions or one higher
49476     positions (ds[0]) and the second one takes care of
49477     putting the odd bytes into the right positions or one below
49478     (ds[1]).  */
49479
49480  for (i = 0; i < nelt; i++)
49481    {
49482      ds[i & 1].perm[i / 2] = d->perm[i] / 2;
49483      if (i & 1)
49484	{
49485	  rperm[i] = constm1_rtx;
49486	  rperm[i + 64] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49487	}
49488      else
49489	{
49490	  rperm[i] = GEN_INT ((i & 14) + (d->perm[i] & 1));
49491	  rperm[i + 64] = constm1_rtx;
49492	}
49493    }
49494
49495  bool ok = expand_vec_perm_1 (&ds[0]);
49496  gcc_assert (ok);
49497  ds[0].target = gen_lowpart (V64QImode, ds[0].target);
49498
49499  ok = expand_vec_perm_1 (&ds[1]);
49500  gcc_assert (ok);
49501  ds[1].target = gen_lowpart (V64QImode, ds[1].target);
49502
49503  vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm));
49504  vperm = force_reg (vmode, vperm);
49505  target0 = gen_reg_rtx (V64QImode);
49506  emit_insn (gen_avx512bw_pshufbv64qi3 (target0, ds[0].target, vperm));
49507
49508  vperm = gen_rtx_CONST_VECTOR (V64QImode, gen_rtvec_v (64, rperm + 64));
49509  vperm = force_reg (vmode, vperm);
49510  target1 = gen_reg_rtx (V64QImode);
49511  emit_insn (gen_avx512bw_pshufbv64qi3 (target1, ds[1].target, vperm));
49512
49513  emit_insn (gen_iorv64qi3 (d->target, target0, target1));
49514  return true;
49515}
49516
49517/* Implement arbitrary permutation of two V32QImode and V16QImode operands
49518   with 4 vpshufb insns, 2 vpermq and 3 vpor.  We should have already failed
49519   all the shorter instruction sequences.  */
49520
49521static bool
49522expand_vec_perm_vpshufb4_vpermq2 (struct expand_vec_perm_d *d)
49523{
49524  rtx rperm[4][32], vperm, l[2], h[2], op, m128;
49525  unsigned int i, nelt, eltsz;
49526  bool used[4];
49527
49528  if (!TARGET_AVX2
49529      || d->one_operand_p
49530      || (d->vmode != V32QImode && d->vmode != V16HImode))
49531    return false;
49532
49533  if (d->testing_p)
49534    return true;
49535
49536  nelt = d->nelt;
49537  eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
49538
49539  /* Generate 4 permutation masks.  If the required element is within
49540     the same lane, it is shuffled in.  If the required element from the
49541     other lane, force a zero by setting bit 7 in the permutation mask.
49542     In the other mask the mask has non-negative elements if element
49543     is requested from the other lane, but also moved to the other lane,
49544     so that the result of vpshufb can have the two V2TImode halves
49545     swapped.  */
49546  m128 = GEN_INT (-128);
49547  for (i = 0; i < 32; ++i)
49548    {
49549      rperm[0][i] = m128;
49550      rperm[1][i] = m128;
49551      rperm[2][i] = m128;
49552      rperm[3][i] = m128;
49553    }
49554  used[0] = false;
49555  used[1] = false;
49556  used[2] = false;
49557  used[3] = false;
49558  for (i = 0; i < nelt; ++i)
49559    {
49560      unsigned j, e = d->perm[i] & (nelt / 2 - 1);
49561      unsigned xlane = ((d->perm[i] ^ i) & (nelt / 2)) * eltsz;
49562      unsigned int which = ((d->perm[i] & nelt) ? 2 : 0) + (xlane ? 1 : 0);
49563
49564      for (j = 0; j < eltsz; ++j)
49565	rperm[which][(i * eltsz + j) ^ xlane] = GEN_INT (e * eltsz + j);
49566      used[which] = true;
49567    }
49568
49569  for (i = 0; i < 2; ++i)
49570    {
49571      if (!used[2 * i + 1])
49572	{
49573	  h[i] = NULL_RTX;
49574	  continue;
49575	}
49576      vperm = gen_rtx_CONST_VECTOR (V32QImode,
49577				    gen_rtvec_v (32, rperm[2 * i + 1]));
49578      vperm = force_reg (V32QImode, vperm);
49579      h[i] = gen_reg_rtx (V32QImode);
49580      op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49581      emit_insn (gen_avx2_pshufbv32qi3 (h[i], op, vperm));
49582    }
49583
49584  /* Swap the 128-byte lanes of h[X].  */
49585  for (i = 0; i < 2; ++i)
49586   {
49587     if (h[i] == NULL_RTX)
49588       continue;
49589     op = gen_reg_rtx (V4DImode);
49590     emit_insn (gen_avx2_permv4di_1 (op, gen_lowpart (V4DImode, h[i]),
49591				     const2_rtx, GEN_INT (3), const0_rtx,
49592				     const1_rtx));
49593     h[i] = gen_lowpart (V32QImode, op);
49594   }
49595
49596  for (i = 0; i < 2; ++i)
49597    {
49598      if (!used[2 * i])
49599	{
49600	  l[i] = NULL_RTX;
49601	  continue;
49602	}
49603      vperm = gen_rtx_CONST_VECTOR (V32QImode, gen_rtvec_v (32, rperm[2 * i]));
49604      vperm = force_reg (V32QImode, vperm);
49605      l[i] = gen_reg_rtx (V32QImode);
49606      op = gen_lowpart (V32QImode, i ? d->op1 : d->op0);
49607      emit_insn (gen_avx2_pshufbv32qi3 (l[i], op, vperm));
49608    }
49609
49610  for (i = 0; i < 2; ++i)
49611    {
49612      if (h[i] && l[i])
49613	{
49614	  op = gen_reg_rtx (V32QImode);
49615	  emit_insn (gen_iorv32qi3 (op, l[i], h[i]));
49616	  l[i] = op;
49617	}
49618      else if (h[i])
49619	l[i] = h[i];
49620    }
49621
49622  gcc_assert (l[0] && l[1]);
49623  op = d->target;
49624  if (d->vmode != V32QImode)
49625    op = gen_reg_rtx (V32QImode);
49626  emit_insn (gen_iorv32qi3 (op, l[0], l[1]));
49627  if (op != d->target)
49628    emit_move_insn (d->target, gen_lowpart (d->vmode, op));
49629  return true;
49630}
49631
49632/* The guts of ix86_expand_vec_perm_const, also used by the ok hook.
49633   With all of the interface bits taken care of, perform the expansion
49634   in D and return true on success.  */
49635
49636static bool
49637ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
49638{
49639  /* Try a single instruction expansion.  */
49640  if (expand_vec_perm_1 (d))
49641    return true;
49642
49643  /* Try sequences of two instructions.  */
49644
49645  if (expand_vec_perm_pshuflw_pshufhw (d))
49646    return true;
49647
49648  if (expand_vec_perm_palignr (d, false))
49649    return true;
49650
49651  if (expand_vec_perm_interleave2 (d))
49652    return true;
49653
49654  if (expand_vec_perm_broadcast (d))
49655    return true;
49656
49657  if (expand_vec_perm_vpermq_perm_1 (d))
49658    return true;
49659
49660  if (expand_vec_perm_vperm2f128 (d))
49661    return true;
49662
49663  if (expand_vec_perm_pblendv (d))
49664    return true;
49665
49666  /* Try sequences of three instructions.  */
49667
49668  if (expand_vec_perm_even_odd_pack (d))
49669    return true;
49670
49671  if (expand_vec_perm_2vperm2f128_vshuf (d))
49672    return true;
49673
49674  if (expand_vec_perm_pshufb2 (d))
49675    return true;
49676
49677  if (expand_vec_perm_interleave3 (d))
49678    return true;
49679
49680  if (expand_vec_perm_vperm2f128_vblend (d))
49681    return true;
49682
49683  /* Try sequences of four instructions.  */
49684
49685  if (expand_vec_perm_even_odd_trunc (d))
49686    return true;
49687  if (expand_vec_perm_vpshufb2_vpermq (d))
49688    return true;
49689
49690  if (expand_vec_perm_vpshufb2_vpermq_even_odd (d))
49691    return true;
49692
49693  if (expand_vec_perm_vpermi2_vpshub2 (d))
49694    return true;
49695
49696  /* ??? Look for narrow permutations whose element orderings would
49697     allow the promotion to a wider mode.  */
49698
49699  /* ??? Look for sequences of interleave or a wider permute that place
49700     the data into the correct lanes for a half-vector shuffle like
49701     pshuf[lh]w or vpermilps.  */
49702
49703  /* ??? Look for sequences of interleave that produce the desired results.
49704     The combinatorics of punpck[lh] get pretty ugly... */
49705
49706  if (expand_vec_perm_even_odd (d))
49707    return true;
49708
49709  /* Even longer sequences.  */
49710  if (expand_vec_perm_vpshufb4_vpermq2 (d))
49711    return true;
49712
49713  return false;
49714}
49715
49716/* If a permutation only uses one operand, make it clear. Returns true
49717   if the permutation references both operands.  */
49718
49719static bool
49720canonicalize_perm (struct expand_vec_perm_d *d)
49721{
49722  int i, which, nelt = d->nelt;
49723
49724  for (i = which = 0; i < nelt; ++i)
49725      which |= (d->perm[i] < nelt ? 1 : 2);
49726
49727  d->one_operand_p = true;
49728  switch (which)
49729    {
49730    default:
49731      gcc_unreachable();
49732
49733    case 3:
49734      if (!rtx_equal_p (d->op0, d->op1))
49735        {
49736	  d->one_operand_p = false;
49737	  break;
49738        }
49739      /* The elements of PERM do not suggest that only the first operand
49740	 is used, but both operands are identical.  Allow easier matching
49741	 of the permutation by folding the permutation into the single
49742	 input vector.  */
49743      /* FALLTHRU */
49744
49745    case 2:
49746      for (i = 0; i < nelt; ++i)
49747        d->perm[i] &= nelt - 1;
49748      d->op0 = d->op1;
49749      break;
49750
49751    case 1:
49752      d->op1 = d->op0;
49753      break;
49754    }
49755
49756  return (which == 3);
49757}
49758
49759bool
49760ix86_expand_vec_perm_const (rtx operands[4])
49761{
49762  struct expand_vec_perm_d d;
49763  unsigned char perm[MAX_VECT_LEN];
49764  int i, nelt;
49765  bool two_args;
49766  rtx sel;
49767
49768  d.target = operands[0];
49769  d.op0 = operands[1];
49770  d.op1 = operands[2];
49771  sel = operands[3];
49772
49773  d.vmode = GET_MODE (d.target);
49774  gcc_assert (VECTOR_MODE_P (d.vmode));
49775  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49776  d.testing_p = false;
49777
49778  gcc_assert (GET_CODE (sel) == CONST_VECTOR);
49779  gcc_assert (XVECLEN (sel, 0) == nelt);
49780  gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
49781
49782  for (i = 0; i < nelt; ++i)
49783    {
49784      rtx e = XVECEXP (sel, 0, i);
49785      int ei = INTVAL (e) & (2 * nelt - 1);
49786      d.perm[i] = ei;
49787      perm[i] = ei;
49788    }
49789
49790  two_args = canonicalize_perm (&d);
49791
49792  if (ix86_expand_vec_perm_const_1 (&d))
49793    return true;
49794
49795  /* If the selector says both arguments are needed, but the operands are the
49796     same, the above tried to expand with one_operand_p and flattened selector.
49797     If that didn't work, retry without one_operand_p; we succeeded with that
49798     during testing.  */
49799  if (two_args && d.one_operand_p)
49800    {
49801      d.one_operand_p = false;
49802      memcpy (d.perm, perm, sizeof (perm));
49803      return ix86_expand_vec_perm_const_1 (&d);
49804    }
49805
49806  return false;
49807}
49808
49809/* Implement targetm.vectorize.vec_perm_const_ok.  */
49810
49811static bool
49812ix86_vectorize_vec_perm_const_ok (machine_mode vmode,
49813				  const unsigned char *sel)
49814{
49815  struct expand_vec_perm_d d;
49816  unsigned int i, nelt, which;
49817  bool ret;
49818
49819  d.vmode = vmode;
49820  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49821  d.testing_p = true;
49822
49823  /* Given sufficient ISA support we can just return true here
49824     for selected vector modes.  */
49825  switch (d.vmode)
49826    {
49827    case V16SFmode:
49828    case V16SImode:
49829    case V8DImode:
49830    case V8DFmode:
49831      if (TARGET_AVX512F)
49832	/* All implementable with a single vpermi2 insn.  */
49833	return true;
49834      break;
49835    case V32HImode:
49836      if (TARGET_AVX512BW)
49837	/* All implementable with a single vpermi2 insn.  */
49838	return true;
49839      break;
49840    case V64QImode:
49841      if (TARGET_AVX512BW)
49842	/* Implementable with 2 vpermi2, 2 vpshufb and 1 or insn.  */
49843	return true;
49844      break;
49845    case V8SImode:
49846    case V8SFmode:
49847    case V4DFmode:
49848    case V4DImode:
49849      if (TARGET_AVX512VL)
49850	/* All implementable with a single vpermi2 insn.  */
49851	return true;
49852      break;
49853    case V16HImode:
49854      if (TARGET_AVX2)
49855	/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns.  */
49856	return true;
49857      break;
49858    case V32QImode:
49859      if (TARGET_AVX2)
49860	/* Implementable with 4 vpshufb insns, 2 vpermq and 3 vpor insns.  */
49861	return true;
49862      break;
49863    case V4SImode:
49864    case V4SFmode:
49865    case V8HImode:
49866    case V16QImode:
49867      /* All implementable with a single vpperm insn.  */
49868      if (TARGET_XOP)
49869	return true;
49870      /* All implementable with 2 pshufb + 1 ior.  */
49871      if (TARGET_SSSE3)
49872	return true;
49873      break;
49874    case V2DImode:
49875    case V2DFmode:
49876      /* All implementable with shufpd or unpck[lh]pd.  */
49877      return true;
49878    default:
49879      return false;
49880    }
49881
49882  /* Extract the values from the vector CST into the permutation
49883     array in D.  */
49884  memcpy (d.perm, sel, nelt);
49885  for (i = which = 0; i < nelt; ++i)
49886    {
49887      unsigned char e = d.perm[i];
49888      gcc_assert (e < 2 * nelt);
49889      which |= (e < nelt ? 1 : 2);
49890    }
49891
49892  /* For all elements from second vector, fold the elements to first.  */
49893  if (which == 2)
49894    for (i = 0; i < nelt; ++i)
49895      d.perm[i] -= nelt;
49896
49897  /* Check whether the mask can be applied to the vector type.  */
49898  d.one_operand_p = (which != 3);
49899
49900  /* Implementable with shufps or pshufd.  */
49901  if (d.one_operand_p && (d.vmode == V4SFmode || d.vmode == V4SImode))
49902    return true;
49903
49904  /* Otherwise we have to go through the motions and see if we can
49905     figure out how to generate the requested permutation.  */
49906  d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
49907  d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
49908  if (!d.one_operand_p)
49909    d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
49910
49911  start_sequence ();
49912  ret = ix86_expand_vec_perm_const_1 (&d);
49913  end_sequence ();
49914
49915  return ret;
49916}
49917
49918void
49919ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
49920{
49921  struct expand_vec_perm_d d;
49922  unsigned i, nelt;
49923
49924  d.target = targ;
49925  d.op0 = op0;
49926  d.op1 = op1;
49927  d.vmode = GET_MODE (targ);
49928  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49929  d.one_operand_p = false;
49930  d.testing_p = false;
49931
49932  for (i = 0; i < nelt; ++i)
49933    d.perm[i] = i * 2 + odd;
49934
49935  /* We'll either be able to implement the permutation directly...  */
49936  if (expand_vec_perm_1 (&d))
49937    return;
49938
49939  /* ... or we use the special-case patterns.  */
49940  expand_vec_perm_even_odd_1 (&d, odd);
49941}
49942
49943static void
49944ix86_expand_vec_interleave (rtx targ, rtx op0, rtx op1, bool high_p)
49945{
49946  struct expand_vec_perm_d d;
49947  unsigned i, nelt, base;
49948  bool ok;
49949
49950  d.target = targ;
49951  d.op0 = op0;
49952  d.op1 = op1;
49953  d.vmode = GET_MODE (targ);
49954  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
49955  d.one_operand_p = false;
49956  d.testing_p = false;
49957
49958  base = high_p ? nelt / 2 : 0;
49959  for (i = 0; i < nelt / 2; ++i)
49960    {
49961      d.perm[i * 2] = i + base;
49962      d.perm[i * 2 + 1] = i + base + nelt;
49963    }
49964
49965  /* Note that for AVX this isn't one instruction.  */
49966  ok = ix86_expand_vec_perm_const_1 (&d);
49967  gcc_assert (ok);
49968}
49969
49970
49971/* Expand a vector operation CODE for a V*QImode in terms of the
49972   same operation on V*HImode.  */
49973
49974void
49975ix86_expand_vecop_qihi (enum rtx_code code, rtx dest, rtx op1, rtx op2)
49976{
49977  machine_mode qimode = GET_MODE (dest);
49978  machine_mode himode;
49979  rtx (*gen_il) (rtx, rtx, rtx);
49980  rtx (*gen_ih) (rtx, rtx, rtx);
49981  rtx op1_l, op1_h, op2_l, op2_h, res_l, res_h;
49982  struct expand_vec_perm_d d;
49983  bool ok, full_interleave;
49984  bool uns_p = false;
49985  int i;
49986
49987  switch (qimode)
49988    {
49989    case V16QImode:
49990      himode = V8HImode;
49991      gen_il = gen_vec_interleave_lowv16qi;
49992      gen_ih = gen_vec_interleave_highv16qi;
49993      break;
49994    case V32QImode:
49995      himode = V16HImode;
49996      gen_il = gen_avx2_interleave_lowv32qi;
49997      gen_ih = gen_avx2_interleave_highv32qi;
49998      break;
49999    case V64QImode:
50000      himode = V32HImode;
50001      gen_il = gen_avx512bw_interleave_lowv64qi;
50002      gen_ih = gen_avx512bw_interleave_highv64qi;
50003      break;
50004    default:
50005      gcc_unreachable ();
50006    }
50007
50008  op2_l = op2_h = op2;
50009  switch (code)
50010    {
50011    case MULT:
50012      /* Unpack data such that we've got a source byte in each low byte of
50013	 each word.  We don't care what goes into the high byte of each word.
50014	 Rather than trying to get zero in there, most convenient is to let
50015	 it be a copy of the low byte.  */
50016      op2_l = gen_reg_rtx (qimode);
50017      op2_h = gen_reg_rtx (qimode);
50018      emit_insn (gen_il (op2_l, op2, op2));
50019      emit_insn (gen_ih (op2_h, op2, op2));
50020      /* FALLTHRU */
50021
50022      op1_l = gen_reg_rtx (qimode);
50023      op1_h = gen_reg_rtx (qimode);
50024      emit_insn (gen_il (op1_l, op1, op1));
50025      emit_insn (gen_ih (op1_h, op1, op1));
50026      full_interleave = qimode == V16QImode;
50027      break;
50028
50029    case ASHIFT:
50030    case LSHIFTRT:
50031      uns_p = true;
50032      /* FALLTHRU */
50033    case ASHIFTRT:
50034      op1_l = gen_reg_rtx (himode);
50035      op1_h = gen_reg_rtx (himode);
50036      ix86_expand_sse_unpack (op1_l, op1, uns_p, false);
50037      ix86_expand_sse_unpack (op1_h, op1, uns_p, true);
50038      full_interleave = true;
50039      break;
50040    default:
50041      gcc_unreachable ();
50042    }
50043
50044  /* Perform the operation.  */
50045  res_l = expand_simple_binop (himode, code, op1_l, op2_l, NULL_RTX,
50046			       1, OPTAB_DIRECT);
50047  res_h = expand_simple_binop (himode, code, op1_h, op2_h, NULL_RTX,
50048			       1, OPTAB_DIRECT);
50049  gcc_assert (res_l && res_h);
50050
50051  /* Merge the data back into the right place.  */
50052  d.target = dest;
50053  d.op0 = gen_lowpart (qimode, res_l);
50054  d.op1 = gen_lowpart (qimode, res_h);
50055  d.vmode = qimode;
50056  d.nelt = GET_MODE_NUNITS (qimode);
50057  d.one_operand_p = false;
50058  d.testing_p = false;
50059
50060  if (full_interleave)
50061    {
50062      /* For SSE2, we used an full interleave, so the desired
50063	 results are in the even elements.  */
50064      for (i = 0; i < d.nelt; ++i)
50065	d.perm[i] = i * 2;
50066    }
50067  else
50068    {
50069      /* For AVX, the interleave used above was not cross-lane.  So the
50070	 extraction is evens but with the second and third quarter swapped.
50071	 Happily, that is even one insn shorter than even extraction.
50072	 For AVX512BW we have 4 lanes.  We extract evens from within a lane,
50073	 always first from the first and then from the second source operand,
50074	 the index bits above the low 4 bits remains the same.
50075	 Thus, for d.nelt == 32 we want permutation
50076	 0,2,4,..14, 32,34,36,..46, 16,18,20,..30, 48,50,52,..62
50077	 and for d.nelt == 64 we want permutation
50078	 0,2,4,..14, 64,66,68,..78, 16,18,20,..30, 80,82,84,..94,
50079	 32,34,36,..46, 96,98,100,..110, 48,50,52,..62, 112,114,116,..126.  */
50080      for (i = 0; i < d.nelt; ++i)
50081	d.perm[i] = ((i * 2) & 14) + ((i & 8) ? d.nelt : 0) + (i & ~15);
50082    }
50083
50084  ok = ix86_expand_vec_perm_const_1 (&d);
50085  gcc_assert (ok);
50086
50087  set_unique_reg_note (get_last_insn (), REG_EQUAL,
50088		       gen_rtx_fmt_ee (code, qimode, op1, op2));
50089}
50090
50091/* Helper function of ix86_expand_mul_widen_evenodd.  Return true
50092   if op is CONST_VECTOR with all odd elements equal to their
50093   preceding element.  */
50094
50095static bool
50096const_vector_equal_evenodd_p (rtx op)
50097{
50098  machine_mode mode = GET_MODE (op);
50099  int i, nunits = GET_MODE_NUNITS (mode);
50100  if (GET_CODE (op) != CONST_VECTOR
50101      || nunits != CONST_VECTOR_NUNITS (op))
50102    return false;
50103  for (i = 0; i < nunits; i += 2)
50104    if (CONST_VECTOR_ELT (op, i) != CONST_VECTOR_ELT (op, i + 1))
50105      return false;
50106  return true;
50107}
50108
50109void
50110ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
50111			       bool uns_p, bool odd_p)
50112{
50113  machine_mode mode = GET_MODE (op1);
50114  machine_mode wmode = GET_MODE (dest);
50115  rtx x;
50116  rtx orig_op1 = op1, orig_op2 = op2;
50117
50118  if (!nonimmediate_operand (op1, mode))
50119    op1 = force_reg (mode, op1);
50120  if (!nonimmediate_operand (op2, mode))
50121    op2 = force_reg (mode, op2);
50122
50123  /* We only play even/odd games with vectors of SImode.  */
50124  gcc_assert (mode == V4SImode || mode == V8SImode || mode == V16SImode);
50125
50126  /* If we're looking for the odd results, shift those members down to
50127     the even slots.  For some cpus this is faster than a PSHUFD.  */
50128  if (odd_p)
50129    {
50130      /* For XOP use vpmacsdqh, but only for smult, as it is only
50131	 signed.  */
50132      if (TARGET_XOP && mode == V4SImode && !uns_p)
50133	{
50134	  x = force_reg (wmode, CONST0_RTX (wmode));
50135	  emit_insn (gen_xop_pmacsdqh (dest, op1, op2, x));
50136	  return;
50137	}
50138
50139      x = GEN_INT (GET_MODE_UNIT_BITSIZE (mode));
50140      if (!const_vector_equal_evenodd_p (orig_op1))
50141	op1 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op1),
50142			    x, NULL, 1, OPTAB_DIRECT);
50143      if (!const_vector_equal_evenodd_p (orig_op2))
50144	op2 = expand_binop (wmode, lshr_optab, gen_lowpart (wmode, op2),
50145			    x, NULL, 1, OPTAB_DIRECT);
50146      op1 = gen_lowpart (mode, op1);
50147      op2 = gen_lowpart (mode, op2);
50148    }
50149
50150  if (mode == V16SImode)
50151    {
50152      if (uns_p)
50153	x = gen_vec_widen_umult_even_v16si (dest, op1, op2);
50154      else
50155	x = gen_vec_widen_smult_even_v16si (dest, op1, op2);
50156    }
50157  else if (mode == V8SImode)
50158    {
50159      if (uns_p)
50160	x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
50161      else
50162	x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
50163    }
50164  else if (uns_p)
50165    x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
50166  else if (TARGET_SSE4_1)
50167    x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
50168  else
50169    {
50170      rtx s1, s2, t0, t1, t2;
50171
50172      /* The easiest way to implement this without PMULDQ is to go through
50173	 the motions as if we are performing a full 64-bit multiply.  With
50174	 the exception that we need to do less shuffling of the elements.  */
50175
50176      /* Compute the sign-extension, aka highparts, of the two operands.  */
50177      s1 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50178				op1, pc_rtx, pc_rtx);
50179      s2 = ix86_expand_sse_cmp (gen_reg_rtx (mode), GT, CONST0_RTX (mode),
50180				op2, pc_rtx, pc_rtx);
50181
50182      /* Multiply LO(A) * HI(B), and vice-versa.  */
50183      t1 = gen_reg_rtx (wmode);
50184      t2 = gen_reg_rtx (wmode);
50185      emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
50186      emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
50187
50188      /* Multiply LO(A) * LO(B).  */
50189      t0 = gen_reg_rtx (wmode);
50190      emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
50191
50192      /* Combine and shift the highparts into place.  */
50193      t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
50194      t1 = expand_binop (wmode, ashl_optab, t1, GEN_INT (32), t1,
50195			 1, OPTAB_DIRECT);
50196
50197      /* Combine high and low parts.  */
50198      force_expand_binop (wmode, add_optab, t0, t1, dest, 1, OPTAB_DIRECT);
50199      return;
50200    }
50201  emit_insn (x);
50202}
50203
50204void
50205ix86_expand_mul_widen_hilo (rtx dest, rtx op1, rtx op2,
50206			    bool uns_p, bool high_p)
50207{
50208  machine_mode wmode = GET_MODE (dest);
50209  machine_mode mode = GET_MODE (op1);
50210  rtx t1, t2, t3, t4, mask;
50211
50212  switch (mode)
50213    {
50214    case V4SImode:
50215      t1 = gen_reg_rtx (mode);
50216      t2 = gen_reg_rtx (mode);
50217      if (TARGET_XOP && !uns_p)
50218	{
50219	  /* With XOP, we have pmacsdqh, aka mul_widen_odd.  In this case,
50220	     shuffle the elements once so that all elements are in the right
50221	     place for immediate use: { A C B D }.  */
50222	  emit_insn (gen_sse2_pshufd_1 (t1, op1, const0_rtx, const2_rtx,
50223					const1_rtx, GEN_INT (3)));
50224	  emit_insn (gen_sse2_pshufd_1 (t2, op2, const0_rtx, const2_rtx,
50225					const1_rtx, GEN_INT (3)));
50226	}
50227      else
50228	{
50229	  /* Put the elements into place for the multiply.  */
50230	  ix86_expand_vec_interleave (t1, op1, op1, high_p);
50231	  ix86_expand_vec_interleave (t2, op2, op2, high_p);
50232	  high_p = false;
50233	}
50234      ix86_expand_mul_widen_evenodd (dest, t1, t2, uns_p, high_p);
50235      break;
50236
50237    case V8SImode:
50238      /* Shuffle the elements between the lanes.  After this we
50239	 have { A B E F | C D G H } for each operand.  */
50240      t1 = gen_reg_rtx (V4DImode);
50241      t2 = gen_reg_rtx (V4DImode);
50242      emit_insn (gen_avx2_permv4di_1 (t1, gen_lowpart (V4DImode, op1),
50243				      const0_rtx, const2_rtx,
50244				      const1_rtx, GEN_INT (3)));
50245      emit_insn (gen_avx2_permv4di_1 (t2, gen_lowpart (V4DImode, op2),
50246				      const0_rtx, const2_rtx,
50247				      const1_rtx, GEN_INT (3)));
50248
50249      /* Shuffle the elements within the lanes.  After this we
50250	 have { A A B B | C C D D } or { E E F F | G G H H }.  */
50251      t3 = gen_reg_rtx (V8SImode);
50252      t4 = gen_reg_rtx (V8SImode);
50253      mask = GEN_INT (high_p
50254		      ? 2 + (2 << 2) + (3 << 4) + (3 << 6)
50255		      : 0 + (0 << 2) + (1 << 4) + (1 << 6));
50256      emit_insn (gen_avx2_pshufdv3 (t3, gen_lowpart (V8SImode, t1), mask));
50257      emit_insn (gen_avx2_pshufdv3 (t4, gen_lowpart (V8SImode, t2), mask));
50258
50259      ix86_expand_mul_widen_evenodd (dest, t3, t4, uns_p, false);
50260      break;
50261
50262    case V8HImode:
50263    case V16HImode:
50264      t1 = expand_binop (mode, smul_optab, op1, op2, NULL_RTX,
50265			 uns_p, OPTAB_DIRECT);
50266      t2 = expand_binop (mode,
50267			 uns_p ? umul_highpart_optab : smul_highpart_optab,
50268			 op1, op2, NULL_RTX, uns_p, OPTAB_DIRECT);
50269      gcc_assert (t1 && t2);
50270
50271      t3 = gen_reg_rtx (mode);
50272      ix86_expand_vec_interleave (t3, t1, t2, high_p);
50273      emit_move_insn (dest, gen_lowpart (wmode, t3));
50274      break;
50275
50276    case V16QImode:
50277    case V32QImode:
50278    case V32HImode:
50279    case V16SImode:
50280    case V64QImode:
50281      t1 = gen_reg_rtx (wmode);
50282      t2 = gen_reg_rtx (wmode);
50283      ix86_expand_sse_unpack (t1, op1, uns_p, high_p);
50284      ix86_expand_sse_unpack (t2, op2, uns_p, high_p);
50285
50286      emit_insn (gen_rtx_SET (VOIDmode, dest, gen_rtx_MULT (wmode, t1, t2)));
50287      break;
50288
50289    default:
50290      gcc_unreachable ();
50291    }
50292}
50293
50294void
50295ix86_expand_sse2_mulv4si3 (rtx op0, rtx op1, rtx op2)
50296{
50297  rtx res_1, res_2, res_3, res_4;
50298
50299  res_1 = gen_reg_rtx (V4SImode);
50300  res_2 = gen_reg_rtx (V4SImode);
50301  res_3 = gen_reg_rtx (V2DImode);
50302  res_4 = gen_reg_rtx (V2DImode);
50303  ix86_expand_mul_widen_evenodd (res_3, op1, op2, true, false);
50304  ix86_expand_mul_widen_evenodd (res_4, op1, op2, true, true);
50305
50306  /* Move the results in element 2 down to element 1; we don't care
50307     what goes in elements 2 and 3.  Then we can merge the parts
50308     back together with an interleave.
50309
50310     Note that two other sequences were tried:
50311     (1) Use interleaves at the start instead of psrldq, which allows
50312     us to use a single shufps to merge things back at the end.
50313     (2) Use shufps here to combine the two vectors, then pshufd to
50314     put the elements in the correct order.
50315     In both cases the cost of the reformatting stall was too high
50316     and the overall sequence slower.  */
50317
50318  emit_insn (gen_sse2_pshufd_1 (res_1, gen_lowpart (V4SImode, res_3),
50319				const0_rtx, const2_rtx,
50320				const0_rtx, const0_rtx));
50321  emit_insn (gen_sse2_pshufd_1 (res_2, gen_lowpart (V4SImode, res_4),
50322				const0_rtx, const2_rtx,
50323				const0_rtx, const0_rtx));
50324  res_1 = emit_insn (gen_vec_interleave_lowv4si (op0, res_1, res_2));
50325
50326  set_unique_reg_note (res_1, REG_EQUAL, gen_rtx_MULT (V4SImode, op1, op2));
50327}
50328
50329void
50330ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
50331{
50332  machine_mode mode = GET_MODE (op0);
50333  rtx t1, t2, t3, t4, t5, t6;
50334
50335  if (TARGET_AVX512DQ && mode == V8DImode)
50336    emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
50337  else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
50338    emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
50339  else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V2DImode)
50340    emit_insn (gen_avx512dq_mulv2di3 (op0, op1, op2));
50341  else if (TARGET_XOP && mode == V2DImode)
50342    {
50343      /* op1: A,B,C,D, op2: E,F,G,H */
50344      op1 = gen_lowpart (V4SImode, op1);
50345      op2 = gen_lowpart (V4SImode, op2);
50346
50347      t1 = gen_reg_rtx (V4SImode);
50348      t2 = gen_reg_rtx (V4SImode);
50349      t3 = gen_reg_rtx (V2DImode);
50350      t4 = gen_reg_rtx (V2DImode);
50351
50352      /* t1: B,A,D,C */
50353      emit_insn (gen_sse2_pshufd_1 (t1, op1,
50354				    GEN_INT (1),
50355				    GEN_INT (0),
50356				    GEN_INT (3),
50357				    GEN_INT (2)));
50358
50359      /* t2: (B*E),(A*F),(D*G),(C*H) */
50360      emit_insn (gen_mulv4si3 (t2, t1, op2));
50361
50362      /* t3: (B*E)+(A*F), (D*G)+(C*H) */
50363      emit_insn (gen_xop_phadddq (t3, t2));
50364
50365      /* t4: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
50366      emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
50367
50368      /* Multiply lower parts and add all */
50369      t5 = gen_reg_rtx (V2DImode);
50370      emit_insn (gen_vec_widen_umult_even_v4si (t5,
50371					gen_lowpart (V4SImode, op1),
50372					gen_lowpart (V4SImode, op2)));
50373      op0 = expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
50374
50375    }
50376  else
50377    {
50378      machine_mode nmode;
50379      rtx (*umul) (rtx, rtx, rtx);
50380
50381      if (mode == V2DImode)
50382	{
50383	  umul = gen_vec_widen_umult_even_v4si;
50384	  nmode = V4SImode;
50385	}
50386      else if (mode == V4DImode)
50387	{
50388	  umul = gen_vec_widen_umult_even_v8si;
50389	  nmode = V8SImode;
50390	}
50391      else if (mode == V8DImode)
50392	{
50393	  umul = gen_vec_widen_umult_even_v16si;
50394	  nmode = V16SImode;
50395	}
50396      else
50397	gcc_unreachable ();
50398
50399
50400      /* Multiply low parts.  */
50401      t1 = gen_reg_rtx (mode);
50402      emit_insn (umul (t1, gen_lowpart (nmode, op1), gen_lowpart (nmode, op2)));
50403
50404      /* Shift input vectors right 32 bits so we can multiply high parts.  */
50405      t6 = GEN_INT (32);
50406      t2 = expand_binop (mode, lshr_optab, op1, t6, NULL, 1, OPTAB_DIRECT);
50407      t3 = expand_binop (mode, lshr_optab, op2, t6, NULL, 1, OPTAB_DIRECT);
50408
50409      /* Multiply high parts by low parts.  */
50410      t4 = gen_reg_rtx (mode);
50411      t5 = gen_reg_rtx (mode);
50412      emit_insn (umul (t4, gen_lowpart (nmode, t2), gen_lowpart (nmode, op2)));
50413      emit_insn (umul (t5, gen_lowpart (nmode, t3), gen_lowpart (nmode, op1)));
50414
50415      /* Combine and shift the highparts back.  */
50416      t4 = expand_binop (mode, add_optab, t4, t5, t4, 1, OPTAB_DIRECT);
50417      t4 = expand_binop (mode, ashl_optab, t4, t6, t4, 1, OPTAB_DIRECT);
50418
50419      /* Combine high and low parts.  */
50420      force_expand_binop (mode, add_optab, t1, t4, op0, 1, OPTAB_DIRECT);
50421    }
50422
50423  set_unique_reg_note (get_last_insn (), REG_EQUAL,
50424		       gen_rtx_MULT (mode, op1, op2));
50425}
50426
50427/* Return 1 if control tansfer instruction INSN
50428   should be encoded with bnd prefix.
50429   If insn is NULL then return 1 when control
50430   transfer instructions should be prefixed with
50431   bnd by default for current function.  */
50432
50433bool
50434ix86_bnd_prefixed_insn_p (rtx insn)
50435{
50436  /* For call insns check special flag.  */
50437  if (insn && CALL_P (insn))
50438    {
50439      rtx call = get_call_rtx_from (insn);
50440      if (call)
50441	return CALL_EXPR_WITH_BOUNDS_P (call);
50442    }
50443
50444  /* All other insns are prefixed only if function is instrumented.  */
50445  return chkp_function_instrumented_p (current_function_decl);
50446}
50447
50448/* Calculate integer abs() using only SSE2 instructions.  */
50449
50450void
50451ix86_expand_sse2_abs (rtx target, rtx input)
50452{
50453  machine_mode mode = GET_MODE (target);
50454  rtx tmp0, tmp1, x;
50455
50456  switch (mode)
50457    {
50458      /* For 32-bit signed integer X, the best way to calculate the absolute
50459	 value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)).  */
50460      case V4SImode:
50461	tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
50462				    GEN_INT (GET_MODE_BITSIZE
50463					     (GET_MODE_INNER (mode)) - 1),
50464				    NULL, 0, OPTAB_DIRECT);
50465	tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
50466				    NULL, 0, OPTAB_DIRECT);
50467	x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
50468				 target, 0, OPTAB_DIRECT);
50469	break;
50470
50471      /* For 16-bit signed integer X, the best way to calculate the absolute
50472	 value of X is max (X, -X), as SSE2 provides the PMAXSW insn.  */
50473      case V8HImode:
50474	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50475
50476	x = expand_simple_binop (mode, SMAX, tmp0, input,
50477				 target, 0, OPTAB_DIRECT);
50478	break;
50479
50480      /* For 8-bit signed integer X, the best way to calculate the absolute
50481	 value of X is min ((unsigned char) X, (unsigned char) (-X)),
50482	 as SSE2 provides the PMINUB insn.  */
50483      case V16QImode:
50484	tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
50485
50486	x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
50487				 target, 0, OPTAB_DIRECT);
50488	break;
50489
50490      default:
50491	gcc_unreachable ();
50492    }
50493
50494  if (x != target)
50495    emit_move_insn (target, x);
50496}
50497
50498/* Expand an insert into a vector register through pinsr insn.
50499   Return true if successful.  */
50500
50501bool
50502ix86_expand_pinsr (rtx *operands)
50503{
50504  rtx dst = operands[0];
50505  rtx src = operands[3];
50506
50507  unsigned int size = INTVAL (operands[1]);
50508  unsigned int pos = INTVAL (operands[2]);
50509
50510  if (GET_CODE (src) == SUBREG)
50511    {
50512      /* Reject non-lowpart subregs.  */
50513      if (SUBREG_BYTE (src) != 0)
50514       return false;
50515      src = SUBREG_REG (src);
50516    }
50517
50518  if (GET_CODE (dst) == SUBREG)
50519    {
50520      pos += SUBREG_BYTE (dst) * BITS_PER_UNIT;
50521      dst = SUBREG_REG (dst);
50522    }
50523
50524  switch (GET_MODE (dst))
50525    {
50526    case V16QImode:
50527    case V8HImode:
50528    case V4SImode:
50529    case V2DImode:
50530      {
50531	machine_mode srcmode, dstmode;
50532	rtx (*pinsr)(rtx, rtx, rtx, rtx);
50533
50534	srcmode = mode_for_size (size, MODE_INT, 0);
50535
50536	switch (srcmode)
50537	  {
50538	  case QImode:
50539	    if (!TARGET_SSE4_1)
50540	      return false;
50541	    dstmode = V16QImode;
50542	    pinsr = gen_sse4_1_pinsrb;
50543	    break;
50544
50545	  case HImode:
50546	    if (!TARGET_SSE2)
50547	      return false;
50548	    dstmode = V8HImode;
50549	    pinsr = gen_sse2_pinsrw;
50550	    break;
50551
50552	  case SImode:
50553	    if (!TARGET_SSE4_1)
50554	      return false;
50555	    dstmode = V4SImode;
50556	    pinsr = gen_sse4_1_pinsrd;
50557	    break;
50558
50559	  case DImode:
50560	    gcc_assert (TARGET_64BIT);
50561	    if (!TARGET_SSE4_1)
50562	      return false;
50563	    dstmode = V2DImode;
50564	    pinsr = gen_sse4_1_pinsrq;
50565	    break;
50566
50567	  default:
50568	    return false;
50569	  }
50570
50571	/* Reject insertions to misaligned positions.  */
50572	if (pos & (size-1))
50573	  return false;
50574
50575	rtx d = dst;
50576	if (GET_MODE (dst) != dstmode)
50577	  d = gen_reg_rtx (dstmode);
50578	src = gen_lowpart (srcmode, src);
50579
50580	pos /= size;
50581
50582	emit_insn (pinsr (d, gen_lowpart (dstmode, dst), src,
50583			  GEN_INT (1 << pos)));
50584	if (d != dst)
50585	  emit_move_insn (dst, gen_lowpart (GET_MODE (dst), d));
50586	return true;
50587      }
50588
50589    default:
50590      return false;
50591    }
50592}
50593
50594/* This function returns the calling abi specific va_list type node.
50595   It returns  the FNDECL specific va_list type.  */
50596
50597static tree
50598ix86_fn_abi_va_list (tree fndecl)
50599{
50600  if (!TARGET_64BIT)
50601    return va_list_type_node;
50602  gcc_assert (fndecl != NULL_TREE);
50603
50604  if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
50605    return ms_va_list_type_node;
50606  else
50607    return sysv_va_list_type_node;
50608}
50609
50610/* Returns the canonical va_list type specified by TYPE. If there
50611   is no valid TYPE provided, it return NULL_TREE.  */
50612
50613static tree
50614ix86_canonical_va_list_type (tree type)
50615{
50616  tree wtype, htype;
50617
50618  /* Resolve references and pointers to va_list type.  */
50619  if (TREE_CODE (type) == MEM_REF)
50620    type = TREE_TYPE (type);
50621  else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
50622    type = TREE_TYPE (type);
50623  else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
50624    type = TREE_TYPE (type);
50625
50626  if (TARGET_64BIT && va_list_type_node != NULL_TREE)
50627    {
50628      wtype = va_list_type_node;
50629	  gcc_assert (wtype != NULL_TREE);
50630      htype = type;
50631      if (TREE_CODE (wtype) == ARRAY_TYPE)
50632	{
50633	  /* If va_list is an array type, the argument may have decayed
50634	     to a pointer type, e.g. by being passed to another function.
50635	     In that case, unwrap both types so that we can compare the
50636	     underlying records.  */
50637	  if (TREE_CODE (htype) == ARRAY_TYPE
50638	      || POINTER_TYPE_P (htype))
50639	    {
50640	      wtype = TREE_TYPE (wtype);
50641	      htype = TREE_TYPE (htype);
50642	    }
50643	}
50644      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50645	return va_list_type_node;
50646      wtype = sysv_va_list_type_node;
50647	  gcc_assert (wtype != NULL_TREE);
50648      htype = type;
50649      if (TREE_CODE (wtype) == ARRAY_TYPE)
50650	{
50651	  /* If va_list is an array type, the argument may have decayed
50652	     to a pointer type, e.g. by being passed to another function.
50653	     In that case, unwrap both types so that we can compare the
50654	     underlying records.  */
50655	  if (TREE_CODE (htype) == ARRAY_TYPE
50656	      || POINTER_TYPE_P (htype))
50657	    {
50658	      wtype = TREE_TYPE (wtype);
50659	      htype = TREE_TYPE (htype);
50660	    }
50661	}
50662      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50663	return sysv_va_list_type_node;
50664      wtype = ms_va_list_type_node;
50665	  gcc_assert (wtype != NULL_TREE);
50666      htype = type;
50667      if (TREE_CODE (wtype) == ARRAY_TYPE)
50668	{
50669	  /* If va_list is an array type, the argument may have decayed
50670	     to a pointer type, e.g. by being passed to another function.
50671	     In that case, unwrap both types so that we can compare the
50672	     underlying records.  */
50673	  if (TREE_CODE (htype) == ARRAY_TYPE
50674	      || POINTER_TYPE_P (htype))
50675	    {
50676	      wtype = TREE_TYPE (wtype);
50677	      htype = TREE_TYPE (htype);
50678	    }
50679	}
50680      if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
50681	return ms_va_list_type_node;
50682      return NULL_TREE;
50683    }
50684  return std_canonical_va_list_type (type);
50685}
50686
50687/* Iterate through the target-specific builtin types for va_list.
50688   IDX denotes the iterator, *PTREE is set to the result type of
50689   the va_list builtin, and *PNAME to its internal type.
50690   Returns zero if there is no element for this index, otherwise
50691   IDX should be increased upon the next call.
50692   Note, do not iterate a base builtin's name like __builtin_va_list.
50693   Used from c_common_nodes_and_builtins.  */
50694
50695static int
50696ix86_enum_va_list (int idx, const char **pname, tree *ptree)
50697{
50698  if (TARGET_64BIT)
50699    {
50700      switch (idx)
50701	{
50702	default:
50703	  break;
50704
50705	case 0:
50706	  *ptree = ms_va_list_type_node;
50707	  *pname = "__builtin_ms_va_list";
50708	  return 1;
50709
50710	case 1:
50711	  *ptree = sysv_va_list_type_node;
50712	  *pname = "__builtin_sysv_va_list";
50713	  return 1;
50714	}
50715    }
50716
50717  return 0;
50718}
50719
50720#undef TARGET_SCHED_DISPATCH
50721#define TARGET_SCHED_DISPATCH has_dispatch
50722#undef TARGET_SCHED_DISPATCH_DO
50723#define TARGET_SCHED_DISPATCH_DO do_dispatch
50724#undef TARGET_SCHED_REASSOCIATION_WIDTH
50725#define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
50726#undef TARGET_SCHED_REORDER
50727#define TARGET_SCHED_REORDER ix86_sched_reorder
50728#undef TARGET_SCHED_ADJUST_PRIORITY
50729#define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
50730#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
50731#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
50732  ix86_dependencies_evaluation_hook
50733
50734/* The size of the dispatch window is the total number of bytes of
50735   object code allowed in a window.  */
50736#define DISPATCH_WINDOW_SIZE 16
50737
50738/* Number of dispatch windows considered for scheduling.  */
50739#define MAX_DISPATCH_WINDOWS 3
50740
50741/* Maximum number of instructions in a window.  */
50742#define MAX_INSN 4
50743
50744/* Maximum number of immediate operands in a window.  */
50745#define MAX_IMM 4
50746
50747/* Maximum number of immediate bits allowed in a window.  */
50748#define MAX_IMM_SIZE 128
50749
50750/* Maximum number of 32 bit immediates allowed in a window.  */
50751#define MAX_IMM_32 4
50752
50753/* Maximum number of 64 bit immediates allowed in a window.  */
50754#define MAX_IMM_64 2
50755
50756/* Maximum total of loads or prefetches allowed in a window.  */
50757#define MAX_LOAD 2
50758
50759/* Maximum total of stores allowed in a window.  */
50760#define MAX_STORE 1
50761
50762#undef BIG
50763#define BIG 100
50764
50765
50766/* Dispatch groups.  Istructions that affect the mix in a dispatch window.  */
50767enum dispatch_group {
50768  disp_no_group = 0,
50769  disp_load,
50770  disp_store,
50771  disp_load_store,
50772  disp_prefetch,
50773  disp_imm,
50774  disp_imm_32,
50775  disp_imm_64,
50776  disp_branch,
50777  disp_cmp,
50778  disp_jcc,
50779  disp_last
50780};
50781
50782/* Number of allowable groups in a dispatch window.  It is an array
50783   indexed by dispatch_group enum.  100 is used as a big number,
50784   because the number of these kind of operations does not have any
50785   effect in dispatch window, but we need them for other reasons in
50786   the table.  */
50787static unsigned int num_allowable_groups[disp_last] = {
50788  0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
50789};
50790
50791char group_name[disp_last + 1][16] = {
50792  "disp_no_group", "disp_load", "disp_store", "disp_load_store",
50793  "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
50794  "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
50795};
50796
50797/* Instruction path.  */
50798enum insn_path {
50799  no_path = 0,
50800  path_single, /* Single micro op.  */
50801  path_double, /* Double micro op.  */
50802  path_multi,  /* Instructions with more than 2 micro op..  */
50803  last_path
50804};
50805
50806/* sched_insn_info defines a window to the instructions scheduled in
50807   the basic block.  It contains a pointer to the insn_info table and
50808   the instruction scheduled.
50809
50810   Windows are allocated for each basic block and are linked
50811   together.  */
50812typedef struct sched_insn_info_s {
50813  rtx insn;
50814  enum dispatch_group group;
50815  enum insn_path path;
50816  int byte_len;
50817  int imm_bytes;
50818} sched_insn_info;
50819
50820/* Linked list of dispatch windows.  This is a two way list of
50821   dispatch windows of a basic block.  It contains information about
50822   the number of uops in the window and the total number of
50823   instructions and of bytes in the object code for this dispatch
50824   window.  */
50825typedef struct dispatch_windows_s {
50826  int num_insn;            /* Number of insn in the window.  */
50827  int num_uops;            /* Number of uops in the window.  */
50828  int window_size;         /* Number of bytes in the window.  */
50829  int window_num;          /* Window number between 0 or 1.  */
50830  int num_imm;             /* Number of immediates in an insn.  */
50831  int num_imm_32;          /* Number of 32 bit immediates in an insn.  */
50832  int num_imm_64;          /* Number of 64 bit immediates in an insn.  */
50833  int imm_size;            /* Total immediates in the window.  */
50834  int num_loads;           /* Total memory loads in the window.  */
50835  int num_stores;          /* Total memory stores in the window.  */
50836  int violation;          /* Violation exists in window.  */
50837  sched_insn_info *window; /* Pointer to the window.  */
50838  struct dispatch_windows_s *next;
50839  struct dispatch_windows_s *prev;
50840} dispatch_windows;
50841
50842/* Immediate valuse used in an insn.  */
50843typedef struct imm_info_s
50844  {
50845    int imm;
50846    int imm32;
50847    int imm64;
50848  } imm_info;
50849
50850static dispatch_windows *dispatch_window_list;
50851static dispatch_windows *dispatch_window_list1;
50852
50853/* Get dispatch group of insn.  */
50854
50855static enum dispatch_group
50856get_mem_group (rtx_insn *insn)
50857{
50858  enum attr_memory memory;
50859
50860  if (INSN_CODE (insn) < 0)
50861    return disp_no_group;
50862  memory = get_attr_memory (insn);
50863  if (memory == MEMORY_STORE)
50864    return disp_store;
50865
50866  if (memory == MEMORY_LOAD)
50867    return disp_load;
50868
50869  if (memory == MEMORY_BOTH)
50870    return disp_load_store;
50871
50872  return disp_no_group;
50873}
50874
50875/* Return true if insn is a compare instruction.  */
50876
50877static bool
50878is_cmp (rtx_insn *insn)
50879{
50880  enum attr_type type;
50881
50882  type = get_attr_type (insn);
50883  return (type == TYPE_TEST
50884	  || type == TYPE_ICMP
50885	  || type == TYPE_FCMP
50886	  || GET_CODE (PATTERN (insn)) == COMPARE);
50887}
50888
50889/* Return true if a dispatch violation encountered.  */
50890
50891static bool
50892dispatch_violation (void)
50893{
50894  if (dispatch_window_list->next)
50895    return dispatch_window_list->next->violation;
50896  return dispatch_window_list->violation;
50897}
50898
50899/* Return true if insn is a branch instruction.  */
50900
50901static bool
50902is_branch (rtx insn)
50903{
50904  return (CALL_P (insn) || JUMP_P (insn));
50905}
50906
50907/* Return true if insn is a prefetch instruction.  */
50908
50909static bool
50910is_prefetch (rtx insn)
50911{
50912  return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
50913}
50914
50915/* This function initializes a dispatch window and the list container holding a
50916   pointer to the window.  */
50917
50918static void
50919init_window (int window_num)
50920{
50921  int i;
50922  dispatch_windows *new_list;
50923
50924  if (window_num == 0)
50925    new_list = dispatch_window_list;
50926  else
50927    new_list = dispatch_window_list1;
50928
50929  new_list->num_insn = 0;
50930  new_list->num_uops = 0;
50931  new_list->window_size = 0;
50932  new_list->next = NULL;
50933  new_list->prev = NULL;
50934  new_list->window_num = window_num;
50935  new_list->num_imm = 0;
50936  new_list->num_imm_32 = 0;
50937  new_list->num_imm_64 = 0;
50938  new_list->imm_size = 0;
50939  new_list->num_loads = 0;
50940  new_list->num_stores = 0;
50941  new_list->violation = false;
50942
50943  for (i = 0; i < MAX_INSN; i++)
50944    {
50945      new_list->window[i].insn = NULL;
50946      new_list->window[i].group = disp_no_group;
50947      new_list->window[i].path = no_path;
50948      new_list->window[i].byte_len = 0;
50949      new_list->window[i].imm_bytes = 0;
50950    }
50951  return;
50952}
50953
50954/* This function allocates and initializes a dispatch window and the
50955   list container holding a pointer to the window.  */
50956
50957static dispatch_windows *
50958allocate_window (void)
50959{
50960  dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
50961  new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
50962
50963  return new_list;
50964}
50965
50966/* This routine initializes the dispatch scheduling information.  It
50967   initiates building dispatch scheduler tables and constructs the
50968   first dispatch window.  */
50969
50970static void
50971init_dispatch_sched (void)
50972{
50973  /* Allocate a dispatch list and a window.  */
50974  dispatch_window_list = allocate_window ();
50975  dispatch_window_list1 = allocate_window ();
50976  init_window (0);
50977  init_window (1);
50978}
50979
50980/* This function returns true if a branch is detected.  End of a basic block
50981   does not have to be a branch, but here we assume only branches end a
50982   window.  */
50983
50984static bool
50985is_end_basic_block (enum dispatch_group group)
50986{
50987  return group == disp_branch;
50988}
50989
50990/* This function is called when the end of a window processing is reached.  */
50991
50992static void
50993process_end_window (void)
50994{
50995  gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
50996  if (dispatch_window_list->next)
50997    {
50998      gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
50999      gcc_assert (dispatch_window_list->window_size
51000		  + dispatch_window_list1->window_size <= 48);
51001      init_window (1);
51002    }
51003  init_window (0);
51004}
51005
51006/* Allocates a new dispatch window and adds it to WINDOW_LIST.
51007   WINDOW_NUM is either 0 or 1.  A maximum of two windows are generated
51008   for 48 bytes of instructions.  Note that these windows are not dispatch
51009   windows that their sizes are DISPATCH_WINDOW_SIZE.  */
51010
51011static dispatch_windows *
51012allocate_next_window (int window_num)
51013{
51014  if (window_num == 0)
51015    {
51016      if (dispatch_window_list->next)
51017	  init_window (1);
51018      init_window (0);
51019      return dispatch_window_list;
51020    }
51021
51022  dispatch_window_list->next = dispatch_window_list1;
51023  dispatch_window_list1->prev = dispatch_window_list;
51024
51025  return dispatch_window_list1;
51026}
51027
51028/* Compute number of immediate operands of an instruction.  */
51029
51030static void
51031find_constant (rtx in_rtx, imm_info *imm_values)
51032{
51033  if (INSN_P (in_rtx))
51034    in_rtx = PATTERN (in_rtx);
51035  subrtx_iterator::array_type array;
51036  FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
51037    if (const_rtx x = *iter)
51038      switch (GET_CODE (x))
51039	{
51040	case CONST:
51041	case SYMBOL_REF:
51042	case CONST_INT:
51043	  (imm_values->imm)++;
51044	  if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
51045	    (imm_values->imm32)++;
51046	  else
51047	    (imm_values->imm64)++;
51048	  break;
51049
51050	case CONST_DOUBLE:
51051	  (imm_values->imm)++;
51052	  (imm_values->imm64)++;
51053	  break;
51054
51055	case CODE_LABEL:
51056	  if (LABEL_KIND (x) == LABEL_NORMAL)
51057	    {
51058	      (imm_values->imm)++;
51059	      (imm_values->imm32)++;
51060	    }
51061	  break;
51062
51063	default:
51064	  break;
51065	}
51066}
51067
51068/* Return total size of immediate operands of an instruction along with number
51069   of corresponding immediate-operands.  It initializes its parameters to zero
51070   befor calling FIND_CONSTANT.
51071   INSN is the input instruction.  IMM is the total of immediates.
51072   IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
51073   bit immediates.  */
51074
51075static int
51076get_num_immediates (rtx insn, int *imm, int *imm32, int *imm64)
51077{
51078  imm_info imm_values = {0, 0, 0};
51079
51080  find_constant (insn, &imm_values);
51081  *imm = imm_values.imm;
51082  *imm32 = imm_values.imm32;
51083  *imm64 = imm_values.imm64;
51084  return imm_values.imm32 * 4 + imm_values.imm64 * 8;
51085}
51086
51087/* This function indicates if an operand of an instruction is an
51088   immediate.  */
51089
51090static bool
51091has_immediate (rtx insn)
51092{
51093  int num_imm_operand;
51094  int num_imm32_operand;
51095  int num_imm64_operand;
51096
51097  if (insn)
51098    return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51099			       &num_imm64_operand);
51100  return false;
51101}
51102
51103/* Return single or double path for instructions.  */
51104
51105static enum insn_path
51106get_insn_path (rtx_insn *insn)
51107{
51108  enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
51109
51110  if ((int)path == 0)
51111    return path_single;
51112
51113  if ((int)path == 1)
51114    return path_double;
51115
51116  return path_multi;
51117}
51118
51119/* Return insn dispatch group.  */
51120
51121static enum dispatch_group
51122get_insn_group (rtx_insn *insn)
51123{
51124  enum dispatch_group group = get_mem_group (insn);
51125  if (group)
51126    return group;
51127
51128  if (is_branch (insn))
51129    return disp_branch;
51130
51131  if (is_cmp (insn))
51132    return disp_cmp;
51133
51134  if (has_immediate (insn))
51135    return disp_imm;
51136
51137  if (is_prefetch (insn))
51138    return disp_prefetch;
51139
51140  return disp_no_group;
51141}
51142
51143/* Count number of GROUP restricted instructions in a dispatch
51144   window WINDOW_LIST.  */
51145
51146static int
51147count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
51148{
51149  enum dispatch_group group = get_insn_group (insn);
51150  int imm_size;
51151  int num_imm_operand;
51152  int num_imm32_operand;
51153  int num_imm64_operand;
51154
51155  if (group == disp_no_group)
51156    return 0;
51157
51158  if (group == disp_imm)
51159    {
51160      imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51161			      &num_imm64_operand);
51162      if (window_list->imm_size + imm_size > MAX_IMM_SIZE
51163	  || num_imm_operand + window_list->num_imm > MAX_IMM
51164	  || (num_imm32_operand > 0
51165	      && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
51166		  || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
51167	  || (num_imm64_operand > 0
51168	      && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
51169		  || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
51170	  || (window_list->imm_size + imm_size == MAX_IMM_SIZE
51171	      && num_imm64_operand > 0
51172	      && ((window_list->num_imm_64 > 0
51173		   && window_list->num_insn >= 2)
51174		  || window_list->num_insn >= 3)))
51175	return BIG;
51176
51177      return 1;
51178    }
51179
51180  if ((group == disp_load_store
51181       && (window_list->num_loads >= MAX_LOAD
51182	   || window_list->num_stores >= MAX_STORE))
51183      || ((group == disp_load
51184	   || group == disp_prefetch)
51185	  && window_list->num_loads >= MAX_LOAD)
51186      || (group == disp_store
51187	  && window_list->num_stores >= MAX_STORE))
51188    return BIG;
51189
51190  return 1;
51191}
51192
51193/* This function returns true if insn satisfies dispatch rules on the
51194   last window scheduled.  */
51195
51196static bool
51197fits_dispatch_window (rtx_insn *insn)
51198{
51199  dispatch_windows *window_list = dispatch_window_list;
51200  dispatch_windows *window_list_next = dispatch_window_list->next;
51201  unsigned int num_restrict;
51202  enum dispatch_group group = get_insn_group (insn);
51203  enum insn_path path = get_insn_path (insn);
51204  int sum;
51205
51206  /* Make disp_cmp and disp_jcc get scheduled at the latest.  These
51207     instructions should be given the lowest priority in the
51208     scheduling process in Haifa scheduler to make sure they will be
51209     scheduled in the same dispatch window as the reference to them.  */
51210  if (group == disp_jcc || group == disp_cmp)
51211    return false;
51212
51213  /* Check nonrestricted.  */
51214  if (group == disp_no_group || group == disp_branch)
51215    return true;
51216
51217  /* Get last dispatch window.  */
51218  if (window_list_next)
51219    window_list = window_list_next;
51220
51221  if (window_list->window_num == 1)
51222    {
51223      sum = window_list->prev->window_size + window_list->window_size;
51224
51225      if (sum == 32
51226	  || (min_insn_size (insn) + sum) >= 48)
51227	/* Window 1 is full.  Go for next window.  */
51228	return true;
51229    }
51230
51231  num_restrict = count_num_restricted (insn, window_list);
51232
51233  if (num_restrict > num_allowable_groups[group])
51234    return false;
51235
51236  /* See if it fits in the first window.  */
51237  if (window_list->window_num == 0)
51238    {
51239      /* The first widow should have only single and double path
51240	 uops.  */
51241      if (path == path_double
51242	  && (window_list->num_uops + 2) > MAX_INSN)
51243	return false;
51244      else if (path != path_single)
51245        return false;
51246    }
51247  return true;
51248}
51249
51250/* Add an instruction INSN with NUM_UOPS micro-operations to the
51251   dispatch window WINDOW_LIST.  */
51252
51253static void
51254add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
51255{
51256  int byte_len = min_insn_size (insn);
51257  int num_insn = window_list->num_insn;
51258  int imm_size;
51259  sched_insn_info *window = window_list->window;
51260  enum dispatch_group group = get_insn_group (insn);
51261  enum insn_path path = get_insn_path (insn);
51262  int num_imm_operand;
51263  int num_imm32_operand;
51264  int num_imm64_operand;
51265
51266  if (!window_list->violation && group != disp_cmp
51267      && !fits_dispatch_window (insn))
51268    window_list->violation = true;
51269
51270  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51271				 &num_imm64_operand);
51272
51273  /* Initialize window with new instruction.  */
51274  window[num_insn].insn = insn;
51275  window[num_insn].byte_len = byte_len;
51276  window[num_insn].group = group;
51277  window[num_insn].path = path;
51278  window[num_insn].imm_bytes = imm_size;
51279
51280  window_list->window_size += byte_len;
51281  window_list->num_insn = num_insn + 1;
51282  window_list->num_uops = window_list->num_uops + num_uops;
51283  window_list->imm_size += imm_size;
51284  window_list->num_imm += num_imm_operand;
51285  window_list->num_imm_32 += num_imm32_operand;
51286  window_list->num_imm_64 += num_imm64_operand;
51287
51288  if (group == disp_store)
51289    window_list->num_stores += 1;
51290  else if (group == disp_load
51291	   || group == disp_prefetch)
51292    window_list->num_loads += 1;
51293  else if (group == disp_load_store)
51294    {
51295      window_list->num_stores += 1;
51296      window_list->num_loads += 1;
51297    }
51298}
51299
51300/* Adds a scheduled instruction, INSN, to the current dispatch window.
51301   If the total bytes of instructions or the number of instructions in
51302   the window exceed allowable, it allocates a new window.  */
51303
51304static void
51305add_to_dispatch_window (rtx_insn *insn)
51306{
51307  int byte_len;
51308  dispatch_windows *window_list;
51309  dispatch_windows *next_list;
51310  dispatch_windows *window0_list;
51311  enum insn_path path;
51312  enum dispatch_group insn_group;
51313  bool insn_fits;
51314  int num_insn;
51315  int num_uops;
51316  int window_num;
51317  int insn_num_uops;
51318  int sum;
51319
51320  if (INSN_CODE (insn) < 0)
51321    return;
51322
51323  byte_len = min_insn_size (insn);
51324  window_list = dispatch_window_list;
51325  next_list = window_list->next;
51326  path = get_insn_path (insn);
51327  insn_group = get_insn_group (insn);
51328
51329  /* Get the last dispatch window.  */
51330  if (next_list)
51331      window_list = dispatch_window_list->next;
51332
51333  if (path == path_single)
51334    insn_num_uops = 1;
51335  else if (path == path_double)
51336    insn_num_uops = 2;
51337  else
51338    insn_num_uops = (int) path;
51339
51340  /* If current window is full, get a new window.
51341     Window number zero is full, if MAX_INSN uops are scheduled in it.
51342     Window number one is full, if window zero's bytes plus window
51343     one's bytes is 32, or if the bytes of the new instruction added
51344     to the total makes it greater than 48, or it has already MAX_INSN
51345     instructions in it.  */
51346  num_insn = window_list->num_insn;
51347  num_uops = window_list->num_uops;
51348  window_num = window_list->window_num;
51349  insn_fits = fits_dispatch_window (insn);
51350
51351  if (num_insn >= MAX_INSN
51352      || num_uops + insn_num_uops > MAX_INSN
51353      || !(insn_fits))
51354    {
51355      window_num = ~window_num & 1;
51356      window_list = allocate_next_window (window_num);
51357    }
51358
51359  if (window_num == 0)
51360    {
51361      add_insn_window (insn, window_list, insn_num_uops);
51362      if (window_list->num_insn >= MAX_INSN
51363	  && insn_group == disp_branch)
51364	{
51365	  process_end_window ();
51366	  return;
51367	}
51368    }
51369  else if (window_num == 1)
51370    {
51371      window0_list = window_list->prev;
51372      sum = window0_list->window_size + window_list->window_size;
51373      if (sum == 32
51374	  || (byte_len + sum) >= 48)
51375	{
51376	  process_end_window ();
51377	  window_list = dispatch_window_list;
51378	}
51379
51380      add_insn_window (insn, window_list, insn_num_uops);
51381    }
51382  else
51383    gcc_unreachable ();
51384
51385  if (is_end_basic_block (insn_group))
51386    {
51387      /* End of basic block is reached do end-basic-block process.  */
51388      process_end_window ();
51389      return;
51390    }
51391}
51392
51393/* Print the dispatch window, WINDOW_NUM, to FILE.  */
51394
51395DEBUG_FUNCTION static void
51396debug_dispatch_window_file (FILE *file, int window_num)
51397{
51398  dispatch_windows *list;
51399  int i;
51400
51401  if (window_num == 0)
51402    list = dispatch_window_list;
51403  else
51404    list = dispatch_window_list1;
51405
51406  fprintf (file, "Window #%d:\n", list->window_num);
51407  fprintf (file, "  num_insn = %d, num_uops = %d, window_size = %d\n",
51408	  list->num_insn, list->num_uops, list->window_size);
51409  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51410	   list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
51411
51412  fprintf (file, "  num_loads = %d, num_stores = %d\n", list->num_loads,
51413	  list->num_stores);
51414  fprintf (file, " insn info:\n");
51415
51416  for (i = 0; i < MAX_INSN; i++)
51417    {
51418      if (!list->window[i].insn)
51419	break;
51420      fprintf (file, "    group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
51421	      i, group_name[list->window[i].group],
51422	      i, (void *)list->window[i].insn,
51423	      i, list->window[i].path,
51424	      i, list->window[i].byte_len,
51425	      i, list->window[i].imm_bytes);
51426    }
51427}
51428
51429/* Print to stdout a dispatch window.  */
51430
51431DEBUG_FUNCTION void
51432debug_dispatch_window (int window_num)
51433{
51434  debug_dispatch_window_file (stdout, window_num);
51435}
51436
51437/* Print INSN dispatch information to FILE.  */
51438
51439DEBUG_FUNCTION static void
51440debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
51441{
51442  int byte_len;
51443  enum insn_path path;
51444  enum dispatch_group group;
51445  int imm_size;
51446  int num_imm_operand;
51447  int num_imm32_operand;
51448  int num_imm64_operand;
51449
51450  if (INSN_CODE (insn) < 0)
51451    return;
51452
51453  byte_len = min_insn_size (insn);
51454  path = get_insn_path (insn);
51455  group = get_insn_group (insn);
51456  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
51457				 &num_imm64_operand);
51458
51459  fprintf (file, " insn info:\n");
51460  fprintf (file, "  group = %s, path = %d, byte_len = %d\n",
51461	   group_name[group], path, byte_len);
51462  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
51463	   num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
51464}
51465
51466/* Print to STDERR the status of the ready list with respect to
51467   dispatch windows.  */
51468
51469DEBUG_FUNCTION void
51470debug_ready_dispatch (void)
51471{
51472  int i;
51473  int no_ready = number_in_ready ();
51474
51475  fprintf (stdout, "Number of ready: %d\n", no_ready);
51476
51477  for (i = 0; i < no_ready; i++)
51478    debug_insn_dispatch_info_file (stdout, get_ready_element (i));
51479}
51480
51481/* This routine is the driver of the dispatch scheduler.  */
51482
51483static void
51484do_dispatch (rtx_insn *insn, int mode)
51485{
51486  if (mode == DISPATCH_INIT)
51487    init_dispatch_sched ();
51488  else if (mode == ADD_TO_DISPATCH_WINDOW)
51489    add_to_dispatch_window (insn);
51490}
51491
51492/* Return TRUE if Dispatch Scheduling is supported.  */
51493
51494static bool
51495has_dispatch (rtx_insn *insn, int action)
51496{
51497  if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3 || TARGET_BDVER4)
51498      && flag_dispatch_scheduler)
51499    switch (action)
51500      {
51501      default:
51502	return false;
51503
51504      case IS_DISPATCH_ON:
51505	return true;
51506	break;
51507
51508      case IS_CMP:
51509	return is_cmp (insn);
51510
51511      case DISPATCH_VIOLATION:
51512	return dispatch_violation ();
51513
51514      case FITS_DISPATCH_WINDOW:
51515	return fits_dispatch_window (insn);
51516      }
51517
51518  return false;
51519}
51520
51521/* Implementation of reassociation_width target hook used by
51522   reassoc phase to identify parallelism level in reassociated
51523   tree.  Statements tree_code is passed in OPC.  Arguments type
51524   is passed in MODE.
51525
51526   Currently parallel reassociation is enabled for Atom
51527   processors only and we set reassociation width to be 2
51528   because Atom may issue up to 2 instructions per cycle.
51529
51530   Return value should be fixed if parallel reassociation is
51531   enabled for other processors.  */
51532
51533static int
51534ix86_reassociation_width (unsigned int, machine_mode mode)
51535{
51536  /* Vector part.  */
51537  if (VECTOR_MODE_P (mode))
51538    {
51539      if (TARGET_VECTOR_PARALLEL_EXECUTION)
51540	return 2;
51541      else
51542	return 1;
51543    }
51544
51545  /* Scalar part.  */
51546  if (INTEGRAL_MODE_P (mode) && TARGET_REASSOC_INT_TO_PARALLEL)
51547    return 2;
51548  else if (FLOAT_MODE_P (mode) && TARGET_REASSOC_FP_TO_PARALLEL)
51549    return 2;
51550  else
51551    return 1;
51552}
51553
51554/* ??? No autovectorization into MMX or 3DNOW until we can reliably
51555   place emms and femms instructions.  */
51556
51557static machine_mode
51558ix86_preferred_simd_mode (machine_mode mode)
51559{
51560  if (!TARGET_SSE)
51561    return word_mode;
51562
51563  switch (mode)
51564    {
51565    case QImode:
51566      return TARGET_AVX512BW ? V64QImode :
51567       (TARGET_AVX && !TARGET_PREFER_AVX128) ? V32QImode : V16QImode;
51568    case HImode:
51569      return TARGET_AVX512BW ? V32HImode :
51570       (TARGET_AVX && !TARGET_PREFER_AVX128) ? V16HImode : V8HImode;
51571    case SImode:
51572      return TARGET_AVX512F ? V16SImode :
51573	(TARGET_AVX && !TARGET_PREFER_AVX128) ? V8SImode : V4SImode;
51574    case DImode:
51575      return TARGET_AVX512F ? V8DImode :
51576	(TARGET_AVX && !TARGET_PREFER_AVX128) ? V4DImode : V2DImode;
51577
51578    case SFmode:
51579      if (TARGET_AVX512F)
51580	return V16SFmode;
51581      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51582	return V8SFmode;
51583      else
51584	return V4SFmode;
51585
51586    case DFmode:
51587      if (!TARGET_VECTORIZE_DOUBLE)
51588	return word_mode;
51589      else if (TARGET_AVX512F)
51590	return V8DFmode;
51591      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
51592	return V4DFmode;
51593      else if (TARGET_SSE2)
51594	return V2DFmode;
51595      /* FALLTHRU */
51596
51597    default:
51598      return word_mode;
51599    }
51600}
51601
51602/* If AVX is enabled then try vectorizing with both 256bit and 128bit
51603   vectors.  If AVX512F is enabled then try vectorizing with 512bit,
51604   256bit and 128bit vectors.  */
51605
51606static unsigned int
51607ix86_autovectorize_vector_sizes (void)
51608{
51609  return TARGET_AVX512F ? 64 | 32 | 16 :
51610    (TARGET_AVX && !TARGET_PREFER_AVX128) ? 32 | 16 : 0;
51611}
51612
51613
51614
51615/* Return class of registers which could be used for pseudo of MODE
51616   and of class RCLASS for spilling instead of memory.  Return NO_REGS
51617   if it is not possible or non-profitable.  */
51618static reg_class_t
51619ix86_spill_class (reg_class_t rclass, machine_mode mode)
51620{
51621  if (TARGET_SSE && TARGET_GENERAL_REGS_SSE_SPILL && ! TARGET_MMX
51622      && (mode == SImode || (TARGET_64BIT && mode == DImode))
51623      && rclass != NO_REGS && INTEGER_CLASS_P (rclass))
51624    return ALL_SSE_REGS;
51625  return NO_REGS;
51626}
51627
51628/* Implement targetm.vectorize.init_cost.  */
51629
51630static void *
51631ix86_init_cost (struct loop *)
51632{
51633  unsigned *cost = XNEWVEC (unsigned, 3);
51634  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
51635  return cost;
51636}
51637
51638/* Implement targetm.vectorize.add_stmt_cost.  */
51639
51640static unsigned
51641ix86_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
51642		    struct _stmt_vec_info *stmt_info, int misalign,
51643		    enum vect_cost_model_location where)
51644{
51645  unsigned *cost = (unsigned *) data;
51646  unsigned retval = 0;
51647
51648  tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
51649  int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
51650
51651  /* Statements in an inner loop relative to the loop being
51652     vectorized are weighted more heavily.  The value here is
51653      arbitrary and could potentially be improved with analysis.  */
51654  if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
51655    count *= 50;  /* FIXME.  */
51656
51657  retval = (unsigned) (count * stmt_cost);
51658
51659  /* We need to multiply all vector stmt cost by 1.7 (estimated cost)
51660     for Silvermont as it has out of order integer pipeline and can execute
51661     2 scalar instruction per tick, but has in order SIMD pipeline.  */
51662  if (TARGET_SILVERMONT || TARGET_INTEL)
51663    if (stmt_info && stmt_info->stmt)
51664      {
51665	tree lhs_op = gimple_get_lhs (stmt_info->stmt);
51666	if (lhs_op && TREE_CODE (TREE_TYPE (lhs_op)) == INTEGER_TYPE)
51667	  retval = (retval * 17) / 10;
51668      }
51669
51670  cost[where] += retval;
51671
51672  return retval;
51673}
51674
51675/* Implement targetm.vectorize.finish_cost.  */
51676
51677static void
51678ix86_finish_cost (void *data, unsigned *prologue_cost,
51679		  unsigned *body_cost, unsigned *epilogue_cost)
51680{
51681  unsigned *cost = (unsigned *) data;
51682  *prologue_cost = cost[vect_prologue];
51683  *body_cost     = cost[vect_body];
51684  *epilogue_cost = cost[vect_epilogue];
51685}
51686
51687/* Implement targetm.vectorize.destroy_cost_data.  */
51688
51689static void
51690ix86_destroy_cost_data (void *data)
51691{
51692  free (data);
51693}
51694
51695/* Validate target specific memory model bits in VAL. */
51696
51697static unsigned HOST_WIDE_INT
51698ix86_memmodel_check (unsigned HOST_WIDE_INT val)
51699{
51700  enum memmodel model = memmodel_from_int (val);
51701  bool strong;
51702
51703  if (val & ~(unsigned HOST_WIDE_INT)(IX86_HLE_ACQUIRE|IX86_HLE_RELEASE
51704				      |MEMMODEL_MASK)
51705      || ((val & IX86_HLE_ACQUIRE) && (val & IX86_HLE_RELEASE)))
51706    {
51707      warning (OPT_Winvalid_memory_model,
51708	       "Unknown architecture specific memory model");
51709      return MEMMODEL_SEQ_CST;
51710    }
51711  strong = (is_mm_acq_rel (model) || is_mm_seq_cst (model));
51712  if (val & IX86_HLE_ACQUIRE && !(is_mm_acquire (model) || strong))
51713    {
51714      warning (OPT_Winvalid_memory_model,
51715              "HLE_ACQUIRE not used with ACQUIRE or stronger memory model");
51716      return MEMMODEL_SEQ_CST | IX86_HLE_ACQUIRE;
51717    }
51718  if (val & IX86_HLE_RELEASE && !(is_mm_release (model) || strong))
51719    {
51720      warning (OPT_Winvalid_memory_model,
51721              "HLE_RELEASE not used with RELEASE or stronger memory model");
51722      return MEMMODEL_SEQ_CST | IX86_HLE_RELEASE;
51723    }
51724  return val;
51725}
51726
51727/* Set CLONEI->vecsize_mangle, CLONEI->vecsize_int,
51728   CLONEI->vecsize_float and if CLONEI->simdlen is 0, also
51729   CLONEI->simdlen.  Return 0 if SIMD clones shouldn't be emitted,
51730   or number of vecsize_mangle variants that should be emitted.  */
51731
51732static int
51733ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
51734					     struct cgraph_simd_clone *clonei,
51735					     tree base_type, int num)
51736{
51737  int ret = 1;
51738
51739  if (clonei->simdlen
51740      && (clonei->simdlen < 2
51741	  || clonei->simdlen > 16
51742	  || (clonei->simdlen & (clonei->simdlen - 1)) != 0))
51743    {
51744      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51745		  "unsupported simdlen %d", clonei->simdlen);
51746      return 0;
51747    }
51748
51749  tree ret_type = TREE_TYPE (TREE_TYPE (node->decl));
51750  if (TREE_CODE (ret_type) != VOID_TYPE)
51751    switch (TYPE_MODE (ret_type))
51752      {
51753      case QImode:
51754      case HImode:
51755      case SImode:
51756      case DImode:
51757      case SFmode:
51758      case DFmode:
51759      /* case SCmode: */
51760      /* case DCmode: */
51761	break;
51762      default:
51763	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51764		    "unsupported return type %qT for simd\n", ret_type);
51765	return 0;
51766      }
51767
51768  tree t;
51769  int i;
51770
51771  for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
51772    /* FIXME: Shouldn't we allow such arguments if they are uniform?  */
51773    switch (TYPE_MODE (TREE_TYPE (t)))
51774      {
51775      case QImode:
51776      case HImode:
51777      case SImode:
51778      case DImode:
51779      case SFmode:
51780      case DFmode:
51781      /* case SCmode: */
51782      /* case DCmode: */
51783	break;
51784      default:
51785	warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
51786		    "unsupported argument type %qT for simd\n", TREE_TYPE (t));
51787	return 0;
51788      }
51789
51790  if (clonei->cilk_elemental)
51791    {
51792      /* Parse here processor clause.  If not present, default to 'b'.  */
51793      clonei->vecsize_mangle = 'b';
51794    }
51795  else if (!TREE_PUBLIC (node->decl))
51796    {
51797      /* If the function isn't exported, we can pick up just one ISA
51798	 for the clones.  */
51799      if (TARGET_AVX2)
51800	clonei->vecsize_mangle = 'd';
51801      else if (TARGET_AVX)
51802	clonei->vecsize_mangle = 'c';
51803      else
51804	clonei->vecsize_mangle = 'b';
51805      ret = 1;
51806    }
51807  else
51808    {
51809      clonei->vecsize_mangle = "bcd"[num];
51810      ret = 3;
51811    }
51812  switch (clonei->vecsize_mangle)
51813    {
51814    case 'b':
51815      clonei->vecsize_int = 128;
51816      clonei->vecsize_float = 128;
51817      break;
51818    case 'c':
51819      clonei->vecsize_int = 128;
51820      clonei->vecsize_float = 256;
51821      break;
51822    case 'd':
51823      clonei->vecsize_int = 256;
51824      clonei->vecsize_float = 256;
51825      break;
51826    }
51827  if (clonei->simdlen == 0)
51828    {
51829      if (SCALAR_INT_MODE_P (TYPE_MODE (base_type)))
51830	clonei->simdlen = clonei->vecsize_int;
51831      else
51832	clonei->simdlen = clonei->vecsize_float;
51833      clonei->simdlen /= GET_MODE_BITSIZE (TYPE_MODE (base_type));
51834      if (clonei->simdlen > 16)
51835	clonei->simdlen = 16;
51836    }
51837  return ret;
51838}
51839
51840/* Add target attribute to SIMD clone NODE if needed.  */
51841
51842static void
51843ix86_simd_clone_adjust (struct cgraph_node *node)
51844{
51845  const char *str = NULL;
51846  gcc_assert (node->decl == cfun->decl);
51847  switch (node->simdclone->vecsize_mangle)
51848    {
51849    case 'b':
51850      if (!TARGET_SSE2)
51851	str = "sse2";
51852      break;
51853    case 'c':
51854      if (!TARGET_AVX)
51855	str = "avx";
51856      break;
51857    case 'd':
51858      if (!TARGET_AVX2)
51859	str = "avx2";
51860      break;
51861    default:
51862      gcc_unreachable ();
51863    }
51864  if (str == NULL)
51865    return;
51866  push_cfun (NULL);
51867  tree args = build_tree_list (NULL_TREE, build_string (strlen (str), str));
51868  bool ok = ix86_valid_target_attribute_p (node->decl, NULL, args, 0);
51869  gcc_assert (ok);
51870  pop_cfun ();
51871  ix86_reset_previous_fndecl ();
51872  ix86_set_current_function (node->decl);
51873}
51874
51875/* If SIMD clone NODE can't be used in a vectorized loop
51876   in current function, return -1, otherwise return a badness of using it
51877   (0 if it is most desirable from vecsize_mangle point of view, 1
51878   slightly less desirable, etc.).  */
51879
51880static int
51881ix86_simd_clone_usable (struct cgraph_node *node)
51882{
51883  switch (node->simdclone->vecsize_mangle)
51884    {
51885    case 'b':
51886      if (!TARGET_SSE2)
51887	return -1;
51888      if (!TARGET_AVX)
51889	return 0;
51890      return TARGET_AVX2 ? 2 : 1;
51891    case 'c':
51892      if (!TARGET_AVX)
51893	return -1;
51894      return TARGET_AVX2 ? 1 : 0;
51895      break;
51896    case 'd':
51897      if (!TARGET_AVX2)
51898	return -1;
51899      return 0;
51900    default:
51901      gcc_unreachable ();
51902    }
51903}
51904
51905/* This function adjusts the unroll factor based on
51906   the hardware capabilities. For ex, bdver3 has
51907   a loop buffer which makes unrolling of smaller
51908   loops less important. This function decides the
51909   unroll factor using number of memory references
51910   (value 32 is used) as a heuristic. */
51911
51912static unsigned
51913ix86_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
51914{
51915  basic_block *bbs;
51916  rtx_insn *insn;
51917  unsigned i;
51918  unsigned mem_count = 0;
51919
51920  if (!TARGET_ADJUST_UNROLL)
51921     return nunroll;
51922
51923  /* Count the number of memory references within the loop body.
51924     This value determines the unrolling factor for bdver3 and bdver4
51925     architectures. */
51926  subrtx_iterator::array_type array;
51927  bbs = get_loop_body (loop);
51928  for (i = 0; i < loop->num_nodes; i++)
51929    FOR_BB_INSNS (bbs[i], insn)
51930      if (NONDEBUG_INSN_P (insn))
51931	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
51932	  if (const_rtx x = *iter)
51933	    if (MEM_P (x))
51934	      {
51935		machine_mode mode = GET_MODE (x);
51936		unsigned int n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
51937		if (n_words > 4)
51938		  mem_count += 2;
51939		else
51940		  mem_count += 1;
51941	      }
51942  free (bbs);
51943
51944  if (mem_count && mem_count <=32)
51945    return 32/mem_count;
51946
51947  return nunroll;
51948}
51949
51950
51951/* Implement TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P.  */
51952
51953static bool
51954ix86_float_exceptions_rounding_supported_p (void)
51955{
51956  /* For x87 floating point with standard excess precision handling,
51957     there is no adddf3 pattern (since x87 floating point only has
51958     XFmode operations) so the default hook implementation gets this
51959     wrong.  */
51960  return TARGET_80387 || TARGET_SSE_MATH;
51961}
51962
51963/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
51964
51965static void
51966ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
51967{
51968  if (!TARGET_80387 && !TARGET_SSE_MATH)
51969    return;
51970  tree exceptions_var = create_tmp_var (integer_type_node);
51971  if (TARGET_80387)
51972    {
51973      tree fenv_index_type = build_index_type (size_int (6));
51974      tree fenv_type = build_array_type (unsigned_type_node, fenv_index_type);
51975      tree fenv_var = create_tmp_var (fenv_type);
51976      mark_addressable (fenv_var);
51977      tree fenv_ptr = build_pointer_type (fenv_type);
51978      tree fenv_addr = build1 (ADDR_EXPR, fenv_ptr, fenv_var);
51979      fenv_addr = fold_convert (ptr_type_node, fenv_addr);
51980      tree fnstenv = ix86_builtins[IX86_BUILTIN_FNSTENV];
51981      tree fldenv = ix86_builtins[IX86_BUILTIN_FLDENV];
51982      tree fnstsw = ix86_builtins[IX86_BUILTIN_FNSTSW];
51983      tree fnclex = ix86_builtins[IX86_BUILTIN_FNCLEX];
51984      tree hold_fnstenv = build_call_expr (fnstenv, 1, fenv_addr);
51985      tree hold_fnclex = build_call_expr (fnclex, 0);
51986      *hold = build2 (COMPOUND_EXPR, void_type_node, hold_fnstenv,
51987		      hold_fnclex);
51988      *clear = build_call_expr (fnclex, 0);
51989      tree sw_var = create_tmp_var (short_unsigned_type_node);
51990      tree fnstsw_call = build_call_expr (fnstsw, 0);
51991      tree sw_mod = build2 (MODIFY_EXPR, short_unsigned_type_node,
51992			    sw_var, fnstsw_call);
51993      tree exceptions_x87 = fold_convert (integer_type_node, sw_var);
51994      tree update_mod = build2 (MODIFY_EXPR, integer_type_node,
51995				exceptions_var, exceptions_x87);
51996      *update = build2 (COMPOUND_EXPR, integer_type_node,
51997			sw_mod, update_mod);
51998      tree update_fldenv = build_call_expr (fldenv, 1, fenv_addr);
51999      *update = build2 (COMPOUND_EXPR, void_type_node, *update, update_fldenv);
52000    }
52001  if (TARGET_SSE_MATH)
52002    {
52003      tree mxcsr_orig_var = create_tmp_var (unsigned_type_node);
52004      tree mxcsr_mod_var = create_tmp_var (unsigned_type_node);
52005      tree stmxcsr = ix86_builtins[IX86_BUILTIN_STMXCSR];
52006      tree ldmxcsr = ix86_builtins[IX86_BUILTIN_LDMXCSR];
52007      tree stmxcsr_hold_call = build_call_expr (stmxcsr, 0);
52008      tree hold_assign_orig = build2 (MODIFY_EXPR, unsigned_type_node,
52009				      mxcsr_orig_var, stmxcsr_hold_call);
52010      tree hold_mod_val = build2 (BIT_IOR_EXPR, unsigned_type_node,
52011				  mxcsr_orig_var,
52012				  build_int_cst (unsigned_type_node, 0x1f80));
52013      hold_mod_val = build2 (BIT_AND_EXPR, unsigned_type_node, hold_mod_val,
52014			     build_int_cst (unsigned_type_node, 0xffffffc0));
52015      tree hold_assign_mod = build2 (MODIFY_EXPR, unsigned_type_node,
52016				     mxcsr_mod_var, hold_mod_val);
52017      tree ldmxcsr_hold_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52018      tree hold_all = build2 (COMPOUND_EXPR, unsigned_type_node,
52019			      hold_assign_orig, hold_assign_mod);
52020      hold_all = build2 (COMPOUND_EXPR, void_type_node, hold_all,
52021			 ldmxcsr_hold_call);
52022      if (*hold)
52023	*hold = build2 (COMPOUND_EXPR, void_type_node, *hold, hold_all);
52024      else
52025	*hold = hold_all;
52026      tree ldmxcsr_clear_call = build_call_expr (ldmxcsr, 1, mxcsr_mod_var);
52027      if (*clear)
52028	*clear = build2 (COMPOUND_EXPR, void_type_node, *clear,
52029			 ldmxcsr_clear_call);
52030      else
52031	*clear = ldmxcsr_clear_call;
52032      tree stxmcsr_update_call = build_call_expr (stmxcsr, 0);
52033      tree exceptions_sse = fold_convert (integer_type_node,
52034					  stxmcsr_update_call);
52035      if (*update)
52036	{
52037	  tree exceptions_mod = build2 (BIT_IOR_EXPR, integer_type_node,
52038					exceptions_var, exceptions_sse);
52039	  tree exceptions_assign = build2 (MODIFY_EXPR, integer_type_node,
52040					   exceptions_var, exceptions_mod);
52041	  *update = build2 (COMPOUND_EXPR, integer_type_node, *update,
52042			    exceptions_assign);
52043	}
52044      else
52045	*update = build2 (MODIFY_EXPR, integer_type_node,
52046			  exceptions_var, exceptions_sse);
52047      tree ldmxcsr_update_call = build_call_expr (ldmxcsr, 1, mxcsr_orig_var);
52048      *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52049			ldmxcsr_update_call);
52050    }
52051  tree atomic_feraiseexcept
52052    = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
52053  tree atomic_feraiseexcept_call = build_call_expr (atomic_feraiseexcept,
52054						    1, exceptions_var);
52055  *update = build2 (COMPOUND_EXPR, void_type_node, *update,
52056		    atomic_feraiseexcept_call);
52057}
52058
52059/* Return mode to be used for bounds or VOIDmode
52060   if bounds are not supported.  */
52061
52062static enum machine_mode
52063ix86_mpx_bound_mode ()
52064{
52065  /* Do not support pointer checker if MPX
52066     is not enabled.  */
52067  if (!TARGET_MPX)
52068    {
52069      if (flag_check_pointer_bounds)
52070	warning (0, "Pointer Checker requires MPX support on this target."
52071		 " Use -mmpx options to enable MPX.");
52072      return VOIDmode;
52073    }
52074
52075  return BNDmode;
52076}
52077
52078/*  Return constant used to statically initialize constant bounds.
52079
52080    This function is used to create special bound values.  For now
52081    only INIT bounds and NONE bounds are expected.  More special
52082    values may be added later.  */
52083
52084static tree
52085ix86_make_bounds_constant (HOST_WIDE_INT lb, HOST_WIDE_INT ub)
52086{
52087  tree low = lb ? build_minus_one_cst (pointer_sized_int_node)
52088    : build_zero_cst (pointer_sized_int_node);
52089  tree high = ub ? build_zero_cst (pointer_sized_int_node)
52090    : build_minus_one_cst (pointer_sized_int_node);
52091
52092  /* This function is supposed to be used to create INIT and
52093     NONE bounds only.  */
52094  gcc_assert ((lb == 0 && ub == -1)
52095	      || (lb == -1 && ub == 0));
52096
52097  return build_complex (NULL, low, high);
52098}
52099
52100/* Generate a list of statements STMTS to initialize pointer bounds
52101   variable VAR with bounds LB and UB.  Return the number of generated
52102   statements.  */
52103
52104static int
52105ix86_initialize_bounds (tree var, tree lb, tree ub, tree *stmts)
52106{
52107  tree bnd_ptr = build_pointer_type (pointer_sized_int_node);
52108  tree lhs, modify, var_p;
52109
52110  ub = build1 (BIT_NOT_EXPR, pointer_sized_int_node, ub);
52111  var_p = fold_convert (bnd_ptr, build_fold_addr_expr (var));
52112
52113  lhs = build1 (INDIRECT_REF, pointer_sized_int_node, var_p);
52114  modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, lb);
52115  append_to_statement_list (modify, stmts);
52116
52117  lhs = build1 (INDIRECT_REF, pointer_sized_int_node,
52118		build2 (POINTER_PLUS_EXPR, bnd_ptr, var_p,
52119			TYPE_SIZE_UNIT (pointer_sized_int_node)));
52120  modify = build2 (MODIFY_EXPR, TREE_TYPE (lhs), lhs, ub);
52121  append_to_statement_list (modify, stmts);
52122
52123  return 2;
52124}
52125
52126#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES
52127/* For i386, common symbol is local only for non-PIE binaries.  For
52128   x86-64, common symbol is local only for non-PIE binaries or linker
52129   supports copy reloc in PIE binaries.   */
52130
52131static bool
52132ix86_binds_local_p (const_tree exp)
52133{
52134  return default_binds_local_p_3 (exp, flag_shlib != 0, true, true,
52135				  (!flag_pic
52136				   || (TARGET_64BIT
52137				       && HAVE_LD_PIE_COPYRELOC != 0)));
52138}
52139#endif
52140
52141/* Initialize the GCC target structure.  */
52142#undef TARGET_RETURN_IN_MEMORY
52143#define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
52144
52145#undef TARGET_LEGITIMIZE_ADDRESS
52146#define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
52147
52148#undef TARGET_ATTRIBUTE_TABLE
52149#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
52150#undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
52151#define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
52152#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52153#  undef TARGET_MERGE_DECL_ATTRIBUTES
52154#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
52155#endif
52156
52157#undef TARGET_COMP_TYPE_ATTRIBUTES
52158#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
52159
52160#undef TARGET_INIT_BUILTINS
52161#define TARGET_INIT_BUILTINS ix86_init_builtins
52162#undef TARGET_BUILTIN_DECL
52163#define TARGET_BUILTIN_DECL ix86_builtin_decl
52164#undef TARGET_EXPAND_BUILTIN
52165#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
52166
52167#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
52168#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
52169  ix86_builtin_vectorized_function
52170
52171#undef TARGET_VECTORIZE_BUILTIN_TM_LOAD
52172#define TARGET_VECTORIZE_BUILTIN_TM_LOAD ix86_builtin_tm_load
52173
52174#undef TARGET_VECTORIZE_BUILTIN_TM_STORE
52175#define TARGET_VECTORIZE_BUILTIN_TM_STORE ix86_builtin_tm_store
52176
52177#undef TARGET_VECTORIZE_BUILTIN_GATHER
52178#define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
52179
52180#undef TARGET_BUILTIN_RECIPROCAL
52181#define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
52182
52183#undef TARGET_ASM_FUNCTION_EPILOGUE
52184#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
52185
52186#undef TARGET_ENCODE_SECTION_INFO
52187#ifndef SUBTARGET_ENCODE_SECTION_INFO
52188#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
52189#else
52190#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
52191#endif
52192
52193#undef TARGET_ASM_OPEN_PAREN
52194#define TARGET_ASM_OPEN_PAREN ""
52195#undef TARGET_ASM_CLOSE_PAREN
52196#define TARGET_ASM_CLOSE_PAREN ""
52197
52198#undef TARGET_ASM_BYTE_OP
52199#define TARGET_ASM_BYTE_OP ASM_BYTE
52200
52201#undef TARGET_ASM_ALIGNED_HI_OP
52202#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
52203#undef TARGET_ASM_ALIGNED_SI_OP
52204#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
52205#ifdef ASM_QUAD
52206#undef TARGET_ASM_ALIGNED_DI_OP
52207#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
52208#endif
52209
52210#undef TARGET_PROFILE_BEFORE_PROLOGUE
52211#define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
52212
52213#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
52214#define TARGET_MANGLE_DECL_ASSEMBLER_NAME ix86_mangle_decl_assembler_name
52215
52216#undef TARGET_ASM_UNALIGNED_HI_OP
52217#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
52218#undef TARGET_ASM_UNALIGNED_SI_OP
52219#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
52220#undef TARGET_ASM_UNALIGNED_DI_OP
52221#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
52222
52223#undef TARGET_PRINT_OPERAND
52224#define TARGET_PRINT_OPERAND ix86_print_operand
52225#undef TARGET_PRINT_OPERAND_ADDRESS
52226#define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
52227#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
52228#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
52229#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
52230#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
52231
52232#undef TARGET_SCHED_INIT_GLOBAL
52233#define TARGET_SCHED_INIT_GLOBAL ix86_sched_init_global
52234#undef TARGET_SCHED_ADJUST_COST
52235#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
52236#undef TARGET_SCHED_ISSUE_RATE
52237#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
52238#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
52239#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
52240  ia32_multipass_dfa_lookahead
52241#undef TARGET_SCHED_MACRO_FUSION_P
52242#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
52243#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
52244#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
52245
52246#undef TARGET_FUNCTION_OK_FOR_SIBCALL
52247#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
52248
52249#undef TARGET_MEMMODEL_CHECK
52250#define TARGET_MEMMODEL_CHECK ix86_memmodel_check
52251
52252#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
52253#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV ix86_atomic_assign_expand_fenv
52254
52255#ifdef HAVE_AS_TLS
52256#undef TARGET_HAVE_TLS
52257#define TARGET_HAVE_TLS true
52258#endif
52259#undef TARGET_CANNOT_FORCE_CONST_MEM
52260#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
52261#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
52262#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
52263
52264#undef TARGET_DELEGITIMIZE_ADDRESS
52265#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
52266
52267#undef TARGET_MS_BITFIELD_LAYOUT_P
52268#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
52269
52270#if TARGET_MACHO
52271#undef TARGET_BINDS_LOCAL_P
52272#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
52273#else
52274#undef TARGET_BINDS_LOCAL_P
52275#define TARGET_BINDS_LOCAL_P ix86_binds_local_p
52276#endif
52277#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
52278#undef TARGET_BINDS_LOCAL_P
52279#define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
52280#endif
52281
52282#undef TARGET_ASM_OUTPUT_MI_THUNK
52283#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
52284#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
52285#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
52286
52287#undef TARGET_ASM_FILE_START
52288#define TARGET_ASM_FILE_START x86_file_start
52289
52290#undef TARGET_OPTION_OVERRIDE
52291#define TARGET_OPTION_OVERRIDE ix86_option_override
52292
52293#undef TARGET_REGISTER_MOVE_COST
52294#define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
52295#undef TARGET_MEMORY_MOVE_COST
52296#define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
52297#undef TARGET_RTX_COSTS
52298#define TARGET_RTX_COSTS ix86_rtx_costs
52299#undef TARGET_ADDRESS_COST
52300#define TARGET_ADDRESS_COST ix86_address_cost
52301
52302#undef TARGET_FIXED_CONDITION_CODE_REGS
52303#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
52304#undef TARGET_CC_MODES_COMPATIBLE
52305#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
52306
52307#undef TARGET_MACHINE_DEPENDENT_REORG
52308#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
52309
52310#undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
52311#define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
52312
52313#undef TARGET_BUILD_BUILTIN_VA_LIST
52314#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
52315
52316#undef TARGET_FOLD_BUILTIN
52317#define TARGET_FOLD_BUILTIN ix86_fold_builtin
52318
52319#undef TARGET_COMPARE_VERSION_PRIORITY
52320#define TARGET_COMPARE_VERSION_PRIORITY ix86_compare_version_priority
52321
52322#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
52323#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
52324  ix86_generate_version_dispatcher_body
52325
52326#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
52327#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
52328  ix86_get_function_versions_dispatcher
52329
52330#undef TARGET_ENUM_VA_LIST_P
52331#define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
52332
52333#undef TARGET_FN_ABI_VA_LIST
52334#define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
52335
52336#undef TARGET_CANONICAL_VA_LIST_TYPE
52337#define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
52338
52339#undef TARGET_EXPAND_BUILTIN_VA_START
52340#define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
52341
52342#undef TARGET_MD_ASM_CLOBBERS
52343#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
52344
52345#undef TARGET_PROMOTE_PROTOTYPES
52346#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
52347#undef TARGET_SETUP_INCOMING_VARARGS
52348#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
52349#undef TARGET_MUST_PASS_IN_STACK
52350#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
52351#undef TARGET_FUNCTION_ARG_ADVANCE
52352#define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
52353#undef TARGET_FUNCTION_ARG
52354#define TARGET_FUNCTION_ARG ix86_function_arg
52355#undef TARGET_INIT_PIC_REG
52356#define TARGET_INIT_PIC_REG ix86_init_pic_reg
52357#undef TARGET_USE_PSEUDO_PIC_REG
52358#define TARGET_USE_PSEUDO_PIC_REG ix86_use_pseudo_pic_reg
52359#undef TARGET_FUNCTION_ARG_BOUNDARY
52360#define TARGET_FUNCTION_ARG_BOUNDARY ix86_function_arg_boundary
52361#undef TARGET_PASS_BY_REFERENCE
52362#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
52363#undef TARGET_INTERNAL_ARG_POINTER
52364#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
52365#undef TARGET_UPDATE_STACK_BOUNDARY
52366#define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
52367#undef TARGET_GET_DRAP_RTX
52368#define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
52369#undef TARGET_STRICT_ARGUMENT_NAMING
52370#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
52371#undef TARGET_STATIC_CHAIN
52372#define TARGET_STATIC_CHAIN ix86_static_chain
52373#undef TARGET_TRAMPOLINE_INIT
52374#define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
52375#undef TARGET_RETURN_POPS_ARGS
52376#define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
52377
52378#undef TARGET_LEGITIMATE_COMBINED_INSN
52379#define TARGET_LEGITIMATE_COMBINED_INSN ix86_legitimate_combined_insn
52380
52381#undef TARGET_ASAN_SHADOW_OFFSET
52382#define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
52383
52384#undef TARGET_GIMPLIFY_VA_ARG_EXPR
52385#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
52386
52387#undef TARGET_SCALAR_MODE_SUPPORTED_P
52388#define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
52389
52390#undef TARGET_VECTOR_MODE_SUPPORTED_P
52391#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
52392
52393#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
52394#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
52395  ix86_libgcc_floating_mode_supported_p
52396
52397#undef TARGET_C_MODE_FOR_SUFFIX
52398#define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
52399
52400#ifdef HAVE_AS_TLS
52401#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
52402#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
52403#endif
52404
52405#ifdef SUBTARGET_INSERT_ATTRIBUTES
52406#undef TARGET_INSERT_ATTRIBUTES
52407#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
52408#endif
52409
52410#undef TARGET_MANGLE_TYPE
52411#define TARGET_MANGLE_TYPE ix86_mangle_type
52412
52413#if !TARGET_MACHO
52414#undef TARGET_STACK_PROTECT_FAIL
52415#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
52416#endif
52417
52418#undef TARGET_FUNCTION_VALUE
52419#define TARGET_FUNCTION_VALUE ix86_function_value
52420
52421#undef TARGET_FUNCTION_VALUE_REGNO_P
52422#define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
52423
52424#undef TARGET_PROMOTE_FUNCTION_MODE
52425#define TARGET_PROMOTE_FUNCTION_MODE ix86_promote_function_mode
52426
52427#undef TARGET_MEMBER_TYPE_FORCES_BLK
52428#define TARGET_MEMBER_TYPE_FORCES_BLK ix86_member_type_forces_blk
52429
52430#undef TARGET_INSTANTIATE_DECLS
52431#define TARGET_INSTANTIATE_DECLS ix86_instantiate_decls
52432
52433#undef TARGET_SECONDARY_RELOAD
52434#define TARGET_SECONDARY_RELOAD ix86_secondary_reload
52435
52436#undef TARGET_CLASS_MAX_NREGS
52437#define TARGET_CLASS_MAX_NREGS ix86_class_max_nregs
52438
52439#undef TARGET_PREFERRED_RELOAD_CLASS
52440#define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
52441#undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
52442#define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
52443#undef TARGET_CLASS_LIKELY_SPILLED_P
52444#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
52445
52446#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
52447#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
52448  ix86_builtin_vectorization_cost
52449#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
52450#define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
52451  ix86_vectorize_vec_perm_const_ok
52452#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
52453#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
52454  ix86_preferred_simd_mode
52455#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
52456#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
52457  ix86_autovectorize_vector_sizes
52458#undef TARGET_VECTORIZE_INIT_COST
52459#define TARGET_VECTORIZE_INIT_COST ix86_init_cost
52460#undef TARGET_VECTORIZE_ADD_STMT_COST
52461#define TARGET_VECTORIZE_ADD_STMT_COST ix86_add_stmt_cost
52462#undef TARGET_VECTORIZE_FINISH_COST
52463#define TARGET_VECTORIZE_FINISH_COST ix86_finish_cost
52464#undef TARGET_VECTORIZE_DESTROY_COST_DATA
52465#define TARGET_VECTORIZE_DESTROY_COST_DATA ix86_destroy_cost_data
52466
52467#undef TARGET_SET_CURRENT_FUNCTION
52468#define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
52469
52470#undef TARGET_OPTION_VALID_ATTRIBUTE_P
52471#define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
52472
52473#undef TARGET_OPTION_SAVE
52474#define TARGET_OPTION_SAVE ix86_function_specific_save
52475
52476#undef TARGET_OPTION_RESTORE
52477#define TARGET_OPTION_RESTORE ix86_function_specific_restore
52478
52479#undef TARGET_OPTION_POST_STREAM_IN
52480#define TARGET_OPTION_POST_STREAM_IN ix86_function_specific_post_stream_in
52481
52482#undef TARGET_OPTION_PRINT
52483#define TARGET_OPTION_PRINT ix86_function_specific_print
52484
52485#undef TARGET_OPTION_FUNCTION_VERSIONS
52486#define TARGET_OPTION_FUNCTION_VERSIONS ix86_function_versions
52487
52488#undef TARGET_CAN_INLINE_P
52489#define TARGET_CAN_INLINE_P ix86_can_inline_p
52490
52491#undef TARGET_LEGITIMATE_ADDRESS_P
52492#define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
52493
52494#undef TARGET_LRA_P
52495#define TARGET_LRA_P hook_bool_void_true
52496
52497#undef TARGET_REGISTER_PRIORITY
52498#define TARGET_REGISTER_PRIORITY ix86_register_priority
52499
52500#undef TARGET_REGISTER_USAGE_LEVELING_P
52501#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
52502
52503#undef TARGET_LEGITIMATE_CONSTANT_P
52504#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
52505
52506#undef TARGET_FRAME_POINTER_REQUIRED
52507#define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
52508
52509#undef TARGET_CAN_ELIMINATE
52510#define TARGET_CAN_ELIMINATE ix86_can_eliminate
52511
52512#undef TARGET_EXTRA_LIVE_ON_ENTRY
52513#define TARGET_EXTRA_LIVE_ON_ENTRY ix86_live_on_entry
52514
52515#undef TARGET_ASM_CODE_END
52516#define TARGET_ASM_CODE_END ix86_code_end
52517
52518#undef TARGET_CONDITIONAL_REGISTER_USAGE
52519#define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
52520
52521#if TARGET_MACHO
52522#undef TARGET_INIT_LIBFUNCS
52523#define TARGET_INIT_LIBFUNCS darwin_rename_builtins
52524#endif
52525
52526#undef TARGET_LOOP_UNROLL_ADJUST
52527#define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
52528
52529#undef TARGET_SPILL_CLASS
52530#define TARGET_SPILL_CLASS ix86_spill_class
52531
52532#undef TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN
52533#define TARGET_SIMD_CLONE_COMPUTE_VECSIZE_AND_SIMDLEN \
52534  ix86_simd_clone_compute_vecsize_and_simdlen
52535
52536#undef TARGET_SIMD_CLONE_ADJUST
52537#define TARGET_SIMD_CLONE_ADJUST \
52538  ix86_simd_clone_adjust
52539
52540#undef TARGET_SIMD_CLONE_USABLE
52541#define TARGET_SIMD_CLONE_USABLE \
52542  ix86_simd_clone_usable
52543
52544#undef TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P
52545#define TARGET_FLOAT_EXCEPTIONS_ROUNDING_SUPPORTED_P \
52546  ix86_float_exceptions_rounding_supported_p
52547
52548#undef TARGET_MODE_EMIT
52549#define TARGET_MODE_EMIT ix86_emit_mode_set
52550
52551#undef TARGET_MODE_NEEDED
52552#define TARGET_MODE_NEEDED ix86_mode_needed
52553
52554#undef TARGET_MODE_AFTER
52555#define TARGET_MODE_AFTER ix86_mode_after
52556
52557#undef TARGET_MODE_ENTRY
52558#define TARGET_MODE_ENTRY ix86_mode_entry
52559
52560#undef TARGET_MODE_EXIT
52561#define TARGET_MODE_EXIT ix86_mode_exit
52562
52563#undef TARGET_MODE_PRIORITY
52564#define TARGET_MODE_PRIORITY ix86_mode_priority
52565
52566#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
52567#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
52568
52569#undef TARGET_LOAD_BOUNDS_FOR_ARG
52570#define TARGET_LOAD_BOUNDS_FOR_ARG ix86_load_bounds
52571
52572#undef TARGET_STORE_BOUNDS_FOR_ARG
52573#define TARGET_STORE_BOUNDS_FOR_ARG ix86_store_bounds
52574
52575#undef TARGET_LOAD_RETURNED_BOUNDS
52576#define TARGET_LOAD_RETURNED_BOUNDS ix86_load_returned_bounds
52577
52578#undef TARGET_STORE_RETURNED_BOUNDS
52579#define TARGET_STORE_RETURNED_BOUNDS ix86_store_returned_bounds
52580
52581#undef TARGET_CHKP_BOUND_MODE
52582#define TARGET_CHKP_BOUND_MODE ix86_mpx_bound_mode
52583
52584#undef TARGET_BUILTIN_CHKP_FUNCTION
52585#define TARGET_BUILTIN_CHKP_FUNCTION ix86_builtin_mpx_function
52586
52587#undef TARGET_CHKP_FUNCTION_VALUE_BOUNDS
52588#define TARGET_CHKP_FUNCTION_VALUE_BOUNDS ix86_function_value_bounds
52589
52590#undef TARGET_CHKP_MAKE_BOUNDS_CONSTANT
52591#define TARGET_CHKP_MAKE_BOUNDS_CONSTANT ix86_make_bounds_constant
52592
52593#undef TARGET_CHKP_INITIALIZE_BOUNDS
52594#define TARGET_CHKP_INITIALIZE_BOUNDS ix86_initialize_bounds
52595
52596#undef TARGET_SETUP_INCOMING_VARARG_BOUNDS
52597#define TARGET_SETUP_INCOMING_VARARG_BOUNDS ix86_setup_incoming_vararg_bounds
52598
52599#undef TARGET_OFFLOAD_OPTIONS
52600#define TARGET_OFFLOAD_OPTIONS \
52601  ix86_offload_options
52602
52603#undef TARGET_ABSOLUTE_BIGGEST_ALIGNMENT
52604#define TARGET_ABSOLUTE_BIGGEST_ALIGNMENT 512
52605
52606struct gcc_target targetm = TARGET_INITIALIZER;
52607
52608#include "gt-i386.h"
52609