1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20Boston, MA 02110-1301, USA.  */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "rtl.h"
27#include "tree.h"
28#include "tm_p.h"
29#include "regs.h"
30#include "hard-reg-set.h"
31#include "real.h"
32#include "insn-config.h"
33#include "conditions.h"
34#include "output.h"
35#include "insn-codes.h"
36#include "insn-attr.h"
37#include "flags.h"
38#include "except.h"
39#include "function.h"
40#include "recog.h"
41#include "expr.h"
42#include "optabs.h"
43#include "toplev.h"
44#include "basic-block.h"
45#include "ggc.h"
46#include "target.h"
47#include "target-def.h"
48#include "langhooks.h"
49#include "cgraph.h"
50#include "tree-gimple.h"
51#include "dwarf2.h"
52
53#ifndef CHECK_STACK_LIMIT
54#define CHECK_STACK_LIMIT (-1)
55#endif
56
57/* Return index of given mode in mult and division cost tables.  */
58#define MODE_INDEX(mode)					\
59  ((mode) == QImode ? 0						\
60   : (mode) == HImode ? 1					\
61   : (mode) == SImode ? 2					\
62   : (mode) == DImode ? 3					\
63   : 4)
64
65/* Processor costs (relative to an add) */
66static const
67struct processor_costs size_cost = {	/* costs for tunning for size */
68  2,					/* cost of an add instruction */
69  3,					/* cost of a lea instruction */
70  2,					/* variable shift costs */
71  3,					/* constant shift costs */
72  {3, 3, 3, 3, 5},			/* cost of starting a multiply */
73  0,					/* cost of multiply per each bit set */
74  {3, 3, 3, 3, 5},			/* cost of a divide/mod */
75  3,					/* cost of movsx */
76  3,					/* cost of movzx */
77  0,					/* "large" insn */
78  2,					/* MOVE_RATIO */
79  2,					/* cost for loading QImode using movzbl */
80  {2, 2, 2},				/* cost of loading integer registers
81					   in QImode, HImode and SImode.
82					   Relative to reg-reg move (2).  */
83  {2, 2, 2},				/* cost of storing integer registers */
84  2,					/* cost of reg,reg fld/fst */
85  {2, 2, 2},				/* cost of loading fp registers
86					   in SFmode, DFmode and XFmode */
87  {2, 2, 2},				/* cost of loading integer registers */
88  3,					/* cost of moving MMX register */
89  {3, 3},				/* cost of loading MMX registers
90					   in SImode and DImode */
91  {3, 3},				/* cost of storing MMX registers
92					   in SImode and DImode */
93  3,					/* cost of moving SSE register */
94  {3, 3, 3},				/* cost of loading SSE registers
95					   in SImode, DImode and TImode */
96  {3, 3, 3},				/* cost of storing SSE registers
97					   in SImode, DImode and TImode */
98  3,					/* MMX or SSE register to integer */
99  0,					/* size of prefetch block */
100  0,					/* number of parallel prefetches */
101  1,					/* Branch cost */
102  2,					/* cost of FADD and FSUB insns.  */
103  2,					/* cost of FMUL instruction.  */
104  2,					/* cost of FDIV instruction.  */
105  2,					/* cost of FABS instruction.  */
106  2,					/* cost of FCHS instruction.  */
107  2,					/* cost of FSQRT instruction.  */
108};
109
110/* Processor costs (relative to an add) */
111static const
112struct processor_costs i386_cost = {	/* 386 specific costs */
113  1,					/* cost of an add instruction */
114  1,					/* cost of a lea instruction */
115  3,					/* variable shift costs */
116  2,					/* constant shift costs */
117  {6, 6, 6, 6, 6},			/* cost of starting a multiply */
118  1,					/* cost of multiply per each bit set */
119  {23, 23, 23, 23, 23},			/* cost of a divide/mod */
120  3,					/* cost of movsx */
121  2,					/* cost of movzx */
122  15,					/* "large" insn */
123  3,					/* MOVE_RATIO */
124  4,					/* cost for loading QImode using movzbl */
125  {2, 4, 2},				/* cost of loading integer registers
126					   in QImode, HImode and SImode.
127					   Relative to reg-reg move (2).  */
128  {2, 4, 2},				/* cost of storing integer registers */
129  2,					/* cost of reg,reg fld/fst */
130  {8, 8, 8},				/* cost of loading fp registers
131					   in SFmode, DFmode and XFmode */
132  {8, 8, 8},				/* cost of loading integer registers */
133  2,					/* cost of moving MMX register */
134  {4, 8},				/* cost of loading MMX registers
135					   in SImode and DImode */
136  {4, 8},				/* cost of storing MMX registers
137					   in SImode and DImode */
138  2,					/* cost of moving SSE register */
139  {4, 8, 16},				/* cost of loading SSE registers
140					   in SImode, DImode and TImode */
141  {4, 8, 16},				/* cost of storing SSE registers
142					   in SImode, DImode and TImode */
143  3,					/* MMX or SSE register to integer */
144  0,					/* size of prefetch block */
145  0,					/* number of parallel prefetches */
146  1,					/* Branch cost */
147  23,					/* cost of FADD and FSUB insns.  */
148  27,					/* cost of FMUL instruction.  */
149  88,					/* cost of FDIV instruction.  */
150  22,					/* cost of FABS instruction.  */
151  24,					/* cost of FCHS instruction.  */
152  122,					/* cost of FSQRT instruction.  */
153};
154
155static const
156struct processor_costs i486_cost = {	/* 486 specific costs */
157  1,					/* cost of an add instruction */
158  1,					/* cost of a lea instruction */
159  3,					/* variable shift costs */
160  2,					/* constant shift costs */
161  {12, 12, 12, 12, 12},			/* cost of starting a multiply */
162  1,					/* cost of multiply per each bit set */
163  {40, 40, 40, 40, 40},			/* cost of a divide/mod */
164  3,					/* cost of movsx */
165  2,					/* cost of movzx */
166  15,					/* "large" insn */
167  3,					/* MOVE_RATIO */
168  4,					/* cost for loading QImode using movzbl */
169  {2, 4, 2},				/* cost of loading integer registers
170					   in QImode, HImode and SImode.
171					   Relative to reg-reg move (2).  */
172  {2, 4, 2},				/* cost of storing integer registers */
173  2,					/* cost of reg,reg fld/fst */
174  {8, 8, 8},				/* cost of loading fp registers
175					   in SFmode, DFmode and XFmode */
176  {8, 8, 8},				/* cost of loading integer registers */
177  2,					/* cost of moving MMX register */
178  {4, 8},				/* cost of loading MMX registers
179					   in SImode and DImode */
180  {4, 8},				/* cost of storing MMX registers
181					   in SImode and DImode */
182  2,					/* cost of moving SSE register */
183  {4, 8, 16},				/* cost of loading SSE registers
184					   in SImode, DImode and TImode */
185  {4, 8, 16},				/* cost of storing SSE registers
186					   in SImode, DImode and TImode */
187  3,					/* MMX or SSE register to integer */
188  0,					/* size of prefetch block */
189  0,					/* number of parallel prefetches */
190  1,					/* Branch cost */
191  8,					/* cost of FADD and FSUB insns.  */
192  16,					/* cost of FMUL instruction.  */
193  73,					/* cost of FDIV instruction.  */
194  3,					/* cost of FABS instruction.  */
195  3,					/* cost of FCHS instruction.  */
196  83,					/* cost of FSQRT instruction.  */
197};
198
199static const
200struct processor_costs pentium_cost = {
201  1,					/* cost of an add instruction */
202  1,					/* cost of a lea instruction */
203  4,					/* variable shift costs */
204  1,					/* constant shift costs */
205  {11, 11, 11, 11, 11},			/* cost of starting a multiply */
206  0,					/* cost of multiply per each bit set */
207  {25, 25, 25, 25, 25},			/* cost of a divide/mod */
208  3,					/* cost of movsx */
209  2,					/* cost of movzx */
210  8,					/* "large" insn */
211  6,					/* MOVE_RATIO */
212  6,					/* cost for loading QImode using movzbl */
213  {2, 4, 2},				/* cost of loading integer registers
214					   in QImode, HImode and SImode.
215					   Relative to reg-reg move (2).  */
216  {2, 4, 2},				/* cost of storing integer registers */
217  2,					/* cost of reg,reg fld/fst */
218  {2, 2, 6},				/* cost of loading fp registers
219					   in SFmode, DFmode and XFmode */
220  {4, 4, 6},				/* cost of loading integer registers */
221  8,					/* cost of moving MMX register */
222  {8, 8},				/* cost of loading MMX registers
223					   in SImode and DImode */
224  {8, 8},				/* cost of storing MMX registers
225					   in SImode and DImode */
226  2,					/* cost of moving SSE register */
227  {4, 8, 16},				/* cost of loading SSE registers
228					   in SImode, DImode and TImode */
229  {4, 8, 16},				/* cost of storing SSE registers
230					   in SImode, DImode and TImode */
231  3,					/* MMX or SSE register to integer */
232  0,					/* size of prefetch block */
233  0,					/* number of parallel prefetches */
234  2,					/* Branch cost */
235  3,					/* cost of FADD and FSUB insns.  */
236  3,					/* cost of FMUL instruction.  */
237  39,					/* cost of FDIV instruction.  */
238  1,					/* cost of FABS instruction.  */
239  1,					/* cost of FCHS instruction.  */
240  70,					/* cost of FSQRT instruction.  */
241};
242
243static const
244struct processor_costs pentiumpro_cost = {
245  1,					/* cost of an add instruction */
246  1,					/* cost of a lea instruction */
247  1,					/* variable shift costs */
248  1,					/* constant shift costs */
249  {4, 4, 4, 4, 4},			/* cost of starting a multiply */
250  0,					/* cost of multiply per each bit set */
251  {17, 17, 17, 17, 17},			/* cost of a divide/mod */
252  1,					/* cost of movsx */
253  1,					/* cost of movzx */
254  8,					/* "large" insn */
255  6,					/* MOVE_RATIO */
256  2,					/* cost for loading QImode using movzbl */
257  {4, 4, 4},				/* cost of loading integer registers
258					   in QImode, HImode and SImode.
259					   Relative to reg-reg move (2).  */
260  {2, 2, 2},				/* cost of storing integer registers */
261  2,					/* cost of reg,reg fld/fst */
262  {2, 2, 6},				/* cost of loading fp registers
263					   in SFmode, DFmode and XFmode */
264  {4, 4, 6},				/* cost of loading integer registers */
265  2,					/* cost of moving MMX register */
266  {2, 2},				/* cost of loading MMX registers
267					   in SImode and DImode */
268  {2, 2},				/* cost of storing MMX registers
269					   in SImode and DImode */
270  2,					/* cost of moving SSE register */
271  {2, 2, 8},				/* cost of loading SSE registers
272					   in SImode, DImode and TImode */
273  {2, 2, 8},				/* cost of storing SSE registers
274					   in SImode, DImode and TImode */
275  3,					/* MMX or SSE register to integer */
276  32,					/* size of prefetch block */
277  6,					/* number of parallel prefetches */
278  2,					/* Branch cost */
279  3,					/* cost of FADD and FSUB insns.  */
280  5,					/* cost of FMUL instruction.  */
281  56,					/* cost of FDIV instruction.  */
282  2,					/* cost of FABS instruction.  */
283  2,					/* cost of FCHS instruction.  */
284  56,					/* cost of FSQRT instruction.  */
285};
286
287static const
288struct processor_costs k6_cost = {
289  1,					/* cost of an add instruction */
290  2,					/* cost of a lea instruction */
291  1,					/* variable shift costs */
292  1,					/* constant shift costs */
293  {3, 3, 3, 3, 3},			/* cost of starting a multiply */
294  0,					/* cost of multiply per each bit set */
295  {18, 18, 18, 18, 18},			/* cost of a divide/mod */
296  2,					/* cost of movsx */
297  2,					/* cost of movzx */
298  8,					/* "large" insn */
299  4,					/* MOVE_RATIO */
300  3,					/* cost for loading QImode using movzbl */
301  {4, 5, 4},				/* cost of loading integer registers
302					   in QImode, HImode and SImode.
303					   Relative to reg-reg move (2).  */
304  {2, 3, 2},				/* cost of storing integer registers */
305  4,					/* cost of reg,reg fld/fst */
306  {6, 6, 6},				/* cost of loading fp registers
307					   in SFmode, DFmode and XFmode */
308  {4, 4, 4},				/* cost of loading integer registers */
309  2,					/* cost of moving MMX register */
310  {2, 2},				/* cost of loading MMX registers
311					   in SImode and DImode */
312  {2, 2},				/* cost of storing MMX registers
313					   in SImode and DImode */
314  2,					/* cost of moving SSE register */
315  {2, 2, 8},				/* cost of loading SSE registers
316					   in SImode, DImode and TImode */
317  {2, 2, 8},				/* cost of storing SSE registers
318					   in SImode, DImode and TImode */
319  6,					/* MMX or SSE register to integer */
320  32,					/* size of prefetch block */
321  1,					/* number of parallel prefetches */
322  1,					/* Branch cost */
323  2,					/* cost of FADD and FSUB insns.  */
324  2,					/* cost of FMUL instruction.  */
325  56,					/* cost of FDIV instruction.  */
326  2,					/* cost of FABS instruction.  */
327  2,					/* cost of FCHS instruction.  */
328  56,					/* cost of FSQRT instruction.  */
329};
330
331static const
332struct processor_costs athlon_cost = {
333  1,					/* cost of an add instruction */
334  2,					/* cost of a lea instruction */
335  1,					/* variable shift costs */
336  1,					/* constant shift costs */
337  {5, 5, 5, 5, 5},			/* cost of starting a multiply */
338  0,					/* cost of multiply per each bit set */
339  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
340  1,					/* cost of movsx */
341  1,					/* cost of movzx */
342  8,					/* "large" insn */
343  9,					/* MOVE_RATIO */
344  4,					/* cost for loading QImode using movzbl */
345  {3, 4, 3},				/* cost of loading integer registers
346					   in QImode, HImode and SImode.
347					   Relative to reg-reg move (2).  */
348  {3, 4, 3},				/* cost of storing integer registers */
349  4,					/* cost of reg,reg fld/fst */
350  {4, 4, 12},				/* cost of loading fp registers
351					   in SFmode, DFmode and XFmode */
352  {6, 6, 8},				/* cost of loading integer registers */
353  2,					/* cost of moving MMX register */
354  {4, 4},				/* cost of loading MMX registers
355					   in SImode and DImode */
356  {4, 4},				/* cost of storing MMX registers
357					   in SImode and DImode */
358  2,					/* cost of moving SSE register */
359  {4, 4, 6},				/* cost of loading SSE registers
360					   in SImode, DImode and TImode */
361  {4, 4, 5},				/* cost of storing SSE registers
362					   in SImode, DImode and TImode */
363  5,					/* MMX or SSE register to integer */
364  64,					/* size of prefetch block */
365  6,					/* number of parallel prefetches */
366  5,					/* Branch cost */
367  4,					/* cost of FADD and FSUB insns.  */
368  4,					/* cost of FMUL instruction.  */
369  24,					/* cost of FDIV instruction.  */
370  2,					/* cost of FABS instruction.  */
371  2,					/* cost of FCHS instruction.  */
372  35,					/* cost of FSQRT instruction.  */
373};
374
375static const
376struct processor_costs k8_cost = {
377  1,					/* cost of an add instruction */
378  2,					/* cost of a lea instruction */
379  1,					/* variable shift costs */
380  1,					/* constant shift costs */
381  {3, 4, 3, 4, 5},			/* cost of starting a multiply */
382  0,					/* cost of multiply per each bit set */
383  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
384  1,					/* cost of movsx */
385  1,					/* cost of movzx */
386  8,					/* "large" insn */
387  9,					/* MOVE_RATIO */
388  4,					/* cost for loading QImode using movzbl */
389  {3, 4, 3},				/* cost of loading integer registers
390					   in QImode, HImode and SImode.
391					   Relative to reg-reg move (2).  */
392  {3, 4, 3},				/* cost of storing integer registers */
393  4,					/* cost of reg,reg fld/fst */
394  {4, 4, 12},				/* cost of loading fp registers
395					   in SFmode, DFmode and XFmode */
396  {6, 6, 8},				/* cost of loading integer registers */
397  2,					/* cost of moving MMX register */
398  {3, 3},				/* cost of loading MMX registers
399					   in SImode and DImode */
400  {4, 4},				/* cost of storing MMX registers
401					   in SImode and DImode */
402  2,					/* cost of moving SSE register */
403  {4, 3, 6},				/* cost of loading SSE registers
404					   in SImode, DImode and TImode */
405  {4, 4, 5},				/* cost of storing SSE registers
406					   in SImode, DImode and TImode */
407  5,					/* MMX or SSE register to integer */
408  64,					/* size of prefetch block */
409  6,					/* number of parallel prefetches */
410  5,					/* Branch cost */
411  4,					/* cost of FADD and FSUB insns.  */
412  4,					/* cost of FMUL instruction.  */
413  19,					/* cost of FDIV instruction.  */
414  2,					/* cost of FABS instruction.  */
415  2,					/* cost of FCHS instruction.  */
416  35,					/* cost of FSQRT instruction.  */
417};
418
419static const
420struct processor_costs pentium4_cost = {
421  1,					/* cost of an add instruction */
422  3,					/* cost of a lea instruction */
423  4,					/* variable shift costs */
424  4,					/* constant shift costs */
425  {15, 15, 15, 15, 15},			/* cost of starting a multiply */
426  0,					/* cost of multiply per each bit set */
427  {56, 56, 56, 56, 56},			/* cost of a divide/mod */
428  1,					/* cost of movsx */
429  1,					/* cost of movzx */
430  16,					/* "large" insn */
431  6,					/* MOVE_RATIO */
432  2,					/* cost for loading QImode using movzbl */
433  {4, 5, 4},				/* cost of loading integer registers
434					   in QImode, HImode and SImode.
435					   Relative to reg-reg move (2).  */
436  {2, 3, 2},				/* cost of storing integer registers */
437  2,					/* cost of reg,reg fld/fst */
438  {2, 2, 6},				/* cost of loading fp registers
439					   in SFmode, DFmode and XFmode */
440  {4, 4, 6},				/* cost of loading integer registers */
441  2,					/* cost of moving MMX register */
442  {2, 2},				/* cost of loading MMX registers
443					   in SImode and DImode */
444  {2, 2},				/* cost of storing MMX registers
445					   in SImode and DImode */
446  12,					/* cost of moving SSE register */
447  {12, 12, 12},				/* cost of loading SSE registers
448					   in SImode, DImode and TImode */
449  {2, 2, 8},				/* cost of storing SSE registers
450					   in SImode, DImode and TImode */
451  10,					/* MMX or SSE register to integer */
452  64,					/* size of prefetch block */
453  6,					/* number of parallel prefetches */
454  2,					/* Branch cost */
455  5,					/* cost of FADD and FSUB insns.  */
456  7,					/* cost of FMUL instruction.  */
457  43,					/* cost of FDIV instruction.  */
458  2,					/* cost of FABS instruction.  */
459  2,					/* cost of FCHS instruction.  */
460  43,					/* cost of FSQRT instruction.  */
461};
462
463static const
464struct processor_costs nocona_cost = {
465  1,					/* cost of an add instruction */
466  1,					/* cost of a lea instruction */
467  1,					/* variable shift costs */
468  1,					/* constant shift costs */
469  {10, 10, 10, 10, 10},			/* cost of starting a multiply */
470  0,					/* cost of multiply per each bit set */
471  {66, 66, 66, 66, 66},			/* cost of a divide/mod */
472  1,					/* cost of movsx */
473  1,					/* cost of movzx */
474  16,					/* "large" insn */
475  17,					/* MOVE_RATIO */
476  4,					/* cost for loading QImode using movzbl */
477  {4, 4, 4},				/* cost of loading integer registers
478					   in QImode, HImode and SImode.
479					   Relative to reg-reg move (2).  */
480  {4, 4, 4},				/* cost of storing integer registers */
481  3,					/* cost of reg,reg fld/fst */
482  {12, 12, 12},				/* cost of loading fp registers
483					   in SFmode, DFmode and XFmode */
484  {4, 4, 4},				/* cost of loading integer registers */
485  6,					/* cost of moving MMX register */
486  {12, 12},				/* cost of loading MMX registers
487					   in SImode and DImode */
488  {12, 12},				/* cost of storing MMX registers
489					   in SImode and DImode */
490  6,					/* cost of moving SSE register */
491  {12, 12, 12},				/* cost of loading SSE registers
492					   in SImode, DImode and TImode */
493  {12, 12, 12},				/* cost of storing SSE registers
494					   in SImode, DImode and TImode */
495  8,					/* MMX or SSE register to integer */
496  128,					/* size of prefetch block */
497  8,					/* number of parallel prefetches */
498  1,					/* Branch cost */
499  6,					/* cost of FADD and FSUB insns.  */
500  8,					/* cost of FMUL instruction.  */
501  40,					/* cost of FDIV instruction.  */
502  3,					/* cost of FABS instruction.  */
503  3,					/* cost of FCHS instruction.  */
504  44,					/* cost of FSQRT instruction.  */
505};
506
507const struct processor_costs *ix86_cost = &pentium_cost;
508
509/* Processor feature/optimization bitmasks.  */
510#define m_386 (1<<PROCESSOR_I386)
511#define m_486 (1<<PROCESSOR_I486)
512#define m_PENT (1<<PROCESSOR_PENTIUM)
513#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
514#define m_K6  (1<<PROCESSOR_K6)
515#define m_ATHLON  (1<<PROCESSOR_ATHLON)
516#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
517#define m_K8  (1<<PROCESSOR_K8)
518#define m_ATHLON_K8  (m_K8 | m_ATHLON)
519#define m_NOCONA  (1<<PROCESSOR_NOCONA)
520
521const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
522const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
523const int x86_zero_extend_with_and = m_486 | m_PENT;
524const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA /* m_386 | m_K6 */;
525const int x86_double_with_add = ~m_386;
526const int x86_use_bit_test = m_386;
527const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
528const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
529const int x86_fisttp = m_NOCONA;
530const int x86_3dnow_a = m_ATHLON_K8;
531const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
532/* Branch hints were put in P4 based on simulation result. But
533   after P4 was made, no performance benefit was observed with
534   branch hints. It also increases the code size. As the result,
535   icc never generates branch hints.  */
536const int x86_branch_hints = 0;
537const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4 | m_NOCONA;
538const int x86_partial_reg_stall = m_PPRO;
539const int x86_use_himode_fiop = m_386 | m_486 | m_K6;
540const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
541const int x86_use_mov0 = m_K6;
542const int x86_use_cltd = ~(m_PENT | m_K6);
543const int x86_read_modify_write = ~m_PENT;
544const int x86_read_modify = ~(m_PENT | m_PPRO);
545const int x86_split_long_moves = m_PPRO;
546const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
547const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
548const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
549const int x86_qimode_math = ~(0);
550const int x86_promote_qi_regs = 0;
551const int x86_himode_math = ~(m_PPRO);
552const int x86_promote_hi_regs = m_PPRO;
553const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA;
554const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA;
555const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4 | m_NOCONA;
556const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4 | m_NOCONA;
557const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO);
558const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
559const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA;
560const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO;
561const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO;
562const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO;
563const int x86_decompose_lea = m_PENT4 | m_NOCONA;
564const int x86_shift1 = ~m_486;
565const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
566const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO;
567/* Set for machines where the type and dependencies are resolved on SSE
568   register parts instead of whole registers, so we may maintain just
569   lower part of scalar values in proper format leaving the upper part
570   undefined.  */
571const int x86_sse_split_regs = m_ATHLON_K8;
572const int x86_sse_typeless_stores = m_ATHLON_K8;
573const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
574const int x86_use_ffreep = m_ATHLON_K8;
575const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
576
577/* ??? Allowing interunit moves makes it all too easy for the compiler to put
578   integer data in xmm registers.  Which results in pretty abysmal code.  */
579const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
580
581const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO;
582/* Some CPU cores are not able to predict more than 4 branch instructions in
583   the 16 byte window.  */
584const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
585const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6 | m_PENT;
586const int x86_use_bt = m_ATHLON_K8;
587/* Compare and exchange was added for 80486.  */
588const int x86_cmpxchg = ~m_386;
589/* Exchange and add was added for 80486.  */
590const int x86_xadd = ~m_386;
591
592/* In case the average insn count for single function invocation is
593   lower than this constant, emit fast (but longer) prologue and
594   epilogue code.  */
595#define FAST_PROLOGUE_INSN_COUNT 20
596
597/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
598static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
599static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
600static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
601
602/* Array of the smallest class containing reg number REGNO, indexed by
603   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
604
605enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
606{
607  /* ax, dx, cx, bx */
608  AREG, DREG, CREG, BREG,
609  /* si, di, bp, sp */
610  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
611  /* FP registers */
612  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
613  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
614  /* arg pointer */
615  NON_Q_REGS,
616  /* flags, fpsr, dirflag, frame */
617  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
618  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
619  SSE_REGS, SSE_REGS,
620  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
621  MMX_REGS, MMX_REGS,
622  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
623  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
624  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
625  SSE_REGS, SSE_REGS,
626};
627
628/* The "default" register map used in 32bit mode.  */
629
630int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
631{
632  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
633  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
634  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
635  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
636  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
637  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
638  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
639};
640
641static int const x86_64_int_parameter_registers[6] =
642{
643  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
644  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
645};
646
647static int const x86_64_int_return_registers[4] =
648{
649  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
650};
651
652/* The "default" register map used in 64bit mode.  */
653int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
654{
655  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
656  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
657  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
658  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
659  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
660  8,9,10,11,12,13,14,15,		/* extended integer registers */
661  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
662};
663
664/* Define the register numbers to be used in Dwarf debugging information.
665   The SVR4 reference port C compiler uses the following register numbers
666   in its Dwarf output code:
667	0 for %eax (gcc regno = 0)
668	1 for %ecx (gcc regno = 2)
669	2 for %edx (gcc regno = 1)
670	3 for %ebx (gcc regno = 3)
671	4 for %esp (gcc regno = 7)
672	5 for %ebp (gcc regno = 6)
673	6 for %esi (gcc regno = 4)
674	7 for %edi (gcc regno = 5)
675   The following three DWARF register numbers are never generated by
676   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
677   believes these numbers have these meanings.
678	8  for %eip    (no gcc equivalent)
679	9  for %eflags (gcc regno = 17)
680	10 for %trapno (no gcc equivalent)
681   It is not at all clear how we should number the FP stack registers
682   for the x86 architecture.  If the version of SDB on x86/svr4 were
683   a bit less brain dead with respect to floating-point then we would
684   have a precedent to follow with respect to DWARF register numbers
685   for x86 FP registers, but the SDB on x86/svr4 is so completely
686   broken with respect to FP registers that it is hardly worth thinking
687   of it as something to strive for compatibility with.
688   The version of x86/svr4 SDB I have at the moment does (partially)
689   seem to believe that DWARF register number 11 is associated with
690   the x86 register %st(0), but that's about all.  Higher DWARF
691   register numbers don't seem to be associated with anything in
692   particular, and even for DWARF regno 11, SDB only seems to under-
693   stand that it should say that a variable lives in %st(0) (when
694   asked via an `=' command) if we said it was in DWARF regno 11,
695   but SDB still prints garbage when asked for the value of the
696   variable in question (via a `/' command).
697   (Also note that the labels SDB prints for various FP stack regs
698   when doing an `x' command are all wrong.)
699   Note that these problems generally don't affect the native SVR4
700   C compiler because it doesn't allow the use of -O with -g and
701   because when it is *not* optimizing, it allocates a memory
702   location for each floating-point variable, and the memory
703   location is what gets described in the DWARF AT_location
704   attribute for the variable in question.
705   Regardless of the severe mental illness of the x86/svr4 SDB, we
706   do something sensible here and we use the following DWARF
707   register numbers.  Note that these are all stack-top-relative
708   numbers.
709	11 for %st(0) (gcc regno = 8)
710	12 for %st(1) (gcc regno = 9)
711	13 for %st(2) (gcc regno = 10)
712	14 for %st(3) (gcc regno = 11)
713	15 for %st(4) (gcc regno = 12)
714	16 for %st(5) (gcc regno = 13)
715	17 for %st(6) (gcc regno = 14)
716	18 for %st(7) (gcc regno = 15)
717*/
718int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
719{
720  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
721  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
722  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
723  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
724  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
725  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
726  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
727};
728
729/* Test and compare insns in i386.md store the information needed to
730   generate branch and scc insns here.  */
731
732rtx ix86_compare_op0 = NULL_RTX;
733rtx ix86_compare_op1 = NULL_RTX;
734rtx ix86_compare_emitted = NULL_RTX;
735
736/* Size of the register save area.  */
737#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
738
739/* Define the structure for the machine field in struct function.  */
740
741struct stack_local_entry GTY(())
742{
743  unsigned short mode;
744  unsigned short n;
745  rtx rtl;
746  struct stack_local_entry *next;
747};
748
749/* Structure describing stack frame layout.
750   Stack grows downward:
751
752   [arguments]
753					      <- ARG_POINTER
754   saved pc
755
756   saved frame pointer if frame_pointer_needed
757					      <- HARD_FRAME_POINTER
758   [saved regs]
759
760   [padding1]          \
761		        )
762   [va_arg registers]  (
763		        > to_allocate	      <- FRAME_POINTER
764   [frame]	       (
765		        )
766   [padding2]	       /
767  */
768struct ix86_frame
769{
770  int nregs;
771  int padding1;
772  int va_arg_size;
773  HOST_WIDE_INT frame;
774  int padding2;
775  int outgoing_arguments_size;
776  int red_zone_size;
777
778  HOST_WIDE_INT to_allocate;
779  /* The offsets relative to ARG_POINTER.  */
780  HOST_WIDE_INT frame_pointer_offset;
781  HOST_WIDE_INT hard_frame_pointer_offset;
782  HOST_WIDE_INT stack_pointer_offset;
783
784  /* When save_regs_using_mov is set, emit prologue using
785     move instead of push instructions.  */
786  bool save_regs_using_mov;
787};
788
789/* Code model option.  */
790enum cmodel ix86_cmodel;
791/* Asm dialect.  */
792enum asm_dialect ix86_asm_dialect = ASM_ATT;
793/* TLS dialext.  */
794enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
795
796/* Which unit we are generating floating point math for.  */
797enum fpmath_unit ix86_fpmath;
798
799/* Which cpu are we scheduling for.  */
800enum processor_type ix86_tune;
801/* Which instruction set architecture to use.  */
802enum processor_type ix86_arch;
803
804/* true if sse prefetch instruction is not NOOP.  */
805int x86_prefetch_sse;
806
807/* ix86_regparm_string as a number */
808static int ix86_regparm;
809
810/* Preferred alignment for stack boundary in bits.  */
811unsigned int ix86_preferred_stack_boundary;
812
813/* Values 1-5: see jump.c */
814int ix86_branch_cost;
815
816/* Variables which are this size or smaller are put in the data/bss
817   or ldata/lbss sections.  */
818
819int ix86_section_threshold = 65536;
820
821/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
822char internal_label_prefix[16];
823int internal_label_prefix_len;
824
825static bool ix86_handle_option (size_t, const char *, int);
826static void output_pic_addr_const (FILE *, rtx, int);
827static void put_condition_code (enum rtx_code, enum machine_mode,
828				int, int, FILE *);
829static const char *get_some_local_dynamic_name (void);
830static int get_some_local_dynamic_name_1 (rtx *, void *);
831static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
832static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
833						   rtx *);
834static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
835static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
836						   enum machine_mode);
837static rtx get_thread_pointer (int);
838static rtx legitimize_tls_address (rtx, enum tls_model, int);
839static void get_pc_thunk_name (char [32], unsigned int);
840static rtx gen_push (rtx);
841static int ix86_flags_dependant (rtx, rtx, enum attr_type);
842static int ix86_agi_dependant (rtx, rtx, enum attr_type);
843static struct machine_function * ix86_init_machine_status (void);
844static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
845static int ix86_nsaved_regs (void);
846static void ix86_emit_save_regs (void);
847static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
848static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
849static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
850static HOST_WIDE_INT ix86_GOT_alias_set (void);
851static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
852static rtx ix86_expand_aligntest (rtx, int);
853static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
854static int ix86_issue_rate (void);
855static int ix86_adjust_cost (rtx, rtx, rtx, int);
856static int ia32_multipass_dfa_lookahead (void);
857static void ix86_init_mmx_sse_builtins (void);
858static rtx x86_this_parameter (tree);
859static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
860				 HOST_WIDE_INT, tree);
861static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
862static void x86_file_start (void);
863static void ix86_reorg (void);
864static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
865static tree ix86_build_builtin_va_list (void);
866static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
867					 tree, int *, int);
868static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
869static bool ix86_vector_mode_supported_p (enum machine_mode);
870
871static int ix86_address_cost (rtx);
872static bool ix86_cannot_force_const_mem (rtx);
873static rtx ix86_delegitimize_address (rtx);
874
875static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
876
877struct builtin_description;
878static rtx ix86_expand_sse_comi (const struct builtin_description *,
879				 tree, rtx);
880static rtx ix86_expand_sse_compare (const struct builtin_description *,
881				    tree, rtx);
882static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
883static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
884static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
885static rtx ix86_expand_store_builtin (enum insn_code, tree);
886static rtx safe_vector_operand (rtx, enum machine_mode);
887static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
888static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
889static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
890static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
891static int ix86_fp_comparison_cost (enum rtx_code code);
892static unsigned int ix86_select_alt_pic_regnum (void);
893static int ix86_save_reg (unsigned int, int);
894static void ix86_compute_frame_layout (struct ix86_frame *);
895static int ix86_comp_type_attributes (tree, tree);
896static int ix86_function_regparm (tree, tree);
897const struct attribute_spec ix86_attribute_table[];
898static bool ix86_function_ok_for_sibcall (tree, tree);
899static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
900static int ix86_value_regno (enum machine_mode, tree, tree);
901static bool contains_128bit_aligned_vector_p (tree);
902static rtx ix86_struct_value_rtx (tree, int);
903static bool ix86_ms_bitfield_layout_p (tree);
904static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
905static int extended_reg_mentioned_1 (rtx *, void *);
906static bool ix86_rtx_costs (rtx, int, int, int *);
907static int min_insn_size (rtx);
908static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
909static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
910static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
911				    tree, bool);
912static void ix86_init_builtins (void);
913static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
914static const char *ix86_mangle_fundamental_type (tree);
915static tree ix86_stack_protect_fail (void);
916static rtx ix86_internal_arg_pointer (void);
917static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
918
919/* This function is only used on Solaris.  */
920static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
921  ATTRIBUTE_UNUSED;
922
923/* Register class used for passing given 64bit part of the argument.
924   These represent classes as documented by the PS ABI, with the exception
925   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
926   use SF or DFmode move instead of DImode to avoid reformatting penalties.
927
928   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
929   whenever possible (upper half does contain padding).
930 */
931enum x86_64_reg_class
932  {
933    X86_64_NO_CLASS,
934    X86_64_INTEGER_CLASS,
935    X86_64_INTEGERSI_CLASS,
936    X86_64_SSE_CLASS,
937    X86_64_SSESF_CLASS,
938    X86_64_SSEDF_CLASS,
939    X86_64_SSEUP_CLASS,
940    X86_64_X87_CLASS,
941    X86_64_X87UP_CLASS,
942    X86_64_COMPLEX_X87_CLASS,
943    X86_64_MEMORY_CLASS
944  };
945static const char * const x86_64_reg_class_name[] = {
946  "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
947  "sseup", "x87", "x87up", "cplx87", "no"
948};
949
950#define MAX_CLASSES 4
951
952/* Table of constants used by fldpi, fldln2, etc....  */
953static REAL_VALUE_TYPE ext_80387_constants_table [5];
954static bool ext_80387_constants_init = 0;
955static void init_ext_80387_constants (void);
956static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
957static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
958static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
959static void x86_64_elf_select_section (tree decl, int reloc,
960				       unsigned HOST_WIDE_INT align)
961				      ATTRIBUTE_UNUSED;
962
963/* Initialize the GCC target structure.  */
964#undef TARGET_ATTRIBUTE_TABLE
965#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
966#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
967#  undef TARGET_MERGE_DECL_ATTRIBUTES
968#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
969#endif
970
971#undef TARGET_COMP_TYPE_ATTRIBUTES
972#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
973
974#undef TARGET_INIT_BUILTINS
975#define TARGET_INIT_BUILTINS ix86_init_builtins
976#undef TARGET_EXPAND_BUILTIN
977#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
978
979#undef TARGET_ASM_FUNCTION_EPILOGUE
980#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
981
982#undef TARGET_ENCODE_SECTION_INFO
983#ifndef SUBTARGET_ENCODE_SECTION_INFO
984#define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
985#else
986#define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
987#endif
988
989#undef TARGET_ASM_OPEN_PAREN
990#define TARGET_ASM_OPEN_PAREN ""
991#undef TARGET_ASM_CLOSE_PAREN
992#define TARGET_ASM_CLOSE_PAREN ""
993
994#undef TARGET_ASM_ALIGNED_HI_OP
995#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
996#undef TARGET_ASM_ALIGNED_SI_OP
997#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
998#ifdef ASM_QUAD
999#undef TARGET_ASM_ALIGNED_DI_OP
1000#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1001#endif
1002
1003#undef TARGET_ASM_UNALIGNED_HI_OP
1004#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1005#undef TARGET_ASM_UNALIGNED_SI_OP
1006#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1007#undef TARGET_ASM_UNALIGNED_DI_OP
1008#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1009
1010#undef TARGET_SCHED_ADJUST_COST
1011#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1012#undef TARGET_SCHED_ISSUE_RATE
1013#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1014#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1015#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1016  ia32_multipass_dfa_lookahead
1017
1018#undef TARGET_FUNCTION_OK_FOR_SIBCALL
1019#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1020
1021#ifdef HAVE_AS_TLS
1022#undef TARGET_HAVE_TLS
1023#define TARGET_HAVE_TLS true
1024#endif
1025#undef TARGET_CANNOT_FORCE_CONST_MEM
1026#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1027
1028#undef TARGET_DELEGITIMIZE_ADDRESS
1029#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1030
1031#undef TARGET_MS_BITFIELD_LAYOUT_P
1032#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1033
1034#if TARGET_MACHO
1035#undef TARGET_BINDS_LOCAL_P
1036#define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1037#endif
1038
1039#undef TARGET_ASM_OUTPUT_MI_THUNK
1040#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1041#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1042#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1043
1044#undef TARGET_ASM_FILE_START
1045#define TARGET_ASM_FILE_START x86_file_start
1046
1047#undef TARGET_DEFAULT_TARGET_FLAGS
1048#define TARGET_DEFAULT_TARGET_FLAGS	\
1049  (TARGET_DEFAULT			\
1050   | TARGET_64BIT_DEFAULT		\
1051   | TARGET_SUBTARGET_DEFAULT		\
1052   | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1053
1054#undef TARGET_HANDLE_OPTION
1055#define TARGET_HANDLE_OPTION ix86_handle_option
1056
1057#undef TARGET_RTX_COSTS
1058#define TARGET_RTX_COSTS ix86_rtx_costs
1059#undef TARGET_ADDRESS_COST
1060#define TARGET_ADDRESS_COST ix86_address_cost
1061
1062#undef TARGET_FIXED_CONDITION_CODE_REGS
1063#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1064#undef TARGET_CC_MODES_COMPATIBLE
1065#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1066
1067#undef TARGET_MACHINE_DEPENDENT_REORG
1068#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1069
1070#undef TARGET_BUILD_BUILTIN_VA_LIST
1071#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1072
1073#undef TARGET_MD_ASM_CLOBBERS
1074#define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1075
1076#undef TARGET_PROMOTE_PROTOTYPES
1077#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1078#undef TARGET_STRUCT_VALUE_RTX
1079#define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1080#undef TARGET_SETUP_INCOMING_VARARGS
1081#define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1082#undef TARGET_MUST_PASS_IN_STACK
1083#define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1084#undef TARGET_PASS_BY_REFERENCE
1085#define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1086#undef TARGET_INTERNAL_ARG_POINTER
1087#define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1088#undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1089#define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1090
1091#undef TARGET_GIMPLIFY_VA_ARG_EXPR
1092#define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1093
1094#undef TARGET_VECTOR_MODE_SUPPORTED_P
1095#define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1096
1097#ifdef HAVE_AS_TLS
1098#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1099#define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1100#endif
1101
1102#ifdef SUBTARGET_INSERT_ATTRIBUTES
1103#undef TARGET_INSERT_ATTRIBUTES
1104#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1105#endif
1106
1107#undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1108#define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1109
1110#undef TARGET_STACK_PROTECT_FAIL
1111#define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1112
1113#undef TARGET_FUNCTION_VALUE
1114#define TARGET_FUNCTION_VALUE ix86_function_value
1115
1116struct gcc_target targetm = TARGET_INITIALIZER;
1117
1118
1119/* The svr4 ABI for the i386 says that records and unions are returned
1120   in memory.  */
1121#ifndef DEFAULT_PCC_STRUCT_RETURN
1122#define DEFAULT_PCC_STRUCT_RETURN 1
1123#endif
1124
1125/* Implement TARGET_HANDLE_OPTION.  */
1126
1127static bool
1128ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1129{
1130  switch (code)
1131    {
1132    case OPT_m3dnow:
1133      if (!value)
1134	{
1135	  target_flags &= ~MASK_3DNOW_A;
1136	  target_flags_explicit |= MASK_3DNOW_A;
1137	}
1138      return true;
1139
1140    case OPT_mmmx:
1141      if (!value)
1142	{
1143	  target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1144	  target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1145	}
1146      return true;
1147
1148    case OPT_msse:
1149      if (!value)
1150	{
1151	  target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1152	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1153	}
1154      return true;
1155
1156    case OPT_msse2:
1157      if (!value)
1158	{
1159	  target_flags &= ~MASK_SSE3;
1160	  target_flags_explicit |= MASK_SSE3;
1161	}
1162      return true;
1163
1164    default:
1165      return true;
1166    }
1167}
1168
1169/* Sometimes certain combinations of command options do not make
1170   sense on a particular target machine.  You can define a macro
1171   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1172   defined, is executed once just after all the command options have
1173   been parsed.
1174
1175   Don't use this macro to turn on various extra optimizations for
1176   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1177
1178void
1179override_options (void)
1180{
1181  int i;
1182  int ix86_tune_defaulted = 0;
1183
1184  /* Comes from final.c -- no real reason to change it.  */
1185#define MAX_CODE_ALIGN 16
1186
1187  static struct ptt
1188    {
1189      const struct processor_costs *cost;	/* Processor costs */
1190      const int target_enable;			/* Target flags to enable.  */
1191      const int target_disable;			/* Target flags to disable.  */
1192      const int align_loop;			/* Default alignments.  */
1193      const int align_loop_max_skip;
1194      const int align_jump;
1195      const int align_jump_max_skip;
1196      const int align_func;
1197    }
1198  const processor_target_table[PROCESSOR_max] =
1199    {
1200      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1201      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1202      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1203      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1204      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1205      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1206      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1207      {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1208      {&nocona_cost, 0, 0, 0, 0, 0, 0, 0}
1209    };
1210
1211  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1212  static struct pta
1213    {
1214      const char *const name;		/* processor name or nickname.  */
1215      const enum processor_type processor;
1216      const enum pta_flags
1217	{
1218	  PTA_SSE = 1,
1219	  PTA_SSE2 = 2,
1220	  PTA_SSE3 = 4,
1221	  PTA_MMX = 8,
1222	  PTA_PREFETCH_SSE = 16,
1223	  PTA_3DNOW = 32,
1224	  PTA_3DNOW_A = 64,
1225	  PTA_64BIT = 128
1226	} flags;
1227    }
1228  const processor_alias_table[] =
1229    {
1230      {"i386", PROCESSOR_I386, 0},
1231      {"i486", PROCESSOR_I486, 0},
1232      {"i586", PROCESSOR_PENTIUM, 0},
1233      {"pentium", PROCESSOR_PENTIUM, 0},
1234      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1235      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1236      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1237      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1238      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1239      {"i686", PROCESSOR_PENTIUMPRO, 0},
1240      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1241      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1242      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1243      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1244      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1245      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1246				       | PTA_MMX | PTA_PREFETCH_SSE},
1247      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1248				        | PTA_MMX | PTA_PREFETCH_SSE},
1249      {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1250				        | PTA_MMX | PTA_PREFETCH_SSE},
1251      {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1252				        | PTA_MMX | PTA_PREFETCH_SSE},
1253      {"k6", PROCESSOR_K6, PTA_MMX},
1254      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1255      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1256      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1257				   | PTA_3DNOW_A},
1258      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1259					 | PTA_3DNOW | PTA_3DNOW_A},
1260      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1261				    | PTA_3DNOW_A | PTA_SSE},
1262      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1263				      | PTA_3DNOW_A | PTA_SSE},
1264      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1265				      | PTA_3DNOW_A | PTA_SSE},
1266      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1267			       | PTA_SSE | PTA_SSE2 },
1268      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1269				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1270      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1271				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1272      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1273				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1274      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1275				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1276    };
1277
1278  int const pta_size = ARRAY_SIZE (processor_alias_table);
1279
1280#ifdef SUBTARGET_OVERRIDE_OPTIONS
1281  SUBTARGET_OVERRIDE_OPTIONS;
1282#endif
1283
1284  /* Set the default values for switches whose default depends on TARGET_64BIT
1285     in case they weren't overwritten by command line options.  */
1286  if (TARGET_64BIT)
1287    {
1288      if (flag_omit_frame_pointer == 2)
1289	flag_omit_frame_pointer = 1;
1290      if (flag_asynchronous_unwind_tables == 2)
1291	flag_asynchronous_unwind_tables = 1;
1292      if (flag_pcc_struct_return == 2)
1293	flag_pcc_struct_return = 0;
1294    }
1295  else
1296    {
1297      if (flag_omit_frame_pointer == 2)
1298	flag_omit_frame_pointer = 0;
1299      if (flag_asynchronous_unwind_tables == 2)
1300	flag_asynchronous_unwind_tables = 0;
1301      if (flag_pcc_struct_return == 2)
1302	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1303    }
1304
1305  if (!ix86_tune_string && ix86_arch_string)
1306    ix86_tune_string = ix86_arch_string;
1307  if (!ix86_tune_string)
1308    {
1309      ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1310      ix86_tune_defaulted = 1;
1311    }
1312  if (!ix86_arch_string)
1313    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1314
1315  if (ix86_cmodel_string != 0)
1316    {
1317      if (!strcmp (ix86_cmodel_string, "small"))
1318	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1319      else if (!strcmp (ix86_cmodel_string, "medium"))
1320	ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1321      else if (flag_pic)
1322	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1323      else if (!strcmp (ix86_cmodel_string, "32"))
1324	ix86_cmodel = CM_32;
1325      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1326	ix86_cmodel = CM_KERNEL;
1327      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1328	ix86_cmodel = CM_LARGE;
1329      else
1330	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1331    }
1332  else
1333    {
1334      ix86_cmodel = CM_32;
1335      if (TARGET_64BIT)
1336	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1337    }
1338  if (ix86_asm_string != 0)
1339    {
1340      if (! TARGET_MACHO
1341	  && !strcmp (ix86_asm_string, "intel"))
1342	ix86_asm_dialect = ASM_INTEL;
1343      else if (!strcmp (ix86_asm_string, "att"))
1344	ix86_asm_dialect = ASM_ATT;
1345      else
1346	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1347    }
1348  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1349    error ("code model %qs not supported in the %s bit mode",
1350	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1351  if (ix86_cmodel == CM_LARGE)
1352    sorry ("code model %<large%> not supported yet");
1353  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1354    sorry ("%i-bit mode not compiled in",
1355	   (target_flags & MASK_64BIT) ? 64 : 32);
1356
1357  for (i = 0; i < pta_size; i++)
1358    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1359      {
1360	ix86_arch = processor_alias_table[i].processor;
1361	/* Default cpu tuning to the architecture.  */
1362	ix86_tune = ix86_arch;
1363	if (processor_alias_table[i].flags & PTA_MMX
1364	    && !(target_flags_explicit & MASK_MMX))
1365	  target_flags |= MASK_MMX;
1366	if (processor_alias_table[i].flags & PTA_3DNOW
1367	    && !(target_flags_explicit & MASK_3DNOW))
1368	  target_flags |= MASK_3DNOW;
1369	if (processor_alias_table[i].flags & PTA_3DNOW_A
1370	    && !(target_flags_explicit & MASK_3DNOW_A))
1371	  target_flags |= MASK_3DNOW_A;
1372	if (processor_alias_table[i].flags & PTA_SSE
1373	    && !(target_flags_explicit & MASK_SSE))
1374	  target_flags |= MASK_SSE;
1375	if (processor_alias_table[i].flags & PTA_SSE2
1376	    && !(target_flags_explicit & MASK_SSE2))
1377	  target_flags |= MASK_SSE2;
1378	if (processor_alias_table[i].flags & PTA_SSE3
1379	    && !(target_flags_explicit & MASK_SSE3))
1380	  target_flags |= MASK_SSE3;
1381	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1382	  x86_prefetch_sse = true;
1383	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1384	  error ("CPU you selected does not support x86-64 "
1385		 "instruction set");
1386	break;
1387      }
1388
1389  if (i == pta_size)
1390    error ("bad value (%s) for -march= switch", ix86_arch_string);
1391
1392  for (i = 0; i < pta_size; i++)
1393    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1394      {
1395	ix86_tune = processor_alias_table[i].processor;
1396	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1397	  {
1398	    if (ix86_tune_defaulted)
1399	      {
1400		ix86_tune_string = "x86-64";
1401		for (i = 0; i < pta_size; i++)
1402		  if (! strcmp (ix86_tune_string,
1403				processor_alias_table[i].name))
1404		    break;
1405		ix86_tune = processor_alias_table[i].processor;
1406	      }
1407	    else
1408	      error ("CPU you selected does not support x86-64 "
1409		     "instruction set");
1410	  }
1411        /* Intel CPUs have always interpreted SSE prefetch instructions as
1412	   NOPs; so, we can enable SSE prefetch instructions even when
1413	   -mtune (rather than -march) points us to a processor that has them.
1414	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1415	   higher processors.  */
1416	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1417	  x86_prefetch_sse = true;
1418	break;
1419      }
1420  if (i == pta_size)
1421    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1422
1423  if (optimize_size)
1424    ix86_cost = &size_cost;
1425  else
1426    ix86_cost = processor_target_table[ix86_tune].cost;
1427  target_flags |= processor_target_table[ix86_tune].target_enable;
1428  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1429
1430  /* Arrange to set up i386_stack_locals for all functions.  */
1431  init_machine_status = ix86_init_machine_status;
1432
1433  /* Validate -mregparm= value.  */
1434  if (ix86_regparm_string)
1435    {
1436      i = atoi (ix86_regparm_string);
1437      if (i < 0 || i > REGPARM_MAX)
1438	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1439      else
1440	ix86_regparm = i;
1441    }
1442  else
1443   if (TARGET_64BIT)
1444     ix86_regparm = REGPARM_MAX;
1445
1446  /* If the user has provided any of the -malign-* options,
1447     warn and use that value only if -falign-* is not set.
1448     Remove this code in GCC 3.2 or later.  */
1449  if (ix86_align_loops_string)
1450    {
1451      warning (0, "-malign-loops is obsolete, use -falign-loops");
1452      if (align_loops == 0)
1453	{
1454	  i = atoi (ix86_align_loops_string);
1455	  if (i < 0 || i > MAX_CODE_ALIGN)
1456	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1457	  else
1458	    align_loops = 1 << i;
1459	}
1460    }
1461
1462  if (ix86_align_jumps_string)
1463    {
1464      warning (0, "-malign-jumps is obsolete, use -falign-jumps");
1465      if (align_jumps == 0)
1466	{
1467	  i = atoi (ix86_align_jumps_string);
1468	  if (i < 0 || i > MAX_CODE_ALIGN)
1469	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1470	  else
1471	    align_jumps = 1 << i;
1472	}
1473    }
1474
1475  if (ix86_align_funcs_string)
1476    {
1477      warning (0, "-malign-functions is obsolete, use -falign-functions");
1478      if (align_functions == 0)
1479	{
1480	  i = atoi (ix86_align_funcs_string);
1481	  if (i < 0 || i > MAX_CODE_ALIGN)
1482	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1483	  else
1484	    align_functions = 1 << i;
1485	}
1486    }
1487
1488  /* Default align_* from the processor table.  */
1489  if (align_loops == 0)
1490    {
1491      align_loops = processor_target_table[ix86_tune].align_loop;
1492      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1493    }
1494  if (align_jumps == 0)
1495    {
1496      align_jumps = processor_target_table[ix86_tune].align_jump;
1497      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1498    }
1499  if (align_functions == 0)
1500    {
1501      align_functions = processor_target_table[ix86_tune].align_func;
1502    }
1503
1504  /* Validate -mpreferred-stack-boundary= value, or provide default.
1505     The default of 128 bits is for Pentium III's SSE __m128, We can't
1506     change it because of optimize_size.  Otherwise, we can't mix object
1507     files compiled with -Os and -On.  */
1508  ix86_preferred_stack_boundary = 128;
1509  if (ix86_preferred_stack_boundary_string)
1510    {
1511      i = atoi (ix86_preferred_stack_boundary_string);
1512      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1513	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1514	       TARGET_64BIT ? 4 : 2);
1515      else
1516	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1517    }
1518
1519  /* Validate -mbranch-cost= value, or provide default.  */
1520  ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1521  if (ix86_branch_cost_string)
1522    {
1523      i = atoi (ix86_branch_cost_string);
1524      if (i < 0 || i > 5)
1525	error ("-mbranch-cost=%d is not between 0 and 5", i);
1526      else
1527	ix86_branch_cost = i;
1528    }
1529  if (ix86_section_threshold_string)
1530    {
1531      i = atoi (ix86_section_threshold_string);
1532      if (i < 0)
1533	error ("-mlarge-data-threshold=%d is negative", i);
1534      else
1535	ix86_section_threshold = i;
1536    }
1537
1538  if (ix86_tls_dialect_string)
1539    {
1540      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1541	ix86_tls_dialect = TLS_DIALECT_GNU;
1542      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1543	ix86_tls_dialect = TLS_DIALECT_SUN;
1544      else
1545	error ("bad value (%s) for -mtls-dialect= switch",
1546	       ix86_tls_dialect_string);
1547    }
1548
1549  /* Keep nonleaf frame pointers.  */
1550  if (flag_omit_frame_pointer)
1551    target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1552  else if (TARGET_OMIT_LEAF_FRAME_POINTER)
1553    flag_omit_frame_pointer = 1;
1554
1555  /* If we're doing fast math, we don't care about comparison order
1556     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1557  if (flag_unsafe_math_optimizations)
1558    target_flags &= ~MASK_IEEE_FP;
1559
1560  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1561     since the insns won't need emulation.  */
1562  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1563    target_flags &= ~MASK_NO_FANCY_MATH_387;
1564
1565  /* Likewise, if the target doesn't have a 387, or we've specified
1566     software floating point, don't use 387 inline intrinsics.  */
1567  if (!TARGET_80387)
1568    target_flags |= MASK_NO_FANCY_MATH_387;
1569
1570  /* Turn on SSE2 builtins for -msse3.  */
1571  if (TARGET_SSE3)
1572    target_flags |= MASK_SSE2;
1573
1574  /* Turn on SSE builtins for -msse2.  */
1575  if (TARGET_SSE2)
1576    target_flags |= MASK_SSE;
1577
1578  /* Turn on MMX builtins for -msse.  */
1579  if (TARGET_SSE)
1580    {
1581      target_flags |= MASK_MMX & ~target_flags_explicit;
1582      x86_prefetch_sse = true;
1583    }
1584
1585  /* Turn on MMX builtins for 3Dnow.  */
1586  if (TARGET_3DNOW)
1587    target_flags |= MASK_MMX;
1588
1589  if (TARGET_64BIT)
1590    {
1591      if (TARGET_ALIGN_DOUBLE)
1592	error ("-malign-double makes no sense in the 64bit mode");
1593      if (TARGET_RTD)
1594	error ("-mrtd calling convention not supported in the 64bit mode");
1595
1596      /* Enable by default the SSE and MMX builtins.  Do allow the user to
1597	 explicitly disable any of these.  In particular, disabling SSE and
1598	 MMX for kernel code is extremely useful.  */
1599      target_flags
1600	|= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
1601	    & ~target_flags_explicit);
1602     }
1603  else
1604    {
1605      /* i386 ABI does not specify red zone.  It still makes sense to use it
1606         when programmer takes care to stack from being destroyed.  */
1607      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1608        target_flags |= MASK_NO_RED_ZONE;
1609    }
1610
1611  /* Accept -msseregparm only if at least SSE support is enabled.  */
1612  if (TARGET_SSEREGPARM
1613      && ! TARGET_SSE)
1614    error ("-msseregparm used without SSE enabled");
1615
1616  ix86_fpmath = TARGET_FPMATH_DEFAULT;
1617
1618  if (ix86_fpmath_string != 0)
1619    {
1620      if (! strcmp (ix86_fpmath_string, "387"))
1621	ix86_fpmath = FPMATH_387;
1622      else if (! strcmp (ix86_fpmath_string, "sse"))
1623	{
1624	  if (!TARGET_SSE)
1625	    {
1626	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1627	      ix86_fpmath = FPMATH_387;
1628	    }
1629	  else
1630	    ix86_fpmath = FPMATH_SSE;
1631	}
1632      else if (! strcmp (ix86_fpmath_string, "387,sse")
1633	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1634	{
1635	  if (!TARGET_SSE)
1636	    {
1637	      warning (0, "SSE instruction set disabled, using 387 arithmetics");
1638	      ix86_fpmath = FPMATH_387;
1639	    }
1640	  else if (!TARGET_80387)
1641	    {
1642	      warning (0, "387 instruction set disabled, using SSE arithmetics");
1643	      ix86_fpmath = FPMATH_SSE;
1644	    }
1645	  else
1646	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1647	}
1648      else
1649	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1650    }
1651
1652  /* If the i387 is disabled, then do not return values in it. */
1653  if (!TARGET_80387)
1654    target_flags &= ~MASK_FLOAT_RETURNS;
1655
1656  if ((x86_accumulate_outgoing_args & TUNEMASK)
1657      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1658      && !optimize_size)
1659    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1660
1661  /* ??? Unwind info is not correct around the CFG unless either a frame
1662     pointer is present or M_A_O_A is set.  Fixing this requires rewriting
1663     unwind info generation to be aware of the CFG and propagating states
1664     around edges.  */
1665  if ((flag_unwind_tables || flag_asynchronous_unwind_tables
1666       || flag_exceptions || flag_non_call_exceptions)
1667      && flag_omit_frame_pointer
1668      && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
1669    {
1670      if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1671	warning (0, "unwind tables currently require either a frame pointer "
1672		 "or -maccumulate-outgoing-args for correctness");
1673      target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1674    }
1675
1676  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1677  {
1678    char *p;
1679    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1680    p = strchr (internal_label_prefix, 'X');
1681    internal_label_prefix_len = p - internal_label_prefix;
1682    *p = '\0';
1683  }
1684
1685  /* When scheduling description is not available, disable scheduler pass
1686     so it won't slow down the compilation and make x87 code slower.  */
1687  if (!TARGET_SCHEDULE)
1688    flag_schedule_insns_after_reload = flag_schedule_insns = 0;
1689}
1690
1691/* switch to the appropriate section for output of DECL.
1692   DECL is either a `VAR_DECL' node or a constant of some sort.
1693   RELOC indicates whether forming the initial value of DECL requires
1694   link-time relocations.  */
1695
1696static void
1697x86_64_elf_select_section (tree decl, int reloc,
1698		         unsigned HOST_WIDE_INT align)
1699{
1700  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1701      && ix86_in_large_data_p (decl))
1702    {
1703      const char *sname = NULL;
1704      unsigned int flags = SECTION_WRITE;
1705      switch (categorize_decl_for_section (decl, reloc, flag_pic))
1706	{
1707	case SECCAT_DATA:
1708	  sname = ".ldata";
1709	  break;
1710	case SECCAT_DATA_REL:
1711	  sname = ".ldata.rel";
1712	  break;
1713	case SECCAT_DATA_REL_LOCAL:
1714	  sname = ".ldata.rel.local";
1715	  break;
1716	case SECCAT_DATA_REL_RO:
1717	  sname = ".ldata.rel.ro";
1718	  break;
1719	case SECCAT_DATA_REL_RO_LOCAL:
1720	  sname = ".ldata.rel.ro.local";
1721	  break;
1722	case SECCAT_BSS:
1723	  sname = ".lbss";
1724	  flags |= SECTION_BSS;
1725	  break;
1726	case SECCAT_RODATA:
1727	case SECCAT_RODATA_MERGE_STR:
1728	case SECCAT_RODATA_MERGE_STR_INIT:
1729	case SECCAT_RODATA_MERGE_CONST:
1730	  sname = ".lrodata";
1731	  flags = 0;
1732	  break;
1733	case SECCAT_SRODATA:
1734	case SECCAT_SDATA:
1735	case SECCAT_SBSS:
1736	  gcc_unreachable ();
1737	case SECCAT_TEXT:
1738	case SECCAT_TDATA:
1739	case SECCAT_TBSS:
1740	  /* We don't split these for medium model.  Place them into
1741	     default sections and hope for best.  */
1742	  break;
1743	}
1744      if (sname)
1745	{
1746	  /* We might get called with string constants, but named_section
1747	     doesn't like them as they are not DECLs.  Also, we need to set
1748	     flags in that case.  */
1749	  if (!DECL_P (decl))
1750	    named_section_flags (sname, flags);
1751	  else
1752	    named_section (decl, sname, reloc);
1753	  return;
1754	}
1755    }
1756  default_elf_select_section (decl, reloc, align);
1757}
1758
1759/* Build up a unique section name, expressed as a
1760   STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
1761   RELOC indicates whether the initial value of EXP requires
1762   link-time relocations.  */
1763
1764static void
1765x86_64_elf_unique_section (tree decl, int reloc)
1766{
1767  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1768      && ix86_in_large_data_p (decl))
1769    {
1770      const char *prefix = NULL;
1771      /* We only need to use .gnu.linkonce if we don't have COMDAT groups.  */
1772      bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
1773
1774      switch (categorize_decl_for_section (decl, reloc, flag_pic))
1775	{
1776	case SECCAT_DATA:
1777	case SECCAT_DATA_REL:
1778	case SECCAT_DATA_REL_LOCAL:
1779	case SECCAT_DATA_REL_RO:
1780	case SECCAT_DATA_REL_RO_LOCAL:
1781          prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
1782	  break;
1783	case SECCAT_BSS:
1784          prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
1785	  break;
1786	case SECCAT_RODATA:
1787	case SECCAT_RODATA_MERGE_STR:
1788	case SECCAT_RODATA_MERGE_STR_INIT:
1789	case SECCAT_RODATA_MERGE_CONST:
1790          prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
1791	  break;
1792	case SECCAT_SRODATA:
1793	case SECCAT_SDATA:
1794	case SECCAT_SBSS:
1795	  gcc_unreachable ();
1796	case SECCAT_TEXT:
1797	case SECCAT_TDATA:
1798	case SECCAT_TBSS:
1799	  /* We don't split these for medium model.  Place them into
1800	     default sections and hope for best.  */
1801	  break;
1802	}
1803      if (prefix)
1804	{
1805	  const char *name;
1806	  size_t nlen, plen;
1807	  char *string;
1808	  plen = strlen (prefix);
1809
1810	  name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
1811	  name = targetm.strip_name_encoding (name);
1812	  nlen = strlen (name);
1813
1814	  string = alloca (nlen + plen + 1);
1815	  memcpy (string, prefix, plen);
1816	  memcpy (string + plen, name, nlen + 1);
1817
1818	  DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
1819	  return;
1820	}
1821    }
1822  default_unique_section (decl, reloc);
1823}
1824
1825#ifdef COMMON_ASM_OP
1826/* This says how to output assembler code to declare an
1827   uninitialized external linkage data object.
1828
1829   For medium model x86-64 we need to use .largecomm opcode for
1830   large objects.  */
1831void
1832x86_elf_aligned_common (FILE *file,
1833			const char *name, unsigned HOST_WIDE_INT size,
1834			int align)
1835{
1836  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1837      && size > (unsigned int)ix86_section_threshold)
1838    fprintf (file, ".largecomm\t");
1839  else
1840    fprintf (file, "%s", COMMON_ASM_OP);
1841  assemble_name (file, name);
1842  fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
1843	   size, align / BITS_PER_UNIT);
1844}
1845
1846/* Utility function for targets to use in implementing
1847   ASM_OUTPUT_ALIGNED_BSS.  */
1848
1849void
1850x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
1851			const char *name, unsigned HOST_WIDE_INT size,
1852			int align)
1853{
1854  if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
1855      && size > (unsigned int)ix86_section_threshold)
1856    named_section (decl, ".lbss", 0);
1857  else
1858    bss_section ();
1859  ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
1860#ifdef ASM_DECLARE_OBJECT_NAME
1861  last_assemble_variable_decl = decl;
1862  ASM_DECLARE_OBJECT_NAME (file, name, decl);
1863#else
1864  /* Standard thing is just output label for the object.  */
1865  ASM_OUTPUT_LABEL (file, name);
1866#endif /* ASM_DECLARE_OBJECT_NAME */
1867  ASM_OUTPUT_SKIP (file, size ? size : 1);
1868}
1869#endif
1870
1871void
1872optimization_options (int level, int size ATTRIBUTE_UNUSED)
1873{
1874  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1875     make the problem with not enough registers even worse.  */
1876#ifdef INSN_SCHEDULING
1877  if (level > 1)
1878    flag_schedule_insns = 0;
1879#endif
1880
1881  if (TARGET_MACHO)
1882    /* The Darwin libraries never set errno, so we might as well
1883       avoid calling them when that's the only reason we would.  */
1884    flag_errno_math = 0;
1885
1886  /* The default values of these switches depend on the TARGET_64BIT
1887     that is not known at this moment.  Mark these values with 2 and
1888     let user the to override these.  In case there is no command line option
1889     specifying them, we will set the defaults in override_options.  */
1890  if (optimize >= 1)
1891    flag_omit_frame_pointer = 2;
1892  flag_pcc_struct_return = 2;
1893  flag_asynchronous_unwind_tables = 2;
1894#ifdef SUBTARGET_OPTIMIZATION_OPTIONS
1895  SUBTARGET_OPTIMIZATION_OPTIONS;
1896#endif
1897}
1898
1899/* Table of valid machine attributes.  */
1900const struct attribute_spec ix86_attribute_table[] =
1901{
1902  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1903  /* Stdcall attribute says callee is responsible for popping arguments
1904     if they are not variable.  */
1905  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cconv_attribute },
1906  /* Fastcall attribute says callee is responsible for popping arguments
1907     if they are not variable.  */
1908  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cconv_attribute },
1909  /* Cdecl attribute says the callee is a normal C declaration */
1910  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cconv_attribute },
1911  /* Regparm attribute specifies how many integer arguments are to be
1912     passed in registers.  */
1913  { "regparm",   1, 1, false, true,  true,  ix86_handle_cconv_attribute },
1914  /* Sseregparm attribute says we are using x86_64 calling conventions
1915     for FP arguments.  */
1916  { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
1917#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1918  { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
1919  { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
1920  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1921#endif
1922  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1923  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1924#ifdef SUBTARGET_ATTRIBUTE_TABLE
1925  SUBTARGET_ATTRIBUTE_TABLE,
1926#endif
1927  { NULL,        0, 0, false, false, false, NULL }
1928};
1929
1930/* Decide whether we can make a sibling call to a function.  DECL is the
1931   declaration of the function being targeted by the call and EXP is the
1932   CALL_EXPR representing the call.  */
1933
1934static bool
1935ix86_function_ok_for_sibcall (tree decl, tree exp)
1936{
1937  tree func;
1938  rtx a, b;
1939
1940  /* If we are generating position-independent code, we cannot sibcall
1941     optimize any indirect call, or a direct call to a global function,
1942     as the PLT requires %ebx be live.  */
1943  if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1944    return false;
1945
1946  if (decl)
1947    func = decl;
1948  else
1949    {
1950      func = TREE_TYPE (TREE_OPERAND (exp, 0));
1951      if (POINTER_TYPE_P (func))
1952        func = TREE_TYPE (func);
1953    }
1954
1955  /* Check that the return value locations are the same.  Like
1956     if we are returning floats on the 80387 register stack, we cannot
1957     make a sibcall from a function that doesn't return a float to a
1958     function that does or, conversely, from a function that does return
1959     a float to a function that doesn't; the necessary stack adjustment
1960     would not be executed.  This is also the place we notice
1961     differences in the return value ABI.  Note that it is ok for one
1962     of the functions to have void return type as long as the return
1963     value of the other is passed in a register.  */
1964  a = ix86_function_value (TREE_TYPE (exp), func, false);
1965  b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
1966			   cfun->decl, false);
1967  if (STACK_REG_P (a) || STACK_REG_P (b))
1968    {
1969      if (!rtx_equal_p (a, b))
1970	return false;
1971    }
1972  else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
1973    ;
1974  else if (!rtx_equal_p (a, b))
1975    return false;
1976
1977  /* If this call is indirect, we'll need to be able to use a call-clobbered
1978     register for the address of the target function.  Make sure that all
1979     such registers are not used for passing parameters.  */
1980  if (!decl && !TARGET_64BIT)
1981    {
1982      tree type;
1983
1984      /* We're looking at the CALL_EXPR, we need the type of the function.  */
1985      type = TREE_OPERAND (exp, 0);		/* pointer expression */
1986      type = TREE_TYPE (type);			/* pointer type */
1987      type = TREE_TYPE (type);			/* function type */
1988
1989      if (ix86_function_regparm (type, NULL) >= 3)
1990	{
1991	  /* ??? Need to count the actual number of registers to be used,
1992	     not the possible number of registers.  Fix later.  */
1993	  return false;
1994	}
1995    }
1996
1997#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1998  /* Dllimport'd functions are also called indirectly.  */
1999  if (decl && DECL_DLLIMPORT_P (decl)
2000      && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2001    return false;
2002#endif
2003
2004  /* If we forced aligned the stack, then sibcalling would unalign the
2005     stack, which may break the called function.  */
2006  if (cfun->machine->force_align_arg_pointer)
2007    return false;
2008
2009  /* Otherwise okay.  That also includes certain types of indirect calls.  */
2010  return true;
2011}
2012
2013/* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2014   calling convention attributes;
2015   arguments as in struct attribute_spec.handler.  */
2016
2017static tree
2018ix86_handle_cconv_attribute (tree *node, tree name,
2019				   tree args,
2020				   int flags ATTRIBUTE_UNUSED,
2021				   bool *no_add_attrs)
2022{
2023  if (TREE_CODE (*node) != FUNCTION_TYPE
2024      && TREE_CODE (*node) != METHOD_TYPE
2025      && TREE_CODE (*node) != FIELD_DECL
2026      && TREE_CODE (*node) != TYPE_DECL)
2027    {
2028      warning (OPT_Wattributes, "%qs attribute only applies to functions",
2029	       IDENTIFIER_POINTER (name));
2030      *no_add_attrs = true;
2031      return NULL_TREE;
2032    }
2033
2034  /* Can combine regparm with all attributes but fastcall.  */
2035  if (is_attribute_p ("regparm", name))
2036    {
2037      tree cst;
2038
2039      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2040        {
2041	  error ("fastcall and regparm attributes are not compatible");
2042	}
2043
2044      cst = TREE_VALUE (args);
2045      if (TREE_CODE (cst) != INTEGER_CST)
2046	{
2047	  warning (OPT_Wattributes,
2048		   "%qs attribute requires an integer constant argument",
2049		   IDENTIFIER_POINTER (name));
2050	  *no_add_attrs = true;
2051	}
2052      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2053	{
2054	  warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2055		   IDENTIFIER_POINTER (name), REGPARM_MAX);
2056	  *no_add_attrs = true;
2057	}
2058
2059      return NULL_TREE;
2060    }
2061
2062  if (TARGET_64BIT)
2063    {
2064      warning (OPT_Wattributes, "%qs attribute ignored",
2065	       IDENTIFIER_POINTER (name));
2066      *no_add_attrs = true;
2067      return NULL_TREE;
2068    }
2069
2070  /* Can combine fastcall with stdcall (redundant) and sseregparm.  */
2071  if (is_attribute_p ("fastcall", name))
2072    {
2073      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2074        {
2075	  error ("fastcall and cdecl attributes are not compatible");
2076	}
2077      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2078        {
2079	  error ("fastcall and stdcall attributes are not compatible");
2080	}
2081      if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2082        {
2083	  error ("fastcall and regparm attributes are not compatible");
2084	}
2085    }
2086
2087  /* Can combine stdcall with fastcall (redundant), regparm and
2088     sseregparm.  */
2089  else if (is_attribute_p ("stdcall", name))
2090    {
2091      if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2092        {
2093	  error ("stdcall and cdecl attributes are not compatible");
2094	}
2095      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2096        {
2097	  error ("stdcall and fastcall attributes are not compatible");
2098	}
2099    }
2100
2101  /* Can combine cdecl with regparm and sseregparm.  */
2102  else if (is_attribute_p ("cdecl", name))
2103    {
2104      if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2105        {
2106	  error ("stdcall and cdecl attributes are not compatible");
2107	}
2108      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2109        {
2110	  error ("fastcall and cdecl attributes are not compatible");
2111	}
2112    }
2113
2114  /* Can combine sseregparm with all attributes.  */
2115
2116  return NULL_TREE;
2117}
2118
2119/* Return 0 if the attributes for two types are incompatible, 1 if they
2120   are compatible, and 2 if they are nearly compatible (which causes a
2121   warning to be generated).  */
2122
2123static int
2124ix86_comp_type_attributes (tree type1, tree type2)
2125{
2126  /* Check for mismatch of non-default calling convention.  */
2127  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2128
2129  if (TREE_CODE (type1) != FUNCTION_TYPE)
2130    return 1;
2131
2132  /* Check for mismatched fastcall/regparm types.  */
2133  if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2134       != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2135      || (ix86_function_regparm (type1, NULL)
2136	  != ix86_function_regparm (type2, NULL)))
2137    return 0;
2138
2139  /* Check for mismatched sseregparm types.  */
2140  if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2141      != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2142    return 0;
2143
2144  /* Check for mismatched return types (cdecl vs stdcall).  */
2145  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2146      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2147    return 0;
2148
2149  return 1;
2150}
2151
2152/* Return the regparm value for a function with the indicated TYPE and DECL.
2153   DECL may be NULL when calling function indirectly
2154   or considering a libcall.  */
2155
2156static int
2157ix86_function_regparm (tree type, tree decl)
2158{
2159  tree attr;
2160  int regparm = ix86_regparm;
2161  bool user_convention = false;
2162
2163  if (!TARGET_64BIT)
2164    {
2165      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2166      if (attr)
2167	{
2168	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2169	  user_convention = true;
2170	}
2171
2172      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2173	{
2174	  regparm = 2;
2175	  user_convention = true;
2176	}
2177
2178      /* Use register calling convention for local functions when possible.  */
2179      if (!TARGET_64BIT && !user_convention && decl
2180	  && flag_unit_at_a_time && !profile_flag)
2181	{
2182	  struct cgraph_local_info *i = cgraph_local_info (decl);
2183	  if (i && i->local)
2184	    {
2185	      int local_regparm, globals = 0, regno;
2186
2187	      /* Make sure no regparm register is taken by a global register
2188		 variable.  */
2189	      for (local_regparm = 0; local_regparm < 3; local_regparm++)
2190		if (global_regs[local_regparm])
2191		  break;
2192	      /* We can't use regparm(3) for nested functions as these use
2193		 static chain pointer in third argument.  */
2194	      if (local_regparm == 3
2195		  && decl_function_context (decl)
2196		  && !DECL_NO_STATIC_CHAIN (decl))
2197		local_regparm = 2;
2198	      /* Each global register variable increases register preassure,
2199		 so the more global reg vars there are, the smaller regparm
2200		 optimization use, unless requested by the user explicitly.  */
2201	      for (regno = 0; regno < 6; regno++)
2202		if (global_regs[regno])
2203		  globals++;
2204	      local_regparm
2205		= globals < local_regparm ? local_regparm - globals : 0;
2206
2207	      if (local_regparm > regparm)
2208		regparm = local_regparm;
2209	    }
2210	}
2211    }
2212  return regparm;
2213}
2214
2215/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2216   DFmode (2) arguments in SSE registers for a function with the
2217   indicated TYPE and DECL.  DECL may be NULL when calling function
2218   indirectly or considering a libcall.  Otherwise return 0.  */
2219
2220static int
2221ix86_function_sseregparm (tree type, tree decl)
2222{
2223  /* Use SSE registers to pass SFmode and DFmode arguments if requested
2224     by the sseregparm attribute.  */
2225  if (TARGET_SSEREGPARM
2226      || (type
2227	  && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2228    {
2229      if (!TARGET_SSE)
2230	{
2231	  if (decl)
2232	    error ("Calling %qD with attribute sseregparm without "
2233		   "SSE/SSE2 enabled", decl);
2234	  else
2235	    error ("Calling %qT with attribute sseregparm without "
2236		   "SSE/SSE2 enabled", type);
2237	  return 0;
2238	}
2239
2240      return 2;
2241    }
2242
2243  /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2244     (and DFmode for SSE2) arguments in SSE registers,
2245     even for 32-bit targets.  */
2246  if (!TARGET_64BIT && decl
2247      && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2248    {
2249      struct cgraph_local_info *i = cgraph_local_info (decl);
2250      if (i && i->local)
2251	return TARGET_SSE2 ? 2 : 1;
2252    }
2253
2254  return 0;
2255}
2256
2257/* Return true if EAX is live at the start of the function.  Used by
2258   ix86_expand_prologue to determine if we need special help before
2259   calling allocate_stack_worker.  */
2260
2261static bool
2262ix86_eax_live_at_start_p (void)
2263{
2264  /* Cheat.  Don't bother working forward from ix86_function_regparm
2265     to the function type to whether an actual argument is located in
2266     eax.  Instead just look at cfg info, which is still close enough
2267     to correct at this point.  This gives false positives for broken
2268     functions that might use uninitialized data that happens to be
2269     allocated in eax, but who cares?  */
2270  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2271}
2272
2273/* Value is the number of bytes of arguments automatically
2274   popped when returning from a subroutine call.
2275   FUNDECL is the declaration node of the function (as a tree),
2276   FUNTYPE is the data type of the function (as a tree),
2277   or for a library call it is an identifier node for the subroutine name.
2278   SIZE is the number of bytes of arguments passed on the stack.
2279
2280   On the 80386, the RTD insn may be used to pop them if the number
2281     of args is fixed, but if the number is variable then the caller
2282     must pop them all.  RTD can't be used for library calls now
2283     because the library is compiled with the Unix compiler.
2284   Use of RTD is a selectable option, since it is incompatible with
2285   standard Unix calling sequences.  If the option is not selected,
2286   the caller must always pop the args.
2287
2288   The attribute stdcall is equivalent to RTD on a per module basis.  */
2289
2290int
2291ix86_return_pops_args (tree fundecl, tree funtype, int size)
2292{
2293  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2294
2295  /* Cdecl functions override -mrtd, and never pop the stack.  */
2296  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2297
2298    /* Stdcall and fastcall functions will pop the stack if not
2299       variable args.  */
2300    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2301        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2302      rtd = 1;
2303
2304    if (rtd
2305        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2306	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2307		== void_type_node)))
2308      return size;
2309  }
2310
2311  /* Lose any fake structure return argument if it is passed on the stack.  */
2312  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2313      && !TARGET_64BIT
2314      && !KEEP_AGGREGATE_RETURN_POINTER)
2315    {
2316      int nregs = ix86_function_regparm (funtype, fundecl);
2317
2318      if (!nregs)
2319	return GET_MODE_SIZE (Pmode);
2320    }
2321
2322  return 0;
2323}
2324
2325/* Argument support functions.  */
2326
2327/* Return true when register may be used to pass function parameters.  */
2328bool
2329ix86_function_arg_regno_p (int regno)
2330{
2331  int i;
2332  if (!TARGET_64BIT)
2333    return (regno < REGPARM_MAX
2334	    || (TARGET_MMX && MMX_REGNO_P (regno)
2335		&& (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2336	    || (TARGET_SSE && SSE_REGNO_P (regno)
2337		&& (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2338
2339  if (TARGET_SSE && SSE_REGNO_P (regno)
2340      && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2341    return true;
2342  /* RAX is used as hidden argument to va_arg functions.  */
2343  if (!regno)
2344    return true;
2345  for (i = 0; i < REGPARM_MAX; i++)
2346    if (regno == x86_64_int_parameter_registers[i])
2347      return true;
2348  return false;
2349}
2350
2351/* Return if we do not know how to pass TYPE solely in registers.  */
2352
2353static bool
2354ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2355{
2356  if (must_pass_in_stack_var_size_or_pad (mode, type))
2357    return true;
2358
2359  /* For 32-bit, we want TImode aggregates to go on the stack.  But watch out!
2360     The layout_type routine is crafty and tries to trick us into passing
2361     currently unsupported vector types on the stack by using TImode.  */
2362  return (!TARGET_64BIT && mode == TImode
2363	  && type && TREE_CODE (type) != VECTOR_TYPE);
2364}
2365
2366/* Initialize a variable CUM of type CUMULATIVE_ARGS
2367   for a call to a function whose data type is FNTYPE.
2368   For a library call, FNTYPE is 0.  */
2369
2370void
2371init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
2372		      tree fntype,	/* tree ptr for function decl */
2373		      rtx libname,	/* SYMBOL_REF of library name or 0 */
2374		      tree fndecl)
2375{
2376  static CUMULATIVE_ARGS zero_cum;
2377  tree param, next_param;
2378
2379  if (TARGET_DEBUG_ARG)
2380    {
2381      fprintf (stderr, "\ninit_cumulative_args (");
2382      if (fntype)
2383	fprintf (stderr, "fntype code = %s, ret code = %s",
2384		 tree_code_name[(int) TREE_CODE (fntype)],
2385		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
2386      else
2387	fprintf (stderr, "no fntype");
2388
2389      if (libname)
2390	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
2391    }
2392
2393  *cum = zero_cum;
2394
2395  /* Set up the number of registers to use for passing arguments.  */
2396  cum->nregs = ix86_regparm;
2397  if (TARGET_SSE)
2398    cum->sse_nregs = SSE_REGPARM_MAX;
2399  if (TARGET_MMX)
2400    cum->mmx_nregs = MMX_REGPARM_MAX;
2401  cum->warn_sse = true;
2402  cum->warn_mmx = true;
2403  cum->maybe_vaarg = false;
2404
2405  /* Use ecx and edx registers if function has fastcall attribute,
2406     else look for regparm information.  */
2407  if (fntype && !TARGET_64BIT)
2408    {
2409      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
2410	{
2411	  cum->nregs = 2;
2412	  cum->fastcall = 1;
2413	}
2414      else
2415	cum->nregs = ix86_function_regparm (fntype, fndecl);
2416    }
2417
2418  /* Set up the number of SSE registers used for passing SFmode
2419     and DFmode arguments.  Warn for mismatching ABI.  */
2420  cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
2421
2422  /* Determine if this function has variable arguments.  This is
2423     indicated by the last argument being 'void_type_mode' if there
2424     are no variable arguments.  If there are variable arguments, then
2425     we won't pass anything in registers in 32-bit mode. */
2426
2427  if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
2428    {
2429      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
2430	   param != 0; param = next_param)
2431	{
2432	  next_param = TREE_CHAIN (param);
2433	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
2434	    {
2435	      if (!TARGET_64BIT)
2436		{
2437		  cum->nregs = 0;
2438		  cum->sse_nregs = 0;
2439		  cum->mmx_nregs = 0;
2440		  cum->warn_sse = 0;
2441		  cum->warn_mmx = 0;
2442		  cum->fastcall = 0;
2443		  cum->float_in_sse = 0;
2444		}
2445	      cum->maybe_vaarg = true;
2446	    }
2447	}
2448    }
2449  if ((!fntype && !libname)
2450      || (fntype && !TYPE_ARG_TYPES (fntype)))
2451    cum->maybe_vaarg = true;
2452
2453  if (TARGET_DEBUG_ARG)
2454    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
2455
2456  return;
2457}
2458
2459/* Return the "natural" mode for TYPE.  In most cases, this is just TYPE_MODE.
2460   But in the case of vector types, it is some vector mode.
2461
2462   When we have only some of our vector isa extensions enabled, then there
2463   are some modes for which vector_mode_supported_p is false.  For these
2464   modes, the generic vector support in gcc will choose some non-vector mode
2465   in order to implement the type.  By computing the natural mode, we'll
2466   select the proper ABI location for the operand and not depend on whatever
2467   the middle-end decides to do with these vector types.  */
2468
2469static enum machine_mode
2470type_natural_mode (tree type)
2471{
2472  enum machine_mode mode = TYPE_MODE (type);
2473
2474  if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
2475    {
2476      HOST_WIDE_INT size = int_size_in_bytes (type);
2477      if ((size == 8 || size == 16)
2478	  /* ??? Generic code allows us to create width 1 vectors.  Ignore.  */
2479	  && TYPE_VECTOR_SUBPARTS (type) > 1)
2480	{
2481	  enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
2482
2483	  if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
2484	    mode = MIN_MODE_VECTOR_FLOAT;
2485	  else
2486	    mode = MIN_MODE_VECTOR_INT;
2487
2488	  /* Get the mode which has this inner mode and number of units.  */
2489	  for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
2490	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
2491		&& GET_MODE_INNER (mode) == innermode)
2492	      return mode;
2493
2494	  gcc_unreachable ();
2495	}
2496    }
2497
2498  return mode;
2499}
2500
2501/* We want to pass a value in REGNO whose "natural" mode is MODE.  However,
2502   this may not agree with the mode that the type system has chosen for the
2503   register, which is ORIG_MODE.  If ORIG_MODE is not BLKmode, then we can
2504   go ahead and use it.  Otherwise we have to build a PARALLEL instead.  */
2505
2506static rtx
2507gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
2508		     unsigned int regno)
2509{
2510  rtx tmp;
2511
2512  if (orig_mode != BLKmode)
2513    tmp = gen_rtx_REG (orig_mode, regno);
2514  else
2515    {
2516      tmp = gen_rtx_REG (mode, regno);
2517      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2518      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2519    }
2520
2521  return tmp;
2522}
2523
2524/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
2525   of this code is to classify each 8bytes of incoming argument by the register
2526   class and assign registers accordingly.  */
2527
2528/* Return the union class of CLASS1 and CLASS2.
2529   See the x86-64 PS ABI for details.  */
2530
2531static enum x86_64_reg_class
2532merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
2533{
2534  /* Rule #1: If both classes are equal, this is the resulting class.  */
2535  if (class1 == class2)
2536    return class1;
2537
2538  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
2539     the other class.  */
2540  if (class1 == X86_64_NO_CLASS)
2541    return class2;
2542  if (class2 == X86_64_NO_CLASS)
2543    return class1;
2544
2545  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
2546  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
2547    return X86_64_MEMORY_CLASS;
2548
2549  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
2550  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
2551      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
2552    return X86_64_INTEGERSI_CLASS;
2553  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
2554      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
2555    return X86_64_INTEGER_CLASS;
2556
2557  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
2558     MEMORY is used.  */
2559  if (class1 == X86_64_X87_CLASS
2560      || class1 == X86_64_X87UP_CLASS
2561      || class1 == X86_64_COMPLEX_X87_CLASS
2562      || class2 == X86_64_X87_CLASS
2563      || class2 == X86_64_X87UP_CLASS
2564      || class2 == X86_64_COMPLEX_X87_CLASS)
2565    return X86_64_MEMORY_CLASS;
2566
2567  /* Rule #6: Otherwise class SSE is used.  */
2568  return X86_64_SSE_CLASS;
2569}
2570
2571/* Classify the argument of type TYPE and mode MODE.
2572   CLASSES will be filled by the register class used to pass each word
2573   of the operand.  The number of words is returned.  In case the parameter
2574   should be passed in memory, 0 is returned. As a special case for zero
2575   sized containers, classes[0] will be NO_CLASS and 1 is returned.
2576
2577   BIT_OFFSET is used internally for handling records and specifies offset
2578   of the offset in bits modulo 256 to avoid overflow cases.
2579
2580   See the x86-64 PS ABI for details.
2581*/
2582
2583static int
2584classify_argument (enum machine_mode mode, tree type,
2585		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
2586{
2587  HOST_WIDE_INT bytes =
2588    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2589  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2590
2591  /* Variable sized entities are always passed/returned in memory.  */
2592  if (bytes < 0)
2593    return 0;
2594
2595  if (mode != VOIDmode
2596      && targetm.calls.must_pass_in_stack (mode, type))
2597    return 0;
2598
2599  if (type && AGGREGATE_TYPE_P (type))
2600    {
2601      int i;
2602      tree field;
2603      enum x86_64_reg_class subclasses[MAX_CLASSES];
2604
2605      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2606      if (bytes > 16)
2607	return 0;
2608
2609      for (i = 0; i < words; i++)
2610	classes[i] = X86_64_NO_CLASS;
2611
2612      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2613	 signalize memory class, so handle it as special case.  */
2614      if (!words)
2615	{
2616	  classes[0] = X86_64_NO_CLASS;
2617	  return 1;
2618	}
2619
2620      /* Classify each field of record and merge classes.  */
2621      switch (TREE_CODE (type))
2622	{
2623	case RECORD_TYPE:
2624	  /* For classes first merge in the field of the subclasses.  */
2625	  if (TYPE_BINFO (type))
2626	    {
2627	      tree binfo, base_binfo;
2628	      int basenum;
2629
2630	      for (binfo = TYPE_BINFO (type), basenum = 0;
2631		   BINFO_BASE_ITERATE (binfo, basenum, base_binfo); basenum++)
2632		{
2633		   int num;
2634		   int offset = tree_low_cst (BINFO_OFFSET (base_binfo), 0) * 8;
2635		   tree type = BINFO_TYPE (base_binfo);
2636
2637		   num = classify_argument (TYPE_MODE (type),
2638					    type, subclasses,
2639					    (offset + bit_offset) % 256);
2640		   if (!num)
2641		     return 0;
2642		   for (i = 0; i < num; i++)
2643		     {
2644		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2645		       classes[i + pos] =
2646			 merge_classes (subclasses[i], classes[i + pos]);
2647		     }
2648		}
2649	    }
2650	  /* And now merge the fields of structure.  */
2651	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2652	    {
2653	      if (TREE_CODE (field) == FIELD_DECL)
2654		{
2655		  int num;
2656
2657		  if (TREE_TYPE (field) == error_mark_node)
2658		    continue;
2659
2660		  /* Bitfields are always classified as integer.  Handle them
2661		     early, since later code would consider them to be
2662		     misaligned integers.  */
2663		  if (DECL_BIT_FIELD (field))
2664		    {
2665		      for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2666			   i < ((int_bit_position (field) + (bit_offset % 64))
2667			        + tree_low_cst (DECL_SIZE (field), 0)
2668				+ 63) / 8 / 8; i++)
2669			classes[i] =
2670			  merge_classes (X86_64_INTEGER_CLASS,
2671					 classes[i]);
2672		    }
2673		  else
2674		    {
2675		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2676					       TREE_TYPE (field), subclasses,
2677					       (int_bit_position (field)
2678						+ bit_offset) % 256);
2679		      if (!num)
2680			return 0;
2681		      for (i = 0; i < num; i++)
2682			{
2683			  int pos =
2684			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2685			  classes[i + pos] =
2686			    merge_classes (subclasses[i], classes[i + pos]);
2687			}
2688		    }
2689		}
2690	    }
2691	  break;
2692
2693	case ARRAY_TYPE:
2694	  /* Arrays are handled as small records.  */
2695	  {
2696	    int num;
2697	    num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2698				     TREE_TYPE (type), subclasses, bit_offset);
2699	    if (!num)
2700	      return 0;
2701
2702	    /* The partial classes are now full classes.  */
2703	    if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2704	      subclasses[0] = X86_64_SSE_CLASS;
2705	    if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2706	      subclasses[0] = X86_64_INTEGER_CLASS;
2707
2708	    for (i = 0; i < words; i++)
2709	      classes[i] = subclasses[i % num];
2710
2711	    break;
2712	  }
2713	case UNION_TYPE:
2714	case QUAL_UNION_TYPE:
2715	  /* Unions are similar to RECORD_TYPE but offset is always 0.
2716	     */
2717
2718	  /* Unions are not derived.  */
2719	  gcc_assert (!TYPE_BINFO (type)
2720		      || !BINFO_N_BASE_BINFOS (TYPE_BINFO (type)));
2721	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2722	    {
2723	      if (TREE_CODE (field) == FIELD_DECL)
2724		{
2725		  int num;
2726
2727		  if (TREE_TYPE (field) == error_mark_node)
2728		    continue;
2729
2730		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2731					   TREE_TYPE (field), subclasses,
2732					   bit_offset);
2733		  if (!num)
2734		    return 0;
2735		  for (i = 0; i < num; i++)
2736		    classes[i] = merge_classes (subclasses[i], classes[i]);
2737		}
2738	    }
2739	  break;
2740
2741	default:
2742	  gcc_unreachable ();
2743	}
2744
2745      /* Final merger cleanup.  */
2746      for (i = 0; i < words; i++)
2747	{
2748	  /* If one class is MEMORY, everything should be passed in
2749	     memory.  */
2750	  if (classes[i] == X86_64_MEMORY_CLASS)
2751	    return 0;
2752
2753	  /* The X86_64_SSEUP_CLASS should be always preceded by
2754	     X86_64_SSE_CLASS.  */
2755	  if (classes[i] == X86_64_SSEUP_CLASS
2756	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2757	    classes[i] = X86_64_SSE_CLASS;
2758
2759	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
2760	  if (classes[i] == X86_64_X87UP_CLASS
2761	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2762	    classes[i] = X86_64_SSE_CLASS;
2763	}
2764      return words;
2765    }
2766
2767  /* Compute alignment needed.  We align all types to natural boundaries with
2768     exception of XFmode that is aligned to 64bits.  */
2769  if (mode != VOIDmode && mode != BLKmode)
2770    {
2771      int mode_alignment = GET_MODE_BITSIZE (mode);
2772
2773      if (mode == XFmode)
2774	mode_alignment = 128;
2775      else if (mode == XCmode)
2776	mode_alignment = 256;
2777      if (COMPLEX_MODE_P (mode))
2778	mode_alignment /= 2;
2779      /* Misaligned fields are always returned in memory.  */
2780      if (bit_offset % mode_alignment)
2781	return 0;
2782    }
2783
2784  /* for V1xx modes, just use the base mode */
2785  if (VECTOR_MODE_P (mode)
2786      && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
2787    mode = GET_MODE_INNER (mode);
2788
2789  /* Classification of atomic types.  */
2790  switch (mode)
2791    {
2792    case DImode:
2793    case SImode:
2794    case HImode:
2795    case QImode:
2796    case CSImode:
2797    case CHImode:
2798    case CQImode:
2799      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2800	classes[0] = X86_64_INTEGERSI_CLASS;
2801      else
2802	classes[0] = X86_64_INTEGER_CLASS;
2803      return 1;
2804    case CDImode:
2805    case TImode:
2806      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2807      return 2;
2808    case CTImode:
2809      return 0;
2810    case SFmode:
2811      if (!(bit_offset % 64))
2812	classes[0] = X86_64_SSESF_CLASS;
2813      else
2814	classes[0] = X86_64_SSE_CLASS;
2815      return 1;
2816    case DFmode:
2817      classes[0] = X86_64_SSEDF_CLASS;
2818      return 1;
2819    case XFmode:
2820      classes[0] = X86_64_X87_CLASS;
2821      classes[1] = X86_64_X87UP_CLASS;
2822      return 2;
2823    case TFmode:
2824      classes[0] = X86_64_SSE_CLASS;
2825      classes[1] = X86_64_SSEUP_CLASS;
2826      return 2;
2827    case SCmode:
2828      classes[0] = X86_64_SSE_CLASS;
2829      return 1;
2830    case DCmode:
2831      classes[0] = X86_64_SSEDF_CLASS;
2832      classes[1] = X86_64_SSEDF_CLASS;
2833      return 2;
2834    case XCmode:
2835      classes[0] = X86_64_COMPLEX_X87_CLASS;
2836      return 1;
2837    case TCmode:
2838      /* This modes is larger than 16 bytes.  */
2839      return 0;
2840    case V4SFmode:
2841    case V4SImode:
2842    case V16QImode:
2843    case V8HImode:
2844    case V2DFmode:
2845    case V2DImode:
2846      classes[0] = X86_64_SSE_CLASS;
2847      classes[1] = X86_64_SSEUP_CLASS;
2848      return 2;
2849    case V2SFmode:
2850    case V2SImode:
2851    case V4HImode:
2852    case V8QImode:
2853      classes[0] = X86_64_SSE_CLASS;
2854      return 1;
2855    case BLKmode:
2856    case VOIDmode:
2857      return 0;
2858    default:
2859      gcc_assert (VECTOR_MODE_P (mode));
2860
2861      if (bytes > 16)
2862	return 0;
2863
2864      gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
2865
2866      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2867	classes[0] = X86_64_INTEGERSI_CLASS;
2868      else
2869	classes[0] = X86_64_INTEGER_CLASS;
2870      classes[1] = X86_64_INTEGER_CLASS;
2871      return 1 + (bytes > 8);
2872    }
2873}
2874
2875/* Examine the argument and return set number of register required in each
2876   class.  Return 0 iff parameter should be passed in memory.  */
2877static int
2878examine_argument (enum machine_mode mode, tree type, int in_return,
2879		  int *int_nregs, int *sse_nregs)
2880{
2881  enum x86_64_reg_class class[MAX_CLASSES];
2882  int n = classify_argument (mode, type, class, 0);
2883
2884  *int_nregs = 0;
2885  *sse_nregs = 0;
2886  if (!n)
2887    return 0;
2888  for (n--; n >= 0; n--)
2889    switch (class[n])
2890      {
2891      case X86_64_INTEGER_CLASS:
2892      case X86_64_INTEGERSI_CLASS:
2893	(*int_nregs)++;
2894	break;
2895      case X86_64_SSE_CLASS:
2896      case X86_64_SSESF_CLASS:
2897      case X86_64_SSEDF_CLASS:
2898	(*sse_nregs)++;
2899	break;
2900      case X86_64_NO_CLASS:
2901      case X86_64_SSEUP_CLASS:
2902	break;
2903      case X86_64_X87_CLASS:
2904      case X86_64_X87UP_CLASS:
2905	if (!in_return)
2906	  return 0;
2907	break;
2908      case X86_64_COMPLEX_X87_CLASS:
2909	return in_return ? 2 : 0;
2910      case X86_64_MEMORY_CLASS:
2911	gcc_unreachable ();
2912      }
2913  return 1;
2914}
2915
2916/* Construct container for the argument used by GCC interface.  See
2917   FUNCTION_ARG for the detailed description.  */
2918
2919static rtx
2920construct_container (enum machine_mode mode, enum machine_mode orig_mode,
2921		     tree type, int in_return, int nintregs, int nsseregs,
2922		     const int *intreg, int sse_regno)
2923{
2924  /* The following variables hold the static issued_error state.  */
2925  static bool issued_sse_arg_error;
2926  static bool issued_sse_ret_error;
2927  static bool issued_x87_ret_error;
2928
2929  enum machine_mode tmpmode;
2930  int bytes =
2931    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2932  enum x86_64_reg_class class[MAX_CLASSES];
2933  int n;
2934  int i;
2935  int nexps = 0;
2936  int needed_sseregs, needed_intregs;
2937  rtx exp[MAX_CLASSES];
2938  rtx ret;
2939
2940  n = classify_argument (mode, type, class, 0);
2941  if (TARGET_DEBUG_ARG)
2942    {
2943      if (!n)
2944	fprintf (stderr, "Memory class\n");
2945      else
2946	{
2947	  fprintf (stderr, "Classes:");
2948	  for (i = 0; i < n; i++)
2949	    {
2950	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2951	    }
2952	   fprintf (stderr, "\n");
2953	}
2954    }
2955  if (!n)
2956    return NULL;
2957  if (!examine_argument (mode, type, in_return, &needed_intregs,
2958			 &needed_sseregs))
2959    return NULL;
2960  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2961    return NULL;
2962
2963  /* We allowed the user to turn off SSE for kernel mode.  Don't crash if
2964     some less clueful developer tries to use floating-point anyway.  */
2965  if (needed_sseregs && !TARGET_SSE)
2966    {
2967      if (in_return)
2968	{
2969	  if (!issued_sse_ret_error)
2970	    {
2971	      error ("SSE register return with SSE disabled");
2972	      issued_sse_ret_error = true;
2973	    }
2974	}
2975      else if (!issued_sse_arg_error)
2976	{
2977	  error ("SSE register argument with SSE disabled");
2978	  issued_sse_arg_error = true;
2979	}
2980      return NULL;
2981    }
2982
2983  /* Likewise, error if the ABI requires us to return values in the
2984     x87 registers and the user specified -mno-80387.  */
2985  if (!TARGET_80387 && in_return)
2986    for (i = 0; i < n; i++)
2987      if (class[i] == X86_64_X87_CLASS
2988	  || class[i] == X86_64_X87UP_CLASS
2989	  || class[i] == X86_64_COMPLEX_X87_CLASS)
2990	{
2991	  if (!issued_x87_ret_error)
2992	    {
2993	      error ("x87 register return with x87 disabled");
2994	      issued_x87_ret_error = true;
2995	    }
2996	  return NULL;
2997	}
2998
2999  /* First construct simple cases.  Avoid SCmode, since we want to use
3000     single register to pass this type.  */
3001  if (n == 1 && mode != SCmode)
3002    switch (class[0])
3003      {
3004      case X86_64_INTEGER_CLASS:
3005      case X86_64_INTEGERSI_CLASS:
3006	return gen_rtx_REG (mode, intreg[0]);
3007      case X86_64_SSE_CLASS:
3008      case X86_64_SSESF_CLASS:
3009      case X86_64_SSEDF_CLASS:
3010	return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3011      case X86_64_X87_CLASS:
3012      case X86_64_COMPLEX_X87_CLASS:
3013	return gen_rtx_REG (mode, FIRST_STACK_REG);
3014      case X86_64_NO_CLASS:
3015	/* Zero sized array, struct or class.  */
3016	return NULL;
3017      default:
3018	gcc_unreachable ();
3019      }
3020  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3021      && mode != BLKmode)
3022    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3023  if (n == 2
3024      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3025    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3026  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3027      && class[1] == X86_64_INTEGER_CLASS
3028      && (mode == CDImode || mode == TImode || mode == TFmode)
3029      && intreg[0] + 1 == intreg[1])
3030    return gen_rtx_REG (mode, intreg[0]);
3031
3032  /* Otherwise figure out the entries of the PARALLEL.  */
3033  for (i = 0; i < n; i++)
3034    {
3035      switch (class[i])
3036        {
3037	  case X86_64_NO_CLASS:
3038	    break;
3039	  case X86_64_INTEGER_CLASS:
3040	  case X86_64_INTEGERSI_CLASS:
3041	    /* Merge TImodes on aligned occasions here too.  */
3042	    if (i * 8 + 8 > bytes)
3043	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3044	    else if (class[i] == X86_64_INTEGERSI_CLASS)
3045	      tmpmode = SImode;
3046	    else
3047	      tmpmode = DImode;
3048	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
3049	    if (tmpmode == BLKmode)
3050	      tmpmode = DImode;
3051	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3052					       gen_rtx_REG (tmpmode, *intreg),
3053					       GEN_INT (i*8));
3054	    intreg++;
3055	    break;
3056	  case X86_64_SSESF_CLASS:
3057	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3058					       gen_rtx_REG (SFmode,
3059							    SSE_REGNO (sse_regno)),
3060					       GEN_INT (i*8));
3061	    sse_regno++;
3062	    break;
3063	  case X86_64_SSEDF_CLASS:
3064	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3065					       gen_rtx_REG (DFmode,
3066							    SSE_REGNO (sse_regno)),
3067					       GEN_INT (i*8));
3068	    sse_regno++;
3069	    break;
3070	  case X86_64_SSE_CLASS:
3071	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3072	      tmpmode = TImode;
3073	    else
3074	      tmpmode = DImode;
3075	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3076					       gen_rtx_REG (tmpmode,
3077							    SSE_REGNO (sse_regno)),
3078					       GEN_INT (i*8));
3079	    if (tmpmode == TImode)
3080	      i++;
3081	    sse_regno++;
3082	    break;
3083	  default:
3084	    gcc_unreachable ();
3085	}
3086    }
3087
3088  /* Empty aligned struct, union or class.  */
3089  if (nexps == 0)
3090    return NULL;
3091
3092  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3093  for (i = 0; i < nexps; i++)
3094    XVECEXP (ret, 0, i) = exp [i];
3095  return ret;
3096}
3097
3098/* Update the data in CUM to advance over an argument
3099   of mode MODE and data type TYPE.
3100   (TYPE is null for libcalls where that information may not be available.)  */
3101
3102void
3103function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3104		      tree type, int named)
3105{
3106  int bytes =
3107    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3108  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3109
3110  if (type)
3111    mode = type_natural_mode (type);
3112
3113  if (TARGET_DEBUG_ARG)
3114    fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3115	     "mode=%s, named=%d)\n\n",
3116	     words, cum->words, cum->nregs, cum->sse_nregs,
3117	     GET_MODE_NAME (mode), named);
3118
3119  if (TARGET_64BIT)
3120    {
3121      int int_nregs, sse_nregs;
3122      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3123	cum->words += words;
3124      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3125	{
3126	  cum->nregs -= int_nregs;
3127	  cum->sse_nregs -= sse_nregs;
3128	  cum->regno += int_nregs;
3129	  cum->sse_regno += sse_nregs;
3130	}
3131      else
3132	cum->words += words;
3133    }
3134  else
3135    {
3136      switch (mode)
3137	{
3138	default:
3139	  break;
3140
3141	case BLKmode:
3142	  if (bytes < 0)
3143	    break;
3144	  /* FALLTHRU */
3145
3146	case DImode:
3147	case SImode:
3148	case HImode:
3149	case QImode:
3150	  cum->words += words;
3151	  cum->nregs -= words;
3152	  cum->regno += words;
3153
3154	  if (cum->nregs <= 0)
3155	    {
3156	      cum->nregs = 0;
3157	      cum->regno = 0;
3158	    }
3159	  break;
3160
3161	case DFmode:
3162	  if (cum->float_in_sse < 2)
3163	    break;
3164	case SFmode:
3165	  if (cum->float_in_sse < 1)
3166	    break;
3167	  /* FALLTHRU */
3168
3169	case TImode:
3170	case V16QImode:
3171	case V8HImode:
3172	case V4SImode:
3173	case V2DImode:
3174	case V4SFmode:
3175	case V2DFmode:
3176	  if (!type || !AGGREGATE_TYPE_P (type))
3177	    {
3178	      cum->sse_words += words;
3179	      cum->sse_nregs -= 1;
3180	      cum->sse_regno += 1;
3181	      if (cum->sse_nregs <= 0)
3182		{
3183		  cum->sse_nregs = 0;
3184		  cum->sse_regno = 0;
3185		}
3186	    }
3187	  break;
3188
3189	case V8QImode:
3190	case V4HImode:
3191	case V2SImode:
3192	case V2SFmode:
3193	  if (!type || !AGGREGATE_TYPE_P (type))
3194	    {
3195	      cum->mmx_words += words;
3196	      cum->mmx_nregs -= 1;
3197	      cum->mmx_regno += 1;
3198	      if (cum->mmx_nregs <= 0)
3199		{
3200		  cum->mmx_nregs = 0;
3201		  cum->mmx_regno = 0;
3202		}
3203	    }
3204	  break;
3205	}
3206    }
3207}
3208
3209/* Define where to put the arguments to a function.
3210   Value is zero to push the argument on the stack,
3211   or a hard register in which to store the argument.
3212
3213   MODE is the argument's machine mode.
3214   TYPE is the data type of the argument (as a tree).
3215    This is null for libcalls where that information may
3216    not be available.
3217   CUM is a variable of type CUMULATIVE_ARGS which gives info about
3218    the preceding args and about the function being called.
3219   NAMED is nonzero if this argument is a named parameter
3220    (otherwise it is an extra parameter matching an ellipsis).  */
3221
3222rtx
3223function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3224	      tree type, int named)
3225{
3226  enum machine_mode mode = orig_mode;
3227  rtx ret = NULL_RTX;
3228  int bytes =
3229    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3230  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3231  static bool warnedsse, warnedmmx;
3232
3233  /* To simplify the code below, represent vector types with a vector mode
3234     even if MMX/SSE are not active.  */
3235  if (type && TREE_CODE (type) == VECTOR_TYPE)
3236    mode = type_natural_mode (type);
3237
3238  /* Handle a hidden AL argument containing number of registers for varargs
3239     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
3240     any AL settings.  */
3241  if (mode == VOIDmode)
3242    {
3243      if (TARGET_64BIT)
3244	return GEN_INT (cum->maybe_vaarg
3245			? (cum->sse_nregs < 0
3246			   ? SSE_REGPARM_MAX
3247			   : cum->sse_regno)
3248			: -1);
3249      else
3250	return constm1_rtx;
3251    }
3252  if (TARGET_64BIT)
3253    ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3254			       cum->sse_nregs,
3255			       &x86_64_int_parameter_registers [cum->regno],
3256			       cum->sse_regno);
3257  else
3258    switch (mode)
3259      {
3260	/* For now, pass fp/complex values on the stack.  */
3261      default:
3262	break;
3263
3264      case BLKmode:
3265	if (bytes < 0)
3266	  break;
3267	/* FALLTHRU */
3268      case DImode:
3269      case SImode:
3270      case HImode:
3271      case QImode:
3272	if (words <= cum->nregs)
3273	  {
3274	    int regno = cum->regno;
3275
3276	    /* Fastcall allocates the first two DWORD (SImode) or
3277	       smaller arguments to ECX and EDX.  */
3278	    if (cum->fastcall)
3279	      {
3280	        if (mode == BLKmode || mode == DImode)
3281	          break;
3282
3283	        /* ECX not EAX is the first allocated register.  */
3284	        if (regno == 0)
3285		  regno = 2;
3286	      }
3287	    ret = gen_rtx_REG (mode, regno);
3288	  }
3289	break;
3290      case DFmode:
3291	if (cum->float_in_sse < 2)
3292	  break;
3293      case SFmode:
3294	if (cum->float_in_sse < 1)
3295	  break;
3296	/* FALLTHRU */
3297      case TImode:
3298      case V16QImode:
3299      case V8HImode:
3300      case V4SImode:
3301      case V2DImode:
3302      case V4SFmode:
3303      case V2DFmode:
3304	if (!type || !AGGREGATE_TYPE_P (type))
3305	  {
3306	    if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3307	      {
3308		warnedsse = true;
3309		warning (0, "SSE vector argument without SSE enabled "
3310			 "changes the ABI");
3311	      }
3312	    if (cum->sse_nregs)
3313	      ret = gen_reg_or_parallel (mode, orig_mode,
3314					 cum->sse_regno + FIRST_SSE_REG);
3315	  }
3316	break;
3317      case V8QImode:
3318      case V4HImode:
3319      case V2SImode:
3320      case V2SFmode:
3321	if (!type || !AGGREGATE_TYPE_P (type))
3322	  {
3323	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3324	      {
3325		warnedmmx = true;
3326		warning (0, "MMX vector argument without MMX enabled "
3327			 "changes the ABI");
3328	      }
3329	    if (cum->mmx_nregs)
3330	      ret = gen_reg_or_parallel (mode, orig_mode,
3331					 cum->mmx_regno + FIRST_MMX_REG);
3332	  }
3333	break;
3334      }
3335
3336  if (TARGET_DEBUG_ARG)
3337    {
3338      fprintf (stderr,
3339	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3340	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
3341
3342      if (ret)
3343	print_simple_rtl (stderr, ret);
3344      else
3345	fprintf (stderr, ", stack");
3346
3347      fprintf (stderr, " )\n");
3348    }
3349
3350  return ret;
3351}
3352
3353/* A C expression that indicates when an argument must be passed by
3354   reference.  If nonzero for an argument, a copy of that argument is
3355   made in memory and a pointer to the argument is passed instead of
3356   the argument itself.  The pointer is passed in whatever way is
3357   appropriate for passing a pointer to that type.  */
3358
3359static bool
3360ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3361			enum machine_mode mode ATTRIBUTE_UNUSED,
3362			tree type, bool named ATTRIBUTE_UNUSED)
3363{
3364  if (!TARGET_64BIT)
3365    return 0;
3366
3367  if (type && int_size_in_bytes (type) == -1)
3368    {
3369      if (TARGET_DEBUG_ARG)
3370	fprintf (stderr, "function_arg_pass_by_reference\n");
3371      return 1;
3372    }
3373
3374  return 0;
3375}
3376
3377/* Return true when TYPE should be 128bit aligned for 32bit argument passing
3378   ABI.  Only called if TARGET_SSE.  */
3379static bool
3380contains_128bit_aligned_vector_p (tree type)
3381{
3382  enum machine_mode mode = TYPE_MODE (type);
3383  if (SSE_REG_MODE_P (mode)
3384      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3385    return true;
3386  if (TYPE_ALIGN (type) < 128)
3387    return false;
3388
3389  if (AGGREGATE_TYPE_P (type))
3390    {
3391      /* Walk the aggregates recursively.  */
3392      switch (TREE_CODE (type))
3393	{
3394	case RECORD_TYPE:
3395	case UNION_TYPE:
3396	case QUAL_UNION_TYPE:
3397	  {
3398	    tree field;
3399
3400	    if (TYPE_BINFO (type))
3401	      {
3402		tree binfo, base_binfo;
3403		int i;
3404
3405		for (binfo = TYPE_BINFO (type), i = 0;
3406		     BINFO_BASE_ITERATE (binfo, i, base_binfo); i++)
3407		  if (contains_128bit_aligned_vector_p
3408		      (BINFO_TYPE (base_binfo)))
3409		    return true;
3410	      }
3411	    /* And now merge the fields of structure.  */
3412	    for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3413	      {
3414		if (TREE_CODE (field) == FIELD_DECL
3415		    && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3416		  return true;
3417	      }
3418	    break;
3419	  }
3420
3421	case ARRAY_TYPE:
3422	  /* Just for use if some languages passes arrays by value.  */
3423	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3424	    return true;
3425	  break;
3426
3427	default:
3428	  gcc_unreachable ();
3429	}
3430    }
3431  return false;
3432}
3433
3434/* Gives the alignment boundary, in bits, of an argument with the
3435   specified mode and type.  */
3436
3437int
3438ix86_function_arg_boundary (enum machine_mode mode, tree type)
3439{
3440  int align;
3441  if (type)
3442    align = TYPE_ALIGN (type);
3443  else
3444    align = GET_MODE_ALIGNMENT (mode);
3445  if (align < PARM_BOUNDARY)
3446    align = PARM_BOUNDARY;
3447  if (!TARGET_64BIT)
3448    {
3449      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
3450	 make an exception for SSE modes since these require 128bit
3451	 alignment.
3452
3453	 The handling here differs from field_alignment.  ICC aligns MMX
3454	 arguments to 4 byte boundaries, while structure fields are aligned
3455	 to 8 byte boundaries.  */
3456      if (!TARGET_SSE)
3457	align = PARM_BOUNDARY;
3458      else if (!type)
3459	{
3460	  if (!SSE_REG_MODE_P (mode))
3461	    align = PARM_BOUNDARY;
3462	}
3463      else
3464	{
3465	  if (!contains_128bit_aligned_vector_p (type))
3466	    align = PARM_BOUNDARY;
3467	}
3468    }
3469  if (align > 128)
3470    align = 128;
3471  return align;
3472}
3473
3474/* Return true if N is a possible register number of function value.  */
3475bool
3476ix86_function_value_regno_p (int regno)
3477{
3478  if (regno == 0
3479      || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
3480      || (regno == FIRST_SSE_REG && TARGET_SSE))
3481    return true;
3482
3483  if (!TARGET_64BIT
3484      && (regno == FIRST_MMX_REG && TARGET_MMX))
3485	return true;
3486
3487  return false;
3488}
3489
3490/* Define how to find the value returned by a function.
3491   VALTYPE is the data type of the value (as a tree).
3492   If the precise function being called is known, FUNC is its FUNCTION_DECL;
3493   otherwise, FUNC is 0.  */
3494rtx
3495ix86_function_value (tree valtype, tree fntype_or_decl,
3496		     bool outgoing ATTRIBUTE_UNUSED)
3497{
3498  enum machine_mode natmode = type_natural_mode (valtype);
3499
3500  if (TARGET_64BIT)
3501    {
3502      rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
3503				     1, REGPARM_MAX, SSE_REGPARM_MAX,
3504				     x86_64_int_return_registers, 0);
3505      /* For zero sized structures, construct_container return NULL, but we
3506	 need to keep rest of compiler happy by returning meaningful value.  */
3507      if (!ret)
3508	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
3509      return ret;
3510    }
3511  else
3512    {
3513      tree fn = NULL_TREE, fntype;
3514      if (fntype_or_decl
3515	  && DECL_P (fntype_or_decl))
3516        fn = fntype_or_decl;
3517      fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
3518      return gen_rtx_REG (TYPE_MODE (valtype),
3519			  ix86_value_regno (natmode, fn, fntype));
3520    }
3521}
3522
3523/* Return false iff type is returned in memory.  */
3524int
3525ix86_return_in_memory (tree type)
3526{
3527  int needed_intregs, needed_sseregs, size;
3528  enum machine_mode mode = type_natural_mode (type);
3529
3530  if (TARGET_64BIT)
3531    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
3532
3533  if (mode == BLKmode)
3534    return 1;
3535
3536  size = int_size_in_bytes (type);
3537
3538  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
3539    return 0;
3540
3541  if (VECTOR_MODE_P (mode) || mode == TImode)
3542    {
3543      /* User-created vectors small enough to fit in EAX.  */
3544      if (size < 8)
3545	return 0;
3546
3547      /* MMX/3dNow values are returned in MM0,
3548	 except when it doesn't exits.  */
3549      if (size == 8)
3550	return (TARGET_MMX ? 0 : 1);
3551
3552      /* SSE values are returned in XMM0, except when it doesn't exist.  */
3553      if (size == 16)
3554	return (TARGET_SSE ? 0 : 1);
3555    }
3556
3557  if (mode == XFmode)
3558    return 0;
3559
3560  if (size > 12)
3561    return 1;
3562  return 0;
3563}
3564
3565/* When returning SSE vector types, we have a choice of either
3566     (1) being abi incompatible with a -march switch, or
3567     (2) generating an error.
3568   Given no good solution, I think the safest thing is one warning.
3569   The user won't be able to use -Werror, but....
3570
3571   Choose the STRUCT_VALUE_RTX hook because that's (at present) only
3572   called in response to actually generating a caller or callee that
3573   uses such a type.  As opposed to RETURN_IN_MEMORY, which is called
3574   via aggregate_value_p for general type probing from tree-ssa.  */
3575
3576static rtx
3577ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
3578{
3579  static bool warnedsse, warnedmmx;
3580
3581  if (type)
3582    {
3583      /* Look at the return type of the function, not the function type.  */
3584      enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
3585
3586      if (!TARGET_SSE && !warnedsse)
3587	{
3588	  if (mode == TImode
3589	      || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3590	    {
3591	      warnedsse = true;
3592	      warning (0, "SSE vector return without SSE enabled "
3593		       "changes the ABI");
3594	    }
3595	}
3596
3597      if (!TARGET_MMX && !warnedmmx)
3598	{
3599	  if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
3600	    {
3601	      warnedmmx = true;
3602	      warning (0, "MMX vector return without MMX enabled "
3603		       "changes the ABI");
3604	    }
3605	}
3606    }
3607
3608  return NULL;
3609}
3610
3611/* Define how to find the value returned by a library function
3612   assuming the value has mode MODE.  */
3613rtx
3614ix86_libcall_value (enum machine_mode mode)
3615{
3616  if (TARGET_64BIT)
3617    {
3618      switch (mode)
3619	{
3620	case SFmode:
3621	case SCmode:
3622	case DFmode:
3623	case DCmode:
3624	case TFmode:
3625	  return gen_rtx_REG (mode, FIRST_SSE_REG);
3626	case XFmode:
3627	case XCmode:
3628	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
3629	case TCmode:
3630	  return NULL;
3631	default:
3632	  return gen_rtx_REG (mode, 0);
3633	}
3634    }
3635  else
3636    return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
3637}
3638
3639/* Given a mode, return the register to use for a return value.  */
3640
3641static int
3642ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
3643{
3644  gcc_assert (!TARGET_64BIT);
3645
3646  /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
3647     we prevent this case when mmx is not available.  */
3648  if ((VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8))
3649    return FIRST_MMX_REG;
3650
3651  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
3652     we prevent this case when sse is not available.  */
3653  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
3654    return FIRST_SSE_REG;
3655
3656  /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values.  */
3657  if (GET_MODE_CLASS (mode) != MODE_FLOAT || !TARGET_FLOAT_RETURNS_IN_80387)
3658    return 0;
3659
3660  /* Floating point return values in %st(0), except for local functions when
3661     SSE math is enabled or for functions with sseregparm attribute.  */
3662  if ((func || fntype)
3663      && (mode == SFmode || mode == DFmode))
3664    {
3665      int sse_level = ix86_function_sseregparm (fntype, func);
3666      if ((sse_level >= 1 && mode == SFmode)
3667	  || (sse_level == 2 && mode == DFmode))
3668        return FIRST_SSE_REG;
3669    }
3670
3671  return FIRST_FLOAT_REG;
3672}
3673
3674/* Create the va_list data type.  */
3675
3676static tree
3677ix86_build_builtin_va_list (void)
3678{
3679  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
3680
3681  /* For i386 we use plain pointer to argument area.  */
3682  if (!TARGET_64BIT)
3683    return build_pointer_type (char_type_node);
3684
3685  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3686  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3687
3688  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
3689		      unsigned_type_node);
3690  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
3691		      unsigned_type_node);
3692  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
3693		      ptr_type_node);
3694  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
3695		      ptr_type_node);
3696
3697  va_list_gpr_counter_field = f_gpr;
3698  va_list_fpr_counter_field = f_fpr;
3699
3700  DECL_FIELD_CONTEXT (f_gpr) = record;
3701  DECL_FIELD_CONTEXT (f_fpr) = record;
3702  DECL_FIELD_CONTEXT (f_ovf) = record;
3703  DECL_FIELD_CONTEXT (f_sav) = record;
3704
3705  TREE_CHAIN (record) = type_decl;
3706  TYPE_NAME (record) = type_decl;
3707  TYPE_FIELDS (record) = f_gpr;
3708  TREE_CHAIN (f_gpr) = f_fpr;
3709  TREE_CHAIN (f_fpr) = f_ovf;
3710  TREE_CHAIN (f_ovf) = f_sav;
3711
3712  layout_type (record);
3713
3714  /* The correct type is an array type of one element.  */
3715  return build_array_type (record, build_index_type (size_zero_node));
3716}
3717
3718/* Worker function for TARGET_SETUP_INCOMING_VARARGS.  */
3719
3720static void
3721ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3722			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
3723			     int no_rtl)
3724{
3725  CUMULATIVE_ARGS next_cum;
3726  rtx save_area = NULL_RTX, mem;
3727  rtx label;
3728  rtx label_ref;
3729  rtx tmp_reg;
3730  rtx nsse_reg;
3731  int set;
3732  tree fntype;
3733  int stdarg_p;
3734  int i;
3735
3736  if (!TARGET_64BIT)
3737    return;
3738
3739  if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
3740    return;
3741
3742  /* Indicate to allocate space on the stack for varargs save area.  */
3743  ix86_save_varrargs_registers = 1;
3744
3745  cfun->stack_alignment_needed = 128;
3746
3747  fntype = TREE_TYPE (current_function_decl);
3748  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3749	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3750		  != void_type_node));
3751
3752  /* For varargs, we do not want to skip the dummy va_dcl argument.
3753     For stdargs, we do want to skip the last named argument.  */
3754  next_cum = *cum;
3755  if (stdarg_p)
3756    function_arg_advance (&next_cum, mode, type, 1);
3757
3758  if (!no_rtl)
3759    save_area = frame_pointer_rtx;
3760
3761  set = get_varargs_alias_set ();
3762
3763  for (i = next_cum.regno;
3764       i < ix86_regparm
3765       && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
3766       i++)
3767    {
3768      mem = gen_rtx_MEM (Pmode,
3769			 plus_constant (save_area, i * UNITS_PER_WORD));
3770      MEM_NOTRAP_P (mem) = 1;
3771      set_mem_alias_set (mem, set);
3772      emit_move_insn (mem, gen_rtx_REG (Pmode,
3773					x86_64_int_parameter_registers[i]));
3774    }
3775
3776  if (next_cum.sse_nregs && cfun->va_list_fpr_size)
3777    {
3778      /* Now emit code to save SSE registers.  The AX parameter contains number
3779	 of SSE parameter registers used to call this function.  We use
3780	 sse_prologue_save insn template that produces computed jump across
3781	 SSE saves.  We need some preparation work to get this working.  */
3782
3783      label = gen_label_rtx ();
3784      label_ref = gen_rtx_LABEL_REF (Pmode, label);
3785
3786      /* Compute address to jump to :
3787         label - 5*eax + nnamed_sse_arguments*5  */
3788      tmp_reg = gen_reg_rtx (Pmode);
3789      nsse_reg = gen_reg_rtx (Pmode);
3790      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3791      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3792			      gen_rtx_MULT (Pmode, nsse_reg,
3793					    GEN_INT (4))));
3794      if (next_cum.sse_regno)
3795	emit_move_insn
3796	  (nsse_reg,
3797	   gen_rtx_CONST (DImode,
3798			  gen_rtx_PLUS (DImode,
3799					label_ref,
3800					GEN_INT (next_cum.sse_regno * 4))));
3801      else
3802	emit_move_insn (nsse_reg, label_ref);
3803      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3804
3805      /* Compute address of memory block we save into.  We always use pointer
3806	 pointing 127 bytes after first byte to store - this is needed to keep
3807	 instruction size limited by 4 bytes.  */
3808      tmp_reg = gen_reg_rtx (Pmode);
3809      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3810			      plus_constant (save_area,
3811					     8 * REGPARM_MAX + 127)));
3812      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3813      MEM_NOTRAP_P (mem) = 1;
3814      set_mem_alias_set (mem, set);
3815      set_mem_align (mem, BITS_PER_WORD);
3816
3817      /* And finally do the dirty job!  */
3818      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3819					GEN_INT (next_cum.sse_regno), label));
3820    }
3821
3822}
3823
3824/* Implement va_start.  */
3825
3826void
3827ix86_va_start (tree valist, rtx nextarg)
3828{
3829  HOST_WIDE_INT words, n_gpr, n_fpr;
3830  tree f_gpr, f_fpr, f_ovf, f_sav;
3831  tree gpr, fpr, ovf, sav, t;
3832
3833  /* Only 64bit target needs something special.  */
3834  if (!TARGET_64BIT)
3835    {
3836      std_expand_builtin_va_start (valist, nextarg);
3837      return;
3838    }
3839
3840  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3841  f_fpr = TREE_CHAIN (f_gpr);
3842  f_ovf = TREE_CHAIN (f_fpr);
3843  f_sav = TREE_CHAIN (f_ovf);
3844
3845  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3846  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3847  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3848  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3849  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3850
3851  /* Count number of gp and fp argument registers used.  */
3852  words = current_function_args_info.words;
3853  n_gpr = current_function_args_info.regno;
3854  n_fpr = current_function_args_info.sse_regno;
3855
3856  if (TARGET_DEBUG_ARG)
3857    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3858	     (int) words, (int) n_gpr, (int) n_fpr);
3859
3860  if (cfun->va_list_gpr_size)
3861    {
3862      t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3863		 build_int_cst (NULL_TREE, n_gpr * 8));
3864      TREE_SIDE_EFFECTS (t) = 1;
3865      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3866    }
3867
3868  if (cfun->va_list_fpr_size)
3869    {
3870      t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3871		 build_int_cst (NULL_TREE, n_fpr * 16 + 8*REGPARM_MAX));
3872      TREE_SIDE_EFFECTS (t) = 1;
3873      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3874    }
3875
3876  /* Find the overflow area.  */
3877  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3878  if (words != 0)
3879    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3880	       build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
3881  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3882  TREE_SIDE_EFFECTS (t) = 1;
3883  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3884
3885  if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
3886    {
3887      /* Find the register save area.
3888	 Prologue of the function save it right above stack frame.  */
3889      t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3890      t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3891      TREE_SIDE_EFFECTS (t) = 1;
3892      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3893    }
3894}
3895
3896/* Implement va_arg.  */
3897
3898tree
3899ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
3900{
3901  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3902  tree f_gpr, f_fpr, f_ovf, f_sav;
3903  tree gpr, fpr, ovf, sav, t;
3904  int size, rsize;
3905  tree lab_false, lab_over = NULL_TREE;
3906  tree addr, t2;
3907  rtx container;
3908  int indirect_p = 0;
3909  tree ptrtype;
3910  enum machine_mode nat_mode;
3911
3912  /* Only 64bit target needs something special.  */
3913  if (!TARGET_64BIT)
3914    return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
3915
3916  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3917  f_fpr = TREE_CHAIN (f_gpr);
3918  f_ovf = TREE_CHAIN (f_fpr);
3919  f_sav = TREE_CHAIN (f_ovf);
3920
3921  valist = build_va_arg_indirect_ref (valist);
3922  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
3923  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
3924  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
3925  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
3926
3927  indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
3928  if (indirect_p)
3929    type = build_pointer_type (type);
3930  size = int_size_in_bytes (type);
3931  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3932
3933  nat_mode = type_natural_mode (type);
3934  container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
3935				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3936
3937  /* Pull the value out of the saved registers.  */
3938
3939  addr = create_tmp_var (ptr_type_node, "addr");
3940  DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3941
3942  if (container)
3943    {
3944      int needed_intregs, needed_sseregs;
3945      bool need_temp;
3946      tree int_addr, sse_addr;
3947
3948      lab_false = create_artificial_label ();
3949      lab_over = create_artificial_label ();
3950
3951      examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
3952
3953      need_temp = (!REG_P (container)
3954		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
3955		       || TYPE_ALIGN (type) > 128));
3956
3957      /* In case we are passing structure, verify that it is consecutive block
3958         on the register save area.  If not we need to do moves.  */
3959      if (!need_temp && !REG_P (container))
3960	{
3961	  /* Verify that all registers are strictly consecutive  */
3962	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3963	    {
3964	      int i;
3965
3966	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3967		{
3968		  rtx slot = XVECEXP (container, 0, i);
3969		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3970		      || INTVAL (XEXP (slot, 1)) != i * 16)
3971		    need_temp = 1;
3972		}
3973	    }
3974	  else
3975	    {
3976	      int i;
3977
3978	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3979		{
3980		  rtx slot = XVECEXP (container, 0, i);
3981		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3982		      || INTVAL (XEXP (slot, 1)) != i * 8)
3983		    need_temp = 1;
3984		}
3985	    }
3986	}
3987      if (!need_temp)
3988	{
3989	  int_addr = addr;
3990	  sse_addr = addr;
3991	}
3992      else
3993	{
3994	  int_addr = create_tmp_var (ptr_type_node, "int_addr");
3995	  DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
3996	  sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
3997	  DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
3998	}
3999
4000      /* First ensure that we fit completely in registers.  */
4001      if (needed_intregs)
4002	{
4003	  t = build_int_cst (TREE_TYPE (gpr),
4004			     (REGPARM_MAX - needed_intregs + 1) * 8);
4005	  t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4006	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4007	  t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4008	  gimplify_and_add (t, pre_p);
4009	}
4010      if (needed_sseregs)
4011	{
4012	  t = build_int_cst (TREE_TYPE (fpr),
4013			     (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4014			     + REGPARM_MAX * 8);
4015	  t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4016	  t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4017	  t = build (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4018	  gimplify_and_add (t, pre_p);
4019	}
4020
4021      /* Compute index to start of area used for integer regs.  */
4022      if (needed_intregs)
4023	{
4024	  /* int_addr = gpr + sav; */
4025	  t = fold_convert (ptr_type_node, gpr);
4026	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4027	  t = build2 (MODIFY_EXPR, void_type_node, int_addr, t);
4028	  gimplify_and_add (t, pre_p);
4029	}
4030      if (needed_sseregs)
4031	{
4032	  /* sse_addr = fpr + sav; */
4033	  t = fold_convert (ptr_type_node, fpr);
4034	  t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4035	  t = build2 (MODIFY_EXPR, void_type_node, sse_addr, t);
4036	  gimplify_and_add (t, pre_p);
4037	}
4038      if (need_temp)
4039	{
4040	  int i;
4041	  tree temp = create_tmp_var (type, "va_arg_tmp");
4042
4043	  /* addr = &temp; */
4044	  t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4045	  t = build2 (MODIFY_EXPR, void_type_node, addr, t);
4046	  gimplify_and_add (t, pre_p);
4047
4048	  for (i = 0; i < XVECLEN (container, 0); i++)
4049	    {
4050	      rtx slot = XVECEXP (container, 0, i);
4051	      rtx reg = XEXP (slot, 0);
4052	      enum machine_mode mode = GET_MODE (reg);
4053	      tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4054	      tree addr_type = build_pointer_type (piece_type);
4055	      tree src_addr, src;
4056	      int src_offset;
4057	      tree dest_addr, dest;
4058
4059	      if (SSE_REGNO_P (REGNO (reg)))
4060		{
4061		  src_addr = sse_addr;
4062		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4063		}
4064	      else
4065		{
4066		  src_addr = int_addr;
4067		  src_offset = REGNO (reg) * 8;
4068		}
4069	      src_addr = fold_convert (addr_type, src_addr);
4070	      src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4071				       size_int (src_offset)));
4072	      src = build_va_arg_indirect_ref (src_addr);
4073
4074	      dest_addr = fold_convert (addr_type, addr);
4075	      dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4076					size_int (INTVAL (XEXP (slot, 1)))));
4077	      dest = build_va_arg_indirect_ref (dest_addr);
4078
4079	      t = build2 (MODIFY_EXPR, void_type_node, dest, src);
4080	      gimplify_and_add (t, pre_p);
4081	    }
4082	}
4083
4084      if (needed_intregs)
4085	{
4086	  t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4087		      build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4088	  t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
4089	  gimplify_and_add (t, pre_p);
4090	}
4091      if (needed_sseregs)
4092	{
4093	  t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4094		      build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4095	  t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
4096	  gimplify_and_add (t, pre_p);
4097	}
4098
4099      t = build1 (GOTO_EXPR, void_type_node, lab_over);
4100      gimplify_and_add (t, pre_p);
4101
4102      t = build1 (LABEL_EXPR, void_type_node, lab_false);
4103      append_to_statement_list (t, pre_p);
4104    }
4105
4106  /* ... otherwise out of the overflow area.  */
4107
4108  /* Care for on-stack alignment if needed.  */
4109  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4110      || integer_zerop (TYPE_SIZE (type)))
4111    t = ovf;
4112  else
4113    {
4114      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4115      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4116		 build_int_cst (TREE_TYPE (ovf), align - 1));
4117      t = build (BIT_AND_EXPR, TREE_TYPE (t), t,
4118		 build_int_cst (TREE_TYPE (t), -align));
4119    }
4120  gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4121
4122  t2 = build2 (MODIFY_EXPR, void_type_node, addr, t);
4123  gimplify_and_add (t2, pre_p);
4124
4125  t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4126	      build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4127  t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
4128  gimplify_and_add (t, pre_p);
4129
4130  if (container)
4131    {
4132      t = build1 (LABEL_EXPR, void_type_node, lab_over);
4133      append_to_statement_list (t, pre_p);
4134    }
4135
4136  ptrtype = build_pointer_type (type);
4137  addr = fold_convert (ptrtype, addr);
4138
4139  if (indirect_p)
4140    addr = build_va_arg_indirect_ref (addr);
4141  return build_va_arg_indirect_ref (addr);
4142}
4143
4144/* Return nonzero if OPNUM's MEM should be matched
4145   in movabs* patterns.  */
4146
4147int
4148ix86_check_movabs (rtx insn, int opnum)
4149{
4150  rtx set, mem;
4151
4152  set = PATTERN (insn);
4153  if (GET_CODE (set) == PARALLEL)
4154    set = XVECEXP (set, 0, 0);
4155  gcc_assert (GET_CODE (set) == SET);
4156  mem = XEXP (set, opnum);
4157  while (GET_CODE (mem) == SUBREG)
4158    mem = SUBREG_REG (mem);
4159  gcc_assert (GET_CODE (mem) == MEM);
4160  return (volatile_ok || !MEM_VOLATILE_P (mem));
4161}
4162
4163/* Initialize the table of extra 80387 mathematical constants.  */
4164
4165static void
4166init_ext_80387_constants (void)
4167{
4168  static const char * cst[5] =
4169  {
4170    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4171    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4172    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4173    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4174    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4175  };
4176  int i;
4177
4178  for (i = 0; i < 5; i++)
4179    {
4180      real_from_string (&ext_80387_constants_table[i], cst[i]);
4181      /* Ensure each constant is rounded to XFmode precision.  */
4182      real_convert (&ext_80387_constants_table[i],
4183		    XFmode, &ext_80387_constants_table[i]);
4184    }
4185
4186  ext_80387_constants_init = 1;
4187}
4188
4189/* Return true if the constant is something that can be loaded with
4190   a special instruction.  */
4191
4192int
4193standard_80387_constant_p (rtx x)
4194{
4195  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4196    return -1;
4197
4198  if (x == CONST0_RTX (GET_MODE (x)))
4199    return 1;
4200  if (x == CONST1_RTX (GET_MODE (x)))
4201    return 2;
4202
4203  /* For XFmode constants, try to find a special 80387 instruction when
4204     optimizing for size or on those CPUs that benefit from them.  */
4205  if (GET_MODE (x) == XFmode
4206      && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4207    {
4208      REAL_VALUE_TYPE r;
4209      int i;
4210
4211      if (! ext_80387_constants_init)
4212	init_ext_80387_constants ();
4213
4214      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4215      for (i = 0; i < 5; i++)
4216        if (real_identical (&r, &ext_80387_constants_table[i]))
4217	  return i + 3;
4218    }
4219
4220  return 0;
4221}
4222
4223/* Return the opcode of the special instruction to be used to load
4224   the constant X.  */
4225
4226const char *
4227standard_80387_constant_opcode (rtx x)
4228{
4229  switch (standard_80387_constant_p (x))
4230    {
4231    case 1:
4232      return "fldz";
4233    case 2:
4234      return "fld1";
4235    case 3:
4236      return "fldlg2";
4237    case 4:
4238      return "fldln2";
4239    case 5:
4240      return "fldl2e";
4241    case 6:
4242      return "fldl2t";
4243    case 7:
4244      return "fldpi";
4245    default:
4246      gcc_unreachable ();
4247    }
4248}
4249
4250/* Return the CONST_DOUBLE representing the 80387 constant that is
4251   loaded by the specified special instruction.  The argument IDX
4252   matches the return value from standard_80387_constant_p.  */
4253
4254rtx
4255standard_80387_constant_rtx (int idx)
4256{
4257  int i;
4258
4259  if (! ext_80387_constants_init)
4260    init_ext_80387_constants ();
4261
4262  switch (idx)
4263    {
4264    case 3:
4265    case 4:
4266    case 5:
4267    case 6:
4268    case 7:
4269      i = idx - 3;
4270      break;
4271
4272    default:
4273      gcc_unreachable ();
4274    }
4275
4276  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4277				       XFmode);
4278}
4279
4280/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4281 */
4282int
4283standard_sse_constant_p (rtx x)
4284{
4285  if (x == const0_rtx)
4286    return 1;
4287  return (x == CONST0_RTX (GET_MODE (x)));
4288}
4289
4290/* Returns 1 if OP contains a symbol reference */
4291
4292int
4293symbolic_reference_mentioned_p (rtx op)
4294{
4295  const char *fmt;
4296  int i;
4297
4298  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4299    return 1;
4300
4301  fmt = GET_RTX_FORMAT (GET_CODE (op));
4302  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4303    {
4304      if (fmt[i] == 'E')
4305	{
4306	  int j;
4307
4308	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4309	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4310	      return 1;
4311	}
4312
4313      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4314	return 1;
4315    }
4316
4317  return 0;
4318}
4319
4320/* Return 1 if it is appropriate to emit `ret' instructions in the
4321   body of a function.  Do this only if the epilogue is simple, needing a
4322   couple of insns.  Prior to reloading, we can't tell how many registers
4323   must be saved, so return 0 then.  Return 0 if there is no frame
4324   marker to de-allocate.  */
4325
4326int
4327ix86_can_use_return_insn_p (void)
4328{
4329  struct ix86_frame frame;
4330
4331  if (! reload_completed || frame_pointer_needed)
4332    return 0;
4333
4334  /* Don't allow more than 32 pop, since that's all we can do
4335     with one instruction.  */
4336  if (current_function_pops_args
4337      && current_function_args_size >= 32768)
4338    return 0;
4339
4340  ix86_compute_frame_layout (&frame);
4341  return frame.to_allocate == 0 && frame.nregs == 0;
4342}
4343
4344/* Value should be nonzero if functions must have frame pointers.
4345   Zero means the frame pointer need not be set up (and parms may
4346   be accessed via the stack pointer) in functions that seem suitable.  */
4347
4348int
4349ix86_frame_pointer_required (void)
4350{
4351  /* If we accessed previous frames, then the generated code expects
4352     to be able to access the saved ebp value in our frame.  */
4353  if (cfun->machine->accesses_prev_frame)
4354    return 1;
4355
4356  /* Several x86 os'es need a frame pointer for other reasons,
4357     usually pertaining to setjmp.  */
4358  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4359    return 1;
4360
4361  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4362     the frame pointer by default.  Turn it back on now if we've not
4363     got a leaf function.  */
4364  if (TARGET_OMIT_LEAF_FRAME_POINTER
4365      && (!current_function_is_leaf))
4366    return 1;
4367
4368  if (current_function_profile)
4369    return 1;
4370
4371  return 0;
4372}
4373
4374/* Record that the current function accesses previous call frames.  */
4375
4376void
4377ix86_setup_frame_addresses (void)
4378{
4379  cfun->machine->accesses_prev_frame = 1;
4380}
4381
4382#if defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)
4383# define USE_HIDDEN_LINKONCE 1
4384#else
4385# define USE_HIDDEN_LINKONCE 0
4386#endif
4387
4388static int pic_labels_used;
4389
4390/* Fills in the label name that should be used for a pc thunk for
4391   the given register.  */
4392
4393static void
4394get_pc_thunk_name (char name[32], unsigned int regno)
4395{
4396  if (USE_HIDDEN_LINKONCE)
4397    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4398  else
4399    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4400}
4401
4402
4403/* This function generates code for -fpic that loads %ebx with
4404   the return address of the caller and then returns.  */
4405
4406void
4407ix86_file_end (void)
4408{
4409  rtx xops[2];
4410  int regno;
4411
4412  for (regno = 0; regno < 8; ++regno)
4413    {
4414      char name[32];
4415
4416      if (! ((pic_labels_used >> regno) & 1))
4417	continue;
4418
4419      get_pc_thunk_name (name, regno);
4420
4421      if (USE_HIDDEN_LINKONCE)
4422	{
4423	  tree decl;
4424
4425	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4426			     error_mark_node);
4427	  TREE_PUBLIC (decl) = 1;
4428	  TREE_STATIC (decl) = 1;
4429	  DECL_ONE_ONLY (decl) = 1;
4430
4431	  (*targetm.asm_out.unique_section) (decl, 0);
4432	  named_section (decl, NULL, 0);
4433
4434	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
4435	  fputs ("\t.hidden\t", asm_out_file);
4436	  assemble_name (asm_out_file, name);
4437	  fputc ('\n', asm_out_file);
4438	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4439	}
4440      else
4441	{
4442	  text_section ();
4443	  ASM_OUTPUT_LABEL (asm_out_file, name);
4444	}
4445
4446      xops[0] = gen_rtx_REG (SImode, regno);
4447      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4448      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4449      output_asm_insn ("ret", xops);
4450    }
4451
4452  if (NEED_INDICATE_EXEC_STACK)
4453    file_end_indicate_exec_stack ();
4454}
4455
4456/* Emit code for the SET_GOT patterns.  */
4457
4458const char *
4459output_set_got (rtx dest)
4460{
4461  rtx xops[3];
4462
4463  xops[0] = dest;
4464  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4465
4466  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4467    {
4468      xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4469
4470      if (!flag_pic)
4471	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4472      else
4473	output_asm_insn ("call\t%a2", xops);
4474
4475#if TARGET_MACHO
4476      /* Output the "canonical" label name ("Lxx$pb") here too.  This
4477         is what will be referred to by the Mach-O PIC subsystem.  */
4478      ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4479#endif
4480      (*targetm.asm_out.internal_label) (asm_out_file, "L",
4481				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4482
4483      if (flag_pic)
4484	output_asm_insn ("pop{l}\t%0", xops);
4485    }
4486  else
4487    {
4488      char name[32];
4489      get_pc_thunk_name (name, REGNO (dest));
4490      pic_labels_used |= 1 << REGNO (dest);
4491
4492      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4493      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4494      output_asm_insn ("call\t%X2", xops);
4495    }
4496
4497  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4498    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4499  else if (!TARGET_MACHO)
4500    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
4501
4502  return "";
4503}
4504
4505/* Generate an "push" pattern for input ARG.  */
4506
4507static rtx
4508gen_push (rtx arg)
4509{
4510  return gen_rtx_SET (VOIDmode,
4511		      gen_rtx_MEM (Pmode,
4512				   gen_rtx_PRE_DEC (Pmode,
4513						    stack_pointer_rtx)),
4514		      arg);
4515}
4516
4517/* Return >= 0 if there is an unused call-clobbered register available
4518   for the entire function.  */
4519
4520static unsigned int
4521ix86_select_alt_pic_regnum (void)
4522{
4523  if (current_function_is_leaf && !current_function_profile)
4524    {
4525      int i;
4526      for (i = 2; i >= 0; --i)
4527        if (!regs_ever_live[i])
4528	  return i;
4529    }
4530
4531  return INVALID_REGNUM;
4532}
4533
4534/* Return 1 if we need to save REGNO.  */
4535static int
4536ix86_save_reg (unsigned int regno, int maybe_eh_return)
4537{
4538  if (pic_offset_table_rtx
4539      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4540      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4541	  || current_function_profile
4542	  || current_function_calls_eh_return
4543	  || current_function_uses_const_pool))
4544    {
4545      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4546	return 0;
4547      return 1;
4548    }
4549
4550  if (current_function_calls_eh_return && maybe_eh_return)
4551    {
4552      unsigned i;
4553      for (i = 0; ; i++)
4554	{
4555	  unsigned test = EH_RETURN_DATA_REGNO (i);
4556	  if (test == INVALID_REGNUM)
4557	    break;
4558	  if (test == regno)
4559	    return 1;
4560	}
4561    }
4562
4563  if (cfun->machine->force_align_arg_pointer
4564      && regno == REGNO (cfun->machine->force_align_arg_pointer))
4565    return 1;
4566
4567  return (regs_ever_live[regno]
4568	  && !call_used_regs[regno]
4569	  && !fixed_regs[regno]
4570	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4571}
4572
4573/* Return number of registers to be saved on the stack.  */
4574
4575static int
4576ix86_nsaved_regs (void)
4577{
4578  int nregs = 0;
4579  int regno;
4580
4581  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4582    if (ix86_save_reg (regno, true))
4583      nregs++;
4584  return nregs;
4585}
4586
4587/* Return the offset between two registers, one to be eliminated, and the other
4588   its replacement, at the start of a routine.  */
4589
4590HOST_WIDE_INT
4591ix86_initial_elimination_offset (int from, int to)
4592{
4593  struct ix86_frame frame;
4594  ix86_compute_frame_layout (&frame);
4595
4596  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4597    return frame.hard_frame_pointer_offset;
4598  else if (from == FRAME_POINTER_REGNUM
4599	   && to == HARD_FRAME_POINTER_REGNUM)
4600    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
4601  else
4602    {
4603      gcc_assert (to == STACK_POINTER_REGNUM);
4604
4605      if (from == ARG_POINTER_REGNUM)
4606	return frame.stack_pointer_offset;
4607
4608      gcc_assert (from == FRAME_POINTER_REGNUM);
4609      return frame.stack_pointer_offset - frame.frame_pointer_offset;
4610    }
4611}
4612
4613/* Fill structure ix86_frame about frame of currently computed function.  */
4614
4615static void
4616ix86_compute_frame_layout (struct ix86_frame *frame)
4617{
4618  HOST_WIDE_INT total_size;
4619  unsigned int stack_alignment_needed;
4620  HOST_WIDE_INT offset;
4621  unsigned int preferred_alignment;
4622  HOST_WIDE_INT size = get_frame_size ();
4623
4624  frame->nregs = ix86_nsaved_regs ();
4625  total_size = size;
4626
4627  stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
4628  preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4629
4630  /* During reload iteration the amount of registers saved can change.
4631     Recompute the value as needed.  Do not recompute when amount of registers
4632     didn't change as reload does multiple calls to the function and does not
4633     expect the decision to change within single iteration.  */
4634  if (!optimize_size
4635      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
4636    {
4637      int count = frame->nregs;
4638
4639      cfun->machine->use_fast_prologue_epilogue_nregs = count;
4640      /* The fast prologue uses move instead of push to save registers.  This
4641         is significantly longer, but also executes faster as modern hardware
4642         can execute the moves in parallel, but can't do that for push/pop.
4643
4644	 Be careful about choosing what prologue to emit:  When function takes
4645	 many instructions to execute we may use slow version as well as in
4646	 case function is known to be outside hot spot (this is known with
4647	 feedback only).  Weight the size of function by number of registers
4648	 to save as it is cheap to use one or two push instructions but very
4649	 slow to use many of them.  */
4650      if (count)
4651	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
4652      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
4653	  || (flag_branch_probabilities
4654	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
4655        cfun->machine->use_fast_prologue_epilogue = false;
4656      else
4657        cfun->machine->use_fast_prologue_epilogue
4658	   = !expensive_function_p (count);
4659    }
4660  if (TARGET_PROLOGUE_USING_MOVE
4661      && cfun->machine->use_fast_prologue_epilogue)
4662    frame->save_regs_using_mov = true;
4663  else
4664    frame->save_regs_using_mov = false;
4665
4666
4667  /* Skip return address and saved base pointer.  */
4668  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4669
4670  frame->hard_frame_pointer_offset = offset;
4671
4672  /* Do some sanity checking of stack_alignment_needed and
4673     preferred_alignment, since i386 port is the only using those features
4674     that may break easily.  */
4675
4676  gcc_assert (!size || stack_alignment_needed);
4677  gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
4678  gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4679  gcc_assert (stack_alignment_needed
4680	      <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
4681
4682  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4683    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
4684
4685  /* Register save area */
4686  offset += frame->nregs * UNITS_PER_WORD;
4687
4688  /* Va-arg area */
4689  if (ix86_save_varrargs_registers)
4690    {
4691      offset += X86_64_VARARGS_SIZE;
4692      frame->va_arg_size = X86_64_VARARGS_SIZE;
4693    }
4694  else
4695    frame->va_arg_size = 0;
4696
4697  /* Align start of frame for local function.  */
4698  frame->padding1 = ((offset + stack_alignment_needed - 1)
4699		     & -stack_alignment_needed) - offset;
4700
4701  offset += frame->padding1;
4702
4703  /* Frame pointer points here.  */
4704  frame->frame_pointer_offset = offset;
4705
4706  offset += size;
4707
4708  /* Add outgoing arguments area.  Can be skipped if we eliminated
4709     all the function calls as dead code.
4710     Skipping is however impossible when function calls alloca.  Alloca
4711     expander assumes that last current_function_outgoing_args_size
4712     of stack frame are unused.  */
4713  if (ACCUMULATE_OUTGOING_ARGS
4714      && (!current_function_is_leaf || current_function_calls_alloca))
4715    {
4716      offset += current_function_outgoing_args_size;
4717      frame->outgoing_arguments_size = current_function_outgoing_args_size;
4718    }
4719  else
4720    frame->outgoing_arguments_size = 0;
4721
4722  /* Align stack boundary.  Only needed if we're calling another function
4723     or using alloca.  */
4724  if (!current_function_is_leaf || current_function_calls_alloca)
4725    frame->padding2 = ((offset + preferred_alignment - 1)
4726		       & -preferred_alignment) - offset;
4727  else
4728    frame->padding2 = 0;
4729
4730  offset += frame->padding2;
4731
4732  /* We've reached end of stack frame.  */
4733  frame->stack_pointer_offset = offset;
4734
4735  /* Size prologue needs to allocate.  */
4736  frame->to_allocate =
4737    (size + frame->padding1 + frame->padding2
4738     + frame->outgoing_arguments_size + frame->va_arg_size);
4739
4740  if ((!frame->to_allocate && frame->nregs <= 1)
4741      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
4742    frame->save_regs_using_mov = false;
4743
4744  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
4745      && current_function_is_leaf)
4746    {
4747      frame->red_zone_size = frame->to_allocate;
4748      if (frame->save_regs_using_mov)
4749	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
4750      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4751	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4752    }
4753  else
4754    frame->red_zone_size = 0;
4755  frame->to_allocate -= frame->red_zone_size;
4756  frame->stack_pointer_offset -= frame->red_zone_size;
4757#if 0
4758  fprintf (stderr, "nregs: %i\n", frame->nregs);
4759  fprintf (stderr, "size: %i\n", size);
4760  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4761  fprintf (stderr, "padding1: %i\n", frame->padding1);
4762  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4763  fprintf (stderr, "padding2: %i\n", frame->padding2);
4764  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
4765  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4766  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4767  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4768	   frame->hard_frame_pointer_offset);
4769  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4770#endif
4771}
4772
4773/* Emit code to save registers in the prologue.  */
4774
4775static void
4776ix86_emit_save_regs (void)
4777{
4778  unsigned int regno;
4779  rtx insn;
4780
4781  for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
4782    if (ix86_save_reg (regno, true))
4783      {
4784	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
4785	RTX_FRAME_RELATED_P (insn) = 1;
4786      }
4787}
4788
4789/* Emit code to save registers using MOV insns.  First register
4790   is restored from POINTER + OFFSET.  */
4791static void
4792ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
4793{
4794  unsigned int regno;
4795  rtx insn;
4796
4797  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4798    if (ix86_save_reg (regno, true))
4799      {
4800	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4801					       Pmode, offset),
4802			       gen_rtx_REG (Pmode, regno));
4803	RTX_FRAME_RELATED_P (insn) = 1;
4804	offset += UNITS_PER_WORD;
4805      }
4806}
4807
4808/* Expand prologue or epilogue stack adjustment.
4809   The pattern exist to put a dependency on all ebp-based memory accesses.
4810   STYLE should be negative if instructions should be marked as frame related,
4811   zero if %r11 register is live and cannot be freely used and positive
4812   otherwise.  */
4813
4814static void
4815pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
4816{
4817  rtx insn;
4818
4819  if (! TARGET_64BIT)
4820    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
4821  else if (x86_64_immediate_operand (offset, DImode))
4822    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
4823  else
4824    {
4825      rtx r11;
4826      /* r11 is used by indirect sibcall return as well, set before the
4827	 epilogue and used after the epilogue.  ATM indirect sibcall
4828	 shouldn't be used together with huge frame sizes in one
4829	 function because of the frame_size check in sibcall.c.  */
4830      gcc_assert (style);
4831      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
4832      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
4833      if (style < 0)
4834	RTX_FRAME_RELATED_P (insn) = 1;
4835      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
4836							       offset));
4837    }
4838  if (style < 0)
4839    RTX_FRAME_RELATED_P (insn) = 1;
4840}
4841
4842/* Handle the TARGET_INTERNAL_ARG_POINTER hook.  */
4843
4844static rtx
4845ix86_internal_arg_pointer (void)
4846{
4847  if (FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
4848      && DECL_NAME (current_function_decl)
4849      && MAIN_NAME_P (DECL_NAME (current_function_decl))
4850      && DECL_FILE_SCOPE_P (current_function_decl))
4851    {
4852      cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
4853      return copy_to_reg (cfun->machine->force_align_arg_pointer);
4854    }
4855  else
4856    return virtual_incoming_args_rtx;
4857}
4858
4859/* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
4860   This is called from dwarf2out.c to emit call frame instructions
4861   for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
4862static void
4863ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
4864{
4865  rtx unspec = SET_SRC (pattern);
4866  gcc_assert (GET_CODE (unspec) == UNSPEC);
4867
4868  switch (index)
4869    {
4870    case UNSPEC_REG_SAVE:
4871      dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
4872			      SET_DEST (pattern));
4873      break;
4874    case UNSPEC_DEF_CFA:
4875      dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
4876			 INTVAL (XVECEXP (unspec, 0, 0)));
4877      break;
4878    default:
4879      gcc_unreachable ();
4880    }
4881}
4882
4883/* Expand the prologue into a bunch of separate insns.  */
4884
4885void
4886ix86_expand_prologue (void)
4887{
4888  rtx insn;
4889  bool pic_reg_used;
4890  struct ix86_frame frame;
4891  HOST_WIDE_INT allocate;
4892
4893  ix86_compute_frame_layout (&frame);
4894
4895  if (cfun->machine->force_align_arg_pointer)
4896    {
4897      rtx x, y;
4898
4899      /* Grab the argument pointer.  */
4900      x = plus_constant (stack_pointer_rtx, 4);
4901      y = cfun->machine->force_align_arg_pointer;
4902      insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
4903      RTX_FRAME_RELATED_P (insn) = 1;
4904
4905      /* The unwind info consists of two parts: install the fafp as the cfa,
4906	 and record the fafp as the "save register" of the stack pointer.
4907	 The later is there in order that the unwinder can see where it
4908	 should restore the stack pointer across the and insn.  */
4909      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
4910      x = gen_rtx_SET (VOIDmode, y, x);
4911      RTX_FRAME_RELATED_P (x) = 1;
4912      y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
4913			  UNSPEC_REG_SAVE);
4914      y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
4915      RTX_FRAME_RELATED_P (y) = 1;
4916      x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
4917      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
4918      REG_NOTES (insn) = x;
4919
4920      /* Align the stack.  */
4921      emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
4922			     GEN_INT (-16)));
4923
4924      /* And here we cheat like madmen with the unwind info.  We force the
4925	 cfa register back to sp+4, which is exactly what it was at the
4926	 start of the function.  Re-pushing the return address results in
4927	 the return at the same spot relative to the cfa, and thus is
4928	 correct wrt the unwind info.  */
4929      x = cfun->machine->force_align_arg_pointer;
4930      x = gen_frame_mem (Pmode, plus_constant (x, -4));
4931      insn = emit_insn (gen_push (x));
4932      RTX_FRAME_RELATED_P (insn) = 1;
4933
4934      x = GEN_INT (4);
4935      x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
4936      x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
4937      x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
4938      REG_NOTES (insn) = x;
4939    }
4940
4941  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
4942     slower on all targets.  Also sdb doesn't like it.  */
4943
4944  if (frame_pointer_needed)
4945    {
4946      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
4947      RTX_FRAME_RELATED_P (insn) = 1;
4948
4949      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
4950      RTX_FRAME_RELATED_P (insn) = 1;
4951    }
4952
4953  allocate = frame.to_allocate;
4954
4955  if (!frame.save_regs_using_mov)
4956    ix86_emit_save_regs ();
4957  else
4958    allocate += frame.nregs * UNITS_PER_WORD;
4959
4960  /* When using red zone we may start register saving before allocating
4961     the stack frame saving one cycle of the prologue.  */
4962  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
4963    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
4964				   : stack_pointer_rtx,
4965				   -frame.nregs * UNITS_PER_WORD);
4966
4967  if (allocate == 0)
4968    ;
4969  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
4970    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
4971			       GEN_INT (-allocate), -1);
4972  else
4973    {
4974      /* Only valid for Win32.  */
4975      rtx eax = gen_rtx_REG (SImode, 0);
4976      bool eax_live = ix86_eax_live_at_start_p ();
4977      rtx t;
4978
4979      gcc_assert (!TARGET_64BIT);
4980
4981      if (eax_live)
4982	{
4983	  emit_insn (gen_push (eax));
4984	  allocate -= 4;
4985	}
4986
4987      emit_move_insn (eax, GEN_INT (allocate));
4988
4989      insn = emit_insn (gen_allocate_stack_worker (eax));
4990      RTX_FRAME_RELATED_P (insn) = 1;
4991      t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
4992      t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
4993      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
4994					    t, REG_NOTES (insn));
4995
4996      if (eax_live)
4997	{
4998	  if (frame_pointer_needed)
4999	    t = plus_constant (hard_frame_pointer_rtx,
5000			       allocate
5001			       - frame.to_allocate
5002			       - frame.nregs * UNITS_PER_WORD);
5003	  else
5004	    t = plus_constant (stack_pointer_rtx, allocate);
5005	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5006	}
5007    }
5008
5009  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5010    {
5011      if (!frame_pointer_needed || !frame.to_allocate)
5012        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5013      else
5014        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5015				       -frame.nregs * UNITS_PER_WORD);
5016    }
5017
5018  pic_reg_used = false;
5019  if (pic_offset_table_rtx
5020      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5021	  || current_function_profile))
5022    {
5023      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5024
5025      if (alt_pic_reg_used != INVALID_REGNUM)
5026	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5027
5028      pic_reg_used = true;
5029    }
5030
5031  if (pic_reg_used)
5032    {
5033      if (TARGET_64BIT)
5034        insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5035      else
5036        insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5037
5038      /* Even with accurate pre-reload life analysis, we can wind up
5039	 deleting all references to the pic register after reload.
5040	 Consider if cross-jumping unifies two sides of a branch
5041	 controlled by a comparison vs the only read from a global.
5042	 In which case, allow the set_got to be deleted, though we're
5043	 too late to do anything about the ebx save in the prologue.  */
5044      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5045    }
5046
5047  /* Prevent function calls from be scheduled before the call to mcount.
5048     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5049  if (current_function_profile)
5050    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5051}
5052
5053/* Emit code to restore saved registers using MOV insns.  First register
5054   is restored from POINTER + OFFSET.  */
5055static void
5056ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5057				  int maybe_eh_return)
5058{
5059  int regno;
5060  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5061
5062  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5063    if (ix86_save_reg (regno, maybe_eh_return))
5064      {
5065	/* Ensure that adjust_address won't be forced to produce pointer
5066	   out of range allowed by x86-64 instruction set.  */
5067	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5068	  {
5069	    rtx r11;
5070
5071	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5072	    emit_move_insn (r11, GEN_INT (offset));
5073	    emit_insn (gen_adddi3 (r11, r11, pointer));
5074	    base_address = gen_rtx_MEM (Pmode, r11);
5075	    offset = 0;
5076	  }
5077	emit_move_insn (gen_rtx_REG (Pmode, regno),
5078			adjust_address (base_address, Pmode, offset));
5079	offset += UNITS_PER_WORD;
5080      }
5081}
5082
5083/* Restore function stack, frame, and registers.  */
5084
5085void
5086ix86_expand_epilogue (int style)
5087{
5088  int regno;
5089  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5090  struct ix86_frame frame;
5091  HOST_WIDE_INT offset;
5092
5093  ix86_compute_frame_layout (&frame);
5094
5095  /* Calculate start of saved registers relative to ebp.  Special care
5096     must be taken for the normal return case of a function using
5097     eh_return: the eax and edx registers are marked as saved, but not
5098     restored along this path.  */
5099  offset = frame.nregs;
5100  if (current_function_calls_eh_return && style != 2)
5101    offset -= 2;
5102  offset *= -UNITS_PER_WORD;
5103
5104  /* If we're only restoring one register and sp is not valid then
5105     using a move instruction to restore the register since it's
5106     less work than reloading sp and popping the register.
5107
5108     The default code result in stack adjustment using add/lea instruction,
5109     while this code results in LEAVE instruction (or discrete equivalent),
5110     so it is profitable in some other cases as well.  Especially when there
5111     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5112     and there is exactly one register to pop. This heuristic may need some
5113     tuning in future.  */
5114  if ((!sp_valid && frame.nregs <= 1)
5115      || (TARGET_EPILOGUE_USING_MOVE
5116	  && cfun->machine->use_fast_prologue_epilogue
5117	  && (frame.nregs > 1 || frame.to_allocate))
5118      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5119      || (frame_pointer_needed && TARGET_USE_LEAVE
5120	  && cfun->machine->use_fast_prologue_epilogue
5121	  && frame.nregs == 1)
5122      || current_function_calls_eh_return)
5123    {
5124      /* Restore registers.  We can use ebp or esp to address the memory
5125	 locations.  If both are available, default to ebp, since offsets
5126	 are known to be small.  Only exception is esp pointing directly to the
5127	 end of block of saved registers, where we may simplify addressing
5128	 mode.  */
5129
5130      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5131	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5132					  frame.to_allocate, style == 2);
5133      else
5134	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5135					  offset, style == 2);
5136
5137      /* eh_return epilogues need %ecx added to the stack pointer.  */
5138      if (style == 2)
5139	{
5140	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5141
5142	  if (frame_pointer_needed)
5143	    {
5144	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5145	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5146	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5147
5148	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5149	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5150
5151	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5152					 const0_rtx, style);
5153	    }
5154	  else
5155	    {
5156	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5157	      tmp = plus_constant (tmp, (frame.to_allocate
5158                                         + frame.nregs * UNITS_PER_WORD));
5159	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5160	    }
5161	}
5162      else if (!frame_pointer_needed)
5163	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5164				   GEN_INT (frame.to_allocate
5165					    + frame.nregs * UNITS_PER_WORD),
5166				   style);
5167      /* If not an i386, mov & pop is faster than "leave".  */
5168      else if (TARGET_USE_LEAVE || optimize_size
5169	       || !cfun->machine->use_fast_prologue_epilogue)
5170	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5171      else
5172	{
5173	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5174				     hard_frame_pointer_rtx,
5175				     const0_rtx, style);
5176	  if (TARGET_64BIT)
5177	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5178	  else
5179	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5180	}
5181    }
5182  else
5183    {
5184      /* First step is to deallocate the stack frame so that we can
5185	 pop the registers.  */
5186      if (!sp_valid)
5187	{
5188	  gcc_assert (frame_pointer_needed);
5189	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5190				     hard_frame_pointer_rtx,
5191				     GEN_INT (offset), style);
5192	}
5193      else if (frame.to_allocate)
5194	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5195				   GEN_INT (frame.to_allocate), style);
5196
5197      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5198	if (ix86_save_reg (regno, false))
5199	  {
5200	    if (TARGET_64BIT)
5201	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5202	    else
5203	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5204	  }
5205      if (frame_pointer_needed)
5206	{
5207	  /* Leave results in shorter dependency chains on CPUs that are
5208	     able to grok it fast.  */
5209	  if (TARGET_USE_LEAVE)
5210	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5211	  else if (TARGET_64BIT)
5212	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5213	  else
5214	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5215	}
5216    }
5217
5218  if (cfun->machine->force_align_arg_pointer)
5219    {
5220      emit_insn (gen_addsi3 (stack_pointer_rtx,
5221			     cfun->machine->force_align_arg_pointer,
5222			     GEN_INT (-4)));
5223    }
5224
5225  /* Sibcall epilogues don't want a return instruction.  */
5226  if (style == 0)
5227    return;
5228
5229  if (current_function_pops_args && current_function_args_size)
5230    {
5231      rtx popc = GEN_INT (current_function_pops_args);
5232
5233      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5234	 return address, do explicit add, and jump indirectly to the
5235	 caller.  */
5236
5237      if (current_function_pops_args >= 65536)
5238	{
5239	  rtx ecx = gen_rtx_REG (SImode, 2);
5240
5241	  /* There is no "pascal" calling convention in 64bit ABI.  */
5242	  gcc_assert (!TARGET_64BIT);
5243
5244	  emit_insn (gen_popsi1 (ecx));
5245	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5246	  emit_jump_insn (gen_return_indirect_internal (ecx));
5247	}
5248      else
5249	emit_jump_insn (gen_return_pop_internal (popc));
5250    }
5251  else
5252    emit_jump_insn (gen_return_internal ());
5253}
5254
5255/* Reset from the function's potential modifications.  */
5256
5257static void
5258ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5259			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5260{
5261  if (pic_offset_table_rtx)
5262    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5263}
5264
5265/* Extract the parts of an RTL expression that is a valid memory address
5266   for an instruction.  Return 0 if the structure of the address is
5267   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5268   strictly valid, but still used for computing length of lea instruction.  */
5269
5270int
5271ix86_decompose_address (rtx addr, struct ix86_address *out)
5272{
5273  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
5274  rtx base_reg, index_reg;
5275  HOST_WIDE_INT scale = 1;
5276  rtx scale_rtx = NULL_RTX;
5277  int retval = 1;
5278  enum ix86_address_seg seg = SEG_DEFAULT;
5279
5280  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5281    base = addr;
5282  else if (GET_CODE (addr) == PLUS)
5283    {
5284      rtx addends[4], op;
5285      int n = 0, i;
5286
5287      op = addr;
5288      do
5289	{
5290	  if (n >= 4)
5291	    return 0;
5292	  addends[n++] = XEXP (op, 1);
5293	  op = XEXP (op, 0);
5294	}
5295      while (GET_CODE (op) == PLUS);
5296      if (n >= 4)
5297	return 0;
5298      addends[n] = op;
5299
5300      for (i = n; i >= 0; --i)
5301	{
5302	  op = addends[i];
5303	  switch (GET_CODE (op))
5304	    {
5305	    case MULT:
5306	      if (index)
5307		return 0;
5308	      index = XEXP (op, 0);
5309	      scale_rtx = XEXP (op, 1);
5310	      break;
5311
5312	    case UNSPEC:
5313	      if (XINT (op, 1) == UNSPEC_TP
5314	          && TARGET_TLS_DIRECT_SEG_REFS
5315	          && seg == SEG_DEFAULT)
5316		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5317	      else
5318		return 0;
5319	      break;
5320
5321	    case REG:
5322	    case SUBREG:
5323	      if (!base)
5324		base = op;
5325	      else if (!index)
5326		index = op;
5327	      else
5328		return 0;
5329	      break;
5330
5331	    case CONST:
5332	    case CONST_INT:
5333	    case SYMBOL_REF:
5334	    case LABEL_REF:
5335	      if (disp)
5336		return 0;
5337	      disp = op;
5338	      break;
5339
5340	    default:
5341	      return 0;
5342	    }
5343	}
5344    }
5345  else if (GET_CODE (addr) == MULT)
5346    {
5347      index = XEXP (addr, 0);		/* index*scale */
5348      scale_rtx = XEXP (addr, 1);
5349    }
5350  else if (GET_CODE (addr) == ASHIFT)
5351    {
5352      rtx tmp;
5353
5354      /* We're called for lea too, which implements ashift on occasion.  */
5355      index = XEXP (addr, 0);
5356      tmp = XEXP (addr, 1);
5357      if (GET_CODE (tmp) != CONST_INT)
5358	return 0;
5359      scale = INTVAL (tmp);
5360      if ((unsigned HOST_WIDE_INT) scale > 3)
5361	return 0;
5362      scale = 1 << scale;
5363      retval = -1;
5364    }
5365  else
5366    disp = addr;			/* displacement */
5367
5368  /* Extract the integral value of scale.  */
5369  if (scale_rtx)
5370    {
5371      if (GET_CODE (scale_rtx) != CONST_INT)
5372	return 0;
5373      scale = INTVAL (scale_rtx);
5374    }
5375
5376  base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
5377  index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
5378
5379  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5380  if (base_reg && index_reg && scale == 1
5381      && (index_reg == arg_pointer_rtx
5382	  || index_reg == frame_pointer_rtx
5383	  || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
5384    {
5385      rtx tmp;
5386      tmp = base, base = index, index = tmp;
5387      tmp = base_reg, base_reg = index_reg, index_reg = tmp;
5388    }
5389
5390  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5391  if ((base_reg == hard_frame_pointer_rtx
5392       || base_reg == frame_pointer_rtx
5393       || base_reg == arg_pointer_rtx) && !disp)
5394    disp = const0_rtx;
5395
5396  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5397     Avoid this by transforming to [%esi+0].  */
5398  if (ix86_tune == PROCESSOR_K6 && !optimize_size
5399      && base_reg && !index_reg && !disp
5400      && REG_P (base_reg)
5401      && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
5402    disp = const0_rtx;
5403
5404  /* Special case: encode reg+reg instead of reg*2.  */
5405  if (!base && index && scale && scale == 2)
5406    base = index, base_reg = index_reg, scale = 1;
5407
5408  /* Special case: scaling cannot be encoded without base or displacement.  */
5409  if (!base && !disp && index && scale != 1)
5410    disp = const0_rtx;
5411
5412  out->base = base;
5413  out->index = index;
5414  out->disp = disp;
5415  out->scale = scale;
5416  out->seg = seg;
5417
5418  return retval;
5419}
5420
5421/* Return cost of the memory address x.
5422   For i386, it is better to use a complex address than let gcc copy
5423   the address into a reg and make a new pseudo.  But not if the address
5424   requires to two regs - that would mean more pseudos with longer
5425   lifetimes.  */
5426static int
5427ix86_address_cost (rtx x)
5428{
5429  struct ix86_address parts;
5430  int cost = 1;
5431  int ok = ix86_decompose_address (x, &parts);
5432
5433  gcc_assert (ok);
5434
5435  if (parts.base && GET_CODE (parts.base) == SUBREG)
5436    parts.base = SUBREG_REG (parts.base);
5437  if (parts.index && GET_CODE (parts.index) == SUBREG)
5438    parts.index = SUBREG_REG (parts.index);
5439
5440  /* More complex memory references are better.  */
5441  if (parts.disp && parts.disp != const0_rtx)
5442    cost--;
5443  if (parts.seg != SEG_DEFAULT)
5444    cost--;
5445
5446  /* Attempt to minimize number of registers in the address.  */
5447  if ((parts.base
5448       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5449      || (parts.index
5450	  && (!REG_P (parts.index)
5451	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5452    cost++;
5453
5454  if (parts.base
5455      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5456      && parts.index
5457      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5458      && parts.base != parts.index)
5459    cost++;
5460
5461  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5462     since it's predecode logic can't detect the length of instructions
5463     and it degenerates to vector decoded.  Increase cost of such
5464     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5465     to split such addresses or even refuse such addresses at all.
5466
5467     Following addressing modes are affected:
5468      [base+scale*index]
5469      [scale*index+disp]
5470      [base+index]
5471
5472     The first and last case  may be avoidable by explicitly coding the zero in
5473     memory address, but I don't have AMD-K6 machine handy to check this
5474     theory.  */
5475
5476  if (TARGET_K6
5477      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5478	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5479	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5480    cost += 10;
5481
5482  return cost;
5483}
5484
5485/* If X is a machine specific address (i.e. a symbol or label being
5486   referenced as a displacement from the GOT implemented using an
5487   UNSPEC), then return the base term.  Otherwise return X.  */
5488
5489rtx
5490ix86_find_base_term (rtx x)
5491{
5492  rtx term;
5493
5494  if (TARGET_64BIT)
5495    {
5496      if (GET_CODE (x) != CONST)
5497	return x;
5498      term = XEXP (x, 0);
5499      if (GET_CODE (term) == PLUS
5500	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
5501	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5502	term = XEXP (term, 0);
5503      if (GET_CODE (term) != UNSPEC
5504	  || XINT (term, 1) != UNSPEC_GOTPCREL)
5505	return x;
5506
5507      term = XVECEXP (term, 0, 0);
5508
5509      if (GET_CODE (term) != SYMBOL_REF
5510	  && GET_CODE (term) != LABEL_REF)
5511	return x;
5512
5513      return term;
5514    }
5515
5516  term = ix86_delegitimize_address (x);
5517
5518  if (GET_CODE (term) != SYMBOL_REF
5519      && GET_CODE (term) != LABEL_REF)
5520    return x;
5521
5522  return term;
5523}
5524
5525/* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
5526   this is used for to form addresses to local data when -fPIC is in
5527   use.  */
5528
5529static bool
5530darwin_local_data_pic (rtx disp)
5531{
5532  if (GET_CODE (disp) == MINUS)
5533    {
5534      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5535          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5536        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5537          {
5538            const char *sym_name = XSTR (XEXP (disp, 1), 0);
5539            if (! strcmp (sym_name, "<pic base>"))
5540              return true;
5541          }
5542    }
5543
5544  return false;
5545}
5546
5547/* Determine if a given RTX is a valid constant.  We already know this
5548   satisfies CONSTANT_P.  */
5549
5550bool
5551legitimate_constant_p (rtx x)
5552{
5553  switch (GET_CODE (x))
5554    {
5555    case CONST:
5556      x = XEXP (x, 0);
5557
5558      if (GET_CODE (x) == PLUS)
5559	{
5560	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5561	    return false;
5562	  x = XEXP (x, 0);
5563	}
5564
5565      if (TARGET_MACHO && darwin_local_data_pic (x))
5566	return true;
5567
5568      /* Only some unspecs are valid as "constants".  */
5569      if (GET_CODE (x) == UNSPEC)
5570	switch (XINT (x, 1))
5571	  {
5572	  case UNSPEC_GOTOFF:
5573	    return TARGET_64BIT;
5574	  case UNSPEC_TPOFF:
5575	  case UNSPEC_NTPOFF:
5576	    x = XVECEXP (x, 0, 0);
5577	    return (GET_CODE (x) == SYMBOL_REF
5578		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5579	  case UNSPEC_DTPOFF:
5580	    x = XVECEXP (x, 0, 0);
5581	    return (GET_CODE (x) == SYMBOL_REF
5582		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
5583	  default:
5584	    return false;
5585	  }
5586
5587      /* We must have drilled down to a symbol.  */
5588      if (GET_CODE (x) == LABEL_REF)
5589	return true;
5590      if (GET_CODE (x) != SYMBOL_REF)
5591	return false;
5592      /* FALLTHRU */
5593
5594    case SYMBOL_REF:
5595      /* TLS symbols are never valid.  */
5596      if (SYMBOL_REF_TLS_MODEL (x))
5597	return false;
5598      break;
5599
5600    case CONST_DOUBLE:
5601      if (GET_MODE (x) == TImode
5602	  && x != CONST0_RTX (TImode)
5603          && !TARGET_64BIT)
5604	return false;
5605      break;
5606
5607    case CONST_VECTOR:
5608      if (x == CONST0_RTX (GET_MODE (x)))
5609	return true;
5610      return false;
5611
5612    default:
5613      break;
5614    }
5615
5616  /* Otherwise we handle everything else in the move patterns.  */
5617  return true;
5618}
5619
5620/* Determine if it's legal to put X into the constant pool.  This
5621   is not possible for the address of thread-local symbols, which
5622   is checked above.  */
5623
5624static bool
5625ix86_cannot_force_const_mem (rtx x)
5626{
5627  /* We can always put integral constants and vectors in memory.  */
5628  switch (GET_CODE (x))
5629    {
5630    case CONST_INT:
5631    case CONST_DOUBLE:
5632    case CONST_VECTOR:
5633      return false;
5634
5635    default:
5636      break;
5637    }
5638  return !legitimate_constant_p (x);
5639}
5640
5641/* Determine if a given RTX is a valid constant address.  */
5642
5643bool
5644constant_address_p (rtx x)
5645{
5646  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5647}
5648
5649/* Nonzero if the constant value X is a legitimate general operand
5650   when generating PIC code.  It is given that flag_pic is on and
5651   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5652
5653bool
5654legitimate_pic_operand_p (rtx x)
5655{
5656  rtx inner;
5657
5658  switch (GET_CODE (x))
5659    {
5660    case CONST:
5661      inner = XEXP (x, 0);
5662      if (GET_CODE (inner) == PLUS
5663	  && GET_CODE (XEXP (inner, 1)) == CONST_INT)
5664	inner = XEXP (inner, 0);
5665
5666      /* Only some unspecs are valid as "constants".  */
5667      if (GET_CODE (inner) == UNSPEC)
5668	switch (XINT (inner, 1))
5669	  {
5670	  case UNSPEC_GOTOFF:
5671	    return TARGET_64BIT;
5672	  case UNSPEC_TPOFF:
5673	    x = XVECEXP (inner, 0, 0);
5674	    return (GET_CODE (x) == SYMBOL_REF
5675		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
5676	  default:
5677	    return false;
5678	  }
5679      /* FALLTHRU */
5680
5681    case SYMBOL_REF:
5682    case LABEL_REF:
5683      return legitimate_pic_address_disp_p (x);
5684
5685    default:
5686      return true;
5687    }
5688}
5689
5690/* Determine if a given CONST RTX is a valid memory displacement
5691   in PIC mode.  */
5692
5693int
5694legitimate_pic_address_disp_p (rtx disp)
5695{
5696  bool saw_plus;
5697
5698  /* In 64bit mode we can allow direct addresses of symbols and labels
5699     when they are not dynamic symbols.  */
5700  if (TARGET_64BIT)
5701    {
5702      rtx op0 = disp, op1;
5703
5704      switch (GET_CODE (disp))
5705	{
5706	case LABEL_REF:
5707	  return true;
5708
5709	case CONST:
5710	  if (GET_CODE (XEXP (disp, 0)) != PLUS)
5711	    break;
5712	  op0 = XEXP (XEXP (disp, 0), 0);
5713	  op1 = XEXP (XEXP (disp, 0), 1);
5714	  if (GET_CODE (op1) != CONST_INT
5715	      || INTVAL (op1) >= 16*1024*1024
5716	      || INTVAL (op1) < -16*1024*1024)
5717	    break;
5718	  if (GET_CODE (op0) == LABEL_REF)
5719	    return true;
5720	  if (GET_CODE (op0) != SYMBOL_REF)
5721	    break;
5722	  /* FALLTHRU */
5723
5724	case SYMBOL_REF:
5725	  /* TLS references should always be enclosed in UNSPEC.  */
5726	  if (SYMBOL_REF_TLS_MODEL (op0))
5727	    return false;
5728	  if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
5729	    return true;
5730	  break;
5731
5732	default:
5733	  break;
5734	}
5735    }
5736  if (GET_CODE (disp) != CONST)
5737    return 0;
5738  disp = XEXP (disp, 0);
5739
5740  if (TARGET_64BIT)
5741    {
5742      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5743         of GOT tables.  We should not need these anyway.  */
5744      if (GET_CODE (disp) != UNSPEC
5745	  || (XINT (disp, 1) != UNSPEC_GOTPCREL
5746	      && XINT (disp, 1) != UNSPEC_GOTOFF))
5747	return 0;
5748
5749      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5750	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5751	return 0;
5752      return 1;
5753    }
5754
5755  saw_plus = false;
5756  if (GET_CODE (disp) == PLUS)
5757    {
5758      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5759	return 0;
5760      disp = XEXP (disp, 0);
5761      saw_plus = true;
5762    }
5763
5764  if (TARGET_MACHO && darwin_local_data_pic (disp))
5765    return 1;
5766
5767  if (GET_CODE (disp) != UNSPEC)
5768    return 0;
5769
5770  switch (XINT (disp, 1))
5771    {
5772    case UNSPEC_GOT:
5773      if (saw_plus)
5774	return false;
5775      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
5776    case UNSPEC_GOTOFF:
5777      /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
5778	 While ABI specify also 32bit relocation but we don't produce it in
5779	 small PIC model at all.  */
5780      if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
5781	   || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
5782	  && !TARGET_64BIT)
5783        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5784      return false;
5785    case UNSPEC_GOTTPOFF:
5786    case UNSPEC_GOTNTPOFF:
5787    case UNSPEC_INDNTPOFF:
5788      if (saw_plus)
5789	return false;
5790      disp = XVECEXP (disp, 0, 0);
5791      return (GET_CODE (disp) == SYMBOL_REF
5792	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
5793    case UNSPEC_NTPOFF:
5794      disp = XVECEXP (disp, 0, 0);
5795      return (GET_CODE (disp) == SYMBOL_REF
5796	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
5797    case UNSPEC_DTPOFF:
5798      disp = XVECEXP (disp, 0, 0);
5799      return (GET_CODE (disp) == SYMBOL_REF
5800	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
5801    }
5802
5803  return 0;
5804}
5805
5806/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5807   memory address for an instruction.  The MODE argument is the machine mode
5808   for the MEM expression that wants to use this address.
5809
5810   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
5811   convert common non-canonical forms to canonical form so that they will
5812   be recognized.  */
5813
5814int
5815legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
5816{
5817  struct ix86_address parts;
5818  rtx base, index, disp;
5819  HOST_WIDE_INT scale;
5820  const char *reason = NULL;
5821  rtx reason_rtx = NULL_RTX;
5822
5823  if (TARGET_DEBUG_ADDR)
5824    {
5825      fprintf (stderr,
5826	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
5827	       GET_MODE_NAME (mode), strict);
5828      debug_rtx (addr);
5829    }
5830
5831  if (ix86_decompose_address (addr, &parts) <= 0)
5832    {
5833      reason = "decomposition failed";
5834      goto report_error;
5835    }
5836
5837  base = parts.base;
5838  index = parts.index;
5839  disp = parts.disp;
5840  scale = parts.scale;
5841
5842  /* Validate base register.
5843
5844     Don't allow SUBREG's that span more than a word here.  It can lead to spill
5845     failures when the base is one word out of a two word structure, which is
5846     represented internally as a DImode int.  */
5847
5848  if (base)
5849    {
5850      rtx reg;
5851      reason_rtx = base;
5852
5853      if (REG_P (base))
5854  	reg = base;
5855      else if (GET_CODE (base) == SUBREG
5856	       && REG_P (SUBREG_REG (base))
5857	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
5858		  <= UNITS_PER_WORD)
5859  	reg = SUBREG_REG (base);
5860      else
5861	{
5862	  reason = "base is not a register";
5863	  goto report_error;
5864	}
5865
5866      if (GET_MODE (base) != Pmode)
5867	{
5868	  reason = "base is not in Pmode";
5869	  goto report_error;
5870	}
5871
5872      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5873	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
5874	{
5875	  reason = "base is not valid";
5876	  goto report_error;
5877	}
5878    }
5879
5880  /* Validate index register.
5881
5882     Don't allow SUBREG's that span more than a word here -- same as above.  */
5883
5884  if (index)
5885    {
5886      rtx reg;
5887      reason_rtx = index;
5888
5889      if (REG_P (index))
5890  	reg = index;
5891      else if (GET_CODE (index) == SUBREG
5892	       && REG_P (SUBREG_REG (index))
5893	       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
5894		  <= UNITS_PER_WORD)
5895  	reg = SUBREG_REG (index);
5896      else
5897	{
5898	  reason = "index is not a register";
5899	  goto report_error;
5900	}
5901
5902      if (GET_MODE (index) != Pmode)
5903	{
5904	  reason = "index is not in Pmode";
5905	  goto report_error;
5906	}
5907
5908      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5909	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
5910	{
5911	  reason = "index is not valid";
5912	  goto report_error;
5913	}
5914    }
5915
5916  /* Validate scale factor.  */
5917  if (scale != 1)
5918    {
5919      reason_rtx = GEN_INT (scale);
5920      if (!index)
5921	{
5922	  reason = "scale without index";
5923	  goto report_error;
5924	}
5925
5926      if (scale != 2 && scale != 4 && scale != 8)
5927	{
5928	  reason = "scale is not a valid multiplier";
5929	  goto report_error;
5930	}
5931    }
5932
5933  /* Validate displacement.  */
5934  if (disp)
5935    {
5936      reason_rtx = disp;
5937
5938      if (GET_CODE (disp) == CONST
5939	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5940	switch (XINT (XEXP (disp, 0), 1))
5941	  {
5942	  /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
5943	     used.  While ABI specify also 32bit relocations, we don't produce
5944	     them at all and use IP relative instead.  */
5945	  case UNSPEC_GOT:
5946	  case UNSPEC_GOTOFF:
5947	    gcc_assert (flag_pic);
5948	    if (!TARGET_64BIT)
5949	      goto is_legitimate_pic;
5950	    reason = "64bit address unspec";
5951	    goto report_error;
5952
5953	  case UNSPEC_GOTPCREL:
5954	    gcc_assert (flag_pic);
5955	    goto is_legitimate_pic;
5956
5957	  case UNSPEC_GOTTPOFF:
5958	  case UNSPEC_GOTNTPOFF:
5959	  case UNSPEC_INDNTPOFF:
5960	  case UNSPEC_NTPOFF:
5961	  case UNSPEC_DTPOFF:
5962	    break;
5963
5964	  default:
5965	    reason = "invalid address unspec";
5966	    goto report_error;
5967	  }
5968
5969      else if (flag_pic && (SYMBOLIC_CONST (disp)
5970#if TARGET_MACHO
5971			    && !machopic_operand_p (disp)
5972#endif
5973			    ))
5974	{
5975	is_legitimate_pic:
5976	  if (TARGET_64BIT && (index || base))
5977	    {
5978	      /* foo@dtpoff(%rX) is ok.  */
5979	      if (GET_CODE (disp) != CONST
5980		  || GET_CODE (XEXP (disp, 0)) != PLUS
5981		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
5982		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
5983		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
5984		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
5985		{
5986		  reason = "non-constant pic memory reference";
5987		  goto report_error;
5988		}
5989	    }
5990	  else if (! legitimate_pic_address_disp_p (disp))
5991	    {
5992	      reason = "displacement is an invalid pic construct";
5993	      goto report_error;
5994	    }
5995
5996          /* This code used to verify that a symbolic pic displacement
5997	     includes the pic_offset_table_rtx register.
5998
5999	     While this is good idea, unfortunately these constructs may
6000	     be created by "adds using lea" optimization for incorrect
6001	     code like:
6002
6003	     int a;
6004	     int foo(int i)
6005	       {
6006	         return *(&a+i);
6007	       }
6008
6009	     This code is nonsensical, but results in addressing
6010	     GOT table with pic_offset_table_rtx base.  We can't
6011	     just refuse it easily, since it gets matched by
6012	     "addsi3" pattern, that later gets split to lea in the
6013	     case output register differs from input.  While this
6014	     can be handled by separate addsi pattern for this case
6015	     that never results in lea, this seems to be easier and
6016	     correct fix for crash to disable this test.  */
6017	}
6018      else if (GET_CODE (disp) != LABEL_REF
6019	       && GET_CODE (disp) != CONST_INT
6020	       && (GET_CODE (disp) != CONST
6021		   || !legitimate_constant_p (disp))
6022	       && (GET_CODE (disp) != SYMBOL_REF
6023		   || !legitimate_constant_p (disp)))
6024	{
6025	  reason = "displacement is not constant";
6026	  goto report_error;
6027	}
6028      else if (TARGET_64BIT
6029	       && !x86_64_immediate_operand (disp, VOIDmode))
6030	{
6031	  reason = "displacement is out of range";
6032	  goto report_error;
6033	}
6034    }
6035
6036  /* Everything looks valid.  */
6037  if (TARGET_DEBUG_ADDR)
6038    fprintf (stderr, "Success.\n");
6039  return TRUE;
6040
6041 report_error:
6042  if (TARGET_DEBUG_ADDR)
6043    {
6044      fprintf (stderr, "Error: %s\n", reason);
6045      debug_rtx (reason_rtx);
6046    }
6047  return FALSE;
6048}
6049
6050/* Return a unique alias set for the GOT.  */
6051
6052static HOST_WIDE_INT
6053ix86_GOT_alias_set (void)
6054{
6055  static HOST_WIDE_INT set = -1;
6056  if (set == -1)
6057    set = new_alias_set ();
6058  return set;
6059}
6060
6061/* Return a legitimate reference for ORIG (an address) using the
6062   register REG.  If REG is 0, a new pseudo is generated.
6063
6064   There are two types of references that must be handled:
6065
6066   1. Global data references must load the address from the GOT, via
6067      the PIC reg.  An insn is emitted to do this load, and the reg is
6068      returned.
6069
6070   2. Static data references, constant pool addresses, and code labels
6071      compute the address as an offset from the GOT, whose base is in
6072      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6073      differentiate them from global data objects.  The returned
6074      address is the PIC reg + an unspec constant.
6075
6076   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6077   reg also appears in the address.  */
6078
6079static rtx
6080legitimize_pic_address (rtx orig, rtx reg)
6081{
6082  rtx addr = orig;
6083  rtx new = orig;
6084  rtx base;
6085
6086#if TARGET_MACHO
6087  if (reg == 0)
6088    reg = gen_reg_rtx (Pmode);
6089  /* Use the generic Mach-O PIC machinery.  */
6090  return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6091#endif
6092
6093  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6094    new = addr;
6095  else if (TARGET_64BIT
6096	   && ix86_cmodel != CM_SMALL_PIC
6097	   && local_symbolic_operand (addr, Pmode))
6098    {
6099      rtx tmpreg;
6100      /* This symbol may be referenced via a displacement from the PIC
6101	 base address (@GOTOFF).  */
6102
6103      if (reload_in_progress)
6104	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6105      if (GET_CODE (addr) == CONST)
6106	addr = XEXP (addr, 0);
6107      if (GET_CODE (addr) == PLUS)
6108	  {
6109            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6110	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6111	  }
6112	else
6113          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6114      new = gen_rtx_CONST (Pmode, new);
6115      if (!reg)
6116        tmpreg = gen_reg_rtx (Pmode);
6117      else
6118	tmpreg = reg;
6119      emit_move_insn (tmpreg, new);
6120
6121      if (reg != 0)
6122	{
6123	  new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6124				     tmpreg, 1, OPTAB_DIRECT);
6125	  new = reg;
6126	}
6127      else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6128    }
6129  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6130    {
6131      /* This symbol may be referenced via a displacement from the PIC
6132	 base address (@GOTOFF).  */
6133
6134      if (reload_in_progress)
6135	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6136      if (GET_CODE (addr) == CONST)
6137	addr = XEXP (addr, 0);
6138      if (GET_CODE (addr) == PLUS)
6139	  {
6140            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6141	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6142	  }
6143	else
6144          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6145      new = gen_rtx_CONST (Pmode, new);
6146      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6147
6148      if (reg != 0)
6149	{
6150	  emit_move_insn (reg, new);
6151	  new = reg;
6152	}
6153    }
6154  else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6155    {
6156      if (TARGET_64BIT)
6157	{
6158	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6159	  new = gen_rtx_CONST (Pmode, new);
6160	  new = gen_const_mem (Pmode, new);
6161	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6162
6163	  if (reg == 0)
6164	    reg = gen_reg_rtx (Pmode);
6165	  /* Use directly gen_movsi, otherwise the address is loaded
6166	     into register for CSE.  We don't want to CSE this addresses,
6167	     instead we CSE addresses from the GOT table, so skip this.  */
6168	  emit_insn (gen_movsi (reg, new));
6169	  new = reg;
6170	}
6171      else
6172	{
6173	  /* This symbol must be referenced via a load from the
6174	     Global Offset Table (@GOT).  */
6175
6176	  if (reload_in_progress)
6177	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6178	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6179	  new = gen_rtx_CONST (Pmode, new);
6180	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6181	  new = gen_const_mem (Pmode, new);
6182	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6183
6184	  if (reg == 0)
6185	    reg = gen_reg_rtx (Pmode);
6186	  emit_move_insn (reg, new);
6187	  new = reg;
6188	}
6189    }
6190  else
6191    {
6192      if (GET_CODE (addr) == CONST_INT
6193	  && !x86_64_immediate_operand (addr, VOIDmode))
6194	{
6195	  if (reg)
6196	    {
6197	      emit_move_insn (reg, addr);
6198	      new = reg;
6199	    }
6200	  else
6201	    new = force_reg (Pmode, addr);
6202	}
6203      else if (GET_CODE (addr) == CONST)
6204	{
6205	  addr = XEXP (addr, 0);
6206
6207	  /* We must match stuff we generate before.  Assume the only
6208	     unspecs that can get here are ours.  Not that we could do
6209	     anything with them anyway....  */
6210	  if (GET_CODE (addr) == UNSPEC
6211	      || (GET_CODE (addr) == PLUS
6212		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6213	    return orig;
6214	  gcc_assert (GET_CODE (addr) == PLUS);
6215	}
6216      if (GET_CODE (addr) == PLUS)
6217	{
6218	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6219
6220	  /* Check first to see if this is a constant offset from a @GOTOFF
6221	     symbol reference.  */
6222	  if (local_symbolic_operand (op0, Pmode)
6223	      && GET_CODE (op1) == CONST_INT)
6224	    {
6225	      if (!TARGET_64BIT)
6226		{
6227		  if (reload_in_progress)
6228		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6229		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6230					UNSPEC_GOTOFF);
6231		  new = gen_rtx_PLUS (Pmode, new, op1);
6232		  new = gen_rtx_CONST (Pmode, new);
6233		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6234
6235		  if (reg != 0)
6236		    {
6237		      emit_move_insn (reg, new);
6238		      new = reg;
6239		    }
6240		}
6241	      else
6242		{
6243		  if (INTVAL (op1) < -16*1024*1024
6244		      || INTVAL (op1) >= 16*1024*1024)
6245		    {
6246		      if (!x86_64_immediate_operand (op1, Pmode))
6247			op1 = force_reg (Pmode, op1);
6248		      new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6249		    }
6250		}
6251	    }
6252	  else
6253	    {
6254	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6255	      new  = legitimize_pic_address (XEXP (addr, 1),
6256					     base == reg ? NULL_RTX : reg);
6257
6258	      if (GET_CODE (new) == CONST_INT)
6259		new = plus_constant (base, INTVAL (new));
6260	      else
6261		{
6262		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6263		    {
6264		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6265		      new = XEXP (new, 1);
6266		    }
6267		  new = gen_rtx_PLUS (Pmode, base, new);
6268		}
6269	    }
6270	}
6271    }
6272  return new;
6273}
6274
6275/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6276
6277static rtx
6278get_thread_pointer (int to_reg)
6279{
6280  rtx tp, reg, insn;
6281
6282  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6283  if (!to_reg)
6284    return tp;
6285
6286  reg = gen_reg_rtx (Pmode);
6287  insn = gen_rtx_SET (VOIDmode, reg, tp);
6288  insn = emit_insn (insn);
6289
6290  return reg;
6291}
6292
6293/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6294   false if we expect this to be used for a memory address and true if
6295   we expect to load the address into a register.  */
6296
6297static rtx
6298legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6299{
6300  rtx dest, base, off, pic;
6301  int type;
6302
6303  switch (model)
6304    {
6305    case TLS_MODEL_GLOBAL_DYNAMIC:
6306      dest = gen_reg_rtx (Pmode);
6307      if (TARGET_64BIT)
6308	{
6309	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6310
6311	  start_sequence ();
6312	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6313	  insns = get_insns ();
6314	  end_sequence ();
6315
6316	  emit_libcall_block (insns, dest, rax, x);
6317	}
6318      else
6319	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6320      break;
6321
6322    case TLS_MODEL_LOCAL_DYNAMIC:
6323      base = gen_reg_rtx (Pmode);
6324      if (TARGET_64BIT)
6325	{
6326	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6327
6328	  start_sequence ();
6329	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6330	  insns = get_insns ();
6331	  end_sequence ();
6332
6333	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6334	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6335	  emit_libcall_block (insns, base, rax, note);
6336	}
6337      else
6338	emit_insn (gen_tls_local_dynamic_base_32 (base));
6339
6340      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6341      off = gen_rtx_CONST (Pmode, off);
6342
6343      return gen_rtx_PLUS (Pmode, base, off);
6344
6345    case TLS_MODEL_INITIAL_EXEC:
6346      if (TARGET_64BIT)
6347	{
6348	  pic = NULL;
6349	  type = UNSPEC_GOTNTPOFF;
6350	}
6351      else if (flag_pic)
6352	{
6353	  if (reload_in_progress)
6354	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6355	  pic = pic_offset_table_rtx;
6356	  type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6357	}
6358      else if (!TARGET_GNU_TLS)
6359	{
6360	  pic = gen_reg_rtx (Pmode);
6361	  emit_insn (gen_set_got (pic));
6362	  type = UNSPEC_GOTTPOFF;
6363	}
6364      else
6365	{
6366	  pic = NULL;
6367	  type = UNSPEC_INDNTPOFF;
6368	}
6369
6370      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6371      off = gen_rtx_CONST (Pmode, off);
6372      if (pic)
6373	off = gen_rtx_PLUS (Pmode, pic, off);
6374      off = gen_const_mem (Pmode, off);
6375      set_mem_alias_set (off, ix86_GOT_alias_set ());
6376
6377      if (TARGET_64BIT || TARGET_GNU_TLS)
6378	{
6379          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6380	  off = force_reg (Pmode, off);
6381	  return gen_rtx_PLUS (Pmode, base, off);
6382	}
6383      else
6384	{
6385	  base = get_thread_pointer (true);
6386	  dest = gen_reg_rtx (Pmode);
6387	  emit_insn (gen_subsi3 (dest, base, off));
6388	}
6389      break;
6390
6391    case TLS_MODEL_LOCAL_EXEC:
6392      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6393			    (TARGET_64BIT || TARGET_GNU_TLS)
6394			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6395      off = gen_rtx_CONST (Pmode, off);
6396
6397      if (TARGET_64BIT || TARGET_GNU_TLS)
6398	{
6399	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6400	  return gen_rtx_PLUS (Pmode, base, off);
6401	}
6402      else
6403	{
6404	  base = get_thread_pointer (true);
6405	  dest = gen_reg_rtx (Pmode);
6406	  emit_insn (gen_subsi3 (dest, base, off));
6407	}
6408      break;
6409
6410    default:
6411      gcc_unreachable ();
6412    }
6413
6414  return dest;
6415}
6416
6417/* Try machine-dependent ways of modifying an illegitimate address
6418   to be legitimate.  If we find one, return the new, valid address.
6419   This macro is used in only one place: `memory_address' in explow.c.
6420
6421   OLDX is the address as it was before break_out_memory_refs was called.
6422   In some cases it is useful to look at this to decide what needs to be done.
6423
6424   MODE and WIN are passed so that this macro can use
6425   GO_IF_LEGITIMATE_ADDRESS.
6426
6427   It is always safe for this macro to do nothing.  It exists to recognize
6428   opportunities to optimize the output.
6429
6430   For the 80386, we handle X+REG by loading X into a register R and
6431   using R+REG.  R will go in a general reg and indexing will be used.
6432   However, if REG is a broken-out memory address or multiplication,
6433   nothing needs to be done because REG can certainly go in a general reg.
6434
6435   When -fpic is used, special handling is needed for symbolic references.
6436   See comments by legitimize_pic_address in i386.c for details.  */
6437
6438rtx
6439legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6440{
6441  int changed = 0;
6442  unsigned log;
6443
6444  if (TARGET_DEBUG_ADDR)
6445    {
6446      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6447	       GET_MODE_NAME (mode));
6448      debug_rtx (x);
6449    }
6450
6451  log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
6452  if (log)
6453    return legitimize_tls_address (x, log, false);
6454  if (GET_CODE (x) == CONST
6455      && GET_CODE (XEXP (x, 0)) == PLUS
6456      && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
6457      && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
6458    {
6459      rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
6460      return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
6461    }
6462
6463  if (flag_pic && SYMBOLIC_CONST (x))
6464    return legitimize_pic_address (x, 0);
6465
6466  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6467  if (GET_CODE (x) == ASHIFT
6468      && GET_CODE (XEXP (x, 1)) == CONST_INT
6469      && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
6470    {
6471      changed = 1;
6472      log = INTVAL (XEXP (x, 1));
6473      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6474			GEN_INT (1 << log));
6475    }
6476
6477  if (GET_CODE (x) == PLUS)
6478    {
6479      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6480
6481      if (GET_CODE (XEXP (x, 0)) == ASHIFT
6482	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6483	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
6484	{
6485	  changed = 1;
6486	  log = INTVAL (XEXP (XEXP (x, 0), 1));
6487	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
6488				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6489				      GEN_INT (1 << log));
6490	}
6491
6492      if (GET_CODE (XEXP (x, 1)) == ASHIFT
6493	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6494	  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
6495	{
6496	  changed = 1;
6497	  log = INTVAL (XEXP (XEXP (x, 1), 1));
6498	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
6499				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6500				      GEN_INT (1 << log));
6501	}
6502
6503      /* Put multiply first if it isn't already.  */
6504      if (GET_CODE (XEXP (x, 1)) == MULT)
6505	{
6506	  rtx tmp = XEXP (x, 0);
6507	  XEXP (x, 0) = XEXP (x, 1);
6508	  XEXP (x, 1) = tmp;
6509	  changed = 1;
6510	}
6511
6512      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6513	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6514	 created by virtual register instantiation, register elimination, and
6515	 similar optimizations.  */
6516      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6517	{
6518	  changed = 1;
6519	  x = gen_rtx_PLUS (Pmode,
6520			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
6521					  XEXP (XEXP (x, 1), 0)),
6522			    XEXP (XEXP (x, 1), 1));
6523	}
6524
6525      /* Canonicalize
6526	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6527	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6528      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6529	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6530	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6531	       && CONSTANT_P (XEXP (x, 1)))
6532	{
6533	  rtx constant;
6534	  rtx other = NULL_RTX;
6535
6536	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6537	    {
6538	      constant = XEXP (x, 1);
6539	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6540	    }
6541	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6542	    {
6543	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6544	      other = XEXP (x, 1);
6545	    }
6546	  else
6547	    constant = 0;
6548
6549	  if (constant)
6550	    {
6551	      changed = 1;
6552	      x = gen_rtx_PLUS (Pmode,
6553				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6554					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
6555				plus_constant (other, INTVAL (constant)));
6556	    }
6557	}
6558
6559      if (changed && legitimate_address_p (mode, x, FALSE))
6560	return x;
6561
6562      if (GET_CODE (XEXP (x, 0)) == MULT)
6563	{
6564	  changed = 1;
6565	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6566	}
6567
6568      if (GET_CODE (XEXP (x, 1)) == MULT)
6569	{
6570	  changed = 1;
6571	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6572	}
6573
6574      if (changed
6575	  && GET_CODE (XEXP (x, 1)) == REG
6576	  && GET_CODE (XEXP (x, 0)) == REG)
6577	return x;
6578
6579      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6580	{
6581	  changed = 1;
6582	  x = legitimize_pic_address (x, 0);
6583	}
6584
6585      if (changed && legitimate_address_p (mode, x, FALSE))
6586	return x;
6587
6588      if (GET_CODE (XEXP (x, 0)) == REG)
6589	{
6590	  rtx temp = gen_reg_rtx (Pmode);
6591	  rtx val  = force_operand (XEXP (x, 1), temp);
6592	  if (val != temp)
6593	    emit_move_insn (temp, val);
6594
6595	  XEXP (x, 1) = temp;
6596	  return x;
6597	}
6598
6599      else if (GET_CODE (XEXP (x, 1)) == REG)
6600	{
6601	  rtx temp = gen_reg_rtx (Pmode);
6602	  rtx val  = force_operand (XEXP (x, 0), temp);
6603	  if (val != temp)
6604	    emit_move_insn (temp, val);
6605
6606	  XEXP (x, 0) = temp;
6607	  return x;
6608	}
6609    }
6610
6611  return x;
6612}
6613
6614/* Print an integer constant expression in assembler syntax.  Addition
6615   and subtraction are the only arithmetic that may appear in these
6616   expressions.  FILE is the stdio stream to write to, X is the rtx, and
6617   CODE is the operand print code from the output string.  */
6618
6619static void
6620output_pic_addr_const (FILE *file, rtx x, int code)
6621{
6622  char buf[256];
6623
6624  switch (GET_CODE (x))
6625    {
6626    case PC:
6627      gcc_assert (flag_pic);
6628      putc ('.', file);
6629      break;
6630
6631    case SYMBOL_REF:
6632      output_addr_const (file, x);
6633      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6634	fputs ("@PLT", file);
6635      break;
6636
6637    case LABEL_REF:
6638      x = XEXP (x, 0);
6639      /* FALLTHRU */
6640    case CODE_LABEL:
6641      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6642      assemble_name (asm_out_file, buf);
6643      break;
6644
6645    case CONST_INT:
6646      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6647      break;
6648
6649    case CONST:
6650      /* This used to output parentheses around the expression,
6651	 but that does not work on the 386 (either ATT or BSD assembler).  */
6652      output_pic_addr_const (file, XEXP (x, 0), code);
6653      break;
6654
6655    case CONST_DOUBLE:
6656      if (GET_MODE (x) == VOIDmode)
6657	{
6658	  /* We can use %d if the number is <32 bits and positive.  */
6659	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6660	    fprintf (file, "0x%lx%08lx",
6661		     (unsigned long) CONST_DOUBLE_HIGH (x),
6662		     (unsigned long) CONST_DOUBLE_LOW (x));
6663	  else
6664	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6665	}
6666      else
6667	/* We can't handle floating point constants;
6668	   PRINT_OPERAND must handle them.  */
6669	output_operand_lossage ("floating constant misused");
6670      break;
6671
6672    case PLUS:
6673      /* Some assemblers need integer constants to appear first.  */
6674      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6675	{
6676	  output_pic_addr_const (file, XEXP (x, 0), code);
6677	  putc ('+', file);
6678	  output_pic_addr_const (file, XEXP (x, 1), code);
6679	}
6680      else
6681	{
6682	  gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
6683	  output_pic_addr_const (file, XEXP (x, 1), code);
6684	  putc ('+', file);
6685	  output_pic_addr_const (file, XEXP (x, 0), code);
6686	}
6687      break;
6688
6689    case MINUS:
6690      if (!TARGET_MACHO)
6691	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6692      output_pic_addr_const (file, XEXP (x, 0), code);
6693      putc ('-', file);
6694      output_pic_addr_const (file, XEXP (x, 1), code);
6695      if (!TARGET_MACHO)
6696	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6697      break;
6698
6699     case UNSPEC:
6700       gcc_assert (XVECLEN (x, 0) == 1);
6701       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6702       switch (XINT (x, 1))
6703	{
6704	case UNSPEC_GOT:
6705	  fputs ("@GOT", file);
6706	  break;
6707	case UNSPEC_GOTOFF:
6708	  fputs ("@GOTOFF", file);
6709	  break;
6710	case UNSPEC_GOTPCREL:
6711	  fputs ("@GOTPCREL(%rip)", file);
6712	  break;
6713	case UNSPEC_GOTTPOFF:
6714	  /* FIXME: This might be @TPOFF in Sun ld too.  */
6715	  fputs ("@GOTTPOFF", file);
6716	  break;
6717	case UNSPEC_TPOFF:
6718	  fputs ("@TPOFF", file);
6719	  break;
6720	case UNSPEC_NTPOFF:
6721	  if (TARGET_64BIT)
6722	    fputs ("@TPOFF", file);
6723	  else
6724	    fputs ("@NTPOFF", file);
6725	  break;
6726	case UNSPEC_DTPOFF:
6727	  fputs ("@DTPOFF", file);
6728	  break;
6729	case UNSPEC_GOTNTPOFF:
6730	  if (TARGET_64BIT)
6731	    fputs ("@GOTTPOFF(%rip)", file);
6732	  else
6733	    fputs ("@GOTNTPOFF", file);
6734	  break;
6735	case UNSPEC_INDNTPOFF:
6736	  fputs ("@INDNTPOFF", file);
6737	  break;
6738	default:
6739	  output_operand_lossage ("invalid UNSPEC as operand");
6740	  break;
6741	}
6742       break;
6743
6744    default:
6745      output_operand_lossage ("invalid expression as operand");
6746    }
6747}
6748
6749/* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6750   We need to emit DTP-relative relocations.  */
6751
6752static void
6753i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6754{
6755  fputs (ASM_LONG, file);
6756  output_addr_const (file, x);
6757  fputs ("@DTPOFF", file);
6758  switch (size)
6759    {
6760    case 4:
6761      break;
6762    case 8:
6763      fputs (", 0", file);
6764      break;
6765    default:
6766      gcc_unreachable ();
6767   }
6768}
6769
6770/* In the name of slightly smaller debug output, and to cater to
6771   general assembler lossage, recognize PIC+GOTOFF and turn it back
6772   into a direct symbol reference.  */
6773
6774static rtx
6775ix86_delegitimize_address (rtx orig_x)
6776{
6777  rtx x = orig_x, y;
6778
6779  if (GET_CODE (x) == MEM)
6780    x = XEXP (x, 0);
6781
6782  if (TARGET_64BIT)
6783    {
6784      if (GET_CODE (x) != CONST
6785	  || GET_CODE (XEXP (x, 0)) != UNSPEC
6786	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6787	  || GET_CODE (orig_x) != MEM)
6788	return orig_x;
6789      return XVECEXP (XEXP (x, 0), 0, 0);
6790    }
6791
6792  if (GET_CODE (x) != PLUS
6793      || GET_CODE (XEXP (x, 1)) != CONST)
6794    return orig_x;
6795
6796  if (GET_CODE (XEXP (x, 0)) == REG
6797      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6798    /* %ebx + GOT/GOTOFF */
6799    y = NULL;
6800  else if (GET_CODE (XEXP (x, 0)) == PLUS)
6801    {
6802      /* %ebx + %reg * scale + GOT/GOTOFF */
6803      y = XEXP (x, 0);
6804      if (GET_CODE (XEXP (y, 0)) == REG
6805	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6806	y = XEXP (y, 1);
6807      else if (GET_CODE (XEXP (y, 1)) == REG
6808	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6809	y = XEXP (y, 0);
6810      else
6811	return orig_x;
6812      if (GET_CODE (y) != REG
6813	  && GET_CODE (y) != MULT
6814	  && GET_CODE (y) != ASHIFT)
6815	return orig_x;
6816    }
6817  else
6818    return orig_x;
6819
6820  x = XEXP (XEXP (x, 1), 0);
6821  if (GET_CODE (x) == UNSPEC
6822      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6823	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6824    {
6825      if (y)
6826	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6827      return XVECEXP (x, 0, 0);
6828    }
6829
6830  if (GET_CODE (x) == PLUS
6831      && GET_CODE (XEXP (x, 0)) == UNSPEC
6832      && GET_CODE (XEXP (x, 1)) == CONST_INT
6833      && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6834	  || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6835	      && GET_CODE (orig_x) != MEM)))
6836    {
6837      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6838      if (y)
6839	return gen_rtx_PLUS (Pmode, y, x);
6840      return x;
6841    }
6842
6843  return orig_x;
6844}
6845
6846static void
6847put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
6848		    int fp, FILE *file)
6849{
6850  const char *suffix;
6851
6852  if (mode == CCFPmode || mode == CCFPUmode)
6853    {
6854      enum rtx_code second_code, bypass_code;
6855      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6856      gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
6857      code = ix86_fp_compare_code_to_integer (code);
6858      mode = CCmode;
6859    }
6860  if (reverse)
6861    code = reverse_condition (code);
6862
6863  switch (code)
6864    {
6865    case EQ:
6866      suffix = "e";
6867      break;
6868    case NE:
6869      suffix = "ne";
6870      break;
6871    case GT:
6872      gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
6873      suffix = "g";
6874      break;
6875    case GTU:
6876      /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
6877	 Those same assemblers have the same but opposite lossage on cmov.  */
6878      gcc_assert (mode == CCmode);
6879      suffix = fp ? "nbe" : "a";
6880      break;
6881    case LT:
6882      switch (mode)
6883	{
6884	case CCNOmode:
6885	case CCGOCmode:
6886	  suffix = "s";
6887	  break;
6888
6889	case CCmode:
6890	case CCGCmode:
6891	  suffix = "l";
6892	  break;
6893
6894	default:
6895	  gcc_unreachable ();
6896	}
6897      break;
6898    case LTU:
6899      gcc_assert (mode == CCmode);
6900      suffix = "b";
6901      break;
6902    case GE:
6903      switch (mode)
6904	{
6905	case CCNOmode:
6906	case CCGOCmode:
6907	  suffix = "ns";
6908	  break;
6909
6910	case CCmode:
6911	case CCGCmode:
6912	  suffix = "ge";
6913	  break;
6914
6915	default:
6916	  gcc_unreachable ();
6917	}
6918      break;
6919    case GEU:
6920      /* ??? As above.  */
6921      gcc_assert (mode == CCmode);
6922      suffix = fp ? "nb" : "ae";
6923      break;
6924    case LE:
6925      gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
6926      suffix = "le";
6927      break;
6928    case LEU:
6929      gcc_assert (mode == CCmode);
6930      suffix = "be";
6931      break;
6932    case UNORDERED:
6933      suffix = fp ? "u" : "p";
6934      break;
6935    case ORDERED:
6936      suffix = fp ? "nu" : "np";
6937      break;
6938    default:
6939      gcc_unreachable ();
6940    }
6941  fputs (suffix, file);
6942}
6943
6944/* Print the name of register X to FILE based on its machine mode and number.
6945   If CODE is 'w', pretend the mode is HImode.
6946   If CODE is 'b', pretend the mode is QImode.
6947   If CODE is 'k', pretend the mode is SImode.
6948   If CODE is 'q', pretend the mode is DImode.
6949   If CODE is 'h', pretend the reg is the 'high' byte register.
6950   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
6951
6952void
6953print_reg (rtx x, int code, FILE *file)
6954{
6955  gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
6956	      && REGNO (x) != FRAME_POINTER_REGNUM
6957	      && REGNO (x) != FLAGS_REG
6958	      && REGNO (x) != FPSR_REG);
6959
6960  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6961    putc ('%', file);
6962
6963  if (code == 'w' || MMX_REG_P (x))
6964    code = 2;
6965  else if (code == 'b')
6966    code = 1;
6967  else if (code == 'k')
6968    code = 4;
6969  else if (code == 'q')
6970    code = 8;
6971  else if (code == 'y')
6972    code = 3;
6973  else if (code == 'h')
6974    code = 0;
6975  else
6976    code = GET_MODE_SIZE (GET_MODE (x));
6977
6978  /* Irritatingly, AMD extended registers use different naming convention
6979     from the normal registers.  */
6980  if (REX_INT_REG_P (x))
6981    {
6982      gcc_assert (TARGET_64BIT);
6983      switch (code)
6984	{
6985	  case 0:
6986	    error ("extended registers have no high halves");
6987	    break;
6988	  case 1:
6989	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6990	    break;
6991	  case 2:
6992	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6993	    break;
6994	  case 4:
6995	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6996	    break;
6997	  case 8:
6998	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6999	    break;
7000	  default:
7001	    error ("unsupported operand size for extended register");
7002	    break;
7003	}
7004      return;
7005    }
7006  switch (code)
7007    {
7008    case 3:
7009      if (STACK_TOP_P (x))
7010	{
7011	  fputs ("st(0)", file);
7012	  break;
7013	}
7014      /* FALLTHRU */
7015    case 8:
7016    case 4:
7017    case 12:
7018      if (! ANY_FP_REG_P (x))
7019	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7020      /* FALLTHRU */
7021    case 16:
7022    case 2:
7023    normal:
7024      fputs (hi_reg_name[REGNO (x)], file);
7025      break;
7026    case 1:
7027      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7028	goto normal;
7029      fputs (qi_reg_name[REGNO (x)], file);
7030      break;
7031    case 0:
7032      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7033	goto normal;
7034      fputs (qi_high_reg_name[REGNO (x)], file);
7035      break;
7036    default:
7037      gcc_unreachable ();
7038    }
7039}
7040
7041/* Locate some local-dynamic symbol still in use by this function
7042   so that we can print its name in some tls_local_dynamic_base
7043   pattern.  */
7044
7045static const char *
7046get_some_local_dynamic_name (void)
7047{
7048  rtx insn;
7049
7050  if (cfun->machine->some_ld_name)
7051    return cfun->machine->some_ld_name;
7052
7053  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7054    if (INSN_P (insn)
7055	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7056      return cfun->machine->some_ld_name;
7057
7058  gcc_unreachable ();
7059}
7060
7061static int
7062get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7063{
7064  rtx x = *px;
7065
7066  if (GET_CODE (x) == SYMBOL_REF
7067      && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7068    {
7069      cfun->machine->some_ld_name = XSTR (x, 0);
7070      return 1;
7071    }
7072
7073  return 0;
7074}
7075
7076/* Meaning of CODE:
7077   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7078   C -- print opcode suffix for set/cmov insn.
7079   c -- like C, but print reversed condition
7080   F,f -- likewise, but for floating-point.
7081   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7082        otherwise nothing
7083   R -- print the prefix for register names.
7084   z -- print the opcode suffix for the size of the current operand.
7085   * -- print a star (in certain assembler syntax)
7086   A -- print an absolute memory reference.
7087   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7088   s -- print a shift double count, followed by the assemblers argument
7089	delimiter.
7090   b -- print the QImode name of the register for the indicated operand.
7091	%b0 would print %al if operands[0] is reg 0.
7092   w --  likewise, print the HImode name of the register.
7093   k --  likewise, print the SImode name of the register.
7094   q --  likewise, print the DImode name of the register.
7095   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7096   y -- print "st(0)" instead of "st" as a register.
7097   D -- print condition for SSE cmp instruction.
7098   P -- if PIC, print an @PLT suffix.
7099   X -- don't print any sort of PIC '@' suffix for a symbol.
7100   & -- print some in-use local-dynamic symbol name.
7101   H -- print a memory address offset by 8; used for sse high-parts
7102 */
7103
7104void
7105print_operand (FILE *file, rtx x, int code)
7106{
7107  if (code)
7108    {
7109      switch (code)
7110	{
7111	case '*':
7112	  if (ASSEMBLER_DIALECT == ASM_ATT)
7113	    putc ('*', file);
7114	  return;
7115
7116	case '&':
7117	  assemble_name (file, get_some_local_dynamic_name ());
7118	  return;
7119
7120	case 'A':
7121	  switch (ASSEMBLER_DIALECT)
7122	    {
7123	    case ASM_ATT:
7124	      putc ('*', file);
7125	      break;
7126
7127	    case ASM_INTEL:
7128	      /* Intel syntax. For absolute addresses, registers should not
7129		 be surrounded by braces.  */
7130	      if (GET_CODE (x) != REG)
7131		{
7132		  putc ('[', file);
7133		  PRINT_OPERAND (file, x, 0);
7134		  putc (']', file);
7135		  return;
7136		}
7137	      break;
7138
7139	    default:
7140	      gcc_unreachable ();
7141	    }
7142
7143	  PRINT_OPERAND (file, x, 0);
7144	  return;
7145
7146
7147	case 'L':
7148	  if (ASSEMBLER_DIALECT == ASM_ATT)
7149	    putc ('l', file);
7150	  return;
7151
7152	case 'W':
7153	  if (ASSEMBLER_DIALECT == ASM_ATT)
7154	    putc ('w', file);
7155	  return;
7156
7157	case 'B':
7158	  if (ASSEMBLER_DIALECT == ASM_ATT)
7159	    putc ('b', file);
7160	  return;
7161
7162	case 'Q':
7163	  if (ASSEMBLER_DIALECT == ASM_ATT)
7164	    putc ('l', file);
7165	  return;
7166
7167	case 'S':
7168	  if (ASSEMBLER_DIALECT == ASM_ATT)
7169	    putc ('s', file);
7170	  return;
7171
7172	case 'T':
7173	  if (ASSEMBLER_DIALECT == ASM_ATT)
7174	    putc ('t', file);
7175	  return;
7176
7177	case 'z':
7178	  /* 387 opcodes don't get size suffixes if the operands are
7179	     registers.  */
7180	  if (STACK_REG_P (x))
7181	    return;
7182
7183	  /* Likewise if using Intel opcodes.  */
7184	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7185	    return;
7186
7187	  /* This is the size of op from size of operand.  */
7188	  switch (GET_MODE_SIZE (GET_MODE (x)))
7189	    {
7190	    case 2:
7191#ifdef HAVE_GAS_FILDS_FISTS
7192	      putc ('s', file);
7193#endif
7194	      return;
7195
7196	    case 4:
7197	      if (GET_MODE (x) == SFmode)
7198		{
7199		  putc ('s', file);
7200		  return;
7201		}
7202	      else
7203		putc ('l', file);
7204	      return;
7205
7206	    case 12:
7207	    case 16:
7208	      putc ('t', file);
7209	      return;
7210
7211	    case 8:
7212	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7213		{
7214#ifdef GAS_MNEMONICS
7215		  putc ('q', file);
7216#else
7217		  putc ('l', file);
7218		  putc ('l', file);
7219#endif
7220		}
7221	      else
7222	        putc ('l', file);
7223	      return;
7224
7225	    default:
7226	      gcc_unreachable ();
7227	    }
7228
7229	case 'b':
7230	case 'w':
7231	case 'k':
7232	case 'q':
7233	case 'h':
7234	case 'y':
7235	case 'X':
7236	case 'P':
7237	  break;
7238
7239	case 's':
7240	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7241	    {
7242	      PRINT_OPERAND (file, x, 0);
7243	      putc (',', file);
7244	    }
7245	  return;
7246
7247	case 'D':
7248	  /* Little bit of braindamage here.  The SSE compare instructions
7249	     does use completely different names for the comparisons that the
7250	     fp conditional moves.  */
7251	  switch (GET_CODE (x))
7252	    {
7253	    case EQ:
7254	    case UNEQ:
7255	      fputs ("eq", file);
7256	      break;
7257	    case LT:
7258	    case UNLT:
7259	      fputs ("lt", file);
7260	      break;
7261	    case LE:
7262	    case UNLE:
7263	      fputs ("le", file);
7264	      break;
7265	    case UNORDERED:
7266	      fputs ("unord", file);
7267	      break;
7268	    case NE:
7269	    case LTGT:
7270	      fputs ("neq", file);
7271	      break;
7272	    case UNGE:
7273	    case GE:
7274	      fputs ("nlt", file);
7275	      break;
7276	    case UNGT:
7277	    case GT:
7278	      fputs ("nle", file);
7279	      break;
7280	    case ORDERED:
7281	      fputs ("ord", file);
7282	      break;
7283	    default:
7284	      gcc_unreachable ();
7285	    }
7286	  return;
7287	case 'O':
7288#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7289	  if (ASSEMBLER_DIALECT == ASM_ATT)
7290	    {
7291	      switch (GET_MODE (x))
7292		{
7293		case HImode: putc ('w', file); break;
7294		case SImode:
7295		case SFmode: putc ('l', file); break;
7296		case DImode:
7297		case DFmode: putc ('q', file); break;
7298		default: gcc_unreachable ();
7299		}
7300	      putc ('.', file);
7301	    }
7302#endif
7303	  return;
7304	case 'C':
7305	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7306	  return;
7307	case 'F':
7308#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7309	  if (ASSEMBLER_DIALECT == ASM_ATT)
7310	    putc ('.', file);
7311#endif
7312	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7313	  return;
7314
7315	  /* Like above, but reverse condition */
7316	case 'c':
7317	  /* Check to see if argument to %c is really a constant
7318	     and not a condition code which needs to be reversed.  */
7319	  if (!COMPARISON_P (x))
7320	  {
7321	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7322	     return;
7323	  }
7324	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7325	  return;
7326	case 'f':
7327#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7328	  if (ASSEMBLER_DIALECT == ASM_ATT)
7329	    putc ('.', file);
7330#endif
7331	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7332	  return;
7333
7334	case 'H':
7335	  /* It doesn't actually matter what mode we use here, as we're
7336	     only going to use this for printing.  */
7337	  x = adjust_address_nv (x, DImode, 8);
7338	  break;
7339
7340	case '+':
7341	  {
7342	    rtx x;
7343
7344	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7345	      return;
7346
7347	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7348	    if (x)
7349	      {
7350		int pred_val = INTVAL (XEXP (x, 0));
7351
7352		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7353		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7354		  {
7355		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7356		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7357
7358		    /* Emit hints only in the case default branch prediction
7359		       heuristics would fail.  */
7360		    if (taken != cputaken)
7361		      {
7362			/* We use 3e (DS) prefix for taken branches and
7363			   2e (CS) prefix for not taken branches.  */
7364			if (taken)
7365			  fputs ("ds ; ", file);
7366			else
7367			  fputs ("cs ; ", file);
7368		      }
7369		  }
7370	      }
7371	    return;
7372	  }
7373	default:
7374	    output_operand_lossage ("invalid operand code '%c'", code);
7375	}
7376    }
7377
7378  if (GET_CODE (x) == REG)
7379    print_reg (x, code, file);
7380
7381  else if (GET_CODE (x) == MEM)
7382    {
7383      /* No `byte ptr' prefix for call instructions.  */
7384      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7385	{
7386	  const char * size;
7387	  switch (GET_MODE_SIZE (GET_MODE (x)))
7388	    {
7389	    case 1: size = "BYTE"; break;
7390	    case 2: size = "WORD"; break;
7391	    case 4: size = "DWORD"; break;
7392	    case 8: size = "QWORD"; break;
7393	    case 12: size = "XWORD"; break;
7394	    case 16: size = "XMMWORD"; break;
7395	    default:
7396	      gcc_unreachable ();
7397	    }
7398
7399	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7400	  if (code == 'b')
7401	    size = "BYTE";
7402	  else if (code == 'w')
7403	    size = "WORD";
7404	  else if (code == 'k')
7405	    size = "DWORD";
7406
7407	  fputs (size, file);
7408	  fputs (" PTR ", file);
7409	}
7410
7411      x = XEXP (x, 0);
7412      /* Avoid (%rip) for call operands.  */
7413      if (CONSTANT_ADDRESS_P (x) && code == 'P'
7414	       && GET_CODE (x) != CONST_INT)
7415	output_addr_const (file, x);
7416      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7417	output_operand_lossage ("invalid constraints for operand");
7418      else
7419	output_address (x);
7420    }
7421
7422  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7423    {
7424      REAL_VALUE_TYPE r;
7425      long l;
7426
7427      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7428      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7429
7430      if (ASSEMBLER_DIALECT == ASM_ATT)
7431	putc ('$', file);
7432      fprintf (file, "0x%08lx", l);
7433    }
7434
7435  /* These float cases don't actually occur as immediate operands.  */
7436  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7437    {
7438      char dstr[30];
7439
7440      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7441      fprintf (file, "%s", dstr);
7442    }
7443
7444  else if (GET_CODE (x) == CONST_DOUBLE
7445	   && GET_MODE (x) == XFmode)
7446    {
7447      char dstr[30];
7448
7449      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7450      fprintf (file, "%s", dstr);
7451    }
7452
7453  else
7454    {
7455      /* We have patterns that allow zero sets of memory, for instance.
7456	 In 64-bit mode, we should probably support all 8-byte vectors,
7457	 since we can in fact encode that into an immediate.  */
7458      if (GET_CODE (x) == CONST_VECTOR)
7459	{
7460	  gcc_assert (x == CONST0_RTX (GET_MODE (x)));
7461	  x = const0_rtx;
7462	}
7463
7464      if (code != 'P')
7465	{
7466	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7467	    {
7468	      if (ASSEMBLER_DIALECT == ASM_ATT)
7469		putc ('$', file);
7470	    }
7471	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7472		   || GET_CODE (x) == LABEL_REF)
7473	    {
7474	      if (ASSEMBLER_DIALECT == ASM_ATT)
7475		putc ('$', file);
7476	      else
7477		fputs ("OFFSET FLAT:", file);
7478	    }
7479	}
7480      if (GET_CODE (x) == CONST_INT)
7481	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7482      else if (flag_pic)
7483	output_pic_addr_const (file, x, code);
7484      else
7485	output_addr_const (file, x);
7486    }
7487}
7488
7489/* Print a memory operand whose address is ADDR.  */
7490
7491void
7492print_operand_address (FILE *file, rtx addr)
7493{
7494  struct ix86_address parts;
7495  rtx base, index, disp;
7496  int scale;
7497  int ok = ix86_decompose_address (addr, &parts);
7498
7499  gcc_assert (ok);
7500
7501  base = parts.base;
7502  index = parts.index;
7503  disp = parts.disp;
7504  scale = parts.scale;
7505
7506  switch (parts.seg)
7507    {
7508    case SEG_DEFAULT:
7509      break;
7510    case SEG_FS:
7511    case SEG_GS:
7512      if (USER_LABEL_PREFIX[0] == 0)
7513	putc ('%', file);
7514      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7515      break;
7516    default:
7517      gcc_unreachable ();
7518    }
7519
7520  if (!base && !index)
7521    {
7522      /* Displacement only requires special attention.  */
7523
7524      if (GET_CODE (disp) == CONST_INT)
7525	{
7526	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7527	    {
7528	      if (USER_LABEL_PREFIX[0] == 0)
7529		putc ('%', file);
7530	      fputs ("ds:", file);
7531	    }
7532	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7533	}
7534      else if (flag_pic)
7535	output_pic_addr_const (file, disp, 0);
7536      else
7537	output_addr_const (file, disp);
7538
7539      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7540      if (TARGET_64BIT)
7541	{
7542	  if (GET_CODE (disp) == CONST
7543	      && GET_CODE (XEXP (disp, 0)) == PLUS
7544	      && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7545	    disp = XEXP (XEXP (disp, 0), 0);
7546	  if (GET_CODE (disp) == LABEL_REF
7547	      || (GET_CODE (disp) == SYMBOL_REF
7548		  && SYMBOL_REF_TLS_MODEL (disp) == 0))
7549	    fputs ("(%rip)", file);
7550	}
7551    }
7552  else
7553    {
7554      if (ASSEMBLER_DIALECT == ASM_ATT)
7555	{
7556	  if (disp)
7557	    {
7558	      if (flag_pic)
7559		output_pic_addr_const (file, disp, 0);
7560	      else if (GET_CODE (disp) == LABEL_REF)
7561		output_asm_label (disp);
7562	      else
7563		output_addr_const (file, disp);
7564	    }
7565
7566	  putc ('(', file);
7567	  if (base)
7568	    print_reg (base, 0, file);
7569	  if (index)
7570	    {
7571	      putc (',', file);
7572	      print_reg (index, 0, file);
7573	      if (scale != 1)
7574		fprintf (file, ",%d", scale);
7575	    }
7576	  putc (')', file);
7577	}
7578      else
7579	{
7580	  rtx offset = NULL_RTX;
7581
7582	  if (disp)
7583	    {
7584	      /* Pull out the offset of a symbol; print any symbol itself.  */
7585	      if (GET_CODE (disp) == CONST
7586		  && GET_CODE (XEXP (disp, 0)) == PLUS
7587		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7588		{
7589		  offset = XEXP (XEXP (disp, 0), 1);
7590		  disp = gen_rtx_CONST (VOIDmode,
7591					XEXP (XEXP (disp, 0), 0));
7592		}
7593
7594	      if (flag_pic)
7595		output_pic_addr_const (file, disp, 0);
7596	      else if (GET_CODE (disp) == LABEL_REF)
7597		output_asm_label (disp);
7598	      else if (GET_CODE (disp) == CONST_INT)
7599		offset = disp;
7600	      else
7601		output_addr_const (file, disp);
7602	    }
7603
7604	  putc ('[', file);
7605	  if (base)
7606	    {
7607	      print_reg (base, 0, file);
7608	      if (offset)
7609		{
7610		  if (INTVAL (offset) >= 0)
7611		    putc ('+', file);
7612		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7613		}
7614	    }
7615	  else if (offset)
7616	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7617	  else
7618	    putc ('0', file);
7619
7620	  if (index)
7621	    {
7622	      putc ('+', file);
7623	      print_reg (index, 0, file);
7624	      if (scale != 1)
7625		fprintf (file, "*%d", scale);
7626	    }
7627	  putc (']', file);
7628	}
7629    }
7630}
7631
7632bool
7633output_addr_const_extra (FILE *file, rtx x)
7634{
7635  rtx op;
7636
7637  if (GET_CODE (x) != UNSPEC)
7638    return false;
7639
7640  op = XVECEXP (x, 0, 0);
7641  switch (XINT (x, 1))
7642    {
7643    case UNSPEC_GOTTPOFF:
7644      output_addr_const (file, op);
7645      /* FIXME: This might be @TPOFF in Sun ld.  */
7646      fputs ("@GOTTPOFF", file);
7647      break;
7648    case UNSPEC_TPOFF:
7649      output_addr_const (file, op);
7650      fputs ("@TPOFF", file);
7651      break;
7652    case UNSPEC_NTPOFF:
7653      output_addr_const (file, op);
7654      if (TARGET_64BIT)
7655	fputs ("@TPOFF", file);
7656      else
7657	fputs ("@NTPOFF", file);
7658      break;
7659    case UNSPEC_DTPOFF:
7660      output_addr_const (file, op);
7661      fputs ("@DTPOFF", file);
7662      break;
7663    case UNSPEC_GOTNTPOFF:
7664      output_addr_const (file, op);
7665      if (TARGET_64BIT)
7666	fputs ("@GOTTPOFF(%rip)", file);
7667      else
7668	fputs ("@GOTNTPOFF", file);
7669      break;
7670    case UNSPEC_INDNTPOFF:
7671      output_addr_const (file, op);
7672      fputs ("@INDNTPOFF", file);
7673      break;
7674
7675    default:
7676      return false;
7677    }
7678
7679  return true;
7680}
7681
7682/* Split one or more DImode RTL references into pairs of SImode
7683   references.  The RTL can be REG, offsettable MEM, integer constant, or
7684   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7685   split and "num" is its length.  lo_half and hi_half are output arrays
7686   that parallel "operands".  */
7687
7688void
7689split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7690{
7691  while (num--)
7692    {
7693      rtx op = operands[num];
7694
7695      /* simplify_subreg refuse to split volatile memory addresses,
7696         but we still have to handle it.  */
7697      if (GET_CODE (op) == MEM)
7698	{
7699	  lo_half[num] = adjust_address (op, SImode, 0);
7700	  hi_half[num] = adjust_address (op, SImode, 4);
7701	}
7702      else
7703	{
7704	  lo_half[num] = simplify_gen_subreg (SImode, op,
7705					      GET_MODE (op) == VOIDmode
7706					      ? DImode : GET_MODE (op), 0);
7707	  hi_half[num] = simplify_gen_subreg (SImode, op,
7708					      GET_MODE (op) == VOIDmode
7709					      ? DImode : GET_MODE (op), 4);
7710	}
7711    }
7712}
7713/* Split one or more TImode RTL references into pairs of DImode
7714   references.  The RTL can be REG, offsettable MEM, integer constant, or
7715   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7716   split and "num" is its length.  lo_half and hi_half are output arrays
7717   that parallel "operands".  */
7718
7719void
7720split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7721{
7722  while (num--)
7723    {
7724      rtx op = operands[num];
7725
7726      /* simplify_subreg refuse to split volatile memory addresses, but we
7727         still have to handle it.  */
7728      if (GET_CODE (op) == MEM)
7729	{
7730	  lo_half[num] = adjust_address (op, DImode, 0);
7731	  hi_half[num] = adjust_address (op, DImode, 8);
7732	}
7733      else
7734	{
7735	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7736	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7737	}
7738    }
7739}
7740
7741/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7742   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7743   is the expression of the binary operation.  The output may either be
7744   emitted here, or returned to the caller, like all output_* functions.
7745
7746   There is no guarantee that the operands are the same mode, as they
7747   might be within FLOAT or FLOAT_EXTEND expressions.  */
7748
7749#ifndef SYSV386_COMPAT
7750/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7751   wants to fix the assemblers because that causes incompatibility
7752   with gcc.  No-one wants to fix gcc because that causes
7753   incompatibility with assemblers...  You can use the option of
7754   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7755#define SYSV386_COMPAT 1
7756#endif
7757
7758const char *
7759output_387_binary_op (rtx insn, rtx *operands)
7760{
7761  static char buf[30];
7762  const char *p;
7763  const char *ssep;
7764  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
7765
7766#ifdef ENABLE_CHECKING
7767  /* Even if we do not want to check the inputs, this documents input
7768     constraints.  Which helps in understanding the following code.  */
7769  if (STACK_REG_P (operands[0])
7770      && ((REG_P (operands[1])
7771	   && REGNO (operands[0]) == REGNO (operands[1])
7772	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7773	  || (REG_P (operands[2])
7774	      && REGNO (operands[0]) == REGNO (operands[2])
7775	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7776      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7777    ; /* ok */
7778  else
7779    gcc_assert (is_sse);
7780#endif
7781
7782  switch (GET_CODE (operands[3]))
7783    {
7784    case PLUS:
7785      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7786	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7787	p = "fiadd";
7788      else
7789	p = "fadd";
7790      ssep = "add";
7791      break;
7792
7793    case MINUS:
7794      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7795	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7796	p = "fisub";
7797      else
7798	p = "fsub";
7799      ssep = "sub";
7800      break;
7801
7802    case MULT:
7803      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7804	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7805	p = "fimul";
7806      else
7807	p = "fmul";
7808      ssep = "mul";
7809      break;
7810
7811    case DIV:
7812      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7813	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7814	p = "fidiv";
7815      else
7816	p = "fdiv";
7817      ssep = "div";
7818      break;
7819
7820    default:
7821      gcc_unreachable ();
7822    }
7823
7824  if (is_sse)
7825   {
7826      strcpy (buf, ssep);
7827      if (GET_MODE (operands[0]) == SFmode)
7828	strcat (buf, "ss\t{%2, %0|%0, %2}");
7829      else
7830	strcat (buf, "sd\t{%2, %0|%0, %2}");
7831      return buf;
7832   }
7833  strcpy (buf, p);
7834
7835  switch (GET_CODE (operands[3]))
7836    {
7837    case MULT:
7838    case PLUS:
7839      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7840	{
7841	  rtx temp = operands[2];
7842	  operands[2] = operands[1];
7843	  operands[1] = temp;
7844	}
7845
7846      /* know operands[0] == operands[1].  */
7847
7848      if (GET_CODE (operands[2]) == MEM)
7849	{
7850	  p = "%z2\t%2";
7851	  break;
7852	}
7853
7854      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7855	{
7856	  if (STACK_TOP_P (operands[0]))
7857	    /* How is it that we are storing to a dead operand[2]?
7858	       Well, presumably operands[1] is dead too.  We can't
7859	       store the result to st(0) as st(0) gets popped on this
7860	       instruction.  Instead store to operands[2] (which I
7861	       think has to be st(1)).  st(1) will be popped later.
7862	       gcc <= 2.8.1 didn't have this check and generated
7863	       assembly code that the Unixware assembler rejected.  */
7864	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7865	  else
7866	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7867	  break;
7868	}
7869
7870      if (STACK_TOP_P (operands[0]))
7871	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7872      else
7873	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7874      break;
7875
7876    case MINUS:
7877    case DIV:
7878      if (GET_CODE (operands[1]) == MEM)
7879	{
7880	  p = "r%z1\t%1";
7881	  break;
7882	}
7883
7884      if (GET_CODE (operands[2]) == MEM)
7885	{
7886	  p = "%z2\t%2";
7887	  break;
7888	}
7889
7890      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7891	{
7892#if SYSV386_COMPAT
7893	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7894	     derived assemblers, confusingly reverse the direction of
7895	     the operation for fsub{r} and fdiv{r} when the
7896	     destination register is not st(0).  The Intel assembler
7897	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
7898	     figure out what the hardware really does.  */
7899	  if (STACK_TOP_P (operands[0]))
7900	    p = "{p\t%0, %2|rp\t%2, %0}";
7901	  else
7902	    p = "{rp\t%2, %0|p\t%0, %2}";
7903#else
7904	  if (STACK_TOP_P (operands[0]))
7905	    /* As above for fmul/fadd, we can't store to st(0).  */
7906	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
7907	  else
7908	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
7909#endif
7910	  break;
7911	}
7912
7913      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
7914	{
7915#if SYSV386_COMPAT
7916	  if (STACK_TOP_P (operands[0]))
7917	    p = "{rp\t%0, %1|p\t%1, %0}";
7918	  else
7919	    p = "{p\t%1, %0|rp\t%0, %1}";
7920#else
7921	  if (STACK_TOP_P (operands[0]))
7922	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
7923	  else
7924	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
7925#endif
7926	  break;
7927	}
7928
7929      if (STACK_TOP_P (operands[0]))
7930	{
7931	  if (STACK_TOP_P (operands[1]))
7932	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
7933	  else
7934	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
7935	  break;
7936	}
7937      else if (STACK_TOP_P (operands[1]))
7938	{
7939#if SYSV386_COMPAT
7940	  p = "{\t%1, %0|r\t%0, %1}";
7941#else
7942	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
7943#endif
7944	}
7945      else
7946	{
7947#if SYSV386_COMPAT
7948	  p = "{r\t%2, %0|\t%0, %2}";
7949#else
7950	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
7951#endif
7952	}
7953      break;
7954
7955    default:
7956      gcc_unreachable ();
7957    }
7958
7959  strcat (buf, p);
7960  return buf;
7961}
7962
7963/* Return needed mode for entity in optimize_mode_switching pass.  */
7964
7965int
7966ix86_mode_needed (int entity, rtx insn)
7967{
7968  enum attr_i387_cw mode;
7969
7970  /* The mode UNINITIALIZED is used to store control word after a
7971     function call or ASM pattern.  The mode ANY specify that function
7972     has no requirements on the control word and make no changes in the
7973     bits we are interested in.  */
7974
7975  if (CALL_P (insn)
7976      || (NONJUMP_INSN_P (insn)
7977	  && (asm_noperands (PATTERN (insn)) >= 0
7978	      || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
7979    return I387_CW_UNINITIALIZED;
7980
7981  if (recog_memoized (insn) < 0)
7982    return I387_CW_ANY;
7983
7984  mode = get_attr_i387_cw (insn);
7985
7986  switch (entity)
7987    {
7988    case I387_TRUNC:
7989      if (mode == I387_CW_TRUNC)
7990	return mode;
7991      break;
7992
7993    case I387_FLOOR:
7994      if (mode == I387_CW_FLOOR)
7995	return mode;
7996      break;
7997
7998    case I387_CEIL:
7999      if (mode == I387_CW_CEIL)
8000	return mode;
8001      break;
8002
8003    case I387_MASK_PM:
8004      if (mode == I387_CW_MASK_PM)
8005	return mode;
8006      break;
8007
8008    default:
8009      gcc_unreachable ();
8010    }
8011
8012  return I387_CW_ANY;
8013}
8014
8015/* Output code to initialize control word copies used by trunc?f?i and
8016   rounding patterns.  CURRENT_MODE is set to current control word,
8017   while NEW_MODE is set to new control word.  */
8018
8019void
8020emit_i387_cw_initialization (int mode)
8021{
8022  rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8023  rtx new_mode;
8024
8025  int slot;
8026
8027  rtx reg = gen_reg_rtx (HImode);
8028
8029  emit_insn (gen_x86_fnstcw_1 (stored_mode));
8030  emit_move_insn (reg, stored_mode);
8031
8032  if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8033    {
8034      switch (mode)
8035	{
8036	case I387_CW_TRUNC:
8037	  /* round toward zero (truncate) */
8038	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8039	  slot = SLOT_CW_TRUNC;
8040	  break;
8041
8042	case I387_CW_FLOOR:
8043	  /* round down toward -oo */
8044	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8045	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8046	  slot = SLOT_CW_FLOOR;
8047	  break;
8048
8049	case I387_CW_CEIL:
8050	  /* round up toward +oo */
8051	  emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8052	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8053	  slot = SLOT_CW_CEIL;
8054	  break;
8055
8056	case I387_CW_MASK_PM:
8057	  /* mask precision exception for nearbyint() */
8058	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8059	  slot = SLOT_CW_MASK_PM;
8060	  break;
8061
8062	default:
8063	  gcc_unreachable ();
8064	}
8065    }
8066  else
8067    {
8068      switch (mode)
8069	{
8070	case I387_CW_TRUNC:
8071	  /* round toward zero (truncate) */
8072	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8073	  slot = SLOT_CW_TRUNC;
8074	  break;
8075
8076	case I387_CW_FLOOR:
8077	  /* round down toward -oo */
8078	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8079	  slot = SLOT_CW_FLOOR;
8080	  break;
8081
8082	case I387_CW_CEIL:
8083	  /* round up toward +oo */
8084	  emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8085	  slot = SLOT_CW_CEIL;
8086	  break;
8087
8088	case I387_CW_MASK_PM:
8089	  /* mask precision exception for nearbyint() */
8090	  emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8091	  slot = SLOT_CW_MASK_PM;
8092	  break;
8093
8094	default:
8095	  gcc_unreachable ();
8096	}
8097    }
8098
8099  gcc_assert (slot < MAX_386_STACK_LOCALS);
8100
8101  new_mode = assign_386_stack_local (HImode, slot);
8102  emit_move_insn (new_mode, reg);
8103}
8104
8105/* Output code for INSN to convert a float to a signed int.  OPERANDS
8106   are the insn operands.  The output may be [HSD]Imode and the input
8107   operand may be [SDX]Fmode.  */
8108
8109const char *
8110output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8111{
8112  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8113  int dimode_p = GET_MODE (operands[0]) == DImode;
8114  int round_mode = get_attr_i387_cw (insn);
8115
8116  /* Jump through a hoop or two for DImode, since the hardware has no
8117     non-popping instruction.  We used to do this a different way, but
8118     that was somewhat fragile and broke with post-reload splitters.  */
8119  if ((dimode_p || fisttp) && !stack_top_dies)
8120    output_asm_insn ("fld\t%y1", operands);
8121
8122  gcc_assert (STACK_TOP_P (operands[1]));
8123  gcc_assert (GET_CODE (operands[0]) == MEM);
8124
8125  if (fisttp)
8126      output_asm_insn ("fisttp%z0\t%0", operands);
8127  else
8128    {
8129      if (round_mode != I387_CW_ANY)
8130	output_asm_insn ("fldcw\t%3", operands);
8131      if (stack_top_dies || dimode_p)
8132	output_asm_insn ("fistp%z0\t%0", operands);
8133      else
8134	output_asm_insn ("fist%z0\t%0", operands);
8135      if (round_mode != I387_CW_ANY)
8136	output_asm_insn ("fldcw\t%2", operands);
8137    }
8138
8139  return "";
8140}
8141
8142/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8143   should be used.  UNORDERED_P is true when fucom should be used.  */
8144
8145const char *
8146output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8147{
8148  int stack_top_dies;
8149  rtx cmp_op0, cmp_op1;
8150  int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
8151
8152  if (eflags_p)
8153    {
8154      cmp_op0 = operands[0];
8155      cmp_op1 = operands[1];
8156    }
8157  else
8158    {
8159      cmp_op0 = operands[1];
8160      cmp_op1 = operands[2];
8161    }
8162
8163  if (is_sse)
8164    {
8165      if (GET_MODE (operands[0]) == SFmode)
8166	if (unordered_p)
8167	  return "ucomiss\t{%1, %0|%0, %1}";
8168	else
8169	  return "comiss\t{%1, %0|%0, %1}";
8170      else
8171	if (unordered_p)
8172	  return "ucomisd\t{%1, %0|%0, %1}";
8173	else
8174	  return "comisd\t{%1, %0|%0, %1}";
8175    }
8176
8177  gcc_assert (STACK_TOP_P (cmp_op0));
8178
8179  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8180
8181  if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
8182    {
8183      if (stack_top_dies)
8184	{
8185	  output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
8186	  return TARGET_USE_FFREEP ? "ffreep\t%y1" : "fstp\t%y1";
8187	}
8188      else
8189	return "ftst\n\tfnstsw\t%0";
8190    }
8191
8192  if (STACK_REG_P (cmp_op1)
8193      && stack_top_dies
8194      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8195      && REGNO (cmp_op1) != FIRST_STACK_REG)
8196    {
8197      /* If both the top of the 387 stack dies, and the other operand
8198	 is also a stack register that dies, then this must be a
8199	 `fcompp' float compare */
8200
8201      if (eflags_p)
8202	{
8203	  /* There is no double popping fcomi variant.  Fortunately,
8204	     eflags is immune from the fstp's cc clobbering.  */
8205	  if (unordered_p)
8206	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8207	  else
8208	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8209	  return TARGET_USE_FFREEP ? "ffreep\t%y0" : "fstp\t%y0";
8210	}
8211      else
8212	{
8213	  if (unordered_p)
8214	    return "fucompp\n\tfnstsw\t%0";
8215	  else
8216	    return "fcompp\n\tfnstsw\t%0";
8217	}
8218    }
8219  else
8220    {
8221      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8222
8223      static const char * const alt[16] =
8224      {
8225	"fcom%z2\t%y2\n\tfnstsw\t%0",
8226	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8227	"fucom%z2\t%y2\n\tfnstsw\t%0",
8228	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8229
8230	"ficom%z2\t%y2\n\tfnstsw\t%0",
8231	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8232	NULL,
8233	NULL,
8234
8235	"fcomi\t{%y1, %0|%0, %y1}",
8236	"fcomip\t{%y1, %0|%0, %y1}",
8237	"fucomi\t{%y1, %0|%0, %y1}",
8238	"fucomip\t{%y1, %0|%0, %y1}",
8239
8240	NULL,
8241	NULL,
8242	NULL,
8243	NULL
8244      };
8245
8246      int mask;
8247      const char *ret;
8248
8249      mask  = eflags_p << 3;
8250      mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
8251      mask |= unordered_p << 1;
8252      mask |= stack_top_dies;
8253
8254      gcc_assert (mask < 16);
8255      ret = alt[mask];
8256      gcc_assert (ret);
8257
8258      return ret;
8259    }
8260}
8261
8262void
8263ix86_output_addr_vec_elt (FILE *file, int value)
8264{
8265  const char *directive = ASM_LONG;
8266
8267#ifdef ASM_QUAD
8268  if (TARGET_64BIT)
8269    directive = ASM_QUAD;
8270#else
8271  gcc_assert (!TARGET_64BIT);
8272#endif
8273
8274  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8275}
8276
8277void
8278ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8279{
8280  if (TARGET_64BIT)
8281    fprintf (file, "%s%s%d-%s%d\n",
8282	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
8283  else if (HAVE_AS_GOTOFF_IN_DATA)
8284    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8285#if TARGET_MACHO
8286  else if (TARGET_MACHO)
8287    {
8288      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8289      machopic_output_function_base_name (file);
8290      fprintf(file, "\n");
8291    }
8292#endif
8293  else
8294    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8295		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8296}
8297
8298/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8299   for the target.  */
8300
8301void
8302ix86_expand_clear (rtx dest)
8303{
8304  rtx tmp;
8305
8306  /* We play register width games, which are only valid after reload.  */
8307  gcc_assert (reload_completed);
8308
8309  /* Avoid HImode and its attendant prefix byte.  */
8310  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8311    dest = gen_rtx_REG (SImode, REGNO (dest));
8312
8313  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8314
8315  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8316  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8317    {
8318      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8319      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8320    }
8321
8322  emit_insn (tmp);
8323}
8324
8325/* X is an unchanging MEM.  If it is a constant pool reference, return
8326   the constant pool rtx, else NULL.  */
8327
8328rtx
8329maybe_get_pool_constant (rtx x)
8330{
8331  x = ix86_delegitimize_address (XEXP (x, 0));
8332
8333  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8334    return get_pool_constant (x);
8335
8336  return NULL_RTX;
8337}
8338
8339void
8340ix86_expand_move (enum machine_mode mode, rtx operands[])
8341{
8342  int strict = (reload_in_progress || reload_completed);
8343  rtx op0, op1;
8344  enum tls_model model;
8345
8346  op0 = operands[0];
8347  op1 = operands[1];
8348
8349  if (GET_CODE (op1) == SYMBOL_REF)
8350    {
8351      model = SYMBOL_REF_TLS_MODEL (op1);
8352      if (model)
8353	{
8354	  op1 = legitimize_tls_address (op1, model, true);
8355	  op1 = force_operand (op1, op0);
8356	  if (op1 == op0)
8357	    return;
8358	}
8359    }
8360  else if (GET_CODE (op1) == CONST
8361	   && GET_CODE (XEXP (op1, 0)) == PLUS
8362	   && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
8363    {
8364      model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
8365      if (model)
8366	{
8367	  rtx addend = XEXP (XEXP (op1, 0), 1);
8368	  op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
8369	  op1 = force_operand (op1, NULL);
8370	  op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
8371				     op0, 1, OPTAB_DIRECT);
8372	  if (op1 == op0)
8373	    return;
8374	}
8375    }
8376
8377  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8378    {
8379#if TARGET_MACHO
8380      if (MACHOPIC_PURE)
8381	{
8382	  rtx temp = ((reload_in_progress
8383		       || ((op0 && GET_CODE (op0) == REG)
8384			   && mode == Pmode))
8385		      ? op0 : gen_reg_rtx (Pmode));
8386	  op1 = machopic_indirect_data_reference (op1, temp);
8387	  op1 = machopic_legitimize_pic_address (op1, mode,
8388						 temp == op1 ? 0 : temp);
8389	}
8390      else if (MACHOPIC_INDIRECT)
8391	op1 = machopic_indirect_data_reference (op1, 0);
8392      if (op0 == op1)
8393	return;
8394#else
8395      if (GET_CODE (op0) == MEM)
8396	op1 = force_reg (Pmode, op1);
8397      else
8398	op1 = legitimize_address (op1, op1, Pmode);
8399#endif /* TARGET_MACHO */
8400    }
8401  else
8402    {
8403      if (GET_CODE (op0) == MEM
8404	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8405	      || !push_operand (op0, mode))
8406	  && GET_CODE (op1) == MEM)
8407	op1 = force_reg (mode, op1);
8408
8409      if (push_operand (op0, mode)
8410	  && ! general_no_elim_operand (op1, mode))
8411	op1 = copy_to_mode_reg (mode, op1);
8412
8413      /* Force large constants in 64bit compilation into register
8414	 to get them CSEed.  */
8415      if (TARGET_64BIT && mode == DImode
8416	  && immediate_operand (op1, mode)
8417	  && !x86_64_zext_immediate_operand (op1, VOIDmode)
8418	  && !register_operand (op0, mode)
8419	  && optimize && !reload_completed && !reload_in_progress)
8420	op1 = copy_to_mode_reg (mode, op1);
8421
8422      if (FLOAT_MODE_P (mode))
8423	{
8424	  /* If we are loading a floating point constant to a register,
8425	     force the value to memory now, since we'll get better code
8426	     out the back end.  */
8427
8428	  if (strict)
8429	    ;
8430	  else if (GET_CODE (op1) == CONST_DOUBLE)
8431	    {
8432	      op1 = validize_mem (force_const_mem (mode, op1));
8433	      if (!register_operand (op0, mode))
8434		{
8435		  rtx temp = gen_reg_rtx (mode);
8436		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8437		  emit_move_insn (op0, temp);
8438		  return;
8439		}
8440	    }
8441	}
8442    }
8443
8444  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8445}
8446
8447void
8448ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8449{
8450  rtx op0 = operands[0], op1 = operands[1];
8451
8452  /* Force constants other than zero into memory.  We do not know how
8453     the instructions used to build constants modify the upper 64 bits
8454     of the register, once we have that information we may be able
8455     to handle some of them more efficiently.  */
8456  if ((reload_in_progress | reload_completed) == 0
8457      && register_operand (op0, mode)
8458      && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
8459    op1 = validize_mem (force_const_mem (mode, op1));
8460
8461  /* Make operand1 a register if it isn't already.  */
8462  if (!no_new_pseudos
8463      && !register_operand (op0, mode)
8464      && !register_operand (op1, mode))
8465    {
8466      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
8467      return;
8468    }
8469
8470  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8471}
8472
8473/* Implement the movmisalign patterns for SSE.  Non-SSE modes go
8474   straight to ix86_expand_vector_move.  */
8475
8476void
8477ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
8478{
8479  rtx op0, op1, m;
8480
8481  op0 = operands[0];
8482  op1 = operands[1];
8483
8484  if (MEM_P (op1))
8485    {
8486      /* If we're optimizing for size, movups is the smallest.  */
8487      if (optimize_size)
8488	{
8489	  op0 = gen_lowpart (V4SFmode, op0);
8490	  op1 = gen_lowpart (V4SFmode, op1);
8491	  emit_insn (gen_sse_movups (op0, op1));
8492	  return;
8493	}
8494
8495      /* ??? If we have typed data, then it would appear that using
8496	 movdqu is the only way to get unaligned data loaded with
8497	 integer type.  */
8498      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8499	{
8500	  op0 = gen_lowpart (V16QImode, op0);
8501	  op1 = gen_lowpart (V16QImode, op1);
8502	  emit_insn (gen_sse2_movdqu (op0, op1));
8503	  return;
8504	}
8505
8506      if (TARGET_SSE2 && mode == V2DFmode)
8507	{
8508	  rtx zero;
8509
8510	  /* When SSE registers are split into halves, we can avoid
8511	     writing to the top half twice.  */
8512	  if (TARGET_SSE_SPLIT_REGS)
8513	    {
8514	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8515	      zero = op0;
8516	    }
8517	  else
8518	    {
8519	      /* ??? Not sure about the best option for the Intel chips.
8520		 The following would seem to satisfy; the register is
8521		 entirely cleared, breaking the dependency chain.  We
8522		 then store to the upper half, with a dependency depth
8523		 of one.  A rumor has it that Intel recommends two movsd
8524		 followed by an unpacklpd, but this is unconfirmed.  And
8525		 given that the dependency depth of the unpacklpd would
8526		 still be one, I'm not sure why this would be better.  */
8527	      zero = CONST0_RTX (V2DFmode);
8528	    }
8529
8530	  m = adjust_address (op1, DFmode, 0);
8531	  emit_insn (gen_sse2_loadlpd (op0, zero, m));
8532	  m = adjust_address (op1, DFmode, 8);
8533	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
8534	}
8535      else
8536	{
8537	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
8538	    emit_move_insn (op0, CONST0_RTX (mode));
8539	  else
8540	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
8541
8542	  if (mode != V4SFmode)
8543	    op0 = gen_lowpart (V4SFmode, op0);
8544	  m = adjust_address (op1, V2SFmode, 0);
8545	  emit_insn (gen_sse_loadlps (op0, op0, m));
8546	  m = adjust_address (op1, V2SFmode, 8);
8547	  emit_insn (gen_sse_loadhps (op0, op0, m));
8548	}
8549    }
8550  else if (MEM_P (op0))
8551    {
8552      /* If we're optimizing for size, movups is the smallest.  */
8553      if (optimize_size)
8554	{
8555	  op0 = gen_lowpart (V4SFmode, op0);
8556	  op1 = gen_lowpart (V4SFmode, op1);
8557	  emit_insn (gen_sse_movups (op0, op1));
8558	  return;
8559	}
8560
8561      /* ??? Similar to above, only less clear because of quote
8562	 typeless stores unquote.  */
8563      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
8564	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
8565        {
8566	  op0 = gen_lowpart (V16QImode, op0);
8567	  op1 = gen_lowpart (V16QImode, op1);
8568	  emit_insn (gen_sse2_movdqu (op0, op1));
8569	  return;
8570	}
8571
8572      if (TARGET_SSE2 && mode == V2DFmode)
8573	{
8574	  m = adjust_address (op0, DFmode, 0);
8575	  emit_insn (gen_sse2_storelpd (m, op1));
8576	  m = adjust_address (op0, DFmode, 8);
8577	  emit_insn (gen_sse2_storehpd (m, op1));
8578	}
8579      else
8580	{
8581	  if (mode != V4SFmode)
8582	    op1 = gen_lowpart (V4SFmode, op1);
8583	  m = adjust_address (op0, V2SFmode, 0);
8584	  emit_insn (gen_sse_storelps (m, op1));
8585	  m = adjust_address (op0, V2SFmode, 8);
8586	  emit_insn (gen_sse_storehps (m, op1));
8587	}
8588    }
8589  else
8590    gcc_unreachable ();
8591}
8592
8593/* Expand a push in MODE.  This is some mode for which we do not support
8594   proper push instructions, at least from the registers that we expect
8595   the value to live in.  */
8596
8597void
8598ix86_expand_push (enum machine_mode mode, rtx x)
8599{
8600  rtx tmp;
8601
8602  tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
8603			     GEN_INT (-GET_MODE_SIZE (mode)),
8604			     stack_pointer_rtx, 1, OPTAB_DIRECT);
8605  if (tmp != stack_pointer_rtx)
8606    emit_move_insn (stack_pointer_rtx, tmp);
8607
8608  tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
8609  emit_move_insn (tmp, x);
8610}
8611
8612/* Fix up OPERANDS to satisfy ix86_binary_operator_ok.  Return the
8613   destination to use for the operation.  If different from the true
8614   destination in operands[0], a copy operation will be required.  */
8615
8616rtx
8617ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
8618			    rtx operands[])
8619{
8620  int matching_memory;
8621  rtx src1, src2, dst;
8622
8623  dst = operands[0];
8624  src1 = operands[1];
8625  src2 = operands[2];
8626
8627  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8628  if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8629      && (rtx_equal_p (dst, src2)
8630	  || immediate_operand (src1, mode)))
8631    {
8632      rtx temp = src1;
8633      src1 = src2;
8634      src2 = temp;
8635    }
8636
8637  /* If the destination is memory, and we do not have matching source
8638     operands, do things in registers.  */
8639  matching_memory = 0;
8640  if (GET_CODE (dst) == MEM)
8641    {
8642      if (rtx_equal_p (dst, src1))
8643	matching_memory = 1;
8644      else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8645	       && rtx_equal_p (dst, src2))
8646	matching_memory = 2;
8647      else
8648	dst = gen_reg_rtx (mode);
8649    }
8650
8651  /* Both source operands cannot be in memory.  */
8652  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8653    {
8654      if (matching_memory != 2)
8655	src2 = force_reg (mode, src2);
8656      else
8657	src1 = force_reg (mode, src1);
8658    }
8659
8660  /* If the operation is not commutable, source 1 cannot be a constant
8661     or non-matching memory.  */
8662  if ((CONSTANT_P (src1)
8663       || (!matching_memory && GET_CODE (src1) == MEM))
8664      && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8665    src1 = force_reg (mode, src1);
8666
8667  src1 = operands[1] = src1;
8668  src2 = operands[2] = src2;
8669  return dst;
8670}
8671
8672/* Similarly, but assume that the destination has already been
8673   set up properly.  */
8674
8675void
8676ix86_fixup_binary_operands_no_copy (enum rtx_code code,
8677				    enum machine_mode mode, rtx operands[])
8678{
8679  rtx dst = ix86_fixup_binary_operands (code, mode, operands);
8680  gcc_assert (dst == operands[0]);
8681}
8682
8683/* Attempt to expand a binary operator.  Make the expansion closer to the
8684   actual machine, then just general_operand, which will allow 3 separate
8685   memory references (one output, two input) in a single insn.  */
8686
8687void
8688ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8689			     rtx operands[])
8690{
8691  rtx src1, src2, dst, op, clob;
8692
8693  dst = ix86_fixup_binary_operands (code, mode, operands);
8694  src1 = operands[1];
8695  src2 = operands[2];
8696
8697 /* Emit the instruction.  */
8698
8699  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8700  if (reload_in_progress)
8701    {
8702      /* Reload doesn't know about the flags register, and doesn't know that
8703         it doesn't want to clobber it.  We can only do this with PLUS.  */
8704      gcc_assert (code == PLUS);
8705      emit_insn (op);
8706    }
8707  else
8708    {
8709      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8710      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8711    }
8712
8713  /* Fix up the destination if needed.  */
8714  if (dst != operands[0])
8715    emit_move_insn (operands[0], dst);
8716}
8717
8718/* Return TRUE or FALSE depending on whether the binary operator meets the
8719   appropriate constraints.  */
8720
8721int
8722ix86_binary_operator_ok (enum rtx_code code,
8723			 enum machine_mode mode ATTRIBUTE_UNUSED,
8724			 rtx operands[3])
8725{
8726  /* Both source operands cannot be in memory.  */
8727  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8728    return 0;
8729  /* If the operation is not commutable, source 1 cannot be a constant.  */
8730  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
8731    return 0;
8732  /* If the destination is memory, we must have a matching source operand.  */
8733  if (GET_CODE (operands[0]) == MEM
8734      && ! (rtx_equal_p (operands[0], operands[1])
8735	    || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
8736		&& rtx_equal_p (operands[0], operands[2]))))
8737    return 0;
8738  /* If the operation is not commutable and the source 1 is memory, we must
8739     have a matching destination.  */
8740  if (GET_CODE (operands[1]) == MEM
8741      && GET_RTX_CLASS (code) != RTX_COMM_ARITH
8742      && ! rtx_equal_p (operands[0], operands[1]))
8743    return 0;
8744  return 1;
8745}
8746
8747/* Attempt to expand a unary operator.  Make the expansion closer to the
8748   actual machine, then just general_operand, which will allow 2 separate
8749   memory references (one output, one input) in a single insn.  */
8750
8751void
8752ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8753			    rtx operands[])
8754{
8755  int matching_memory;
8756  rtx src, dst, op, clob;
8757
8758  dst = operands[0];
8759  src = operands[1];
8760
8761  /* If the destination is memory, and we do not have matching source
8762     operands, do things in registers.  */
8763  matching_memory = 0;
8764  if (MEM_P (dst))
8765    {
8766      if (rtx_equal_p (dst, src))
8767	matching_memory = 1;
8768      else
8769	dst = gen_reg_rtx (mode);
8770    }
8771
8772  /* When source operand is memory, destination must match.  */
8773  if (MEM_P (src) && !matching_memory)
8774    src = force_reg (mode, src);
8775
8776  /* Emit the instruction.  */
8777
8778  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8779  if (reload_in_progress || code == NOT)
8780    {
8781      /* Reload doesn't know about the flags register, and doesn't know that
8782         it doesn't want to clobber it.  */
8783      gcc_assert (code == NOT);
8784      emit_insn (op);
8785    }
8786  else
8787    {
8788      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8789      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8790    }
8791
8792  /* Fix up the destination if needed.  */
8793  if (dst != operands[0])
8794    emit_move_insn (operands[0], dst);
8795}
8796
8797/* Return TRUE or FALSE depending on whether the unary operator meets the
8798   appropriate constraints.  */
8799
8800int
8801ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8802			enum machine_mode mode ATTRIBUTE_UNUSED,
8803			rtx operands[2] ATTRIBUTE_UNUSED)
8804{
8805  /* If one of operands is memory, source and destination must match.  */
8806  if ((GET_CODE (operands[0]) == MEM
8807       || GET_CODE (operands[1]) == MEM)
8808      && ! rtx_equal_p (operands[0], operands[1]))
8809    return FALSE;
8810  return TRUE;
8811}
8812
8813/* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
8814   Create a mask for the sign bit in MODE for an SSE register.  If VECT is
8815   true, then replicate the mask for all elements of the vector register.
8816   If INVERT is true, then create a mask excluding the sign bit.  */
8817
8818rtx
8819ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
8820{
8821  enum machine_mode vec_mode;
8822  HOST_WIDE_INT hi, lo;
8823  int shift = 63;
8824  rtvec v;
8825  rtx mask;
8826
8827  /* Find the sign bit, sign extended to 2*HWI.  */
8828  if (mode == SFmode)
8829    lo = 0x80000000, hi = lo < 0;
8830  else if (HOST_BITS_PER_WIDE_INT >= 64)
8831    lo = (HOST_WIDE_INT)1 << shift, hi = -1;
8832  else
8833    lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
8834
8835  if (invert)
8836    lo = ~lo, hi = ~hi;
8837
8838  /* Force this value into the low part of a fp vector constant.  */
8839  mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
8840  mask = gen_lowpart (mode, mask);
8841
8842  if (mode == SFmode)
8843    {
8844      if (vect)
8845	v = gen_rtvec (4, mask, mask, mask, mask);
8846      else
8847	v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
8848		       CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8849      vec_mode = V4SFmode;
8850    }
8851  else
8852    {
8853      if (vect)
8854	v = gen_rtvec (2, mask, mask);
8855      else
8856	v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
8857      vec_mode = V2DFmode;
8858    }
8859
8860  return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
8861}
8862
8863/* Generate code for floating point ABS or NEG.  */
8864
8865void
8866ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
8867				rtx operands[])
8868{
8869  rtx mask, set, use, clob, dst, src;
8870  bool matching_memory;
8871  bool use_sse = false;
8872  bool vector_mode = VECTOR_MODE_P (mode);
8873  enum machine_mode elt_mode = mode;
8874
8875  if (vector_mode)
8876    {
8877      elt_mode = GET_MODE_INNER (mode);
8878      use_sse = true;
8879    }
8880  else if (TARGET_SSE_MATH)
8881    use_sse = SSE_FLOAT_MODE_P (mode);
8882
8883  /* NEG and ABS performed with SSE use bitwise mask operations.
8884     Create the appropriate mask now.  */
8885  if (use_sse)
8886    mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
8887  else
8888    {
8889      /* When not using SSE, we don't use the mask, but prefer to keep the
8890	 same general form of the insn pattern to reduce duplication when
8891	 it comes time to split.  */
8892      mask = const0_rtx;
8893    }
8894
8895  dst = operands[0];
8896  src = operands[1];
8897
8898  /* If the destination is memory, and we don't have matching source
8899     operands, do things in registers.  */
8900  matching_memory = false;
8901  if (MEM_P (dst))
8902    {
8903      if (rtx_equal_p (dst, src))
8904	matching_memory = true;
8905      else
8906	dst = gen_reg_rtx (mode);
8907    }
8908  if (MEM_P (src) && !matching_memory)
8909    src = force_reg (mode, src);
8910
8911  if (vector_mode)
8912    {
8913      set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
8914      set = gen_rtx_SET (VOIDmode, dst, set);
8915      emit_insn (set);
8916    }
8917  else
8918    {
8919      set = gen_rtx_fmt_e (code, mode, src);
8920      set = gen_rtx_SET (VOIDmode, dst, set);
8921      use = gen_rtx_USE (VOIDmode, mask);
8922      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8923      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, set, use, clob)));
8924    }
8925
8926  if (dst != operands[0])
8927    emit_move_insn (operands[0], dst);
8928}
8929
8930/* Expand a copysign operation.  Special case operand 0 being a constant.  */
8931
8932void
8933ix86_expand_copysign (rtx operands[])
8934{
8935  enum machine_mode mode, vmode;
8936  rtx dest, op0, op1, mask, nmask;
8937
8938  dest = operands[0];
8939  op0 = operands[1];
8940  op1 = operands[2];
8941
8942  mode = GET_MODE (dest);
8943  vmode = mode == SFmode ? V4SFmode : V2DFmode;
8944
8945  if (GET_CODE (op0) == CONST_DOUBLE)
8946    {
8947      rtvec v;
8948
8949      if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
8950	op0 = simplify_unary_operation (ABS, mode, op0, mode);
8951
8952      if (op0 == CONST0_RTX (mode))
8953	op0 = CONST0_RTX (vmode);
8954      else
8955        {
8956	  if (mode == SFmode)
8957	    v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
8958                           CONST0_RTX (SFmode), CONST0_RTX (SFmode));
8959	  else
8960	    v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
8961          op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
8962	}
8963
8964      mask = ix86_build_signbit_mask (mode, 0, 0);
8965
8966      if (mode == SFmode)
8967	emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
8968      else
8969	emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
8970    }
8971  else
8972    {
8973      nmask = ix86_build_signbit_mask (mode, 0, 1);
8974      mask = ix86_build_signbit_mask (mode, 0, 0);
8975
8976      if (mode == SFmode)
8977	emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
8978      else
8979	emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
8980    }
8981}
8982
8983/* Deconstruct a copysign operation into bit masks.  Operand 0 is known to
8984   be a constant, and so has already been expanded into a vector constant.  */
8985
8986void
8987ix86_split_copysign_const (rtx operands[])
8988{
8989  enum machine_mode mode, vmode;
8990  rtx dest, op0, op1, mask, x;
8991
8992  dest = operands[0];
8993  op0 = operands[1];
8994  op1 = operands[2];
8995  mask = operands[3];
8996
8997  mode = GET_MODE (dest);
8998  vmode = GET_MODE (mask);
8999
9000  dest = simplify_gen_subreg (vmode, dest, mode, 0);
9001  x = gen_rtx_AND (vmode, dest, mask);
9002  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9003
9004  if (op0 != CONST0_RTX (vmode))
9005    {
9006      x = gen_rtx_IOR (vmode, dest, op0);
9007      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9008    }
9009}
9010
9011/* Deconstruct a copysign operation into bit masks.  Operand 0 is variable,
9012   so we have to do two masks.  */
9013
9014void
9015ix86_split_copysign_var (rtx operands[])
9016{
9017  enum machine_mode mode, vmode;
9018  rtx dest, scratch, op0, op1, mask, nmask, x;
9019
9020  dest = operands[0];
9021  scratch = operands[1];
9022  op0 = operands[2];
9023  op1 = operands[3];
9024  nmask = operands[4];
9025  mask = operands[5];
9026
9027  mode = GET_MODE (dest);
9028  vmode = GET_MODE (mask);
9029
9030  if (rtx_equal_p (op0, op1))
9031    {
9032      /* Shouldn't happen often (it's useless, obviously), but when it does
9033	 we'd generate incorrect code if we continue below.  */
9034      emit_move_insn (dest, op0);
9035      return;
9036    }
9037
9038  if (REG_P (mask) && REGNO (dest) == REGNO (mask))	/* alternative 0 */
9039    {
9040      gcc_assert (REGNO (op1) == REGNO (scratch));
9041
9042      x = gen_rtx_AND (vmode, scratch, mask);
9043      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9044
9045      dest = mask;
9046      op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9047      x = gen_rtx_NOT (vmode, dest);
9048      x = gen_rtx_AND (vmode, x, op0);
9049      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9050    }
9051  else
9052    {
9053      if (REGNO (op1) == REGNO (scratch))		/* alternative 1,3 */
9054	{
9055	  x = gen_rtx_AND (vmode, scratch, mask);
9056	}
9057      else						/* alternative 2,4 */
9058	{
9059          gcc_assert (REGNO (mask) == REGNO (scratch));
9060          op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9061	  x = gen_rtx_AND (vmode, scratch, op1);
9062	}
9063      emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9064
9065      if (REGNO (op0) == REGNO (dest))			/* alternative 1,2 */
9066	{
9067	  dest = simplify_gen_subreg (vmode, op0, mode, 0);
9068	  x = gen_rtx_AND (vmode, dest, nmask);
9069	}
9070      else						/* alternative 3,4 */
9071	{
9072          gcc_assert (REGNO (nmask) == REGNO (dest));
9073	  dest = nmask;
9074	  op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9075	  x = gen_rtx_AND (vmode, dest, op0);
9076	}
9077      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9078    }
9079
9080  x = gen_rtx_IOR (vmode, dest, scratch);
9081  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9082}
9083
9084/* Return TRUE or FALSE depending on whether the first SET in INSN
9085   has source and destination with matching CC modes, and that the
9086   CC mode is at least as constrained as REQ_MODE.  */
9087
9088int
9089ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9090{
9091  rtx set;
9092  enum machine_mode set_mode;
9093
9094  set = PATTERN (insn);
9095  if (GET_CODE (set) == PARALLEL)
9096    set = XVECEXP (set, 0, 0);
9097  gcc_assert (GET_CODE (set) == SET);
9098  gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9099
9100  set_mode = GET_MODE (SET_DEST (set));
9101  switch (set_mode)
9102    {
9103    case CCNOmode:
9104      if (req_mode != CCNOmode
9105	  && (req_mode != CCmode
9106	      || XEXP (SET_SRC (set), 1) != const0_rtx))
9107	return 0;
9108      break;
9109    case CCmode:
9110      if (req_mode == CCGCmode)
9111	return 0;
9112      /* FALLTHRU */
9113    case CCGCmode:
9114      if (req_mode == CCGOCmode || req_mode == CCNOmode)
9115	return 0;
9116      /* FALLTHRU */
9117    case CCGOCmode:
9118      if (req_mode == CCZmode)
9119	return 0;
9120      /* FALLTHRU */
9121    case CCZmode:
9122      break;
9123
9124    default:
9125      gcc_unreachable ();
9126    }
9127
9128  return (GET_MODE (SET_SRC (set)) == set_mode);
9129}
9130
9131/* Generate insn patterns to do an integer compare of OPERANDS.  */
9132
9133static rtx
9134ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
9135{
9136  enum machine_mode cmpmode;
9137  rtx tmp, flags;
9138
9139  cmpmode = SELECT_CC_MODE (code, op0, op1);
9140  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
9141
9142  /* This is very simple, but making the interface the same as in the
9143     FP case makes the rest of the code easier.  */
9144  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
9145  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
9146
9147  /* Return the test that should be put into the flags user, i.e.
9148     the bcc, scc, or cmov instruction.  */
9149  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
9150}
9151
9152/* Figure out whether to use ordered or unordered fp comparisons.
9153   Return the appropriate mode to use.  */
9154
9155enum machine_mode
9156ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
9157{
9158  /* ??? In order to make all comparisons reversible, we do all comparisons
9159     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
9160     all forms trapping and nontrapping comparisons, we can make inequality
9161     comparisons trapping again, since it results in better code when using
9162     FCOM based compares.  */
9163  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
9164}
9165
9166enum machine_mode
9167ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
9168{
9169  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9170    return ix86_fp_compare_mode (code);
9171  switch (code)
9172    {
9173      /* Only zero flag is needed.  */
9174    case EQ:			/* ZF=0 */
9175    case NE:			/* ZF!=0 */
9176      return CCZmode;
9177      /* Codes needing carry flag.  */
9178    case GEU:			/* CF=0 */
9179    case GTU:			/* CF=0 & ZF=0 */
9180    case LTU:			/* CF=1 */
9181    case LEU:			/* CF=1 | ZF=1 */
9182      return CCmode;
9183      /* Codes possibly doable only with sign flag when
9184         comparing against zero.  */
9185    case GE:			/* SF=OF   or   SF=0 */
9186    case LT:			/* SF<>OF  or   SF=1 */
9187      if (op1 == const0_rtx)
9188	return CCGOCmode;
9189      else
9190	/* For other cases Carry flag is not required.  */
9191	return CCGCmode;
9192      /* Codes doable only with sign flag when comparing
9193         against zero, but we miss jump instruction for it
9194         so we need to use relational tests against overflow
9195         that thus needs to be zero.  */
9196    case GT:			/* ZF=0 & SF=OF */
9197    case LE:			/* ZF=1 | SF<>OF */
9198      if (op1 == const0_rtx)
9199	return CCNOmode;
9200      else
9201	return CCGCmode;
9202      /* strcmp pattern do (use flags) and combine may ask us for proper
9203	 mode.  */
9204    case USE:
9205      return CCmode;
9206    default:
9207      gcc_unreachable ();
9208    }
9209}
9210
9211/* Return the fixed registers used for condition codes.  */
9212
9213static bool
9214ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
9215{
9216  *p1 = FLAGS_REG;
9217  *p2 = FPSR_REG;
9218  return true;
9219}
9220
9221/* If two condition code modes are compatible, return a condition code
9222   mode which is compatible with both.  Otherwise, return
9223   VOIDmode.  */
9224
9225static enum machine_mode
9226ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
9227{
9228  if (m1 == m2)
9229    return m1;
9230
9231  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
9232    return VOIDmode;
9233
9234  if ((m1 == CCGCmode && m2 == CCGOCmode)
9235      || (m1 == CCGOCmode && m2 == CCGCmode))
9236    return CCGCmode;
9237
9238  switch (m1)
9239    {
9240    default:
9241      gcc_unreachable ();
9242
9243    case CCmode:
9244    case CCGCmode:
9245    case CCGOCmode:
9246    case CCNOmode:
9247    case CCZmode:
9248      switch (m2)
9249	{
9250	default:
9251	  return VOIDmode;
9252
9253	case CCmode:
9254	case CCGCmode:
9255	case CCGOCmode:
9256	case CCNOmode:
9257	case CCZmode:
9258	  return CCmode;
9259	}
9260
9261    case CCFPmode:
9262    case CCFPUmode:
9263      /* These are only compatible with themselves, which we already
9264	 checked above.  */
9265      return VOIDmode;
9266    }
9267}
9268
9269/* Return true if we should use an FCOMI instruction for this fp comparison.  */
9270
9271int
9272ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
9273{
9274  enum rtx_code swapped_code = swap_condition (code);
9275  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
9276	  || (ix86_fp_comparison_cost (swapped_code)
9277	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
9278}
9279
9280/* Swap, force into registers, or otherwise massage the two operands
9281   to a fp comparison.  The operands are updated in place; the new
9282   comparison code is returned.  */
9283
9284static enum rtx_code
9285ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
9286{
9287  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
9288  rtx op0 = *pop0, op1 = *pop1;
9289  enum machine_mode op_mode = GET_MODE (op0);
9290  int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
9291
9292  /* All of the unordered compare instructions only work on registers.
9293     The same is true of the fcomi compare instructions.  The XFmode
9294     compare instructions require registers except when comparing
9295     against zero or when converting operand 1 from fixed point to
9296     floating point.  */
9297
9298  if (!is_sse
9299      && (fpcmp_mode == CCFPUmode
9300	  || (op_mode == XFmode
9301	      && ! (standard_80387_constant_p (op0) == 1
9302		    || standard_80387_constant_p (op1) == 1)
9303	      && GET_CODE (op1) != FLOAT)
9304	  || ix86_use_fcomi_compare (code)))
9305    {
9306      op0 = force_reg (op_mode, op0);
9307      op1 = force_reg (op_mode, op1);
9308    }
9309  else
9310    {
9311      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
9312	 things around if they appear profitable, otherwise force op0
9313	 into a register.  */
9314
9315      if (standard_80387_constant_p (op0) == 0
9316	  || (GET_CODE (op0) == MEM
9317	      && ! (standard_80387_constant_p (op1) == 0
9318		    || GET_CODE (op1) == MEM)))
9319	{
9320	  rtx tmp;
9321	  tmp = op0, op0 = op1, op1 = tmp;
9322	  code = swap_condition (code);
9323	}
9324
9325      if (GET_CODE (op0) != REG)
9326	op0 = force_reg (op_mode, op0);
9327
9328      if (CONSTANT_P (op1))
9329	{
9330	  int tmp = standard_80387_constant_p (op1);
9331	  if (tmp == 0)
9332	    op1 = validize_mem (force_const_mem (op_mode, op1));
9333	  else if (tmp == 1)
9334	    {
9335	      if (TARGET_CMOVE)
9336		op1 = force_reg (op_mode, op1);
9337	    }
9338	  else
9339	    op1 = force_reg (op_mode, op1);
9340	}
9341    }
9342
9343  /* Try to rearrange the comparison to make it cheaper.  */
9344  if (ix86_fp_comparison_cost (code)
9345      > ix86_fp_comparison_cost (swap_condition (code))
9346      && (GET_CODE (op1) == REG || !no_new_pseudos))
9347    {
9348      rtx tmp;
9349      tmp = op0, op0 = op1, op1 = tmp;
9350      code = swap_condition (code);
9351      if (GET_CODE (op0) != REG)
9352	op0 = force_reg (op_mode, op0);
9353    }
9354
9355  *pop0 = op0;
9356  *pop1 = op1;
9357  return code;
9358}
9359
9360/* Convert comparison codes we use to represent FP comparison to integer
9361   code that will result in proper branch.  Return UNKNOWN if no such code
9362   is available.  */
9363
9364enum rtx_code
9365ix86_fp_compare_code_to_integer (enum rtx_code code)
9366{
9367  switch (code)
9368    {
9369    case GT:
9370      return GTU;
9371    case GE:
9372      return GEU;
9373    case ORDERED:
9374    case UNORDERED:
9375      return code;
9376      break;
9377    case UNEQ:
9378      return EQ;
9379      break;
9380    case UNLT:
9381      return LTU;
9382      break;
9383    case UNLE:
9384      return LEU;
9385      break;
9386    case LTGT:
9387      return NE;
9388      break;
9389    default:
9390      return UNKNOWN;
9391    }
9392}
9393
9394/* Split comparison code CODE into comparisons we can do using branch
9395   instructions.  BYPASS_CODE is comparison code for branch that will
9396   branch around FIRST_CODE and SECOND_CODE.  If some of branches
9397   is not required, set value to UNKNOWN.
9398   We never require more than two branches.  */
9399
9400void
9401ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
9402			  enum rtx_code *first_code,
9403			  enum rtx_code *second_code)
9404{
9405  *first_code = code;
9406  *bypass_code = UNKNOWN;
9407  *second_code = UNKNOWN;
9408
9409  /* The fcomi comparison sets flags as follows:
9410
9411     cmp    ZF PF CF
9412     >      0  0  0
9413     <      0  0  1
9414     =      1  0  0
9415     un     1  1  1 */
9416
9417  switch (code)
9418    {
9419    case GT:			/* GTU - CF=0 & ZF=0 */
9420    case GE:			/* GEU - CF=0 */
9421    case ORDERED:		/* PF=0 */
9422    case UNORDERED:		/* PF=1 */
9423    case UNEQ:			/* EQ - ZF=1 */
9424    case UNLT:			/* LTU - CF=1 */
9425    case UNLE:			/* LEU - CF=1 | ZF=1 */
9426    case LTGT:			/* EQ - ZF=0 */
9427      break;
9428    case LT:			/* LTU - CF=1 - fails on unordered */
9429      *first_code = UNLT;
9430      *bypass_code = UNORDERED;
9431      break;
9432    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
9433      *first_code = UNLE;
9434      *bypass_code = UNORDERED;
9435      break;
9436    case EQ:			/* EQ - ZF=1 - fails on unordered */
9437      *first_code = UNEQ;
9438      *bypass_code = UNORDERED;
9439      break;
9440    case NE:			/* NE - ZF=0 - fails on unordered */
9441      *first_code = LTGT;
9442      *second_code = UNORDERED;
9443      break;
9444    case UNGE:			/* GEU - CF=0 - fails on unordered */
9445      *first_code = GE;
9446      *second_code = UNORDERED;
9447      break;
9448    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
9449      *first_code = GT;
9450      *second_code = UNORDERED;
9451      break;
9452    default:
9453      gcc_unreachable ();
9454    }
9455  if (!TARGET_IEEE_FP)
9456    {
9457      *second_code = UNKNOWN;
9458      *bypass_code = UNKNOWN;
9459    }
9460}
9461
9462/* Return cost of comparison done fcom + arithmetics operations on AX.
9463   All following functions do use number of instructions as a cost metrics.
9464   In future this should be tweaked to compute bytes for optimize_size and
9465   take into account performance of various instructions on various CPUs.  */
9466static int
9467ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9468{
9469  if (!TARGET_IEEE_FP)
9470    return 4;
9471  /* The cost of code output by ix86_expand_fp_compare.  */
9472  switch (code)
9473    {
9474    case UNLE:
9475    case UNLT:
9476    case LTGT:
9477    case GT:
9478    case GE:
9479    case UNORDERED:
9480    case ORDERED:
9481    case UNEQ:
9482      return 4;
9483      break;
9484    case LT:
9485    case NE:
9486    case EQ:
9487    case UNGE:
9488      return 5;
9489      break;
9490    case LE:
9491    case UNGT:
9492      return 6;
9493      break;
9494    default:
9495      gcc_unreachable ();
9496    }
9497}
9498
9499/* Return cost of comparison done using fcomi operation.
9500   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9501static int
9502ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9503{
9504  enum rtx_code bypass_code, first_code, second_code;
9505  /* Return arbitrarily high cost when instruction is not supported - this
9506     prevents gcc from using it.  */
9507  if (!TARGET_CMOVE)
9508    return 1024;
9509  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9510  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
9511}
9512
9513/* Return cost of comparison done using sahf operation.
9514   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9515static int
9516ix86_fp_comparison_sahf_cost (enum rtx_code code)
9517{
9518  enum rtx_code bypass_code, first_code, second_code;
9519  /* Return arbitrarily high cost when instruction is not preferred - this
9520     avoids gcc from using it.  */
9521  if (!TARGET_USE_SAHF && !optimize_size)
9522    return 1024;
9523  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9524  return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
9525}
9526
9527/* Compute cost of the comparison done using any method.
9528   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9529static int
9530ix86_fp_comparison_cost (enum rtx_code code)
9531{
9532  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9533  int min;
9534
9535  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9536  sahf_cost = ix86_fp_comparison_sahf_cost (code);
9537
9538  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9539  if (min > sahf_cost)
9540    min = sahf_cost;
9541  if (min > fcomi_cost)
9542    min = fcomi_cost;
9543  return min;
9544}
9545
9546/* Generate insn patterns to do a floating point compare of OPERANDS.  */
9547
9548static rtx
9549ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9550			rtx *second_test, rtx *bypass_test)
9551{
9552  enum machine_mode fpcmp_mode, intcmp_mode;
9553  rtx tmp, tmp2;
9554  int cost = ix86_fp_comparison_cost (code);
9555  enum rtx_code bypass_code, first_code, second_code;
9556
9557  fpcmp_mode = ix86_fp_compare_mode (code);
9558  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9559
9560  if (second_test)
9561    *second_test = NULL_RTX;
9562  if (bypass_test)
9563    *bypass_test = NULL_RTX;
9564
9565  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9566
9567  /* Do fcomi/sahf based test when profitable.  */
9568  if ((bypass_code == UNKNOWN || bypass_test)
9569      && (second_code == UNKNOWN || second_test)
9570      && ix86_fp_comparison_arithmetics_cost (code) > cost)
9571    {
9572      if (TARGET_CMOVE)
9573	{
9574	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9575	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9576			     tmp);
9577	  emit_insn (tmp);
9578	}
9579      else
9580	{
9581	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9582	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9583	  if (!scratch)
9584	    scratch = gen_reg_rtx (HImode);
9585	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9586	  emit_insn (gen_x86_sahf_1 (scratch));
9587	}
9588
9589      /* The FP codes work out to act like unsigned.  */
9590      intcmp_mode = fpcmp_mode;
9591      code = first_code;
9592      if (bypass_code != UNKNOWN)
9593	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9594				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9595				       const0_rtx);
9596      if (second_code != UNKNOWN)
9597	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9598				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9599				       const0_rtx);
9600    }
9601  else
9602    {
9603      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
9604      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9605      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9606      if (!scratch)
9607	scratch = gen_reg_rtx (HImode);
9608      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9609
9610      /* In the unordered case, we have to check C2 for NaN's, which
9611	 doesn't happen to work out to anything nice combination-wise.
9612	 So do some bit twiddling on the value we've got in AH to come
9613	 up with an appropriate set of condition codes.  */
9614
9615      intcmp_mode = CCNOmode;
9616      switch (code)
9617	{
9618	case GT:
9619	case UNGT:
9620	  if (code == GT || !TARGET_IEEE_FP)
9621	    {
9622	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9623	      code = EQ;
9624	    }
9625	  else
9626	    {
9627	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9628	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9629	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9630	      intcmp_mode = CCmode;
9631	      code = GEU;
9632	    }
9633	  break;
9634	case LT:
9635	case UNLT:
9636	  if (code == LT && TARGET_IEEE_FP)
9637	    {
9638	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9639	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9640	      intcmp_mode = CCmode;
9641	      code = EQ;
9642	    }
9643	  else
9644	    {
9645	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9646	      code = NE;
9647	    }
9648	  break;
9649	case GE:
9650	case UNGE:
9651	  if (code == GE || !TARGET_IEEE_FP)
9652	    {
9653	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9654	      code = EQ;
9655	    }
9656	  else
9657	    {
9658	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9659	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9660					     GEN_INT (0x01)));
9661	      code = NE;
9662	    }
9663	  break;
9664	case LE:
9665	case UNLE:
9666	  if (code == LE && TARGET_IEEE_FP)
9667	    {
9668	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9669	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9670	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9671	      intcmp_mode = CCmode;
9672	      code = LTU;
9673	    }
9674	  else
9675	    {
9676	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9677	      code = NE;
9678	    }
9679	  break;
9680	case EQ:
9681	case UNEQ:
9682	  if (code == EQ && TARGET_IEEE_FP)
9683	    {
9684	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9685	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9686	      intcmp_mode = CCmode;
9687	      code = EQ;
9688	    }
9689	  else
9690	    {
9691	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9692	      code = NE;
9693	      break;
9694	    }
9695	  break;
9696	case NE:
9697	case LTGT:
9698	  if (code == NE && TARGET_IEEE_FP)
9699	    {
9700	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9701	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9702					     GEN_INT (0x40)));
9703	      code = NE;
9704	    }
9705	  else
9706	    {
9707	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9708	      code = EQ;
9709	    }
9710	  break;
9711
9712	case UNORDERED:
9713	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9714	  code = NE;
9715	  break;
9716	case ORDERED:
9717	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9718	  code = EQ;
9719	  break;
9720
9721	default:
9722	  gcc_unreachable ();
9723	}
9724    }
9725
9726  /* Return the test that should be put into the flags user, i.e.
9727     the bcc, scc, or cmov instruction.  */
9728  return gen_rtx_fmt_ee (code, VOIDmode,
9729			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9730			 const0_rtx);
9731}
9732
9733rtx
9734ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9735{
9736  rtx op0, op1, ret;
9737  op0 = ix86_compare_op0;
9738  op1 = ix86_compare_op1;
9739
9740  if (second_test)
9741    *second_test = NULL_RTX;
9742  if (bypass_test)
9743    *bypass_test = NULL_RTX;
9744
9745  if (ix86_compare_emitted)
9746    {
9747      ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
9748      ix86_compare_emitted = NULL_RTX;
9749    }
9750  else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9751    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9752				  second_test, bypass_test);
9753  else
9754    ret = ix86_expand_int_compare (code, op0, op1);
9755
9756  return ret;
9757}
9758
9759/* Return true if the CODE will result in nontrivial jump sequence.  */
9760bool
9761ix86_fp_jump_nontrivial_p (enum rtx_code code)
9762{
9763  enum rtx_code bypass_code, first_code, second_code;
9764  if (!TARGET_CMOVE)
9765    return true;
9766  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9767  return bypass_code != UNKNOWN || second_code != UNKNOWN;
9768}
9769
9770void
9771ix86_expand_branch (enum rtx_code code, rtx label)
9772{
9773  rtx tmp;
9774
9775  switch (GET_MODE (ix86_compare_op0))
9776    {
9777    case QImode:
9778    case HImode:
9779    case SImode:
9780      simple:
9781      tmp = ix86_expand_compare (code, NULL, NULL);
9782      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9783				  gen_rtx_LABEL_REF (VOIDmode, label),
9784				  pc_rtx);
9785      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9786      return;
9787
9788    case SFmode:
9789    case DFmode:
9790    case XFmode:
9791      {
9792	rtvec vec;
9793	int use_fcomi;
9794	enum rtx_code bypass_code, first_code, second_code;
9795
9796	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9797					     &ix86_compare_op1);
9798
9799	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9800
9801	/* Check whether we will use the natural sequence with one jump.  If
9802	   so, we can expand jump early.  Otherwise delay expansion by
9803	   creating compound insn to not confuse optimizers.  */
9804	if (bypass_code == UNKNOWN && second_code == UNKNOWN
9805	    && TARGET_CMOVE)
9806	  {
9807	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9808				  gen_rtx_LABEL_REF (VOIDmode, label),
9809				  pc_rtx, NULL_RTX, NULL_RTX);
9810	  }
9811	else
9812	  {
9813	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
9814				  ix86_compare_op0, ix86_compare_op1);
9815	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9816					gen_rtx_LABEL_REF (VOIDmode, label),
9817					pc_rtx);
9818	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9819
9820	    use_fcomi = ix86_use_fcomi_compare (code);
9821	    vec = rtvec_alloc (3 + !use_fcomi);
9822	    RTVEC_ELT (vec, 0) = tmp;
9823	    RTVEC_ELT (vec, 1)
9824	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9825	    RTVEC_ELT (vec, 2)
9826	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9827	    if (! use_fcomi)
9828	      RTVEC_ELT (vec, 3)
9829		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9830
9831	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9832	  }
9833	return;
9834      }
9835
9836    case DImode:
9837      if (TARGET_64BIT)
9838	goto simple;
9839    case TImode:
9840      /* Expand DImode branch into multiple compare+branch.  */
9841      {
9842	rtx lo[2], hi[2], label2;
9843	enum rtx_code code1, code2, code3;
9844	enum machine_mode submode;
9845
9846	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9847	  {
9848	    tmp = ix86_compare_op0;
9849	    ix86_compare_op0 = ix86_compare_op1;
9850	    ix86_compare_op1 = tmp;
9851	    code = swap_condition (code);
9852	  }
9853	if (GET_MODE (ix86_compare_op0) == DImode)
9854	  {
9855	    split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9856	    split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9857	    submode = SImode;
9858	  }
9859	else
9860	  {
9861	    split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
9862	    split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
9863	    submode = DImode;
9864	  }
9865
9866	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9867	   avoid two branches.  This costs one extra insn, so disable when
9868	   optimizing for size.  */
9869
9870	if ((code == EQ || code == NE)
9871	    && (!optimize_size
9872	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
9873	  {
9874	    rtx xor0, xor1;
9875
9876	    xor1 = hi[0];
9877	    if (hi[1] != const0_rtx)
9878	      xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
9879				   NULL_RTX, 0, OPTAB_WIDEN);
9880
9881	    xor0 = lo[0];
9882	    if (lo[1] != const0_rtx)
9883	      xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
9884				   NULL_RTX, 0, OPTAB_WIDEN);
9885
9886	    tmp = expand_binop (submode, ior_optab, xor1, xor0,
9887				NULL_RTX, 0, OPTAB_WIDEN);
9888
9889	    ix86_compare_op0 = tmp;
9890	    ix86_compare_op1 = const0_rtx;
9891	    ix86_expand_branch (code, label);
9892	    return;
9893	  }
9894
9895	/* Otherwise, if we are doing less-than or greater-or-equal-than,
9896	   op1 is a constant and the low word is zero, then we can just
9897	   examine the high word.  */
9898
9899	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9900	  switch (code)
9901	    {
9902	    case LT: case LTU: case GE: case GEU:
9903	      ix86_compare_op0 = hi[0];
9904	      ix86_compare_op1 = hi[1];
9905	      ix86_expand_branch (code, label);
9906	      return;
9907	    default:
9908	      break;
9909	    }
9910
9911	/* Otherwise, we need two or three jumps.  */
9912
9913	label2 = gen_label_rtx ();
9914
9915	code1 = code;
9916	code2 = swap_condition (code);
9917	code3 = unsigned_condition (code);
9918
9919	switch (code)
9920	  {
9921	  case LT: case GT: case LTU: case GTU:
9922	    break;
9923
9924	  case LE:   code1 = LT;  code2 = GT;  break;
9925	  case GE:   code1 = GT;  code2 = LT;  break;
9926	  case LEU:  code1 = LTU; code2 = GTU; break;
9927	  case GEU:  code1 = GTU; code2 = LTU; break;
9928
9929	  case EQ:   code1 = UNKNOWN; code2 = NE;  break;
9930	  case NE:   code2 = UNKNOWN; break;
9931
9932	  default:
9933	    gcc_unreachable ();
9934	  }
9935
9936	/*
9937	 * a < b =>
9938	 *    if (hi(a) < hi(b)) goto true;
9939	 *    if (hi(a) > hi(b)) goto false;
9940	 *    if (lo(a) < lo(b)) goto true;
9941	 *  false:
9942	 */
9943
9944	ix86_compare_op0 = hi[0];
9945	ix86_compare_op1 = hi[1];
9946
9947	if (code1 != UNKNOWN)
9948	  ix86_expand_branch (code1, label);
9949	if (code2 != UNKNOWN)
9950	  ix86_expand_branch (code2, label2);
9951
9952	ix86_compare_op0 = lo[0];
9953	ix86_compare_op1 = lo[1];
9954	ix86_expand_branch (code3, label);
9955
9956	if (code2 != UNKNOWN)
9957	  emit_label (label2);
9958	return;
9959      }
9960
9961    default:
9962      gcc_unreachable ();
9963    }
9964}
9965
9966/* Split branch based on floating point condition.  */
9967void
9968ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9969		      rtx target1, rtx target2, rtx tmp, rtx pushed)
9970{
9971  rtx second, bypass;
9972  rtx label = NULL_RTX;
9973  rtx condition;
9974  int bypass_probability = -1, second_probability = -1, probability = -1;
9975  rtx i;
9976
9977  if (target2 != pc_rtx)
9978    {
9979      rtx tmp = target2;
9980      code = reverse_condition_maybe_unordered (code);
9981      target2 = target1;
9982      target1 = tmp;
9983    }
9984
9985  condition = ix86_expand_fp_compare (code, op1, op2,
9986				      tmp, &second, &bypass);
9987
9988  /* Remove pushed operand from stack.  */
9989  if (pushed)
9990    ix86_free_from_memory (GET_MODE (pushed));
9991
9992  if (split_branch_probability >= 0)
9993    {
9994      /* Distribute the probabilities across the jumps.
9995	 Assume the BYPASS and SECOND to be always test
9996	 for UNORDERED.  */
9997      probability = split_branch_probability;
9998
9999      /* Value of 1 is low enough to make no need for probability
10000	 to be updated.  Later we may run some experiments and see
10001	 if unordered values are more frequent in practice.  */
10002      if (bypass)
10003	bypass_probability = 1;
10004      if (second)
10005	second_probability = 1;
10006    }
10007  if (bypass != NULL_RTX)
10008    {
10009      label = gen_label_rtx ();
10010      i = emit_jump_insn (gen_rtx_SET
10011			  (VOIDmode, pc_rtx,
10012			   gen_rtx_IF_THEN_ELSE (VOIDmode,
10013						 bypass,
10014						 gen_rtx_LABEL_REF (VOIDmode,
10015								    label),
10016						 pc_rtx)));
10017      if (bypass_probability >= 0)
10018	REG_NOTES (i)
10019	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10020			       GEN_INT (bypass_probability),
10021			       REG_NOTES (i));
10022    }
10023  i = emit_jump_insn (gen_rtx_SET
10024		      (VOIDmode, pc_rtx,
10025		       gen_rtx_IF_THEN_ELSE (VOIDmode,
10026					     condition, target1, target2)));
10027  if (probability >= 0)
10028    REG_NOTES (i)
10029      = gen_rtx_EXPR_LIST (REG_BR_PROB,
10030			   GEN_INT (probability),
10031			   REG_NOTES (i));
10032  if (second != NULL_RTX)
10033    {
10034      i = emit_jump_insn (gen_rtx_SET
10035			  (VOIDmode, pc_rtx,
10036			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10037						 target2)));
10038      if (second_probability >= 0)
10039	REG_NOTES (i)
10040	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
10041			       GEN_INT (second_probability),
10042			       REG_NOTES (i));
10043    }
10044  if (label != NULL_RTX)
10045    emit_label (label);
10046}
10047
10048int
10049ix86_expand_setcc (enum rtx_code code, rtx dest)
10050{
10051  rtx ret, tmp, tmpreg, equiv;
10052  rtx second_test, bypass_test;
10053
10054  if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10055    return 0; /* FAIL */
10056
10057  gcc_assert (GET_MODE (dest) == QImode);
10058
10059  ret = ix86_expand_compare (code, &second_test, &bypass_test);
10060  PUT_MODE (ret, QImode);
10061
10062  tmp = dest;
10063  tmpreg = dest;
10064
10065  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10066  if (bypass_test || second_test)
10067    {
10068      rtx test = second_test;
10069      int bypass = 0;
10070      rtx tmp2 = gen_reg_rtx (QImode);
10071      if (bypass_test)
10072	{
10073	  gcc_assert (!second_test);
10074	  test = bypass_test;
10075	  bypass = 1;
10076	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10077	}
10078      PUT_MODE (test, QImode);
10079      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10080
10081      if (bypass)
10082	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10083      else
10084	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10085    }
10086
10087  /* Attach a REG_EQUAL note describing the comparison result.  */
10088  if (ix86_compare_op0 && ix86_compare_op1)
10089    {
10090      equiv = simplify_gen_relational (code, QImode,
10091				       GET_MODE (ix86_compare_op0),
10092				       ix86_compare_op0, ix86_compare_op1);
10093      set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10094    }
10095
10096  return 1; /* DONE */
10097}
10098
10099/* Expand comparison setting or clearing carry flag.  Return true when
10100   successful and set pop for the operation.  */
10101static bool
10102ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10103{
10104  enum machine_mode mode =
10105    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10106
10107  /* Do not handle DImode compares that go trought special path.  Also we can't
10108     deal with FP compares yet.  This is possible to add.  */
10109  if (mode == (TARGET_64BIT ? TImode : DImode))
10110    return false;
10111  if (FLOAT_MODE_P (mode))
10112    {
10113      rtx second_test = NULL, bypass_test = NULL;
10114      rtx compare_op, compare_seq;
10115
10116      /* Shortcut:  following common codes never translate into carry flag compares.  */
10117      if (code == EQ || code == NE || code == UNEQ || code == LTGT
10118	  || code == ORDERED || code == UNORDERED)
10119	return false;
10120
10121      /* These comparisons require zero flag; swap operands so they won't.  */
10122      if ((code == GT || code == UNLE || code == LE || code == UNGT)
10123	  && !TARGET_IEEE_FP)
10124	{
10125	  rtx tmp = op0;
10126	  op0 = op1;
10127	  op1 = tmp;
10128	  code = swap_condition (code);
10129	}
10130
10131      /* Try to expand the comparison and verify that we end up with carry flag
10132	 based comparison.  This is fails to be true only when we decide to expand
10133	 comparison using arithmetic that is not too common scenario.  */
10134      start_sequence ();
10135      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10136					   &second_test, &bypass_test);
10137      compare_seq = get_insns ();
10138      end_sequence ();
10139
10140      if (second_test || bypass_test)
10141	return false;
10142      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10143	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10144        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
10145      else
10146	code = GET_CODE (compare_op);
10147      if (code != LTU && code != GEU)
10148	return false;
10149      emit_insn (compare_seq);
10150      *pop = compare_op;
10151      return true;
10152    }
10153  if (!INTEGRAL_MODE_P (mode))
10154    return false;
10155  switch (code)
10156    {
10157    case LTU:
10158    case GEU:
10159      break;
10160
10161    /* Convert a==0 into (unsigned)a<1.  */
10162    case EQ:
10163    case NE:
10164      if (op1 != const0_rtx)
10165	return false;
10166      op1 = const1_rtx;
10167      code = (code == EQ ? LTU : GEU);
10168      break;
10169
10170    /* Convert a>b into b<a or a>=b-1.  */
10171    case GTU:
10172    case LEU:
10173      if (GET_CODE (op1) == CONST_INT)
10174	{
10175	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
10176	  /* Bail out on overflow.  We still can swap operands but that
10177	     would force loading of the constant into register.  */
10178	  if (op1 == const0_rtx
10179	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
10180	    return false;
10181	  code = (code == GTU ? GEU : LTU);
10182	}
10183      else
10184	{
10185	  rtx tmp = op1;
10186	  op1 = op0;
10187	  op0 = tmp;
10188	  code = (code == GTU ? LTU : GEU);
10189	}
10190      break;
10191
10192    /* Convert a>=0 into (unsigned)a<0x80000000.  */
10193    case LT:
10194    case GE:
10195      if (mode == DImode || op1 != const0_rtx)
10196	return false;
10197      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10198      code = (code == LT ? GEU : LTU);
10199      break;
10200    case LE:
10201    case GT:
10202      if (mode == DImode || op1 != constm1_rtx)
10203	return false;
10204      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
10205      code = (code == LE ? GEU : LTU);
10206      break;
10207
10208    default:
10209      return false;
10210    }
10211  /* Swapping operands may cause constant to appear as first operand.  */
10212  if (!nonimmediate_operand (op0, VOIDmode))
10213    {
10214      if (no_new_pseudos)
10215	return false;
10216      op0 = force_reg (mode, op0);
10217    }
10218  ix86_compare_op0 = op0;
10219  ix86_compare_op1 = op1;
10220  *pop = ix86_expand_compare (code, NULL, NULL);
10221  gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
10222  return true;
10223}
10224
10225int
10226ix86_expand_int_movcc (rtx operands[])
10227{
10228  enum rtx_code code = GET_CODE (operands[1]), compare_code;
10229  rtx compare_seq, compare_op;
10230  rtx second_test, bypass_test;
10231  enum machine_mode mode = GET_MODE (operands[0]);
10232  bool sign_bit_compare_p = false;;
10233
10234  start_sequence ();
10235  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10236  compare_seq = get_insns ();
10237  end_sequence ();
10238
10239  compare_code = GET_CODE (compare_op);
10240
10241  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
10242      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
10243    sign_bit_compare_p = true;
10244
10245  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
10246     HImode insns, we'd be swallowed in word prefix ops.  */
10247
10248  if ((mode != HImode || TARGET_FAST_PREFIX)
10249      && (mode != (TARGET_64BIT ? TImode : DImode))
10250      && GET_CODE (operands[2]) == CONST_INT
10251      && GET_CODE (operands[3]) == CONST_INT)
10252    {
10253      rtx out = operands[0];
10254      HOST_WIDE_INT ct = INTVAL (operands[2]);
10255      HOST_WIDE_INT cf = INTVAL (operands[3]);
10256      HOST_WIDE_INT diff;
10257
10258      diff = ct - cf;
10259      /*  Sign bit compares are better done using shifts than we do by using
10260	  sbb.  */
10261      if (sign_bit_compare_p
10262	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10263					     ix86_compare_op1, &compare_op))
10264	{
10265	  /* Detect overlap between destination and compare sources.  */
10266	  rtx tmp = out;
10267
10268          if (!sign_bit_compare_p)
10269	    {
10270	      bool fpcmp = false;
10271
10272	      compare_code = GET_CODE (compare_op);
10273
10274	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10275		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10276		{
10277		  fpcmp = true;
10278		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
10279		}
10280
10281	      /* To simplify rest of code, restrict to the GEU case.  */
10282	      if (compare_code == LTU)
10283		{
10284		  HOST_WIDE_INT tmp = ct;
10285		  ct = cf;
10286		  cf = tmp;
10287		  compare_code = reverse_condition (compare_code);
10288		  code = reverse_condition (code);
10289		}
10290	      else
10291		{
10292		  if (fpcmp)
10293		    PUT_CODE (compare_op,
10294			      reverse_condition_maybe_unordered
10295			        (GET_CODE (compare_op)));
10296		  else
10297		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10298		}
10299	      diff = ct - cf;
10300
10301	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
10302		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
10303		tmp = gen_reg_rtx (mode);
10304
10305	      if (mode == DImode)
10306		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
10307	      else
10308		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
10309	    }
10310	  else
10311	    {
10312	      if (code == GT || code == GE)
10313		code = reverse_condition (code);
10314	      else
10315		{
10316		  HOST_WIDE_INT tmp = ct;
10317		  ct = cf;
10318		  cf = tmp;
10319		  diff = ct - cf;
10320		}
10321	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
10322				     ix86_compare_op1, VOIDmode, 0, -1);
10323	    }
10324
10325	  if (diff == 1)
10326	    {
10327	      /*
10328	       * cmpl op0,op1
10329	       * sbbl dest,dest
10330	       * [addl dest, ct]
10331	       *
10332	       * Size 5 - 8.
10333	       */
10334	      if (ct)
10335		tmp = expand_simple_binop (mode, PLUS,
10336					   tmp, GEN_INT (ct),
10337					   copy_rtx (tmp), 1, OPTAB_DIRECT);
10338	    }
10339	  else if (cf == -1)
10340	    {
10341	      /*
10342	       * cmpl op0,op1
10343	       * sbbl dest,dest
10344	       * orl $ct, dest
10345	       *
10346	       * Size 8.
10347	       */
10348	      tmp = expand_simple_binop (mode, IOR,
10349					 tmp, GEN_INT (ct),
10350					 copy_rtx (tmp), 1, OPTAB_DIRECT);
10351	    }
10352	  else if (diff == -1 && ct)
10353	    {
10354	      /*
10355	       * cmpl op0,op1
10356	       * sbbl dest,dest
10357	       * notl dest
10358	       * [addl dest, cf]
10359	       *
10360	       * Size 8 - 11.
10361	       */
10362	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10363	      if (cf)
10364		tmp = expand_simple_binop (mode, PLUS,
10365					   copy_rtx (tmp), GEN_INT (cf),
10366					   copy_rtx (tmp), 1, OPTAB_DIRECT);
10367	    }
10368	  else
10369	    {
10370	      /*
10371	       * cmpl op0,op1
10372	       * sbbl dest,dest
10373	       * [notl dest]
10374	       * andl cf - ct, dest
10375	       * [addl dest, ct]
10376	       *
10377	       * Size 8 - 11.
10378	       */
10379
10380	      if (cf == 0)
10381		{
10382		  cf = ct;
10383		  ct = 0;
10384		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
10385		}
10386
10387	      tmp = expand_simple_binop (mode, AND,
10388					 copy_rtx (tmp),
10389					 gen_int_mode (cf - ct, mode),
10390					 copy_rtx (tmp), 1, OPTAB_DIRECT);
10391	      if (ct)
10392		tmp = expand_simple_binop (mode, PLUS,
10393					   copy_rtx (tmp), GEN_INT (ct),
10394					   copy_rtx (tmp), 1, OPTAB_DIRECT);
10395	    }
10396
10397	  if (!rtx_equal_p (tmp, out))
10398	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
10399
10400	  return 1; /* DONE */
10401	}
10402
10403      if (diff < 0)
10404	{
10405	  HOST_WIDE_INT tmp;
10406	  tmp = ct, ct = cf, cf = tmp;
10407	  diff = -diff;
10408	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10409	    {
10410	      /* We may be reversing unordered compare to normal compare, that
10411		 is not valid in general (we may convert non-trapping condition
10412		 to trapping one), however on i386 we currently emit all
10413		 comparisons unordered.  */
10414	      compare_code = reverse_condition_maybe_unordered (compare_code);
10415	      code = reverse_condition_maybe_unordered (code);
10416	    }
10417	  else
10418	    {
10419	      compare_code = reverse_condition (compare_code);
10420	      code = reverse_condition (code);
10421	    }
10422	}
10423
10424      compare_code = UNKNOWN;
10425      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
10426	  && GET_CODE (ix86_compare_op1) == CONST_INT)
10427	{
10428	  if (ix86_compare_op1 == const0_rtx
10429	      && (code == LT || code == GE))
10430	    compare_code = code;
10431	  else if (ix86_compare_op1 == constm1_rtx)
10432	    {
10433	      if (code == LE)
10434		compare_code = LT;
10435	      else if (code == GT)
10436		compare_code = GE;
10437	    }
10438	}
10439
10440      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
10441      if (compare_code != UNKNOWN
10442	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10443	  && (cf == -1 || ct == -1))
10444	{
10445	  /* If lea code below could be used, only optimize
10446	     if it results in a 2 insn sequence.  */
10447
10448	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10449		 || diff == 3 || diff == 5 || diff == 9)
10450	      || (compare_code == LT && ct == -1)
10451	      || (compare_code == GE && cf == -1))
10452	    {
10453	      /*
10454	       * notl op1	(if necessary)
10455	       * sarl $31, op1
10456	       * orl cf, op1
10457	       */
10458	      if (ct != -1)
10459		{
10460		  cf = ct;
10461		  ct = -1;
10462		  code = reverse_condition (code);
10463		}
10464
10465	      out = emit_store_flag (out, code, ix86_compare_op0,
10466				     ix86_compare_op1, VOIDmode, 0, -1);
10467
10468	      out = expand_simple_binop (mode, IOR,
10469					 out, GEN_INT (cf),
10470					 out, 1, OPTAB_DIRECT);
10471	      if (out != operands[0])
10472		emit_move_insn (operands[0], out);
10473
10474	      return 1; /* DONE */
10475	    }
10476	}
10477
10478
10479      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10480	   || diff == 3 || diff == 5 || diff == 9)
10481	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10482	  && (mode != DImode
10483	      || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
10484	{
10485	  /*
10486	   * xorl dest,dest
10487	   * cmpl op1,op2
10488	   * setcc dest
10489	   * lea cf(dest*(ct-cf)),dest
10490	   *
10491	   * Size 14.
10492	   *
10493	   * This also catches the degenerate setcc-only case.
10494	   */
10495
10496	  rtx tmp;
10497	  int nops;
10498
10499	  out = emit_store_flag (out, code, ix86_compare_op0,
10500				 ix86_compare_op1, VOIDmode, 0, 1);
10501
10502	  nops = 0;
10503	  /* On x86_64 the lea instruction operates on Pmode, so we need
10504	     to get arithmetics done in proper mode to match.  */
10505	  if (diff == 1)
10506	    tmp = copy_rtx (out);
10507	  else
10508	    {
10509	      rtx out1;
10510	      out1 = copy_rtx (out);
10511	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10512	      nops++;
10513	      if (diff & 1)
10514		{
10515		  tmp = gen_rtx_PLUS (mode, tmp, out1);
10516		  nops++;
10517		}
10518	    }
10519	  if (cf != 0)
10520	    {
10521	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10522	      nops++;
10523	    }
10524	  if (!rtx_equal_p (tmp, out))
10525	    {
10526	      if (nops == 1)
10527		out = force_operand (tmp, copy_rtx (out));
10528	      else
10529		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10530	    }
10531	  if (!rtx_equal_p (out, operands[0]))
10532	    emit_move_insn (operands[0], copy_rtx (out));
10533
10534	  return 1; /* DONE */
10535	}
10536
10537      /*
10538       * General case:			Jumpful:
10539       *   xorl dest,dest		cmpl op1, op2
10540       *   cmpl op1, op2		movl ct, dest
10541       *   setcc dest			jcc 1f
10542       *   decl dest			movl cf, dest
10543       *   andl (cf-ct),dest		1:
10544       *   addl ct,dest
10545       *
10546       * Size 20.			Size 14.
10547       *
10548       * This is reasonably steep, but branch mispredict costs are
10549       * high on modern cpus, so consider failing only if optimizing
10550       * for space.
10551       */
10552
10553      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10554	  && BRANCH_COST >= 2)
10555	{
10556	  if (cf == 0)
10557	    {
10558	      cf = ct;
10559	      ct = 0;
10560	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10561		/* We may be reversing unordered compare to normal compare,
10562		   that is not valid in general (we may convert non-trapping
10563		   condition to trapping one), however on i386 we currently
10564		   emit all comparisons unordered.  */
10565		code = reverse_condition_maybe_unordered (code);
10566	      else
10567		{
10568		  code = reverse_condition (code);
10569		  if (compare_code != UNKNOWN)
10570		    compare_code = reverse_condition (compare_code);
10571		}
10572	    }
10573
10574	  if (compare_code != UNKNOWN)
10575	    {
10576	      /* notl op1	(if needed)
10577		 sarl $31, op1
10578		 andl (cf-ct), op1
10579		 addl ct, op1
10580
10581		 For x < 0 (resp. x <= -1) there will be no notl,
10582		 so if possible swap the constants to get rid of the
10583		 complement.
10584		 True/false will be -1/0 while code below (store flag
10585		 followed by decrement) is 0/-1, so the constants need
10586		 to be exchanged once more.  */
10587
10588	      if (compare_code == GE || !cf)
10589		{
10590		  code = reverse_condition (code);
10591		  compare_code = LT;
10592		}
10593	      else
10594		{
10595		  HOST_WIDE_INT tmp = cf;
10596		  cf = ct;
10597		  ct = tmp;
10598		}
10599
10600	      out = emit_store_flag (out, code, ix86_compare_op0,
10601				     ix86_compare_op1, VOIDmode, 0, -1);
10602	    }
10603	  else
10604	    {
10605	      out = emit_store_flag (out, code, ix86_compare_op0,
10606				     ix86_compare_op1, VOIDmode, 0, 1);
10607
10608	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10609					 copy_rtx (out), 1, OPTAB_DIRECT);
10610	    }
10611
10612	  out = expand_simple_binop (mode, AND, copy_rtx (out),
10613				     gen_int_mode (cf - ct, mode),
10614				     copy_rtx (out), 1, OPTAB_DIRECT);
10615	  if (ct)
10616	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10617				       copy_rtx (out), 1, OPTAB_DIRECT);
10618	  if (!rtx_equal_p (out, operands[0]))
10619	    emit_move_insn (operands[0], copy_rtx (out));
10620
10621	  return 1; /* DONE */
10622	}
10623    }
10624
10625  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10626    {
10627      /* Try a few things more with specific constants and a variable.  */
10628
10629      optab op;
10630      rtx var, orig_out, out, tmp;
10631
10632      if (BRANCH_COST <= 2)
10633	return 0; /* FAIL */
10634
10635      /* If one of the two operands is an interesting constant, load a
10636	 constant with the above and mask it in with a logical operation.  */
10637
10638      if (GET_CODE (operands[2]) == CONST_INT)
10639	{
10640	  var = operands[3];
10641	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10642	    operands[3] = constm1_rtx, op = and_optab;
10643	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10644	    operands[3] = const0_rtx, op = ior_optab;
10645	  else
10646	    return 0; /* FAIL */
10647	}
10648      else if (GET_CODE (operands[3]) == CONST_INT)
10649	{
10650	  var = operands[2];
10651	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10652	    operands[2] = constm1_rtx, op = and_optab;
10653	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10654	    operands[2] = const0_rtx, op = ior_optab;
10655	  else
10656	    return 0; /* FAIL */
10657	}
10658      else
10659        return 0; /* FAIL */
10660
10661      orig_out = operands[0];
10662      tmp = gen_reg_rtx (mode);
10663      operands[0] = tmp;
10664
10665      /* Recurse to get the constant loaded.  */
10666      if (ix86_expand_int_movcc (operands) == 0)
10667        return 0; /* FAIL */
10668
10669      /* Mask in the interesting variable.  */
10670      out = expand_binop (mode, op, var, tmp, orig_out, 0,
10671			  OPTAB_WIDEN);
10672      if (!rtx_equal_p (out, orig_out))
10673	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10674
10675      return 1; /* DONE */
10676    }
10677
10678  /*
10679   * For comparison with above,
10680   *
10681   * movl cf,dest
10682   * movl ct,tmp
10683   * cmpl op1,op2
10684   * cmovcc tmp,dest
10685   *
10686   * Size 15.
10687   */
10688
10689  if (! nonimmediate_operand (operands[2], mode))
10690    operands[2] = force_reg (mode, operands[2]);
10691  if (! nonimmediate_operand (operands[3], mode))
10692    operands[3] = force_reg (mode, operands[3]);
10693
10694  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10695    {
10696      rtx tmp = gen_reg_rtx (mode);
10697      emit_move_insn (tmp, operands[3]);
10698      operands[3] = tmp;
10699    }
10700  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10701    {
10702      rtx tmp = gen_reg_rtx (mode);
10703      emit_move_insn (tmp, operands[2]);
10704      operands[2] = tmp;
10705    }
10706
10707  if (! register_operand (operands[2], VOIDmode)
10708      && (mode == QImode
10709          || ! register_operand (operands[3], VOIDmode)))
10710    operands[2] = force_reg (mode, operands[2]);
10711
10712  if (mode == QImode
10713      && ! register_operand (operands[3], VOIDmode))
10714    operands[3] = force_reg (mode, operands[3]);
10715
10716  emit_insn (compare_seq);
10717  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10718			  gen_rtx_IF_THEN_ELSE (mode,
10719						compare_op, operands[2],
10720						operands[3])));
10721  if (bypass_test)
10722    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10723			    gen_rtx_IF_THEN_ELSE (mode,
10724				  bypass_test,
10725				  copy_rtx (operands[3]),
10726				  copy_rtx (operands[0]))));
10727  if (second_test)
10728    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10729			    gen_rtx_IF_THEN_ELSE (mode,
10730				  second_test,
10731				  copy_rtx (operands[2]),
10732				  copy_rtx (operands[0]))));
10733
10734  return 1; /* DONE */
10735}
10736
10737/* Swap, force into registers, or otherwise massage the two operands
10738   to an sse comparison with a mask result.  Thus we differ a bit from
10739   ix86_prepare_fp_compare_args which expects to produce a flags result.
10740
10741   The DEST operand exists to help determine whether to commute commutative
10742   operators.  The POP0/POP1 operands are updated in place.  The new
10743   comparison code is returned, or UNKNOWN if not implementable.  */
10744
10745static enum rtx_code
10746ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
10747				  rtx *pop0, rtx *pop1)
10748{
10749  rtx tmp;
10750
10751  switch (code)
10752    {
10753    case LTGT:
10754    case UNEQ:
10755      /* We have no LTGT as an operator.  We could implement it with
10756	 NE & ORDERED, but this requires an extra temporary.  It's
10757	 not clear that it's worth it.  */
10758      return UNKNOWN;
10759
10760    case LT:
10761    case LE:
10762    case UNGT:
10763    case UNGE:
10764      /* These are supported directly.  */
10765      break;
10766
10767    case EQ:
10768    case NE:
10769    case UNORDERED:
10770    case ORDERED:
10771      /* For commutative operators, try to canonicalize the destination
10772	 operand to be first in the comparison - this helps reload to
10773	 avoid extra moves.  */
10774      if (!dest || !rtx_equal_p (dest, *pop1))
10775	break;
10776      /* FALLTHRU */
10777
10778    case GE:
10779    case GT:
10780    case UNLE:
10781    case UNLT:
10782      /* These are not supported directly.  Swap the comparison operands
10783	 to transform into something that is supported.  */
10784      tmp = *pop0;
10785      *pop0 = *pop1;
10786      *pop1 = tmp;
10787      code = swap_condition (code);
10788      break;
10789
10790    default:
10791      gcc_unreachable ();
10792    }
10793
10794  return code;
10795}
10796
10797/* Detect conditional moves that exactly match min/max operational
10798   semantics.  Note that this is IEEE safe, as long as we don't
10799   interchange the operands.
10800
10801   Returns FALSE if this conditional move doesn't match a MIN/MAX,
10802   and TRUE if the operation is successful and instructions are emitted.  */
10803
10804static bool
10805ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
10806			   rtx cmp_op1, rtx if_true, rtx if_false)
10807{
10808  enum machine_mode mode;
10809  bool is_min;
10810  rtx tmp;
10811
10812  if (code == LT)
10813    ;
10814  else if (code == UNGE)
10815    {
10816      tmp = if_true;
10817      if_true = if_false;
10818      if_false = tmp;
10819    }
10820  else
10821    return false;
10822
10823  if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
10824    is_min = true;
10825  else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
10826    is_min = false;
10827  else
10828    return false;
10829
10830  mode = GET_MODE (dest);
10831
10832  /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
10833     but MODE may be a vector mode and thus not appropriate.  */
10834  if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
10835    {
10836      int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
10837      rtvec v;
10838
10839      if_true = force_reg (mode, if_true);
10840      v = gen_rtvec (2, if_true, if_false);
10841      tmp = gen_rtx_UNSPEC (mode, v, u);
10842    }
10843  else
10844    {
10845      code = is_min ? SMIN : SMAX;
10846      tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
10847    }
10848
10849  emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
10850  return true;
10851}
10852
10853/* Expand an sse vector comparison.  Return the register with the result.  */
10854
10855static rtx
10856ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
10857		     rtx op_true, rtx op_false)
10858{
10859  enum machine_mode mode = GET_MODE (dest);
10860  rtx x;
10861
10862  cmp_op0 = force_reg (mode, cmp_op0);
10863  if (!nonimmediate_operand (cmp_op1, mode))
10864    cmp_op1 = force_reg (mode, cmp_op1);
10865
10866  if (optimize
10867      || reg_overlap_mentioned_p (dest, op_true)
10868      || reg_overlap_mentioned_p (dest, op_false))
10869    dest = gen_reg_rtx (mode);
10870
10871  x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
10872  emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10873
10874  return dest;
10875}
10876
10877/* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
10878   operations.  This is used for both scalar and vector conditional moves.  */
10879
10880static void
10881ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
10882{
10883  enum machine_mode mode = GET_MODE (dest);
10884  rtx t2, t3, x;
10885
10886  if (op_false == CONST0_RTX (mode))
10887    {
10888      op_true = force_reg (mode, op_true);
10889      x = gen_rtx_AND (mode, cmp, op_true);
10890      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10891    }
10892  else if (op_true == CONST0_RTX (mode))
10893    {
10894      op_false = force_reg (mode, op_false);
10895      x = gen_rtx_NOT (mode, cmp);
10896      x = gen_rtx_AND (mode, x, op_false);
10897      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10898    }
10899  else
10900    {
10901      op_true = force_reg (mode, op_true);
10902      op_false = force_reg (mode, op_false);
10903
10904      t2 = gen_reg_rtx (mode);
10905      if (optimize)
10906	t3 = gen_reg_rtx (mode);
10907      else
10908	t3 = dest;
10909
10910      x = gen_rtx_AND (mode, op_true, cmp);
10911      emit_insn (gen_rtx_SET (VOIDmode, t2, x));
10912
10913      x = gen_rtx_NOT (mode, cmp);
10914      x = gen_rtx_AND (mode, x, op_false);
10915      emit_insn (gen_rtx_SET (VOIDmode, t3, x));
10916
10917      x = gen_rtx_IOR (mode, t3, t2);
10918      emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10919    }
10920}
10921
10922/* Expand a floating-point conditional move.  Return true if successful.  */
10923
10924int
10925ix86_expand_fp_movcc (rtx operands[])
10926{
10927  enum machine_mode mode = GET_MODE (operands[0]);
10928  enum rtx_code code = GET_CODE (operands[1]);
10929  rtx tmp, compare_op, second_test, bypass_test;
10930
10931  if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
10932    {
10933      enum machine_mode cmode;
10934
10935      /* Since we've no cmove for sse registers, don't force bad register
10936	 allocation just to gain access to it.  Deny movcc when the
10937	 comparison mode doesn't match the move mode.  */
10938      cmode = GET_MODE (ix86_compare_op0);
10939      if (cmode == VOIDmode)
10940	cmode = GET_MODE (ix86_compare_op1);
10941      if (cmode != mode)
10942	return 0;
10943
10944      code = ix86_prepare_sse_fp_compare_args (operands[0], code,
10945					       &ix86_compare_op0,
10946					       &ix86_compare_op1);
10947      if (code == UNKNOWN)
10948	return 0;
10949
10950      if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
10951				     ix86_compare_op1, operands[2],
10952				     operands[3]))
10953	return 1;
10954
10955      tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
10956				 ix86_compare_op1, operands[2], operands[3]);
10957      ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
10958      return 1;
10959    }
10960
10961  /* The floating point conditional move instructions don't directly
10962     support conditions resulting from a signed integer comparison.  */
10963
10964  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10965
10966  /* The floating point conditional move instructions don't directly
10967     support signed integer comparisons.  */
10968
10969  if (!fcmov_comparison_operator (compare_op, VOIDmode))
10970    {
10971      gcc_assert (!second_test && !bypass_test);
10972      tmp = gen_reg_rtx (QImode);
10973      ix86_expand_setcc (code, tmp);
10974      code = NE;
10975      ix86_compare_op0 = tmp;
10976      ix86_compare_op1 = const0_rtx;
10977      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
10978    }
10979  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10980    {
10981      tmp = gen_reg_rtx (mode);
10982      emit_move_insn (tmp, operands[3]);
10983      operands[3] = tmp;
10984    }
10985  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10986    {
10987      tmp = gen_reg_rtx (mode);
10988      emit_move_insn (tmp, operands[2]);
10989      operands[2] = tmp;
10990    }
10991
10992  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10993			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
10994						operands[2], operands[3])));
10995  if (bypass_test)
10996    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10997			    gen_rtx_IF_THEN_ELSE (mode, bypass_test,
10998						  operands[3], operands[0])));
10999  if (second_test)
11000    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11001			    gen_rtx_IF_THEN_ELSE (mode, second_test,
11002						  operands[2], operands[0])));
11003
11004  return 1;
11005}
11006
11007/* Expand a floating-point vector conditional move; a vcond operation
11008   rather than a movcc operation.  */
11009
11010bool
11011ix86_expand_fp_vcond (rtx operands[])
11012{
11013  enum rtx_code code = GET_CODE (operands[3]);
11014  rtx cmp;
11015
11016  code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11017					   &operands[4], &operands[5]);
11018  if (code == UNKNOWN)
11019    return false;
11020
11021  if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11022				 operands[5], operands[1], operands[2]))
11023    return true;
11024
11025  cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11026			     operands[1], operands[2]);
11027  ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11028  return true;
11029}
11030
11031/* Expand a signed integral vector conditional move.  */
11032
11033bool
11034ix86_expand_int_vcond (rtx operands[])
11035{
11036  enum machine_mode mode = GET_MODE (operands[0]);
11037  enum rtx_code code = GET_CODE (operands[3]);
11038  bool negate = false;
11039  rtx x, cop0, cop1;
11040
11041  cop0 = operands[4];
11042  cop1 = operands[5];
11043
11044  /* Canonicalize the comparison to EQ, GT, GTU.  */
11045  switch (code)
11046    {
11047    case EQ:
11048    case GT:
11049    case GTU:
11050      break;
11051
11052    case NE:
11053    case LE:
11054    case LEU:
11055      code = reverse_condition (code);
11056      negate = true;
11057      break;
11058
11059    case GE:
11060    case GEU:
11061      code = reverse_condition (code);
11062      negate = true;
11063      /* FALLTHRU */
11064
11065    case LT:
11066    case LTU:
11067      code = swap_condition (code);
11068      x = cop0, cop0 = cop1, cop1 = x;
11069      break;
11070
11071    default:
11072      gcc_unreachable ();
11073    }
11074
11075  /* Unsigned parallel compare is not supported by the hardware.  Play some
11076     tricks to turn this into a signed comparison against 0.  */
11077  if (code == GTU)
11078    {
11079      cop0 = force_reg (mode, cop0);
11080
11081      switch (mode)
11082	{
11083	case V4SImode:
11084	  {
11085	    rtx t1, t2, mask;
11086
11087	    /* Perform a parallel modulo subtraction.  */
11088	    t1 = gen_reg_rtx (mode);
11089	    emit_insn (gen_subv4si3 (t1, cop0, cop1));
11090
11091	    /* Extract the original sign bit of op0.  */
11092	    mask = GEN_INT (-0x80000000);
11093	    mask = gen_rtx_CONST_VECTOR (mode,
11094			gen_rtvec (4, mask, mask, mask, mask));
11095	    mask = force_reg (mode, mask);
11096	    t2 = gen_reg_rtx (mode);
11097	    emit_insn (gen_andv4si3 (t2, cop0, mask));
11098
11099	    /* XOR it back into the result of the subtraction.  This results
11100	       in the sign bit set iff we saw unsigned underflow.  */
11101	    x = gen_reg_rtx (mode);
11102	    emit_insn (gen_xorv4si3 (x, t1, t2));
11103
11104	    code = GT;
11105	  }
11106	  break;
11107
11108	case V16QImode:
11109	case V8HImode:
11110	  /* Perform a parallel unsigned saturating subtraction.  */
11111	  x = gen_reg_rtx (mode);
11112	  emit_insn (gen_rtx_SET (VOIDmode, x,
11113				  gen_rtx_US_MINUS (mode, cop0, cop1)));
11114
11115	  code = EQ;
11116	  negate = !negate;
11117	  break;
11118
11119	default:
11120	  gcc_unreachable ();
11121	}
11122
11123      cop0 = x;
11124      cop1 = CONST0_RTX (mode);
11125    }
11126
11127  x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
11128			   operands[1+negate], operands[2-negate]);
11129
11130  ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
11131			 operands[2-negate]);
11132  return true;
11133}
11134
11135/* Expand conditional increment or decrement using adb/sbb instructions.
11136   The default case using setcc followed by the conditional move can be
11137   done by generic code.  */
11138int
11139ix86_expand_int_addcc (rtx operands[])
11140{
11141  enum rtx_code code = GET_CODE (operands[1]);
11142  rtx compare_op;
11143  rtx val = const0_rtx;
11144  bool fpcmp = false;
11145  enum machine_mode mode = GET_MODE (operands[0]);
11146
11147  if (operands[3] != const1_rtx
11148      && operands[3] != constm1_rtx)
11149    return 0;
11150  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11151				       ix86_compare_op1, &compare_op))
11152     return 0;
11153  code = GET_CODE (compare_op);
11154
11155  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11156      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11157    {
11158      fpcmp = true;
11159      code = ix86_fp_compare_code_to_integer (code);
11160    }
11161
11162  if (code != LTU)
11163    {
11164      val = constm1_rtx;
11165      if (fpcmp)
11166	PUT_CODE (compare_op,
11167		  reverse_condition_maybe_unordered
11168		    (GET_CODE (compare_op)));
11169      else
11170	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11171    }
11172  PUT_MODE (compare_op, mode);
11173
11174  /* Construct either adc or sbb insn.  */
11175  if ((code == LTU) == (operands[3] == constm1_rtx))
11176    {
11177      switch (GET_MODE (operands[0]))
11178	{
11179	  case QImode:
11180            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
11181	    break;
11182	  case HImode:
11183            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
11184	    break;
11185	  case SImode:
11186            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
11187	    break;
11188	  case DImode:
11189            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11190	    break;
11191	  default:
11192	    gcc_unreachable ();
11193	}
11194    }
11195  else
11196    {
11197      switch (GET_MODE (operands[0]))
11198	{
11199	  case QImode:
11200            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
11201	    break;
11202	  case HImode:
11203            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
11204	    break;
11205	  case SImode:
11206            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
11207	    break;
11208	  case DImode:
11209            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
11210	    break;
11211	  default:
11212	    gcc_unreachable ();
11213	}
11214    }
11215  return 1; /* DONE */
11216}
11217
11218
11219/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
11220   works for floating pointer parameters and nonoffsetable memories.
11221   For pushes, it returns just stack offsets; the values will be saved
11222   in the right order.  Maximally three parts are generated.  */
11223
11224static int
11225ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
11226{
11227  int size;
11228
11229  if (!TARGET_64BIT)
11230    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
11231  else
11232    size = (GET_MODE_SIZE (mode) + 4) / 8;
11233
11234  gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
11235  gcc_assert (size >= 2 && size <= 3);
11236
11237  /* Optimize constant pool reference to immediates.  This is used by fp
11238     moves, that force all constants to memory to allow combining.  */
11239  if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
11240    {
11241      rtx tmp = maybe_get_pool_constant (operand);
11242      if (tmp)
11243	operand = tmp;
11244    }
11245
11246  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
11247    {
11248      /* The only non-offsetable memories we handle are pushes.  */
11249      int ok = push_operand (operand, VOIDmode);
11250
11251      gcc_assert (ok);
11252
11253      operand = copy_rtx (operand);
11254      PUT_MODE (operand, Pmode);
11255      parts[0] = parts[1] = parts[2] = operand;
11256      return size;
11257    }
11258
11259  if (GET_CODE (operand) == CONST_VECTOR)
11260    {
11261      enum machine_mode imode = int_mode_for_mode (mode);
11262      /* Caution: if we looked through a constant pool memory above,
11263	 the operand may actually have a different mode now.  That's
11264	 ok, since we want to pun this all the way back to an integer.  */
11265      operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
11266      gcc_assert (operand != NULL);
11267      mode = imode;
11268    }
11269
11270  if (!TARGET_64BIT)
11271    {
11272      if (mode == DImode)
11273	split_di (&operand, 1, &parts[0], &parts[1]);
11274      else
11275	{
11276	  if (REG_P (operand))
11277	    {
11278	      gcc_assert (reload_completed);
11279	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
11280	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
11281	      if (size == 3)
11282		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
11283	    }
11284	  else if (offsettable_memref_p (operand))
11285	    {
11286	      operand = adjust_address (operand, SImode, 0);
11287	      parts[0] = operand;
11288	      parts[1] = adjust_address (operand, SImode, 4);
11289	      if (size == 3)
11290		parts[2] = adjust_address (operand, SImode, 8);
11291	    }
11292	  else if (GET_CODE (operand) == CONST_DOUBLE)
11293	    {
11294	      REAL_VALUE_TYPE r;
11295	      long l[4];
11296
11297	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11298	      switch (mode)
11299		{
11300		case XFmode:
11301		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
11302		  parts[2] = gen_int_mode (l[2], SImode);
11303		  break;
11304		case DFmode:
11305		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
11306		  break;
11307		default:
11308		  gcc_unreachable ();
11309		}
11310	      parts[1] = gen_int_mode (l[1], SImode);
11311	      parts[0] = gen_int_mode (l[0], SImode);
11312	    }
11313	  else
11314	    gcc_unreachable ();
11315	}
11316    }
11317  else
11318    {
11319      if (mode == TImode)
11320	split_ti (&operand, 1, &parts[0], &parts[1]);
11321      if (mode == XFmode || mode == TFmode)
11322	{
11323	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
11324	  if (REG_P (operand))
11325	    {
11326	      gcc_assert (reload_completed);
11327	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
11328	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
11329	    }
11330	  else if (offsettable_memref_p (operand))
11331	    {
11332	      operand = adjust_address (operand, DImode, 0);
11333	      parts[0] = operand;
11334	      parts[1] = adjust_address (operand, upper_mode, 8);
11335	    }
11336	  else if (GET_CODE (operand) == CONST_DOUBLE)
11337	    {
11338	      REAL_VALUE_TYPE r;
11339	      long l[4];
11340
11341	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
11342	      real_to_target (l, &r, mode);
11343
11344	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
11345	      if (HOST_BITS_PER_WIDE_INT >= 64)
11346	        parts[0]
11347		  = gen_int_mode
11348		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
11349		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
11350		       DImode);
11351	      else
11352	        parts[0] = immed_double_const (l[0], l[1], DImode);
11353
11354	      if (upper_mode == SImode)
11355	        parts[1] = gen_int_mode (l[2], SImode);
11356	      else if (HOST_BITS_PER_WIDE_INT >= 64)
11357	        parts[1]
11358		  = gen_int_mode
11359		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
11360		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
11361		       DImode);
11362	      else
11363	        parts[1] = immed_double_const (l[2], l[3], DImode);
11364	    }
11365	  else
11366	    gcc_unreachable ();
11367	}
11368    }
11369
11370  return size;
11371}
11372
11373/* Emit insns to perform a move or push of DI, DF, and XF values.
11374   Return false when normal moves are needed; true when all required
11375   insns have been emitted.  Operands 2-4 contain the input values
11376   int the correct order; operands 5-7 contain the output values.  */
11377
11378void
11379ix86_split_long_move (rtx operands[])
11380{
11381  rtx part[2][3];
11382  int nparts;
11383  int push = 0;
11384  int collisions = 0;
11385  enum machine_mode mode = GET_MODE (operands[0]);
11386
11387  /* The DFmode expanders may ask us to move double.
11388     For 64bit target this is single move.  By hiding the fact
11389     here we simplify i386.md splitters.  */
11390  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
11391    {
11392      /* Optimize constant pool reference to immediates.  This is used by
11393	 fp moves, that force all constants to memory to allow combining.  */
11394
11395      if (GET_CODE (operands[1]) == MEM
11396	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
11397	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
11398	operands[1] = get_pool_constant (XEXP (operands[1], 0));
11399      if (push_operand (operands[0], VOIDmode))
11400	{
11401	  operands[0] = copy_rtx (operands[0]);
11402	  PUT_MODE (operands[0], Pmode);
11403	}
11404      else
11405        operands[0] = gen_lowpart (DImode, operands[0]);
11406      operands[1] = gen_lowpart (DImode, operands[1]);
11407      emit_move_insn (operands[0], operands[1]);
11408      return;
11409    }
11410
11411  /* The only non-offsettable memory we handle is push.  */
11412  if (push_operand (operands[0], VOIDmode))
11413    push = 1;
11414  else
11415    gcc_assert (GET_CODE (operands[0]) != MEM
11416		|| offsettable_memref_p (operands[0]));
11417
11418  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
11419  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
11420
11421  /* When emitting push, take care for source operands on the stack.  */
11422  if (push && GET_CODE (operands[1]) == MEM
11423      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
11424    {
11425      if (nparts == 3)
11426	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
11427				     XEXP (part[1][2], 0));
11428      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
11429				   XEXP (part[1][1], 0));
11430    }
11431
11432  /* We need to do copy in the right order in case an address register
11433     of the source overlaps the destination.  */
11434  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
11435    {
11436      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
11437	collisions++;
11438      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11439	collisions++;
11440      if (nparts == 3
11441	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
11442	collisions++;
11443
11444      /* Collision in the middle part can be handled by reordering.  */
11445      if (collisions == 1 && nparts == 3
11446	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
11447	{
11448	  rtx tmp;
11449	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
11450	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
11451	}
11452
11453      /* If there are more collisions, we can't handle it by reordering.
11454	 Do an lea to the last part and use only one colliding move.  */
11455      else if (collisions > 1)
11456	{
11457	  rtx base;
11458
11459	  collisions = 1;
11460
11461	  base = part[0][nparts - 1];
11462
11463	  /* Handle the case when the last part isn't valid for lea.
11464	     Happens in 64-bit mode storing the 12-byte XFmode.  */
11465	  if (GET_MODE (base) != Pmode)
11466	    base = gen_rtx_REG (Pmode, REGNO (base));
11467
11468	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
11469	  part[1][0] = replace_equiv_address (part[1][0], base);
11470	  part[1][1] = replace_equiv_address (part[1][1],
11471				      plus_constant (base, UNITS_PER_WORD));
11472	  if (nparts == 3)
11473	    part[1][2] = replace_equiv_address (part[1][2],
11474				      plus_constant (base, 8));
11475	}
11476    }
11477
11478  if (push)
11479    {
11480      if (!TARGET_64BIT)
11481	{
11482	  if (nparts == 3)
11483	    {
11484	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
11485                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
11486	      emit_move_insn (part[0][2], part[1][2]);
11487	    }
11488	}
11489      else
11490	{
11491	  /* In 64bit mode we don't have 32bit push available.  In case this is
11492	     register, it is OK - we will just use larger counterpart.  We also
11493	     retype memory - these comes from attempt to avoid REX prefix on
11494	     moving of second half of TFmode value.  */
11495	  if (GET_MODE (part[1][1]) == SImode)
11496	    {
11497	      switch (GET_CODE (part[1][1]))
11498		{
11499		case MEM:
11500		  part[1][1] = adjust_address (part[1][1], DImode, 0);
11501		  break;
11502
11503		case REG:
11504		  part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
11505		  break;
11506
11507		default:
11508		  gcc_unreachable ();
11509		}
11510
11511	      if (GET_MODE (part[1][0]) == SImode)
11512		part[1][0] = part[1][1];
11513	    }
11514	}
11515      emit_move_insn (part[0][1], part[1][1]);
11516      emit_move_insn (part[0][0], part[1][0]);
11517      return;
11518    }
11519
11520  /* Choose correct order to not overwrite the source before it is copied.  */
11521  if ((REG_P (part[0][0])
11522       && REG_P (part[1][1])
11523       && (REGNO (part[0][0]) == REGNO (part[1][1])
11524	   || (nparts == 3
11525	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
11526      || (collisions > 0
11527	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
11528    {
11529      if (nparts == 3)
11530	{
11531	  operands[2] = part[0][2];
11532	  operands[3] = part[0][1];
11533	  operands[4] = part[0][0];
11534	  operands[5] = part[1][2];
11535	  operands[6] = part[1][1];
11536	  operands[7] = part[1][0];
11537	}
11538      else
11539	{
11540	  operands[2] = part[0][1];
11541	  operands[3] = part[0][0];
11542	  operands[5] = part[1][1];
11543	  operands[6] = part[1][0];
11544	}
11545    }
11546  else
11547    {
11548      if (nparts == 3)
11549	{
11550	  operands[2] = part[0][0];
11551	  operands[3] = part[0][1];
11552	  operands[4] = part[0][2];
11553	  operands[5] = part[1][0];
11554	  operands[6] = part[1][1];
11555	  operands[7] = part[1][2];
11556	}
11557      else
11558	{
11559	  operands[2] = part[0][0];
11560	  operands[3] = part[0][1];
11561	  operands[5] = part[1][0];
11562	  operands[6] = part[1][1];
11563	}
11564    }
11565
11566  /* If optimizing for size, attempt to locally unCSE nonzero constants.  */
11567  if (optimize_size)
11568    {
11569      if (GET_CODE (operands[5]) == CONST_INT
11570	  && operands[5] != const0_rtx
11571	  && REG_P (operands[2]))
11572	{
11573	  if (GET_CODE (operands[6]) == CONST_INT
11574	      && INTVAL (operands[6]) == INTVAL (operands[5]))
11575	    operands[6] = operands[2];
11576
11577	  if (nparts == 3
11578	      && GET_CODE (operands[7]) == CONST_INT
11579	      && INTVAL (operands[7]) == INTVAL (operands[5]))
11580	    operands[7] = operands[2];
11581	}
11582
11583      if (nparts == 3
11584	  && GET_CODE (operands[6]) == CONST_INT
11585	  && operands[6] != const0_rtx
11586	  && REG_P (operands[3])
11587	  && GET_CODE (operands[7]) == CONST_INT
11588	  && INTVAL (operands[7]) == INTVAL (operands[6]))
11589	operands[7] = operands[3];
11590    }
11591
11592  emit_move_insn (operands[2], operands[5]);
11593  emit_move_insn (operands[3], operands[6]);
11594  if (nparts == 3)
11595    emit_move_insn (operands[4], operands[7]);
11596
11597  return;
11598}
11599
11600/* Helper function of ix86_split_ashl used to generate an SImode/DImode
11601   left shift by a constant, either using a single shift or
11602   a sequence of add instructions.  */
11603
11604static void
11605ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
11606{
11607  if (count == 1)
11608    {
11609      emit_insn ((mode == DImode
11610		  ? gen_addsi3
11611		  : gen_adddi3) (operand, operand, operand));
11612    }
11613  else if (!optimize_size
11614	   && count * ix86_cost->add <= ix86_cost->shift_const)
11615    {
11616      int i;
11617      for (i=0; i<count; i++)
11618	{
11619	  emit_insn ((mode == DImode
11620		      ? gen_addsi3
11621		      : gen_adddi3) (operand, operand, operand));
11622	}
11623    }
11624  else
11625    emit_insn ((mode == DImode
11626		? gen_ashlsi3
11627		: gen_ashldi3) (operand, operand, GEN_INT (count)));
11628}
11629
11630void
11631ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
11632{
11633  rtx low[2], high[2];
11634  int count;
11635  const int single_width = mode == DImode ? 32 : 64;
11636
11637  if (GET_CODE (operands[2]) == CONST_INT)
11638    {
11639      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11640      count = INTVAL (operands[2]) & (single_width * 2 - 1);
11641
11642      if (count >= single_width)
11643	{
11644	  emit_move_insn (high[0], low[1]);
11645	  emit_move_insn (low[0], const0_rtx);
11646
11647	  if (count > single_width)
11648	    ix86_expand_ashl_const (high[0], count - single_width, mode);
11649	}
11650      else
11651	{
11652	  if (!rtx_equal_p (operands[0], operands[1]))
11653	    emit_move_insn (operands[0], operands[1]);
11654	  emit_insn ((mode == DImode
11655		     ? gen_x86_shld_1
11656		     : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
11657	  ix86_expand_ashl_const (low[0], count, mode);
11658	}
11659      return;
11660    }
11661
11662  (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11663
11664  if (operands[1] == const1_rtx)
11665    {
11666      /* Assuming we've chosen a QImode capable registers, then 1 << N
11667	 can be done with two 32/64-bit shifts, no branches, no cmoves.  */
11668      if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
11669	{
11670	  rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
11671
11672	  ix86_expand_clear (low[0]);
11673	  ix86_expand_clear (high[0]);
11674	  emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
11675
11676	  d = gen_lowpart (QImode, low[0]);
11677	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11678	  s = gen_rtx_EQ (QImode, flags, const0_rtx);
11679	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
11680
11681	  d = gen_lowpart (QImode, high[0]);
11682	  d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
11683	  s = gen_rtx_NE (QImode, flags, const0_rtx);
11684	  emit_insn (gen_rtx_SET (VOIDmode, d, s));
11685	}
11686
11687      /* Otherwise, we can get the same results by manually performing
11688	 a bit extract operation on bit 5/6, and then performing the two
11689	 shifts.  The two methods of getting 0/1 into low/high are exactly
11690	 the same size.  Avoiding the shift in the bit extract case helps
11691	 pentium4 a bit; no one else seems to care much either way.  */
11692      else
11693	{
11694	  rtx x;
11695
11696	  if (TARGET_PARTIAL_REG_STALL && !optimize_size)
11697	    x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
11698	  else
11699	    x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
11700	  emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
11701
11702	  emit_insn ((mode == DImode
11703		      ? gen_lshrsi3
11704		      : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
11705	  emit_insn ((mode == DImode
11706		      ? gen_andsi3
11707		      : gen_anddi3) (high[0], high[0], GEN_INT (1)));
11708	  emit_move_insn (low[0], high[0]);
11709	  emit_insn ((mode == DImode
11710		      ? gen_xorsi3
11711		      : gen_xordi3) (low[0], low[0], GEN_INT (1)));
11712	}
11713
11714      emit_insn ((mode == DImode
11715		    ? gen_ashlsi3
11716		    : gen_ashldi3) (low[0], low[0], operands[2]));
11717      emit_insn ((mode == DImode
11718		    ? gen_ashlsi3
11719		    : gen_ashldi3) (high[0], high[0], operands[2]));
11720      return;
11721    }
11722
11723  if (operands[1] == constm1_rtx)
11724    {
11725      /* For -1 << N, we can avoid the shld instruction, because we
11726	 know that we're shifting 0...31/63 ones into a -1.  */
11727      emit_move_insn (low[0], constm1_rtx);
11728      if (optimize_size)
11729	emit_move_insn (high[0], low[0]);
11730      else
11731	emit_move_insn (high[0], constm1_rtx);
11732    }
11733  else
11734    {
11735      if (!rtx_equal_p (operands[0], operands[1]))
11736	emit_move_insn (operands[0], operands[1]);
11737
11738      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11739      emit_insn ((mode == DImode
11740		  ? gen_x86_shld_1
11741		  : gen_x86_64_shld) (high[0], low[0], operands[2]));
11742    }
11743
11744  emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
11745
11746  if (TARGET_CMOVE && scratch)
11747    {
11748      ix86_expand_clear (scratch);
11749      emit_insn ((mode == DImode
11750		  ? gen_x86_shift_adj_1
11751		  : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
11752    }
11753  else
11754    emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
11755}
11756
11757void
11758ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
11759{
11760  rtx low[2], high[2];
11761  int count;
11762  const int single_width = mode == DImode ? 32 : 64;
11763
11764  if (GET_CODE (operands[2]) == CONST_INT)
11765    {
11766      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11767      count = INTVAL (operands[2]) & (single_width * 2 - 1);
11768
11769      if (count == single_width * 2 - 1)
11770	{
11771	  emit_move_insn (high[0], high[1]);
11772	  emit_insn ((mode == DImode
11773		      ? gen_ashrsi3
11774		      : gen_ashrdi3) (high[0], high[0],
11775				      GEN_INT (single_width - 1)));
11776	  emit_move_insn (low[0], high[0]);
11777
11778	}
11779      else if (count >= single_width)
11780	{
11781	  emit_move_insn (low[0], high[1]);
11782	  emit_move_insn (high[0], low[0]);
11783	  emit_insn ((mode == DImode
11784		      ? gen_ashrsi3
11785		      : gen_ashrdi3) (high[0], high[0],
11786				      GEN_INT (single_width - 1)));
11787	  if (count > single_width)
11788	    emit_insn ((mode == DImode
11789			? gen_ashrsi3
11790			: gen_ashrdi3) (low[0], low[0],
11791					GEN_INT (count - single_width)));
11792	}
11793      else
11794	{
11795	  if (!rtx_equal_p (operands[0], operands[1]))
11796	    emit_move_insn (operands[0], operands[1]);
11797	  emit_insn ((mode == DImode
11798		      ? gen_x86_shrd_1
11799		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11800	  emit_insn ((mode == DImode
11801		      ? gen_ashrsi3
11802		      : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
11803	}
11804    }
11805  else
11806    {
11807      if (!rtx_equal_p (operands[0], operands[1]))
11808	emit_move_insn (operands[0], operands[1]);
11809
11810      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11811
11812      emit_insn ((mode == DImode
11813		  ? gen_x86_shrd_1
11814		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11815      emit_insn ((mode == DImode
11816		  ? gen_ashrsi3
11817		  : gen_ashrdi3)  (high[0], high[0], operands[2]));
11818
11819      if (TARGET_CMOVE && scratch)
11820	{
11821	  emit_move_insn (scratch, high[0]);
11822	  emit_insn ((mode == DImode
11823		      ? gen_ashrsi3
11824		      : gen_ashrdi3) (scratch, scratch,
11825				      GEN_INT (single_width - 1)));
11826	  emit_insn ((mode == DImode
11827		      ? gen_x86_shift_adj_1
11828		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11829					 scratch));
11830	}
11831      else
11832	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
11833    }
11834}
11835
11836void
11837ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
11838{
11839  rtx low[2], high[2];
11840  int count;
11841  const int single_width = mode == DImode ? 32 : 64;
11842
11843  if (GET_CODE (operands[2]) == CONST_INT)
11844    {
11845      (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
11846      count = INTVAL (operands[2]) & (single_width * 2 - 1);
11847
11848      if (count >= single_width)
11849	{
11850	  emit_move_insn (low[0], high[1]);
11851	  ix86_expand_clear (high[0]);
11852
11853	  if (count > single_width)
11854	    emit_insn ((mode == DImode
11855			? gen_lshrsi3
11856			: gen_lshrdi3) (low[0], low[0],
11857					GEN_INT (count - single_width)));
11858	}
11859      else
11860	{
11861	  if (!rtx_equal_p (operands[0], operands[1]))
11862	    emit_move_insn (operands[0], operands[1]);
11863	  emit_insn ((mode == DImode
11864		      ? gen_x86_shrd_1
11865		      : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
11866	  emit_insn ((mode == DImode
11867		      ? gen_lshrsi3
11868		      : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
11869	}
11870    }
11871  else
11872    {
11873      if (!rtx_equal_p (operands[0], operands[1]))
11874	emit_move_insn (operands[0], operands[1]);
11875
11876      (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
11877
11878      emit_insn ((mode == DImode
11879		  ? gen_x86_shrd_1
11880		  : gen_x86_64_shrd) (low[0], high[0], operands[2]));
11881      emit_insn ((mode == DImode
11882		  ? gen_lshrsi3
11883		  : gen_lshrdi3) (high[0], high[0], operands[2]));
11884
11885      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
11886      if (TARGET_CMOVE && scratch)
11887	{
11888	  ix86_expand_clear (scratch);
11889	  emit_insn ((mode == DImode
11890		      ? gen_x86_shift_adj_1
11891		      : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
11892					       scratch));
11893	}
11894      else
11895	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11896    }
11897}
11898
11899/* Helper function for the string operations below.  Dest VARIABLE whether
11900   it is aligned to VALUE bytes.  If true, jump to the label.  */
11901static rtx
11902ix86_expand_aligntest (rtx variable, int value)
11903{
11904  rtx label = gen_label_rtx ();
11905  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11906  if (GET_MODE (variable) == DImode)
11907    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11908  else
11909    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11910  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11911			   1, label);
11912  return label;
11913}
11914
11915/* Adjust COUNTER by the VALUE.  */
11916static void
11917ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11918{
11919  if (GET_MODE (countreg) == DImode)
11920    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11921  else
11922    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11923}
11924
11925/* Zero extend possibly SImode EXP to Pmode register.  */
11926rtx
11927ix86_zero_extend_to_Pmode (rtx exp)
11928{
11929  rtx r;
11930  if (GET_MODE (exp) == VOIDmode)
11931    return force_reg (Pmode, exp);
11932  if (GET_MODE (exp) == Pmode)
11933    return copy_to_mode_reg (Pmode, exp);
11934  r = gen_reg_rtx (Pmode);
11935  emit_insn (gen_zero_extendsidi2 (r, exp));
11936  return r;
11937}
11938
11939/* Expand string move (memcpy) operation.  Use i386 string operations when
11940   profitable.  expand_clrmem contains similar code.  */
11941int
11942ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11943{
11944  rtx srcreg, destreg, countreg, srcexp, destexp;
11945  enum machine_mode counter_mode;
11946  HOST_WIDE_INT align = 0;
11947  unsigned HOST_WIDE_INT count = 0;
11948
11949  if (GET_CODE (align_exp) == CONST_INT)
11950    align = INTVAL (align_exp);
11951
11952  /* Can't use any of this if the user has appropriated esi or edi.  */
11953  if (global_regs[4] || global_regs[5])
11954    return 0;
11955
11956  /* This simple hack avoids all inlining code and simplifies code below.  */
11957  if (!TARGET_ALIGN_STRINGOPS)
11958    align = 64;
11959
11960  if (GET_CODE (count_exp) == CONST_INT)
11961    {
11962      count = INTVAL (count_exp);
11963      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11964	return 0;
11965    }
11966
11967  /* Figure out proper mode for counter.  For 32bits it is always SImode,
11968     for 64bits use SImode when possible, otherwise DImode.
11969     Set count to number of bytes copied when known at compile time.  */
11970  if (!TARGET_64BIT
11971      || GET_MODE (count_exp) == SImode
11972      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
11973    counter_mode = SImode;
11974  else
11975    counter_mode = DImode;
11976
11977  gcc_assert (counter_mode == SImode || counter_mode == DImode);
11978
11979  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11980  if (destreg != XEXP (dst, 0))
11981    dst = replace_equiv_address_nv (dst, destreg);
11982  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11983  if (srcreg != XEXP (src, 0))
11984    src = replace_equiv_address_nv (src, srcreg);
11985
11986  /* When optimizing for size emit simple rep ; movsb instruction for
11987     counts not divisible by 4, except when (movsl;)*(movsw;)?(movsb;)?
11988     sequence is shorter than mov{b,l} $count, %{ecx,cl}; rep; movsb.
11989     Sice of (movsl;)*(movsw;)?(movsb;)? sequence is
11990     count / 4 + (count & 3), the other sequence is either 4 or 7 bytes,
11991     but we don't know whether upper 24 (resp. 56) bits of %ecx will be
11992     known to be zero or not.  The rep; movsb sequence causes higher
11993     register pressure though, so take that into account.  */
11994
11995  if ((!optimize || optimize_size)
11996      && (count == 0
11997	  || ((count & 0x03)
11998	      && (!optimize_size
11999		  || count > 5 * 4
12000		  || (count & 3) + count / 4 > 6))))
12001    {
12002      emit_insn (gen_cld ());
12003      countreg = ix86_zero_extend_to_Pmode (count_exp);
12004      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12005      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
12006      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
12007			      destexp, srcexp));
12008    }
12009
12010  /* For constant aligned (or small unaligned) copies use rep movsl
12011     followed by code copying the rest.  For PentiumPro ensure 8 byte
12012     alignment to allow rep movsl acceleration.  */
12013
12014  else if (count != 0
12015	   && (align >= 8
12016	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12017	       || optimize_size || count < (unsigned int) 64))
12018    {
12019      unsigned HOST_WIDE_INT offset = 0;
12020      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12021      rtx srcmem, dstmem;
12022
12023      emit_insn (gen_cld ());
12024      if (count & ~(size - 1))
12025	{
12026	  if ((TARGET_SINGLE_STRINGOP || optimize_size) && count < 5 * 4)
12027	    {
12028	      enum machine_mode movs_mode = size == 4 ? SImode : DImode;
12029
12030	      while (offset < (count & ~(size - 1)))
12031		{
12032		  srcmem = adjust_automodify_address_nv (src, movs_mode,
12033							 srcreg, offset);
12034		  dstmem = adjust_automodify_address_nv (dst, movs_mode,
12035							 destreg, offset);
12036		  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12037		  offset += size;
12038		}
12039	    }
12040	  else
12041	    {
12042	      countreg = GEN_INT ((count >> (size == 4 ? 2 : 3))
12043				  & (TARGET_64BIT ? -1 : 0x3fffffff));
12044	      countreg = copy_to_mode_reg (counter_mode, countreg);
12045	      countreg = ix86_zero_extend_to_Pmode (countreg);
12046
12047	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12048					GEN_INT (size == 4 ? 2 : 3));
12049	      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12050	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12051
12052	      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12053				      countreg, destexp, srcexp));
12054	      offset = count & ~(size - 1);
12055	    }
12056	}
12057      if (size == 8 && (count & 0x04))
12058	{
12059	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
12060						 offset);
12061	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
12062						 offset);
12063	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12064	  offset += 4;
12065	}
12066      if (count & 0x02)
12067	{
12068	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
12069						 offset);
12070	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
12071						 offset);
12072	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12073	  offset += 2;
12074	}
12075      if (count & 0x01)
12076	{
12077	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
12078						 offset);
12079	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
12080						 offset);
12081	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12082	}
12083    }
12084  /* The generic code based on the glibc implementation:
12085     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
12086     allowing accelerated copying there)
12087     - copy the data using rep movsl
12088     - copy the rest.  */
12089  else
12090    {
12091      rtx countreg2;
12092      rtx label = NULL;
12093      rtx srcmem, dstmem;
12094      int desired_alignment = (TARGET_PENTIUMPRO
12095			       && (count == 0 || count >= (unsigned int) 260)
12096			       ? 8 : UNITS_PER_WORD);
12097      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
12098      dst = change_address (dst, BLKmode, destreg);
12099      src = change_address (src, BLKmode, srcreg);
12100
12101      /* In case we don't know anything about the alignment, default to
12102         library version, since it is usually equally fast and result in
12103         shorter code.
12104
12105	 Also emit call when we know that the count is large and call overhead
12106	 will not be important.  */
12107      if (!TARGET_INLINE_ALL_STRINGOPS
12108	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12109	return 0;
12110
12111      if (TARGET_SINGLE_STRINGOP)
12112	emit_insn (gen_cld ());
12113
12114      countreg2 = gen_reg_rtx (Pmode);
12115      countreg = copy_to_mode_reg (counter_mode, count_exp);
12116
12117      /* We don't use loops to align destination and to copy parts smaller
12118         than 4 bytes, because gcc is able to optimize such code better (in
12119         the case the destination or the count really is aligned, gcc is often
12120         able to predict the branches) and also it is friendlier to the
12121         hardware branch prediction.
12122
12123         Using loops is beneficial for generic case, because we can
12124         handle small counts using the loops.  Many CPUs (such as Athlon)
12125         have large REP prefix setup costs.
12126
12127         This is quite costly.  Maybe we can revisit this decision later or
12128         add some customizability to this code.  */
12129
12130      if (count == 0 && align < desired_alignment)
12131	{
12132	  label = gen_label_rtx ();
12133	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12134				   LEU, 0, counter_mode, 1, label);
12135	}
12136      if (align <= 1)
12137	{
12138	  rtx label = ix86_expand_aligntest (destreg, 1);
12139	  srcmem = change_address (src, QImode, srcreg);
12140	  dstmem = change_address (dst, QImode, destreg);
12141	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12142	  ix86_adjust_counter (countreg, 1);
12143	  emit_label (label);
12144	  LABEL_NUSES (label) = 1;
12145	}
12146      if (align <= 2)
12147	{
12148	  rtx label = ix86_expand_aligntest (destreg, 2);
12149	  srcmem = change_address (src, HImode, srcreg);
12150	  dstmem = change_address (dst, HImode, destreg);
12151	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12152	  ix86_adjust_counter (countreg, 2);
12153	  emit_label (label);
12154	  LABEL_NUSES (label) = 1;
12155	}
12156      if (align <= 4 && desired_alignment > 4)
12157	{
12158	  rtx label = ix86_expand_aligntest (destreg, 4);
12159	  srcmem = change_address (src, SImode, srcreg);
12160	  dstmem = change_address (dst, SImode, destreg);
12161	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12162	  ix86_adjust_counter (countreg, 4);
12163	  emit_label (label);
12164	  LABEL_NUSES (label) = 1;
12165	}
12166
12167      if (label && desired_alignment > 4 && !TARGET_64BIT)
12168	{
12169	  emit_label (label);
12170	  LABEL_NUSES (label) = 1;
12171	  label = NULL_RTX;
12172	}
12173      if (!TARGET_SINGLE_STRINGOP)
12174	emit_insn (gen_cld ());
12175      if (TARGET_64BIT)
12176	{
12177	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12178				  GEN_INT (3)));
12179	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12180	}
12181      else
12182	{
12183	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12184	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12185	}
12186      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
12187      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12188      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
12189			      countreg2, destexp, srcexp));
12190
12191      if (label)
12192	{
12193	  emit_label (label);
12194	  LABEL_NUSES (label) = 1;
12195	}
12196      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12197	{
12198	  srcmem = change_address (src, SImode, srcreg);
12199	  dstmem = change_address (dst, SImode, destreg);
12200	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12201	}
12202      if ((align <= 4 || count == 0) && TARGET_64BIT)
12203	{
12204	  rtx label = ix86_expand_aligntest (countreg, 4);
12205	  srcmem = change_address (src, SImode, srcreg);
12206	  dstmem = change_address (dst, SImode, destreg);
12207	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12208	  emit_label (label);
12209	  LABEL_NUSES (label) = 1;
12210	}
12211      if (align > 2 && count != 0 && (count & 2))
12212	{
12213	  srcmem = change_address (src, HImode, srcreg);
12214	  dstmem = change_address (dst, HImode, destreg);
12215	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12216	}
12217      if (align <= 2 || count == 0)
12218	{
12219	  rtx label = ix86_expand_aligntest (countreg, 2);
12220	  srcmem = change_address (src, HImode, srcreg);
12221	  dstmem = change_address (dst, HImode, destreg);
12222	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12223	  emit_label (label);
12224	  LABEL_NUSES (label) = 1;
12225	}
12226      if (align > 1 && count != 0 && (count & 1))
12227	{
12228	  srcmem = change_address (src, QImode, srcreg);
12229	  dstmem = change_address (dst, QImode, destreg);
12230	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12231	}
12232      if (align <= 1 || count == 0)
12233	{
12234	  rtx label = ix86_expand_aligntest (countreg, 1);
12235	  srcmem = change_address (src, QImode, srcreg);
12236	  dstmem = change_address (dst, QImode, destreg);
12237	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
12238	  emit_label (label);
12239	  LABEL_NUSES (label) = 1;
12240	}
12241    }
12242
12243  return 1;
12244}
12245
12246/* Expand string clear operation (bzero).  Use i386 string operations when
12247   profitable.  expand_movmem contains similar code.  */
12248int
12249ix86_expand_clrmem (rtx dst, rtx count_exp, rtx align_exp)
12250{
12251  rtx destreg, zeroreg, countreg, destexp;
12252  enum machine_mode counter_mode;
12253  HOST_WIDE_INT align = 0;
12254  unsigned HOST_WIDE_INT count = 0;
12255
12256  if (GET_CODE (align_exp) == CONST_INT)
12257    align = INTVAL (align_exp);
12258
12259  /* Can't use any of this if the user has appropriated esi.  */
12260  if (global_regs[4])
12261    return 0;
12262
12263  /* This simple hack avoids all inlining code and simplifies code below.  */
12264  if (!TARGET_ALIGN_STRINGOPS)
12265    align = 32;
12266
12267  if (GET_CODE (count_exp) == CONST_INT)
12268    {
12269      count = INTVAL (count_exp);
12270      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
12271	return 0;
12272    }
12273  /* Figure out proper mode for counter.  For 32bits it is always SImode,
12274     for 64bits use SImode when possible, otherwise DImode.
12275     Set count to number of bytes copied when known at compile time.  */
12276  if (!TARGET_64BIT
12277      || GET_MODE (count_exp) == SImode
12278      || x86_64_zext_immediate_operand (count_exp, VOIDmode))
12279    counter_mode = SImode;
12280  else
12281    counter_mode = DImode;
12282
12283  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
12284  if (destreg != XEXP (dst, 0))
12285    dst = replace_equiv_address_nv (dst, destreg);
12286
12287
12288  /* When optimizing for size emit simple rep ; movsb instruction for
12289     counts not divisible by 4.  The movl $N, %ecx; rep; stosb
12290     sequence is 7 bytes long, so if optimizing for size and count is
12291     small enough that some stosl, stosw and stosb instructions without
12292     rep are shorter, fall back into the next if.  */
12293
12294  if ((!optimize || optimize_size)
12295      && (count == 0
12296	  || ((count & 0x03)
12297	      && (!optimize_size || (count & 0x03) + (count >> 2) > 7))))
12298    {
12299      emit_insn (gen_cld ());
12300
12301      countreg = ix86_zero_extend_to_Pmode (count_exp);
12302      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
12303      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
12304      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
12305    }
12306  else if (count != 0
12307	   && (align >= 8
12308	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
12309	       || optimize_size || count < (unsigned int) 64))
12310    {
12311      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
12312      unsigned HOST_WIDE_INT offset = 0;
12313
12314      emit_insn (gen_cld ());
12315
12316      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
12317      if (count & ~(size - 1))
12318	{
12319	  unsigned HOST_WIDE_INT repcount;
12320	  unsigned int max_nonrep;
12321
12322	  repcount = count >> (size == 4 ? 2 : 3);
12323	  if (!TARGET_64BIT)
12324	    repcount &= 0x3fffffff;
12325
12326	  /* movl $N, %ecx; rep; stosl is 7 bytes, while N x stosl is N bytes.
12327	     movl $N, %ecx; rep; stosq is 8 bytes, while N x stosq is 2xN
12328	     bytes.  In both cases the latter seems to be faster for small
12329	     values of N.  */
12330	  max_nonrep = size == 4 ? 7 : 4;
12331	  if (!optimize_size)
12332	    switch (ix86_tune)
12333	      {
12334	      case PROCESSOR_PENTIUM4:
12335	      case PROCESSOR_NOCONA:
12336	        max_nonrep = 3;
12337	        break;
12338	      default:
12339	        break;
12340	      }
12341
12342	  if (repcount <= max_nonrep)
12343	    while (repcount-- > 0)
12344	      {
12345		rtx mem = adjust_automodify_address_nv (dst,
12346							GET_MODE (zeroreg),
12347							destreg, offset);
12348		emit_insn (gen_strset (destreg, mem, zeroreg));
12349		offset += size;
12350	      }
12351	  else
12352	    {
12353	      countreg = copy_to_mode_reg (counter_mode, GEN_INT (repcount));
12354	      countreg = ix86_zero_extend_to_Pmode (countreg);
12355	      destexp = gen_rtx_ASHIFT (Pmode, countreg,
12356					GEN_INT (size == 4 ? 2 : 3));
12357	      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12358	      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg,
12359				       destexp));
12360	      offset = count & ~(size - 1);
12361	    }
12362	}
12363      if (size == 8 && (count & 0x04))
12364	{
12365	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
12366						  offset);
12367	  emit_insn (gen_strset (destreg, mem,
12368				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12369	  offset += 4;
12370	}
12371      if (count & 0x02)
12372	{
12373	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
12374						  offset);
12375	  emit_insn (gen_strset (destreg, mem,
12376				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12377	  offset += 2;
12378	}
12379      if (count & 0x01)
12380	{
12381	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
12382						  offset);
12383	  emit_insn (gen_strset (destreg, mem,
12384				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12385	}
12386    }
12387  else
12388    {
12389      rtx countreg2;
12390      rtx label = NULL;
12391      /* Compute desired alignment of the string operation.  */
12392      int desired_alignment = (TARGET_PENTIUMPRO
12393			       && (count == 0 || count >= (unsigned int) 260)
12394			       ? 8 : UNITS_PER_WORD);
12395
12396      /* In case we don't know anything about the alignment, default to
12397         library version, since it is usually equally fast and result in
12398         shorter code.
12399
12400	 Also emit call when we know that the count is large and call overhead
12401	 will not be important.  */
12402      if (!TARGET_INLINE_ALL_STRINGOPS
12403	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
12404	return 0;
12405
12406      if (TARGET_SINGLE_STRINGOP)
12407	emit_insn (gen_cld ());
12408
12409      countreg2 = gen_reg_rtx (Pmode);
12410      countreg = copy_to_mode_reg (counter_mode, count_exp);
12411      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
12412      /* Get rid of MEM_OFFSET, it won't be accurate.  */
12413      dst = change_address (dst, BLKmode, destreg);
12414
12415      if (count == 0 && align < desired_alignment)
12416	{
12417	  label = gen_label_rtx ();
12418	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
12419				   LEU, 0, counter_mode, 1, label);
12420	}
12421      if (align <= 1)
12422	{
12423	  rtx label = ix86_expand_aligntest (destreg, 1);
12424	  emit_insn (gen_strset (destreg, dst,
12425				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12426	  ix86_adjust_counter (countreg, 1);
12427	  emit_label (label);
12428	  LABEL_NUSES (label) = 1;
12429	}
12430      if (align <= 2)
12431	{
12432	  rtx label = ix86_expand_aligntest (destreg, 2);
12433	  emit_insn (gen_strset (destreg, dst,
12434				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12435	  ix86_adjust_counter (countreg, 2);
12436	  emit_label (label);
12437	  LABEL_NUSES (label) = 1;
12438	}
12439      if (align <= 4 && desired_alignment > 4)
12440	{
12441	  rtx label = ix86_expand_aligntest (destreg, 4);
12442	  emit_insn (gen_strset (destreg, dst,
12443				 (TARGET_64BIT
12444				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
12445				  : zeroreg)));
12446	  ix86_adjust_counter (countreg, 4);
12447	  emit_label (label);
12448	  LABEL_NUSES (label) = 1;
12449	}
12450
12451      if (label && desired_alignment > 4 && !TARGET_64BIT)
12452	{
12453	  emit_label (label);
12454	  LABEL_NUSES (label) = 1;
12455	  label = NULL_RTX;
12456	}
12457
12458      if (!TARGET_SINGLE_STRINGOP)
12459	emit_insn (gen_cld ());
12460      if (TARGET_64BIT)
12461	{
12462	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
12463				  GEN_INT (3)));
12464	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
12465	}
12466      else
12467	{
12468	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
12469	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
12470	}
12471      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
12472      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
12473
12474      if (label)
12475	{
12476	  emit_label (label);
12477	  LABEL_NUSES (label) = 1;
12478	}
12479
12480      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
12481	emit_insn (gen_strset (destreg, dst,
12482			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
12483      if (TARGET_64BIT && (align <= 4 || count == 0))
12484	{
12485	  rtx label = ix86_expand_aligntest (countreg, 4);
12486	  emit_insn (gen_strset (destreg, dst,
12487				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
12488	  emit_label (label);
12489	  LABEL_NUSES (label) = 1;
12490	}
12491      if (align > 2 && count != 0 && (count & 2))
12492	emit_insn (gen_strset (destreg, dst,
12493			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
12494      if (align <= 2 || count == 0)
12495	{
12496	  rtx label = ix86_expand_aligntest (countreg, 2);
12497	  emit_insn (gen_strset (destreg, dst,
12498				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
12499	  emit_label (label);
12500	  LABEL_NUSES (label) = 1;
12501	}
12502      if (align > 1 && count != 0 && (count & 1))
12503	emit_insn (gen_strset (destreg, dst,
12504			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
12505      if (align <= 1 || count == 0)
12506	{
12507	  rtx label = ix86_expand_aligntest (countreg, 1);
12508	  emit_insn (gen_strset (destreg, dst,
12509				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
12510	  emit_label (label);
12511	  LABEL_NUSES (label) = 1;
12512	}
12513    }
12514  return 1;
12515}
12516
12517/* Expand strlen.  */
12518int
12519ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
12520{
12521  rtx addr, scratch1, scratch2, scratch3, scratch4;
12522
12523  /* The generic case of strlen expander is long.  Avoid it's
12524     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
12525
12526  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12527      && !TARGET_INLINE_ALL_STRINGOPS
12528      && !optimize_size
12529      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
12530    return 0;
12531
12532  addr = force_reg (Pmode, XEXP (src, 0));
12533  scratch1 = gen_reg_rtx (Pmode);
12534
12535  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
12536      && !optimize_size)
12537    {
12538      /* Well it seems that some optimizer does not combine a call like
12539         foo(strlen(bar), strlen(bar));
12540         when the move and the subtraction is done here.  It does calculate
12541         the length just once when these instructions are done inside of
12542         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
12543         often used and I use one fewer register for the lifetime of
12544         output_strlen_unroll() this is better.  */
12545
12546      emit_move_insn (out, addr);
12547
12548      ix86_expand_strlensi_unroll_1 (out, src, align);
12549
12550      /* strlensi_unroll_1 returns the address of the zero at the end of
12551         the string, like memchr(), so compute the length by subtracting
12552         the start address.  */
12553      if (TARGET_64BIT)
12554	emit_insn (gen_subdi3 (out, out, addr));
12555      else
12556	emit_insn (gen_subsi3 (out, out, addr));
12557    }
12558  else
12559    {
12560      rtx unspec;
12561      scratch2 = gen_reg_rtx (Pmode);
12562      scratch3 = gen_reg_rtx (Pmode);
12563      scratch4 = force_reg (Pmode, constm1_rtx);
12564
12565      emit_move_insn (scratch3, addr);
12566      eoschar = force_reg (QImode, eoschar);
12567
12568      emit_insn (gen_cld ());
12569      src = replace_equiv_address_nv (src, scratch3);
12570
12571      /* If .md starts supporting :P, this can be done in .md.  */
12572      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
12573						 scratch4), UNSPEC_SCAS);
12574      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
12575      if (TARGET_64BIT)
12576	{
12577	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
12578	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
12579	}
12580      else
12581	{
12582	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
12583	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
12584	}
12585    }
12586  return 1;
12587}
12588
12589/* Expand the appropriate insns for doing strlen if not just doing
12590   repnz; scasb
12591
12592   out = result, initialized with the start address
12593   align_rtx = alignment of the address.
12594   scratch = scratch register, initialized with the startaddress when
12595	not aligned, otherwise undefined
12596
12597   This is just the body. It needs the initializations mentioned above and
12598   some address computing at the end.  These things are done in i386.md.  */
12599
12600static void
12601ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
12602{
12603  int align;
12604  rtx tmp;
12605  rtx align_2_label = NULL_RTX;
12606  rtx align_3_label = NULL_RTX;
12607  rtx align_4_label = gen_label_rtx ();
12608  rtx end_0_label = gen_label_rtx ();
12609  rtx mem;
12610  rtx tmpreg = gen_reg_rtx (SImode);
12611  rtx scratch = gen_reg_rtx (SImode);
12612  rtx cmp;
12613
12614  align = 0;
12615  if (GET_CODE (align_rtx) == CONST_INT)
12616    align = INTVAL (align_rtx);
12617
12618  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
12619
12620  /* Is there a known alignment and is it less than 4?  */
12621  if (align < 4)
12622    {
12623      rtx scratch1 = gen_reg_rtx (Pmode);
12624      emit_move_insn (scratch1, out);
12625      /* Is there a known alignment and is it not 2? */
12626      if (align != 2)
12627	{
12628	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
12629	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
12630
12631	  /* Leave just the 3 lower bits.  */
12632	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
12633				    NULL_RTX, 0, OPTAB_WIDEN);
12634
12635	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12636				   Pmode, 1, align_4_label);
12637	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
12638				   Pmode, 1, align_2_label);
12639	  emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
12640				   Pmode, 1, align_3_label);
12641	}
12642      else
12643        {
12644	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
12645	     check if is aligned to 4 - byte.  */
12646
12647	  align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
12648				    NULL_RTX, 0, OPTAB_WIDEN);
12649
12650	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
12651				   Pmode, 1, align_4_label);
12652        }
12653
12654      mem = change_address (src, QImode, out);
12655
12656      /* Now compare the bytes.  */
12657
12658      /* Compare the first n unaligned byte on a byte per byte basis.  */
12659      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
12660			       QImode, 1, end_0_label);
12661
12662      /* Increment the address.  */
12663      if (TARGET_64BIT)
12664	emit_insn (gen_adddi3 (out, out, const1_rtx));
12665      else
12666	emit_insn (gen_addsi3 (out, out, const1_rtx));
12667
12668      /* Not needed with an alignment of 2 */
12669      if (align != 2)
12670	{
12671	  emit_label (align_2_label);
12672
12673	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12674				   end_0_label);
12675
12676	  if (TARGET_64BIT)
12677	    emit_insn (gen_adddi3 (out, out, const1_rtx));
12678	  else
12679	    emit_insn (gen_addsi3 (out, out, const1_rtx));
12680
12681	  emit_label (align_3_label);
12682	}
12683
12684      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
12685			       end_0_label);
12686
12687      if (TARGET_64BIT)
12688	emit_insn (gen_adddi3 (out, out, const1_rtx));
12689      else
12690	emit_insn (gen_addsi3 (out, out, const1_rtx));
12691    }
12692
12693  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
12694     align this loop.  It gives only huge programs, but does not help to
12695     speed up.  */
12696  emit_label (align_4_label);
12697
12698  mem = change_address (src, SImode, out);
12699  emit_move_insn (scratch, mem);
12700  if (TARGET_64BIT)
12701    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
12702  else
12703    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
12704
12705  /* This formula yields a nonzero result iff one of the bytes is zero.
12706     This saves three branches inside loop and many cycles.  */
12707
12708  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
12709  emit_insn (gen_one_cmplsi2 (scratch, scratch));
12710  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
12711  emit_insn (gen_andsi3 (tmpreg, tmpreg,
12712			 gen_int_mode (0x80808080, SImode)));
12713  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
12714			   align_4_label);
12715
12716  if (TARGET_CMOVE)
12717    {
12718       rtx reg = gen_reg_rtx (SImode);
12719       rtx reg2 = gen_reg_rtx (Pmode);
12720       emit_move_insn (reg, tmpreg);
12721       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
12722
12723       /* If zero is not in the first two bytes, move two bytes forward.  */
12724       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12725       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12726       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12727       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
12728			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
12729						     reg,
12730						     tmpreg)));
12731       /* Emit lea manually to avoid clobbering of flags.  */
12732       emit_insn (gen_rtx_SET (SImode, reg2,
12733			       gen_rtx_PLUS (Pmode, out, const2_rtx)));
12734
12735       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12736       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
12737       emit_insn (gen_rtx_SET (VOIDmode, out,
12738			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
12739						     reg2,
12740						     out)));
12741
12742    }
12743  else
12744    {
12745       rtx end_2_label = gen_label_rtx ();
12746       /* Is zero in the first two bytes? */
12747
12748       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
12749       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
12750       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
12751       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12752                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
12753                            pc_rtx);
12754       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12755       JUMP_LABEL (tmp) = end_2_label;
12756
12757       /* Not in the first two.  Move two bytes forward.  */
12758       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
12759       if (TARGET_64BIT)
12760	 emit_insn (gen_adddi3 (out, out, const2_rtx));
12761       else
12762	 emit_insn (gen_addsi3 (out, out, const2_rtx));
12763
12764       emit_label (end_2_label);
12765
12766    }
12767
12768  /* Avoid branch in fixing the byte.  */
12769  tmpreg = gen_lowpart (QImode, tmpreg);
12770  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
12771  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
12772  if (TARGET_64BIT)
12773    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
12774  else
12775    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
12776
12777  emit_label (end_0_label);
12778}
12779
12780void
12781ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
12782		  rtx callarg2 ATTRIBUTE_UNUSED,
12783		  rtx pop, int sibcall)
12784{
12785  rtx use = NULL, call;
12786
12787  if (pop == const0_rtx)
12788    pop = NULL;
12789  gcc_assert (!TARGET_64BIT || !pop);
12790
12791#if TARGET_MACHO
12792  if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
12793    fnaddr = machopic_indirect_call_target (fnaddr);
12794#else
12795  /* Static functions and indirect calls don't need the pic register.  */
12796  if (! TARGET_64BIT && flag_pic
12797      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
12798      && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
12799    use_reg (&use, pic_offset_table_rtx);
12800
12801  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
12802    {
12803      rtx al = gen_rtx_REG (QImode, 0);
12804      emit_move_insn (al, callarg2);
12805      use_reg (&use, al);
12806    }
12807#endif /* TARGET_MACHO */
12808
12809  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
12810    {
12811      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12812      fnaddr = gen_rtx_MEM (QImode, fnaddr);
12813    }
12814  if (sibcall && TARGET_64BIT
12815      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
12816    {
12817      rtx addr;
12818      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
12819      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
12820      emit_move_insn (fnaddr, addr);
12821      fnaddr = gen_rtx_MEM (QImode, fnaddr);
12822    }
12823
12824  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
12825  if (retval)
12826    call = gen_rtx_SET (VOIDmode, retval, call);
12827  if (pop)
12828    {
12829      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
12830      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
12831      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
12832    }
12833
12834  call = emit_call_insn (call);
12835  if (use)
12836    CALL_INSN_FUNCTION_USAGE (call) = use;
12837}
12838
12839
12840/* Clear stack slot assignments remembered from previous functions.
12841   This is called from INIT_EXPANDERS once before RTL is emitted for each
12842   function.  */
12843
12844static struct machine_function *
12845ix86_init_machine_status (void)
12846{
12847  struct machine_function *f;
12848
12849  f = ggc_alloc_cleared (sizeof (struct machine_function));
12850  f->use_fast_prologue_epilogue_nregs = -1;
12851
12852  return f;
12853}
12854
12855/* Return a MEM corresponding to a stack slot with mode MODE.
12856   Allocate a new slot if necessary.
12857
12858   The RTL for a function can have several slots available: N is
12859   which slot to use.  */
12860
12861rtx
12862assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
12863{
12864  struct stack_local_entry *s;
12865
12866  gcc_assert (n < MAX_386_STACK_LOCALS);
12867
12868  /* Virtual slot is valid only before vregs are instantiated.  */
12869  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
12870
12871  for (s = ix86_stack_locals; s; s = s->next)
12872    if (s->mode == mode && s->n == n)
12873      return s->rtl;
12874
12875  s = (struct stack_local_entry *)
12876    ggc_alloc (sizeof (struct stack_local_entry));
12877  s->n = n;
12878  s->mode = mode;
12879  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
12880
12881  s->next = ix86_stack_locals;
12882  ix86_stack_locals = s;
12883  return s->rtl;
12884}
12885
12886/* Construct the SYMBOL_REF for the tls_get_addr function.  */
12887
12888static GTY(()) rtx ix86_tls_symbol;
12889rtx
12890ix86_tls_get_addr (void)
12891{
12892
12893  if (!ix86_tls_symbol)
12894    {
12895      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
12896					    (TARGET_GNU_TLS && !TARGET_64BIT)
12897					    ? "___tls_get_addr"
12898					    : "__tls_get_addr");
12899    }
12900
12901  return ix86_tls_symbol;
12902}
12903
12904/* Calculate the length of the memory address in the instruction
12905   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
12906
12907int
12908memory_address_length (rtx addr)
12909{
12910  struct ix86_address parts;
12911  rtx base, index, disp;
12912  int len;
12913  int ok;
12914
12915  if (GET_CODE (addr) == PRE_DEC
12916      || GET_CODE (addr) == POST_INC
12917      || GET_CODE (addr) == PRE_MODIFY
12918      || GET_CODE (addr) == POST_MODIFY)
12919    return 0;
12920
12921  ok = ix86_decompose_address (addr, &parts);
12922  gcc_assert (ok);
12923
12924  if (parts.base && GET_CODE (parts.base) == SUBREG)
12925    parts.base = SUBREG_REG (parts.base);
12926  if (parts.index && GET_CODE (parts.index) == SUBREG)
12927    parts.index = SUBREG_REG (parts.index);
12928
12929  base = parts.base;
12930  index = parts.index;
12931  disp = parts.disp;
12932  len = 0;
12933
12934  /* Rule of thumb:
12935       - esp as the base always wants an index,
12936       - ebp as the base always wants a displacement.  */
12937
12938  /* Register Indirect.  */
12939  if (base && !index && !disp)
12940    {
12941      /* esp (for its index) and ebp (for its displacement) need
12942	 the two-byte modrm form.  */
12943      if (addr == stack_pointer_rtx
12944	  || addr == arg_pointer_rtx
12945	  || addr == frame_pointer_rtx
12946	  || addr == hard_frame_pointer_rtx)
12947	len = 1;
12948    }
12949
12950  /* Direct Addressing.  */
12951  else if (disp && !base && !index)
12952    len = 4;
12953
12954  else
12955    {
12956      /* Find the length of the displacement constant.  */
12957      if (disp)
12958	{
12959	  if (GET_CODE (disp) == CONST_INT
12960	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12961	      && base)
12962	    len = 1;
12963	  else
12964	    len = 4;
12965	}
12966      /* ebp always wants a displacement.  */
12967      else if (base == hard_frame_pointer_rtx)
12968        len = 1;
12969
12970      /* An index requires the two-byte modrm form....  */
12971      if (index
12972	  /* ...like esp, which always wants an index.  */
12973	  || base == stack_pointer_rtx
12974	  || base == arg_pointer_rtx
12975	  || base == frame_pointer_rtx)
12976	len += 1;
12977    }
12978
12979  return len;
12980}
12981
12982/* Compute default value for "length_immediate" attribute.  When SHORTFORM
12983   is set, expect that insn have 8bit immediate alternative.  */
12984int
12985ix86_attr_length_immediate_default (rtx insn, int shortform)
12986{
12987  int len = 0;
12988  int i;
12989  extract_insn_cached (insn);
12990  for (i = recog_data.n_operands - 1; i >= 0; --i)
12991    if (CONSTANT_P (recog_data.operand[i]))
12992      {
12993	gcc_assert (!len);
12994	if (shortform
12995	    && GET_CODE (recog_data.operand[i]) == CONST_INT
12996	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12997	  len = 1;
12998	else
12999	  {
13000	    switch (get_attr_mode (insn))
13001	      {
13002		case MODE_QI:
13003		  len+=1;
13004		  break;
13005		case MODE_HI:
13006		  len+=2;
13007		  break;
13008		case MODE_SI:
13009		  len+=4;
13010		  break;
13011		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
13012		case MODE_DI:
13013		  len+=4;
13014		  break;
13015		default:
13016		  fatal_insn ("unknown insn mode", insn);
13017	      }
13018	  }
13019      }
13020  return len;
13021}
13022/* Compute default value for "length_address" attribute.  */
13023int
13024ix86_attr_length_address_default (rtx insn)
13025{
13026  int i;
13027
13028  if (get_attr_type (insn) == TYPE_LEA)
13029    {
13030      rtx set = PATTERN (insn);
13031
13032      if (GET_CODE (set) == PARALLEL)
13033	set = XVECEXP (set, 0, 0);
13034
13035      gcc_assert (GET_CODE (set) == SET);
13036
13037      return memory_address_length (SET_SRC (set));
13038    }
13039
13040  extract_insn_cached (insn);
13041  for (i = recog_data.n_operands - 1; i >= 0; --i)
13042    if (GET_CODE (recog_data.operand[i]) == MEM)
13043      {
13044	return memory_address_length (XEXP (recog_data.operand[i], 0));
13045	break;
13046      }
13047  return 0;
13048}
13049
13050/* Return the maximum number of instructions a cpu can issue.  */
13051
13052static int
13053ix86_issue_rate (void)
13054{
13055  switch (ix86_tune)
13056    {
13057    case PROCESSOR_PENTIUM:
13058    case PROCESSOR_K6:
13059      return 2;
13060
13061    case PROCESSOR_PENTIUMPRO:
13062    case PROCESSOR_PENTIUM4:
13063    case PROCESSOR_ATHLON:
13064    case PROCESSOR_K8:
13065    case PROCESSOR_NOCONA:
13066      return 3;
13067
13068    default:
13069      return 1;
13070    }
13071}
13072
13073/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
13074   by DEP_INSN and nothing set by DEP_INSN.  */
13075
13076static int
13077ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13078{
13079  rtx set, set2;
13080
13081  /* Simplify the test for uninteresting insns.  */
13082  if (insn_type != TYPE_SETCC
13083      && insn_type != TYPE_ICMOV
13084      && insn_type != TYPE_FCMOV
13085      && insn_type != TYPE_IBR)
13086    return 0;
13087
13088  if ((set = single_set (dep_insn)) != 0)
13089    {
13090      set = SET_DEST (set);
13091      set2 = NULL_RTX;
13092    }
13093  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
13094	   && XVECLEN (PATTERN (dep_insn), 0) == 2
13095	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
13096	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
13097    {
13098      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13099      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
13100    }
13101  else
13102    return 0;
13103
13104  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
13105    return 0;
13106
13107  /* This test is true if the dependent insn reads the flags but
13108     not any other potentially set register.  */
13109  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
13110    return 0;
13111
13112  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
13113    return 0;
13114
13115  return 1;
13116}
13117
13118/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
13119   address with operands set by DEP_INSN.  */
13120
13121static int
13122ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
13123{
13124  rtx addr;
13125
13126  if (insn_type == TYPE_LEA
13127      && TARGET_PENTIUM)
13128    {
13129      addr = PATTERN (insn);
13130
13131      if (GET_CODE (addr) == PARALLEL)
13132	addr = XVECEXP (addr, 0, 0);
13133
13134      gcc_assert (GET_CODE (addr) == SET);
13135
13136      addr = SET_SRC (addr);
13137    }
13138  else
13139    {
13140      int i;
13141      extract_insn_cached (insn);
13142      for (i = recog_data.n_operands - 1; i >= 0; --i)
13143	if (GET_CODE (recog_data.operand[i]) == MEM)
13144	  {
13145	    addr = XEXP (recog_data.operand[i], 0);
13146	    goto found;
13147	  }
13148      return 0;
13149    found:;
13150    }
13151
13152  return modified_in_p (addr, dep_insn);
13153}
13154
13155static int
13156ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
13157{
13158  enum attr_type insn_type, dep_insn_type;
13159  enum attr_memory memory;
13160  rtx set, set2;
13161  int dep_insn_code_number;
13162
13163  /* Anti and output dependencies have zero cost on all CPUs.  */
13164  if (REG_NOTE_KIND (link) != 0)
13165    return 0;
13166
13167  dep_insn_code_number = recog_memoized (dep_insn);
13168
13169  /* If we can't recognize the insns, we can't really do anything.  */
13170  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
13171    return cost;
13172
13173  insn_type = get_attr_type (insn);
13174  dep_insn_type = get_attr_type (dep_insn);
13175
13176  switch (ix86_tune)
13177    {
13178    case PROCESSOR_PENTIUM:
13179      /* Address Generation Interlock adds a cycle of latency.  */
13180      if (ix86_agi_dependant (insn, dep_insn, insn_type))
13181	cost += 1;
13182
13183      /* ??? Compares pair with jump/setcc.  */
13184      if (ix86_flags_dependant (insn, dep_insn, insn_type))
13185	cost = 0;
13186
13187      /* Floating point stores require value to be ready one cycle earlier.  */
13188      if (insn_type == TYPE_FMOV
13189	  && get_attr_memory (insn) == MEMORY_STORE
13190	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
13191	cost += 1;
13192      break;
13193
13194    case PROCESSOR_PENTIUMPRO:
13195      memory = get_attr_memory (insn);
13196
13197      /* INT->FP conversion is expensive.  */
13198      if (get_attr_fp_int_src (dep_insn))
13199	cost += 5;
13200
13201      /* There is one cycle extra latency between an FP op and a store.  */
13202      if (insn_type == TYPE_FMOV
13203	  && (set = single_set (dep_insn)) != NULL_RTX
13204	  && (set2 = single_set (insn)) != NULL_RTX
13205	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
13206	  && GET_CODE (SET_DEST (set2)) == MEM)
13207	cost += 1;
13208
13209      /* Show ability of reorder buffer to hide latency of load by executing
13210	 in parallel with previous instruction in case
13211	 previous instruction is not needed to compute the address.  */
13212      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13213	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
13214	{
13215	  /* Claim moves to take one cycle, as core can issue one load
13216	     at time and the next load can start cycle later.  */
13217	  if (dep_insn_type == TYPE_IMOV
13218	      || dep_insn_type == TYPE_FMOV)
13219	    cost = 1;
13220	  else if (cost > 1)
13221	    cost--;
13222	}
13223      break;
13224
13225    case PROCESSOR_K6:
13226      memory = get_attr_memory (insn);
13227
13228      /* The esp dependency is resolved before the instruction is really
13229         finished.  */
13230      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
13231	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
13232	return 1;
13233
13234      /* INT->FP conversion is expensive.  */
13235      if (get_attr_fp_int_src (dep_insn))
13236	cost += 5;
13237
13238      /* Show ability of reorder buffer to hide latency of load by executing
13239	 in parallel with previous instruction in case
13240	 previous instruction is not needed to compute the address.  */
13241      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13242	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
13243	{
13244	  /* Claim moves to take one cycle, as core can issue one load
13245	     at time and the next load can start cycle later.  */
13246	  if (dep_insn_type == TYPE_IMOV
13247	      || dep_insn_type == TYPE_FMOV)
13248	    cost = 1;
13249	  else if (cost > 2)
13250	    cost -= 2;
13251	  else
13252	    cost = 1;
13253	}
13254      break;
13255
13256    case PROCESSOR_ATHLON:
13257    case PROCESSOR_K8:
13258      memory = get_attr_memory (insn);
13259
13260      /* Show ability of reorder buffer to hide latency of load by executing
13261	 in parallel with previous instruction in case
13262	 previous instruction is not needed to compute the address.  */
13263      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
13264	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
13265	{
13266	  enum attr_unit unit = get_attr_unit (insn);
13267	  int loadcost = 3;
13268
13269	  /* Because of the difference between the length of integer and
13270	     floating unit pipeline preparation stages, the memory operands
13271	     for floating point are cheaper.
13272
13273	     ??? For Athlon it the difference is most probably 2.  */
13274	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
13275	    loadcost = 3;
13276	  else
13277	    loadcost = TARGET_ATHLON ? 2 : 0;
13278
13279	  if (cost >= loadcost)
13280	    cost -= loadcost;
13281	  else
13282	    cost = 0;
13283	}
13284
13285    default:
13286      break;
13287    }
13288
13289  return cost;
13290}
13291
13292/* How many alternative schedules to try.  This should be as wide as the
13293   scheduling freedom in the DFA, but no wider.  Making this value too
13294   large results extra work for the scheduler.  */
13295
13296static int
13297ia32_multipass_dfa_lookahead (void)
13298{
13299  if (ix86_tune == PROCESSOR_PENTIUM)
13300    return 2;
13301
13302  if (ix86_tune == PROCESSOR_PENTIUMPRO
13303      || ix86_tune == PROCESSOR_K6)
13304    return 1;
13305
13306  else
13307    return 0;
13308}
13309
13310
13311/* Compute the alignment given to a constant that is being placed in memory.
13312   EXP is the constant and ALIGN is the alignment that the object would
13313   ordinarily have.
13314   The value of this function is used instead of that alignment to align
13315   the object.  */
13316
13317int
13318ix86_constant_alignment (tree exp, int align)
13319{
13320  if (TREE_CODE (exp) == REAL_CST)
13321    {
13322      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
13323	return 64;
13324      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
13325	return 128;
13326    }
13327  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
13328	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
13329    return BITS_PER_WORD;
13330
13331  return align;
13332}
13333
13334/* Compute the alignment for a static variable.
13335   TYPE is the data type, and ALIGN is the alignment that
13336   the object would ordinarily have.  The value of this function is used
13337   instead of that alignment to align the object.  */
13338
13339int
13340ix86_data_alignment (tree type, int align)
13341{
13342  if (AGGREGATE_TYPE_P (type)
13343       && TYPE_SIZE (type)
13344       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13345       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
13346	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
13347    return 256;
13348
13349  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13350     to 16byte boundary.  */
13351  if (TARGET_64BIT)
13352    {
13353      if (AGGREGATE_TYPE_P (type)
13354	   && TYPE_SIZE (type)
13355	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13356	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
13357	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13358	return 128;
13359    }
13360
13361  if (TREE_CODE (type) == ARRAY_TYPE)
13362    {
13363      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13364	return 64;
13365      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13366	return 128;
13367    }
13368  else if (TREE_CODE (type) == COMPLEX_TYPE)
13369    {
13370
13371      if (TYPE_MODE (type) == DCmode && align < 64)
13372	return 64;
13373      if (TYPE_MODE (type) == XCmode && align < 128)
13374	return 128;
13375    }
13376  else if ((TREE_CODE (type) == RECORD_TYPE
13377	    || TREE_CODE (type) == UNION_TYPE
13378	    || TREE_CODE (type) == QUAL_UNION_TYPE)
13379	   && TYPE_FIELDS (type))
13380    {
13381      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13382	return 64;
13383      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13384	return 128;
13385    }
13386  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13387	   || TREE_CODE (type) == INTEGER_TYPE)
13388    {
13389      if (TYPE_MODE (type) == DFmode && align < 64)
13390	return 64;
13391      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13392	return 128;
13393    }
13394
13395  return align;
13396}
13397
13398/* Compute the alignment for a local variable.
13399   TYPE is the data type, and ALIGN is the alignment that
13400   the object would ordinarily have.  The value of this macro is used
13401   instead of that alignment to align the object.  */
13402
13403int
13404ix86_local_alignment (tree type, int align)
13405{
13406  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
13407     to 16byte boundary.  */
13408  if (TARGET_64BIT)
13409    {
13410      if (AGGREGATE_TYPE_P (type)
13411	   && TYPE_SIZE (type)
13412	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
13413	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
13414	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
13415	return 128;
13416    }
13417  if (TREE_CODE (type) == ARRAY_TYPE)
13418    {
13419      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
13420	return 64;
13421      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
13422	return 128;
13423    }
13424  else if (TREE_CODE (type) == COMPLEX_TYPE)
13425    {
13426      if (TYPE_MODE (type) == DCmode && align < 64)
13427	return 64;
13428      if (TYPE_MODE (type) == XCmode && align < 128)
13429	return 128;
13430    }
13431  else if ((TREE_CODE (type) == RECORD_TYPE
13432	    || TREE_CODE (type) == UNION_TYPE
13433	    || TREE_CODE (type) == QUAL_UNION_TYPE)
13434	   && TYPE_FIELDS (type))
13435    {
13436      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
13437	return 64;
13438      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
13439	return 128;
13440    }
13441  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
13442	   || TREE_CODE (type) == INTEGER_TYPE)
13443    {
13444
13445      if (TYPE_MODE (type) == DFmode && align < 64)
13446	return 64;
13447      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
13448	return 128;
13449    }
13450  return align;
13451}
13452
13453/* Emit RTL insns to initialize the variable parts of a trampoline.
13454   FNADDR is an RTX for the address of the function's pure code.
13455   CXT is an RTX for the static chain value for the function.  */
13456void
13457x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
13458{
13459  if (!TARGET_64BIT)
13460    {
13461      /* Compute offset from the end of the jmp to the target function.  */
13462      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
13463			       plus_constant (tramp, 10),
13464			       NULL_RTX, 1, OPTAB_DIRECT);
13465      emit_move_insn (gen_rtx_MEM (QImode, tramp),
13466		      gen_int_mode (0xb9, QImode));
13467      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
13468      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
13469		      gen_int_mode (0xe9, QImode));
13470      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
13471    }
13472  else
13473    {
13474      int offset = 0;
13475      /* Try to load address using shorter movl instead of movabs.
13476         We may want to support movq for kernel mode, but kernel does not use
13477         trampolines at the moment.  */
13478      if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
13479	{
13480	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
13481	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13482			  gen_int_mode (0xbb41, HImode));
13483	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
13484			  gen_lowpart (SImode, fnaddr));
13485	  offset += 6;
13486	}
13487      else
13488	{
13489	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13490			  gen_int_mode (0xbb49, HImode));
13491	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13492			  fnaddr);
13493	  offset += 10;
13494	}
13495      /* Load static chain using movabs to r10.  */
13496      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13497		      gen_int_mode (0xba49, HImode));
13498      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
13499		      cxt);
13500      offset += 10;
13501      /* Jump to the r11 */
13502      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
13503		      gen_int_mode (0xff49, HImode));
13504      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
13505		      gen_int_mode (0xe3, QImode));
13506      offset += 3;
13507      gcc_assert (offset <= TRAMPOLINE_SIZE);
13508    }
13509
13510#ifdef ENABLE_EXECUTE_STACK
13511  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
13512		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
13513#endif
13514}
13515
13516/* Codes for all the SSE/MMX builtins.  */
13517enum ix86_builtins
13518{
13519  IX86_BUILTIN_ADDPS,
13520  IX86_BUILTIN_ADDSS,
13521  IX86_BUILTIN_DIVPS,
13522  IX86_BUILTIN_DIVSS,
13523  IX86_BUILTIN_MULPS,
13524  IX86_BUILTIN_MULSS,
13525  IX86_BUILTIN_SUBPS,
13526  IX86_BUILTIN_SUBSS,
13527
13528  IX86_BUILTIN_CMPEQPS,
13529  IX86_BUILTIN_CMPLTPS,
13530  IX86_BUILTIN_CMPLEPS,
13531  IX86_BUILTIN_CMPGTPS,
13532  IX86_BUILTIN_CMPGEPS,
13533  IX86_BUILTIN_CMPNEQPS,
13534  IX86_BUILTIN_CMPNLTPS,
13535  IX86_BUILTIN_CMPNLEPS,
13536  IX86_BUILTIN_CMPNGTPS,
13537  IX86_BUILTIN_CMPNGEPS,
13538  IX86_BUILTIN_CMPORDPS,
13539  IX86_BUILTIN_CMPUNORDPS,
13540  IX86_BUILTIN_CMPNEPS,
13541  IX86_BUILTIN_CMPEQSS,
13542  IX86_BUILTIN_CMPLTSS,
13543  IX86_BUILTIN_CMPLESS,
13544  IX86_BUILTIN_CMPNEQSS,
13545  IX86_BUILTIN_CMPNLTSS,
13546  IX86_BUILTIN_CMPNLESS,
13547  IX86_BUILTIN_CMPNGTSS,
13548  IX86_BUILTIN_CMPNGESS,
13549  IX86_BUILTIN_CMPORDSS,
13550  IX86_BUILTIN_CMPUNORDSS,
13551  IX86_BUILTIN_CMPNESS,
13552
13553  IX86_BUILTIN_COMIEQSS,
13554  IX86_BUILTIN_COMILTSS,
13555  IX86_BUILTIN_COMILESS,
13556  IX86_BUILTIN_COMIGTSS,
13557  IX86_BUILTIN_COMIGESS,
13558  IX86_BUILTIN_COMINEQSS,
13559  IX86_BUILTIN_UCOMIEQSS,
13560  IX86_BUILTIN_UCOMILTSS,
13561  IX86_BUILTIN_UCOMILESS,
13562  IX86_BUILTIN_UCOMIGTSS,
13563  IX86_BUILTIN_UCOMIGESS,
13564  IX86_BUILTIN_UCOMINEQSS,
13565
13566  IX86_BUILTIN_CVTPI2PS,
13567  IX86_BUILTIN_CVTPS2PI,
13568  IX86_BUILTIN_CVTSI2SS,
13569  IX86_BUILTIN_CVTSI642SS,
13570  IX86_BUILTIN_CVTSS2SI,
13571  IX86_BUILTIN_CVTSS2SI64,
13572  IX86_BUILTIN_CVTTPS2PI,
13573  IX86_BUILTIN_CVTTSS2SI,
13574  IX86_BUILTIN_CVTTSS2SI64,
13575
13576  IX86_BUILTIN_MAXPS,
13577  IX86_BUILTIN_MAXSS,
13578  IX86_BUILTIN_MINPS,
13579  IX86_BUILTIN_MINSS,
13580
13581  IX86_BUILTIN_LOADUPS,
13582  IX86_BUILTIN_STOREUPS,
13583  IX86_BUILTIN_MOVSS,
13584
13585  IX86_BUILTIN_MOVHLPS,
13586  IX86_BUILTIN_MOVLHPS,
13587  IX86_BUILTIN_LOADHPS,
13588  IX86_BUILTIN_LOADLPS,
13589  IX86_BUILTIN_STOREHPS,
13590  IX86_BUILTIN_STORELPS,
13591
13592  IX86_BUILTIN_MASKMOVQ,
13593  IX86_BUILTIN_MOVMSKPS,
13594  IX86_BUILTIN_PMOVMSKB,
13595
13596  IX86_BUILTIN_MOVNTPS,
13597  IX86_BUILTIN_MOVNTQ,
13598
13599  IX86_BUILTIN_LOADDQU,
13600  IX86_BUILTIN_STOREDQU,
13601
13602  IX86_BUILTIN_PACKSSWB,
13603  IX86_BUILTIN_PACKSSDW,
13604  IX86_BUILTIN_PACKUSWB,
13605
13606  IX86_BUILTIN_PADDB,
13607  IX86_BUILTIN_PADDW,
13608  IX86_BUILTIN_PADDD,
13609  IX86_BUILTIN_PADDQ,
13610  IX86_BUILTIN_PADDSB,
13611  IX86_BUILTIN_PADDSW,
13612  IX86_BUILTIN_PADDUSB,
13613  IX86_BUILTIN_PADDUSW,
13614  IX86_BUILTIN_PSUBB,
13615  IX86_BUILTIN_PSUBW,
13616  IX86_BUILTIN_PSUBD,
13617  IX86_BUILTIN_PSUBQ,
13618  IX86_BUILTIN_PSUBSB,
13619  IX86_BUILTIN_PSUBSW,
13620  IX86_BUILTIN_PSUBUSB,
13621  IX86_BUILTIN_PSUBUSW,
13622
13623  IX86_BUILTIN_PAND,
13624  IX86_BUILTIN_PANDN,
13625  IX86_BUILTIN_POR,
13626  IX86_BUILTIN_PXOR,
13627
13628  IX86_BUILTIN_PAVGB,
13629  IX86_BUILTIN_PAVGW,
13630
13631  IX86_BUILTIN_PCMPEQB,
13632  IX86_BUILTIN_PCMPEQW,
13633  IX86_BUILTIN_PCMPEQD,
13634  IX86_BUILTIN_PCMPGTB,
13635  IX86_BUILTIN_PCMPGTW,
13636  IX86_BUILTIN_PCMPGTD,
13637
13638  IX86_BUILTIN_PMADDWD,
13639
13640  IX86_BUILTIN_PMAXSW,
13641  IX86_BUILTIN_PMAXUB,
13642  IX86_BUILTIN_PMINSW,
13643  IX86_BUILTIN_PMINUB,
13644
13645  IX86_BUILTIN_PMULHUW,
13646  IX86_BUILTIN_PMULHW,
13647  IX86_BUILTIN_PMULLW,
13648
13649  IX86_BUILTIN_PSADBW,
13650  IX86_BUILTIN_PSHUFW,
13651
13652  IX86_BUILTIN_PSLLW,
13653  IX86_BUILTIN_PSLLD,
13654  IX86_BUILTIN_PSLLQ,
13655  IX86_BUILTIN_PSRAW,
13656  IX86_BUILTIN_PSRAD,
13657  IX86_BUILTIN_PSRLW,
13658  IX86_BUILTIN_PSRLD,
13659  IX86_BUILTIN_PSRLQ,
13660  IX86_BUILTIN_PSLLWI,
13661  IX86_BUILTIN_PSLLDI,
13662  IX86_BUILTIN_PSLLQI,
13663  IX86_BUILTIN_PSRAWI,
13664  IX86_BUILTIN_PSRADI,
13665  IX86_BUILTIN_PSRLWI,
13666  IX86_BUILTIN_PSRLDI,
13667  IX86_BUILTIN_PSRLQI,
13668
13669  IX86_BUILTIN_PUNPCKHBW,
13670  IX86_BUILTIN_PUNPCKHWD,
13671  IX86_BUILTIN_PUNPCKHDQ,
13672  IX86_BUILTIN_PUNPCKLBW,
13673  IX86_BUILTIN_PUNPCKLWD,
13674  IX86_BUILTIN_PUNPCKLDQ,
13675
13676  IX86_BUILTIN_SHUFPS,
13677
13678  IX86_BUILTIN_RCPPS,
13679  IX86_BUILTIN_RCPSS,
13680  IX86_BUILTIN_RSQRTPS,
13681  IX86_BUILTIN_RSQRTSS,
13682  IX86_BUILTIN_SQRTPS,
13683  IX86_BUILTIN_SQRTSS,
13684
13685  IX86_BUILTIN_UNPCKHPS,
13686  IX86_BUILTIN_UNPCKLPS,
13687
13688  IX86_BUILTIN_ANDPS,
13689  IX86_BUILTIN_ANDNPS,
13690  IX86_BUILTIN_ORPS,
13691  IX86_BUILTIN_XORPS,
13692
13693  IX86_BUILTIN_EMMS,
13694  IX86_BUILTIN_LDMXCSR,
13695  IX86_BUILTIN_STMXCSR,
13696  IX86_BUILTIN_SFENCE,
13697
13698  /* 3DNow! Original */
13699  IX86_BUILTIN_FEMMS,
13700  IX86_BUILTIN_PAVGUSB,
13701  IX86_BUILTIN_PF2ID,
13702  IX86_BUILTIN_PFACC,
13703  IX86_BUILTIN_PFADD,
13704  IX86_BUILTIN_PFCMPEQ,
13705  IX86_BUILTIN_PFCMPGE,
13706  IX86_BUILTIN_PFCMPGT,
13707  IX86_BUILTIN_PFMAX,
13708  IX86_BUILTIN_PFMIN,
13709  IX86_BUILTIN_PFMUL,
13710  IX86_BUILTIN_PFRCP,
13711  IX86_BUILTIN_PFRCPIT1,
13712  IX86_BUILTIN_PFRCPIT2,
13713  IX86_BUILTIN_PFRSQIT1,
13714  IX86_BUILTIN_PFRSQRT,
13715  IX86_BUILTIN_PFSUB,
13716  IX86_BUILTIN_PFSUBR,
13717  IX86_BUILTIN_PI2FD,
13718  IX86_BUILTIN_PMULHRW,
13719
13720  /* 3DNow! Athlon Extensions */
13721  IX86_BUILTIN_PF2IW,
13722  IX86_BUILTIN_PFNACC,
13723  IX86_BUILTIN_PFPNACC,
13724  IX86_BUILTIN_PI2FW,
13725  IX86_BUILTIN_PSWAPDSI,
13726  IX86_BUILTIN_PSWAPDSF,
13727
13728  /* SSE2 */
13729  IX86_BUILTIN_ADDPD,
13730  IX86_BUILTIN_ADDSD,
13731  IX86_BUILTIN_DIVPD,
13732  IX86_BUILTIN_DIVSD,
13733  IX86_BUILTIN_MULPD,
13734  IX86_BUILTIN_MULSD,
13735  IX86_BUILTIN_SUBPD,
13736  IX86_BUILTIN_SUBSD,
13737
13738  IX86_BUILTIN_CMPEQPD,
13739  IX86_BUILTIN_CMPLTPD,
13740  IX86_BUILTIN_CMPLEPD,
13741  IX86_BUILTIN_CMPGTPD,
13742  IX86_BUILTIN_CMPGEPD,
13743  IX86_BUILTIN_CMPNEQPD,
13744  IX86_BUILTIN_CMPNLTPD,
13745  IX86_BUILTIN_CMPNLEPD,
13746  IX86_BUILTIN_CMPNGTPD,
13747  IX86_BUILTIN_CMPNGEPD,
13748  IX86_BUILTIN_CMPORDPD,
13749  IX86_BUILTIN_CMPUNORDPD,
13750  IX86_BUILTIN_CMPNEPD,
13751  IX86_BUILTIN_CMPEQSD,
13752  IX86_BUILTIN_CMPLTSD,
13753  IX86_BUILTIN_CMPLESD,
13754  IX86_BUILTIN_CMPNEQSD,
13755  IX86_BUILTIN_CMPNLTSD,
13756  IX86_BUILTIN_CMPNLESD,
13757  IX86_BUILTIN_CMPORDSD,
13758  IX86_BUILTIN_CMPUNORDSD,
13759  IX86_BUILTIN_CMPNESD,
13760
13761  IX86_BUILTIN_COMIEQSD,
13762  IX86_BUILTIN_COMILTSD,
13763  IX86_BUILTIN_COMILESD,
13764  IX86_BUILTIN_COMIGTSD,
13765  IX86_BUILTIN_COMIGESD,
13766  IX86_BUILTIN_COMINEQSD,
13767  IX86_BUILTIN_UCOMIEQSD,
13768  IX86_BUILTIN_UCOMILTSD,
13769  IX86_BUILTIN_UCOMILESD,
13770  IX86_BUILTIN_UCOMIGTSD,
13771  IX86_BUILTIN_UCOMIGESD,
13772  IX86_BUILTIN_UCOMINEQSD,
13773
13774  IX86_BUILTIN_MAXPD,
13775  IX86_BUILTIN_MAXSD,
13776  IX86_BUILTIN_MINPD,
13777  IX86_BUILTIN_MINSD,
13778
13779  IX86_BUILTIN_ANDPD,
13780  IX86_BUILTIN_ANDNPD,
13781  IX86_BUILTIN_ORPD,
13782  IX86_BUILTIN_XORPD,
13783
13784  IX86_BUILTIN_SQRTPD,
13785  IX86_BUILTIN_SQRTSD,
13786
13787  IX86_BUILTIN_UNPCKHPD,
13788  IX86_BUILTIN_UNPCKLPD,
13789
13790  IX86_BUILTIN_SHUFPD,
13791
13792  IX86_BUILTIN_LOADUPD,
13793  IX86_BUILTIN_STOREUPD,
13794  IX86_BUILTIN_MOVSD,
13795
13796  IX86_BUILTIN_LOADHPD,
13797  IX86_BUILTIN_LOADLPD,
13798
13799  IX86_BUILTIN_CVTDQ2PD,
13800  IX86_BUILTIN_CVTDQ2PS,
13801
13802  IX86_BUILTIN_CVTPD2DQ,
13803  IX86_BUILTIN_CVTPD2PI,
13804  IX86_BUILTIN_CVTPD2PS,
13805  IX86_BUILTIN_CVTTPD2DQ,
13806  IX86_BUILTIN_CVTTPD2PI,
13807
13808  IX86_BUILTIN_CVTPI2PD,
13809  IX86_BUILTIN_CVTSI2SD,
13810  IX86_BUILTIN_CVTSI642SD,
13811
13812  IX86_BUILTIN_CVTSD2SI,
13813  IX86_BUILTIN_CVTSD2SI64,
13814  IX86_BUILTIN_CVTSD2SS,
13815  IX86_BUILTIN_CVTSS2SD,
13816  IX86_BUILTIN_CVTTSD2SI,
13817  IX86_BUILTIN_CVTTSD2SI64,
13818
13819  IX86_BUILTIN_CVTPS2DQ,
13820  IX86_BUILTIN_CVTPS2PD,
13821  IX86_BUILTIN_CVTTPS2DQ,
13822
13823  IX86_BUILTIN_MOVNTI,
13824  IX86_BUILTIN_MOVNTPD,
13825  IX86_BUILTIN_MOVNTDQ,
13826
13827  /* SSE2 MMX */
13828  IX86_BUILTIN_MASKMOVDQU,
13829  IX86_BUILTIN_MOVMSKPD,
13830  IX86_BUILTIN_PMOVMSKB128,
13831
13832  IX86_BUILTIN_PACKSSWB128,
13833  IX86_BUILTIN_PACKSSDW128,
13834  IX86_BUILTIN_PACKUSWB128,
13835
13836  IX86_BUILTIN_PADDB128,
13837  IX86_BUILTIN_PADDW128,
13838  IX86_BUILTIN_PADDD128,
13839  IX86_BUILTIN_PADDQ128,
13840  IX86_BUILTIN_PADDSB128,
13841  IX86_BUILTIN_PADDSW128,
13842  IX86_BUILTIN_PADDUSB128,
13843  IX86_BUILTIN_PADDUSW128,
13844  IX86_BUILTIN_PSUBB128,
13845  IX86_BUILTIN_PSUBW128,
13846  IX86_BUILTIN_PSUBD128,
13847  IX86_BUILTIN_PSUBQ128,
13848  IX86_BUILTIN_PSUBSB128,
13849  IX86_BUILTIN_PSUBSW128,
13850  IX86_BUILTIN_PSUBUSB128,
13851  IX86_BUILTIN_PSUBUSW128,
13852
13853  IX86_BUILTIN_PAND128,
13854  IX86_BUILTIN_PANDN128,
13855  IX86_BUILTIN_POR128,
13856  IX86_BUILTIN_PXOR128,
13857
13858  IX86_BUILTIN_PAVGB128,
13859  IX86_BUILTIN_PAVGW128,
13860
13861  IX86_BUILTIN_PCMPEQB128,
13862  IX86_BUILTIN_PCMPEQW128,
13863  IX86_BUILTIN_PCMPEQD128,
13864  IX86_BUILTIN_PCMPGTB128,
13865  IX86_BUILTIN_PCMPGTW128,
13866  IX86_BUILTIN_PCMPGTD128,
13867
13868  IX86_BUILTIN_PMADDWD128,
13869
13870  IX86_BUILTIN_PMAXSW128,
13871  IX86_BUILTIN_PMAXUB128,
13872  IX86_BUILTIN_PMINSW128,
13873  IX86_BUILTIN_PMINUB128,
13874
13875  IX86_BUILTIN_PMULUDQ,
13876  IX86_BUILTIN_PMULUDQ128,
13877  IX86_BUILTIN_PMULHUW128,
13878  IX86_BUILTIN_PMULHW128,
13879  IX86_BUILTIN_PMULLW128,
13880
13881  IX86_BUILTIN_PSADBW128,
13882  IX86_BUILTIN_PSHUFHW,
13883  IX86_BUILTIN_PSHUFLW,
13884  IX86_BUILTIN_PSHUFD,
13885
13886  IX86_BUILTIN_PSLLW128,
13887  IX86_BUILTIN_PSLLD128,
13888  IX86_BUILTIN_PSLLQ128,
13889  IX86_BUILTIN_PSRAW128,
13890  IX86_BUILTIN_PSRAD128,
13891  IX86_BUILTIN_PSRLW128,
13892  IX86_BUILTIN_PSRLD128,
13893  IX86_BUILTIN_PSRLQ128,
13894  IX86_BUILTIN_PSLLDQI128,
13895  IX86_BUILTIN_PSLLWI128,
13896  IX86_BUILTIN_PSLLDI128,
13897  IX86_BUILTIN_PSLLQI128,
13898  IX86_BUILTIN_PSRAWI128,
13899  IX86_BUILTIN_PSRADI128,
13900  IX86_BUILTIN_PSRLDQI128,
13901  IX86_BUILTIN_PSRLWI128,
13902  IX86_BUILTIN_PSRLDI128,
13903  IX86_BUILTIN_PSRLQI128,
13904
13905  IX86_BUILTIN_PUNPCKHBW128,
13906  IX86_BUILTIN_PUNPCKHWD128,
13907  IX86_BUILTIN_PUNPCKHDQ128,
13908  IX86_BUILTIN_PUNPCKHQDQ128,
13909  IX86_BUILTIN_PUNPCKLBW128,
13910  IX86_BUILTIN_PUNPCKLWD128,
13911  IX86_BUILTIN_PUNPCKLDQ128,
13912  IX86_BUILTIN_PUNPCKLQDQ128,
13913
13914  IX86_BUILTIN_CLFLUSH,
13915  IX86_BUILTIN_MFENCE,
13916  IX86_BUILTIN_LFENCE,
13917
13918  /* Prescott New Instructions.  */
13919  IX86_BUILTIN_ADDSUBPS,
13920  IX86_BUILTIN_HADDPS,
13921  IX86_BUILTIN_HSUBPS,
13922  IX86_BUILTIN_MOVSHDUP,
13923  IX86_BUILTIN_MOVSLDUP,
13924  IX86_BUILTIN_ADDSUBPD,
13925  IX86_BUILTIN_HADDPD,
13926  IX86_BUILTIN_HSUBPD,
13927  IX86_BUILTIN_LDDQU,
13928
13929  IX86_BUILTIN_MONITOR,
13930  IX86_BUILTIN_MWAIT,
13931
13932  IX86_BUILTIN_VEC_INIT_V2SI,
13933  IX86_BUILTIN_VEC_INIT_V4HI,
13934  IX86_BUILTIN_VEC_INIT_V8QI,
13935  IX86_BUILTIN_VEC_EXT_V2DF,
13936  IX86_BUILTIN_VEC_EXT_V2DI,
13937  IX86_BUILTIN_VEC_EXT_V4SF,
13938  IX86_BUILTIN_VEC_EXT_V4SI,
13939  IX86_BUILTIN_VEC_EXT_V8HI,
13940  IX86_BUILTIN_VEC_EXT_V2SI,
13941  IX86_BUILTIN_VEC_EXT_V4HI,
13942  IX86_BUILTIN_VEC_SET_V8HI,
13943  IX86_BUILTIN_VEC_SET_V4HI,
13944
13945  IX86_BUILTIN_MAX
13946};
13947
13948#define def_builtin(MASK, NAME, TYPE, CODE)				\
13949do {									\
13950  if ((MASK) & target_flags						\
13951      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))			\
13952    lang_hooks.builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
13953				 NULL, NULL_TREE);			\
13954} while (0)
13955
13956/* Bits for builtin_description.flag.  */
13957
13958/* Set when we don't support the comparison natively, and should
13959   swap_comparison in order to support it.  */
13960#define BUILTIN_DESC_SWAP_OPERANDS	1
13961
13962struct builtin_description
13963{
13964  const unsigned int mask;
13965  const enum insn_code icode;
13966  const char *const name;
13967  const enum ix86_builtins code;
13968  const enum rtx_code comparison;
13969  const unsigned int flag;
13970};
13971
13972static const struct builtin_description bdesc_comi[] =
13973{
13974  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
13975  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
13976  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
13977  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
13978  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
13979  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
13980  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
13981  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
13982  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
13983  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
13984  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
13985  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
13986  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
13987  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
13988  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
13989  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
13990  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
13991  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
13992  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
13993  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
13994  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
13995  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
13996  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
13997  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
13998};
13999
14000static const struct builtin_description bdesc_2arg[] =
14001{
14002  /* SSE */
14003  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
14004  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
14005  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
14006  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
14007  { MASK_SSE, CODE_FOR_sse_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
14008  { MASK_SSE, CODE_FOR_sse_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
14009  { MASK_SSE, CODE_FOR_sse_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
14010  { MASK_SSE, CODE_FOR_sse_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
14011
14012  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
14013  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
14014  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
14015  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
14016    BUILTIN_DESC_SWAP_OPERANDS },
14017  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
14018    BUILTIN_DESC_SWAP_OPERANDS },
14019  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
14020  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
14021  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
14022  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
14023  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
14024    BUILTIN_DESC_SWAP_OPERANDS },
14025  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
14026    BUILTIN_DESC_SWAP_OPERANDS },
14027  { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
14028  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
14029  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
14030  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
14031  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
14032  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
14033  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
14034  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
14035  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
14036    BUILTIN_DESC_SWAP_OPERANDS },
14037  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
14038    BUILTIN_DESC_SWAP_OPERANDS },
14039  { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
14040
14041  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
14042  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
14043  { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
14044  { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
14045
14046  { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
14047  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
14048  { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
14049  { MASK_SSE, CODE_FOR_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
14050
14051  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
14052  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
14053  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
14054  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
14055  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
14056
14057  /* MMX */
14058  { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
14059  { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
14060  { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
14061  { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
14062  { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
14063  { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
14064  { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
14065  { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
14066
14067  { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
14068  { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
14069  { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
14070  { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
14071  { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
14072  { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
14073  { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
14074  { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
14075
14076  { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
14077  { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
14078  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
14079
14080  { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
14081  { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
14082  { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
14083  { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
14084
14085  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
14086  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
14087
14088  { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
14089  { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
14090  { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
14091  { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
14092  { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
14093  { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
14094
14095  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
14096  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
14097  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
14098  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
14099
14100  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
14101  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
14102  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
14103  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
14104  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
14105  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
14106
14107  /* Special.  */
14108  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
14109  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
14110  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
14111
14112  { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
14113  { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
14114  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
14115
14116  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
14117  { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
14118  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
14119  { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
14120  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
14121  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
14122
14123  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
14124  { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
14125  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
14126  { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
14127  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
14128  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
14129
14130  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
14131  { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
14132  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
14133  { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
14134
14135  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
14136  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
14137
14138  /* SSE2 */
14139  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
14140  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
14141  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
14142  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
14143  { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
14144  { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
14145  { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
14146  { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
14147
14148  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
14149  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
14150  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
14151  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
14152    BUILTIN_DESC_SWAP_OPERANDS },
14153  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
14154    BUILTIN_DESC_SWAP_OPERANDS },
14155  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
14156  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
14157  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
14158  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
14159  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
14160    BUILTIN_DESC_SWAP_OPERANDS },
14161  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
14162    BUILTIN_DESC_SWAP_OPERANDS },
14163  { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
14164  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
14165  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
14166  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
14167  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
14168  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
14169  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
14170  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
14171  { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
14172
14173  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
14174  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
14175  { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
14176  { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
14177
14178  { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
14179  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
14180  { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
14181  { MASK_SSE2, CODE_FOR_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
14182
14183  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
14184  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
14185  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
14186
14187  /* SSE2 MMX */
14188  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
14189  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
14190  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
14191  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
14192  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
14193  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
14194  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
14195  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
14196
14197  { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
14198  { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
14199  { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
14200  { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
14201  { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
14202  { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
14203  { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
14204  { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
14205
14206  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
14207  { MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
14208
14209  { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
14210  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
14211  { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
14212  { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
14213
14214  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
14215  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
14216
14217  { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
14218  { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
14219  { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
14220  { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
14221  { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
14222  { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
14223
14224  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
14225  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
14226  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
14227  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
14228
14229  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
14230  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
14231  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
14232  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
14233  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
14234  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
14235  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
14236  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
14237
14238  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
14239  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
14240  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
14241
14242  { MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
14243  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
14244
14245  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
14246  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
14247
14248  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
14249  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
14250  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
14251
14252  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
14253  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
14254  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
14255
14256  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
14257  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
14258
14259  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
14260
14261  { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
14262  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
14263  { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
14264  { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
14265
14266  /* SSE3 MMX */
14267  { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
14268  { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
14269  { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
14270  { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
14271  { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
14272  { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
14273};
14274
14275static const struct builtin_description bdesc_1arg[] =
14276{
14277  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
14278  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
14279
14280  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
14281  { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
14282  { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
14283
14284  { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
14285  { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
14286  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
14287  { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
14288  { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
14289  { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
14290
14291  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
14292  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
14293
14294  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
14295
14296  { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
14297  { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
14298
14299  { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
14300  { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
14301  { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
14302  { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
14303  { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
14304
14305  { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
14306
14307  { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
14308  { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
14309  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
14310  { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
14311
14312  { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
14313  { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
14314  { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
14315
14316  /* SSE3 */
14317  { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
14318  { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
14319};
14320
14321static void
14322ix86_init_builtins (void)
14323{
14324  if (TARGET_MMX)
14325    ix86_init_mmx_sse_builtins ();
14326}
14327
14328/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
14329   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
14330   builtins.  */
14331static void
14332ix86_init_mmx_sse_builtins (void)
14333{
14334  const struct builtin_description * d;
14335  size_t i;
14336
14337  tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
14338  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
14339  tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
14340  tree V2DI_type_node
14341    = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
14342  tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
14343  tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
14344  tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
14345  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
14346  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
14347  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
14348
14349  tree pchar_type_node = build_pointer_type (char_type_node);
14350  tree pcchar_type_node = build_pointer_type (
14351			     build_type_variant (char_type_node, 1, 0));
14352  tree pfloat_type_node = build_pointer_type (float_type_node);
14353  tree pcfloat_type_node = build_pointer_type (
14354			     build_type_variant (float_type_node, 1, 0));
14355  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
14356  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
14357  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
14358
14359  /* Comparisons.  */
14360  tree int_ftype_v4sf_v4sf
14361    = build_function_type_list (integer_type_node,
14362				V4SF_type_node, V4SF_type_node, NULL_TREE);
14363  tree v4si_ftype_v4sf_v4sf
14364    = build_function_type_list (V4SI_type_node,
14365				V4SF_type_node, V4SF_type_node, NULL_TREE);
14366  /* MMX/SSE/integer conversions.  */
14367  tree int_ftype_v4sf
14368    = build_function_type_list (integer_type_node,
14369				V4SF_type_node, NULL_TREE);
14370  tree int64_ftype_v4sf
14371    = build_function_type_list (long_long_integer_type_node,
14372				V4SF_type_node, NULL_TREE);
14373  tree int_ftype_v8qi
14374    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
14375  tree v4sf_ftype_v4sf_int
14376    = build_function_type_list (V4SF_type_node,
14377				V4SF_type_node, integer_type_node, NULL_TREE);
14378  tree v4sf_ftype_v4sf_int64
14379    = build_function_type_list (V4SF_type_node,
14380				V4SF_type_node, long_long_integer_type_node,
14381				NULL_TREE);
14382  tree v4sf_ftype_v4sf_v2si
14383    = build_function_type_list (V4SF_type_node,
14384				V4SF_type_node, V2SI_type_node, NULL_TREE);
14385
14386  /* Miscellaneous.  */
14387  tree v8qi_ftype_v4hi_v4hi
14388    = build_function_type_list (V8QI_type_node,
14389				V4HI_type_node, V4HI_type_node, NULL_TREE);
14390  tree v4hi_ftype_v2si_v2si
14391    = build_function_type_list (V4HI_type_node,
14392				V2SI_type_node, V2SI_type_node, NULL_TREE);
14393  tree v4sf_ftype_v4sf_v4sf_int
14394    = build_function_type_list (V4SF_type_node,
14395				V4SF_type_node, V4SF_type_node,
14396				integer_type_node, NULL_TREE);
14397  tree v2si_ftype_v4hi_v4hi
14398    = build_function_type_list (V2SI_type_node,
14399				V4HI_type_node, V4HI_type_node, NULL_TREE);
14400  tree v4hi_ftype_v4hi_int
14401    = build_function_type_list (V4HI_type_node,
14402				V4HI_type_node, integer_type_node, NULL_TREE);
14403  tree v4hi_ftype_v4hi_di
14404    = build_function_type_list (V4HI_type_node,
14405				V4HI_type_node, long_long_unsigned_type_node,
14406				NULL_TREE);
14407  tree v2si_ftype_v2si_di
14408    = build_function_type_list (V2SI_type_node,
14409				V2SI_type_node, long_long_unsigned_type_node,
14410				NULL_TREE);
14411  tree void_ftype_void
14412    = build_function_type (void_type_node, void_list_node);
14413  tree void_ftype_unsigned
14414    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
14415  tree void_ftype_unsigned_unsigned
14416    = build_function_type_list (void_type_node, unsigned_type_node,
14417				unsigned_type_node, NULL_TREE);
14418  tree void_ftype_pcvoid_unsigned_unsigned
14419    = build_function_type_list (void_type_node, const_ptr_type_node,
14420				unsigned_type_node, unsigned_type_node,
14421				NULL_TREE);
14422  tree unsigned_ftype_void
14423    = build_function_type (unsigned_type_node, void_list_node);
14424  tree v2si_ftype_v4sf
14425    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
14426  /* Loads/stores.  */
14427  tree void_ftype_v8qi_v8qi_pchar
14428    = build_function_type_list (void_type_node,
14429				V8QI_type_node, V8QI_type_node,
14430				pchar_type_node, NULL_TREE);
14431  tree v4sf_ftype_pcfloat
14432    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
14433  /* @@@ the type is bogus */
14434  tree v4sf_ftype_v4sf_pv2si
14435    = build_function_type_list (V4SF_type_node,
14436				V4SF_type_node, pv2si_type_node, NULL_TREE);
14437  tree void_ftype_pv2si_v4sf
14438    = build_function_type_list (void_type_node,
14439				pv2si_type_node, V4SF_type_node, NULL_TREE);
14440  tree void_ftype_pfloat_v4sf
14441    = build_function_type_list (void_type_node,
14442				pfloat_type_node, V4SF_type_node, NULL_TREE);
14443  tree void_ftype_pdi_di
14444    = build_function_type_list (void_type_node,
14445				pdi_type_node, long_long_unsigned_type_node,
14446				NULL_TREE);
14447  tree void_ftype_pv2di_v2di
14448    = build_function_type_list (void_type_node,
14449				pv2di_type_node, V2DI_type_node, NULL_TREE);
14450  /* Normal vector unops.  */
14451  tree v4sf_ftype_v4sf
14452    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14453
14454  /* Normal vector binops.  */
14455  tree v4sf_ftype_v4sf_v4sf
14456    = build_function_type_list (V4SF_type_node,
14457				V4SF_type_node, V4SF_type_node, NULL_TREE);
14458  tree v8qi_ftype_v8qi_v8qi
14459    = build_function_type_list (V8QI_type_node,
14460				V8QI_type_node, V8QI_type_node, NULL_TREE);
14461  tree v4hi_ftype_v4hi_v4hi
14462    = build_function_type_list (V4HI_type_node,
14463				V4HI_type_node, V4HI_type_node, NULL_TREE);
14464  tree v2si_ftype_v2si_v2si
14465    = build_function_type_list (V2SI_type_node,
14466				V2SI_type_node, V2SI_type_node, NULL_TREE);
14467  tree di_ftype_di_di
14468    = build_function_type_list (long_long_unsigned_type_node,
14469				long_long_unsigned_type_node,
14470				long_long_unsigned_type_node, NULL_TREE);
14471
14472  tree v2si_ftype_v2sf
14473    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
14474  tree v2sf_ftype_v2si
14475    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
14476  tree v2si_ftype_v2si
14477    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
14478  tree v2sf_ftype_v2sf
14479    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
14480  tree v2sf_ftype_v2sf_v2sf
14481    = build_function_type_list (V2SF_type_node,
14482				V2SF_type_node, V2SF_type_node, NULL_TREE);
14483  tree v2si_ftype_v2sf_v2sf
14484    = build_function_type_list (V2SI_type_node,
14485				V2SF_type_node, V2SF_type_node, NULL_TREE);
14486  tree pint_type_node    = build_pointer_type (integer_type_node);
14487  tree pdouble_type_node = build_pointer_type (double_type_node);
14488  tree pcdouble_type_node = build_pointer_type (
14489				build_type_variant (double_type_node, 1, 0));
14490  tree int_ftype_v2df_v2df
14491    = build_function_type_list (integer_type_node,
14492				V2DF_type_node, V2DF_type_node, NULL_TREE);
14493
14494  tree ti_ftype_ti_ti
14495    = build_function_type_list (intTI_type_node,
14496				intTI_type_node, intTI_type_node, NULL_TREE);
14497  tree void_ftype_pcvoid
14498    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
14499  tree v4sf_ftype_v4si
14500    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
14501  tree v4si_ftype_v4sf
14502    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
14503  tree v2df_ftype_v4si
14504    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
14505  tree v4si_ftype_v2df
14506    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
14507  tree v2si_ftype_v2df
14508    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
14509  tree v4sf_ftype_v2df
14510    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
14511  tree v2df_ftype_v2si
14512    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
14513  tree v2df_ftype_v4sf
14514    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
14515  tree int_ftype_v2df
14516    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
14517  tree int64_ftype_v2df
14518    = build_function_type_list (long_long_integer_type_node,
14519				V2DF_type_node, NULL_TREE);
14520  tree v2df_ftype_v2df_int
14521    = build_function_type_list (V2DF_type_node,
14522				V2DF_type_node, integer_type_node, NULL_TREE);
14523  tree v2df_ftype_v2df_int64
14524    = build_function_type_list (V2DF_type_node,
14525				V2DF_type_node, long_long_integer_type_node,
14526				NULL_TREE);
14527  tree v4sf_ftype_v4sf_v2df
14528    = build_function_type_list (V4SF_type_node,
14529				V4SF_type_node, V2DF_type_node, NULL_TREE);
14530  tree v2df_ftype_v2df_v4sf
14531    = build_function_type_list (V2DF_type_node,
14532				V2DF_type_node, V4SF_type_node, NULL_TREE);
14533  tree v2df_ftype_v2df_v2df_int
14534    = build_function_type_list (V2DF_type_node,
14535				V2DF_type_node, V2DF_type_node,
14536				integer_type_node,
14537				NULL_TREE);
14538  tree v2df_ftype_v2df_pcdouble
14539    = build_function_type_list (V2DF_type_node,
14540				V2DF_type_node, pcdouble_type_node, NULL_TREE);
14541  tree void_ftype_pdouble_v2df
14542    = build_function_type_list (void_type_node,
14543				pdouble_type_node, V2DF_type_node, NULL_TREE);
14544  tree void_ftype_pint_int
14545    = build_function_type_list (void_type_node,
14546				pint_type_node, integer_type_node, NULL_TREE);
14547  tree void_ftype_v16qi_v16qi_pchar
14548    = build_function_type_list (void_type_node,
14549				V16QI_type_node, V16QI_type_node,
14550				pchar_type_node, NULL_TREE);
14551  tree v2df_ftype_pcdouble
14552    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
14553  tree v2df_ftype_v2df_v2df
14554    = build_function_type_list (V2DF_type_node,
14555				V2DF_type_node, V2DF_type_node, NULL_TREE);
14556  tree v16qi_ftype_v16qi_v16qi
14557    = build_function_type_list (V16QI_type_node,
14558				V16QI_type_node, V16QI_type_node, NULL_TREE);
14559  tree v8hi_ftype_v8hi_v8hi
14560    = build_function_type_list (V8HI_type_node,
14561				V8HI_type_node, V8HI_type_node, NULL_TREE);
14562  tree v4si_ftype_v4si_v4si
14563    = build_function_type_list (V4SI_type_node,
14564				V4SI_type_node, V4SI_type_node, NULL_TREE);
14565  tree v2di_ftype_v2di_v2di
14566    = build_function_type_list (V2DI_type_node,
14567				V2DI_type_node, V2DI_type_node, NULL_TREE);
14568  tree v2di_ftype_v2df_v2df
14569    = build_function_type_list (V2DI_type_node,
14570				V2DF_type_node, V2DF_type_node, NULL_TREE);
14571  tree v2df_ftype_v2df
14572    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14573  tree v2di_ftype_v2di_int
14574    = build_function_type_list (V2DI_type_node,
14575				V2DI_type_node, integer_type_node, NULL_TREE);
14576  tree v4si_ftype_v4si_int
14577    = build_function_type_list (V4SI_type_node,
14578				V4SI_type_node, integer_type_node, NULL_TREE);
14579  tree v8hi_ftype_v8hi_int
14580    = build_function_type_list (V8HI_type_node,
14581				V8HI_type_node, integer_type_node, NULL_TREE);
14582  tree v4si_ftype_v8hi_v8hi
14583    = build_function_type_list (V4SI_type_node,
14584				V8HI_type_node, V8HI_type_node, NULL_TREE);
14585  tree di_ftype_v8qi_v8qi
14586    = build_function_type_list (long_long_unsigned_type_node,
14587				V8QI_type_node, V8QI_type_node, NULL_TREE);
14588  tree di_ftype_v2si_v2si
14589    = build_function_type_list (long_long_unsigned_type_node,
14590				V2SI_type_node, V2SI_type_node, NULL_TREE);
14591  tree v2di_ftype_v16qi_v16qi
14592    = build_function_type_list (V2DI_type_node,
14593				V16QI_type_node, V16QI_type_node, NULL_TREE);
14594  tree v2di_ftype_v4si_v4si
14595    = build_function_type_list (V2DI_type_node,
14596				V4SI_type_node, V4SI_type_node, NULL_TREE);
14597  tree int_ftype_v16qi
14598    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
14599  tree v16qi_ftype_pcchar
14600    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
14601  tree void_ftype_pchar_v16qi
14602    = build_function_type_list (void_type_node,
14603			        pchar_type_node, V16QI_type_node, NULL_TREE);
14604
14605  tree float80_type;
14606  tree float128_type;
14607  tree ftype;
14608
14609  /* The __float80 type.  */
14610  if (TYPE_MODE (long_double_type_node) == XFmode)
14611    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
14612					       "__float80");
14613  else
14614    {
14615      /* The __float80 type.  */
14616      float80_type = make_node (REAL_TYPE);
14617      TYPE_PRECISION (float80_type) = 80;
14618      layout_type (float80_type);
14619      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
14620    }
14621
14622  if (TARGET_64BIT)
14623    {
14624      float128_type = make_node (REAL_TYPE);
14625      TYPE_PRECISION (float128_type) = 128;
14626      layout_type (float128_type);
14627      (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
14628    }
14629
14630  /* Add all builtins that are more or less simple operations on two
14631     operands.  */
14632  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14633    {
14634      /* Use one of the operands; the target can have a different mode for
14635	 mask-generating compares.  */
14636      enum machine_mode mode;
14637      tree type;
14638
14639      if (d->name == 0)
14640	continue;
14641      mode = insn_data[d->icode].operand[1].mode;
14642
14643      switch (mode)
14644	{
14645	case V16QImode:
14646	  type = v16qi_ftype_v16qi_v16qi;
14647	  break;
14648	case V8HImode:
14649	  type = v8hi_ftype_v8hi_v8hi;
14650	  break;
14651	case V4SImode:
14652	  type = v4si_ftype_v4si_v4si;
14653	  break;
14654	case V2DImode:
14655	  type = v2di_ftype_v2di_v2di;
14656	  break;
14657	case V2DFmode:
14658	  type = v2df_ftype_v2df_v2df;
14659	  break;
14660	case TImode:
14661	  type = ti_ftype_ti_ti;
14662	  break;
14663	case V4SFmode:
14664	  type = v4sf_ftype_v4sf_v4sf;
14665	  break;
14666	case V8QImode:
14667	  type = v8qi_ftype_v8qi_v8qi;
14668	  break;
14669	case V4HImode:
14670	  type = v4hi_ftype_v4hi_v4hi;
14671	  break;
14672	case V2SImode:
14673	  type = v2si_ftype_v2si_v2si;
14674	  break;
14675	case DImode:
14676	  type = di_ftype_di_di;
14677	  break;
14678
14679	default:
14680	  gcc_unreachable ();
14681	}
14682
14683      /* Override for comparisons.  */
14684      if (d->icode == CODE_FOR_sse_maskcmpv4sf3
14685	  || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
14686	type = v4si_ftype_v4sf_v4sf;
14687
14688      if (d->icode == CODE_FOR_sse2_maskcmpv2df3
14689	  || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
14690	type = v2di_ftype_v2df_v2df;
14691
14692      def_builtin (d->mask, d->name, type, d->code);
14693    }
14694
14695  /* Add the remaining MMX insns with somewhat more complicated types.  */
14696  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
14697  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
14698  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
14699  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
14700
14701  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
14702  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
14703  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
14704
14705  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
14706  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
14707
14708  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
14709  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
14710
14711  /* comi/ucomi insns.  */
14712  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14713    if (d->mask == MASK_SSE2)
14714      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
14715    else
14716      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
14717
14718  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
14719  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
14720  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
14721
14722  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
14723  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
14724  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
14725  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
14726  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
14727  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
14728  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
14729  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
14730  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
14731  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
14732  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
14733
14734  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
14735
14736  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
14737  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
14738
14739  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
14740  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
14741  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
14742  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
14743
14744  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
14745  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
14746  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
14747  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
14748
14749  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
14750
14751  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
14752
14753  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
14754  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
14755  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
14756  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
14757  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
14758  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
14759
14760  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
14761
14762  /* Original 3DNow!  */
14763  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
14764  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
14765  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
14766  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
14767  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
14768  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
14769  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
14770  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
14771  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
14772  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
14773  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
14774  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
14775  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
14776  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
14777  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
14778  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
14779  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
14780  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
14781  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
14782  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
14783
14784  /* 3DNow! extension as used in the Athlon CPU.  */
14785  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
14786  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
14787  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
14788  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
14789  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
14790  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
14791
14792  /* SSE2 */
14793  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
14794
14795  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
14796  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
14797
14798  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
14799  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
14800
14801  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
14802  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
14803  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
14804  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
14805  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
14806
14807  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
14808  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
14809  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
14810  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
14811
14812  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
14813  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
14814
14815  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
14816
14817  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
14818  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
14819
14820  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
14821  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
14822  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
14823  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
14824  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
14825
14826  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
14827
14828  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
14829  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
14830  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
14831  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
14832
14833  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
14834  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
14835  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
14836
14837  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
14838  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
14839  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
14840  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
14841
14842  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
14843  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
14844  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
14845
14846  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
14847  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
14848
14849  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
14850  def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
14851
14852  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
14853  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
14854  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
14855
14856  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
14857  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
14858  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
14859
14860  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
14861  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
14862
14863  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
14864  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
14865  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
14866  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
14867
14868  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
14869  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
14870  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
14871  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
14872
14873  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
14874  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
14875
14876  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
14877
14878  /* Prescott New Instructions.  */
14879  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
14880	       void_ftype_pcvoid_unsigned_unsigned,
14881	       IX86_BUILTIN_MONITOR);
14882  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
14883	       void_ftype_unsigned_unsigned,
14884	       IX86_BUILTIN_MWAIT);
14885  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
14886	       v4sf_ftype_v4sf,
14887	       IX86_BUILTIN_MOVSHDUP);
14888  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
14889	       v4sf_ftype_v4sf,
14890	       IX86_BUILTIN_MOVSLDUP);
14891  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
14892	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
14893
14894  /* Access to the vec_init patterns.  */
14895  ftype = build_function_type_list (V2SI_type_node, integer_type_node,
14896				    integer_type_node, NULL_TREE);
14897  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
14898	       ftype, IX86_BUILTIN_VEC_INIT_V2SI);
14899
14900  ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
14901				    short_integer_type_node,
14902				    short_integer_type_node,
14903				    short_integer_type_node, NULL_TREE);
14904  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
14905	       ftype, IX86_BUILTIN_VEC_INIT_V4HI);
14906
14907  ftype = build_function_type_list (V8QI_type_node, char_type_node,
14908				    char_type_node, char_type_node,
14909				    char_type_node, char_type_node,
14910				    char_type_node, char_type_node,
14911				    char_type_node, NULL_TREE);
14912  def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
14913	       ftype, IX86_BUILTIN_VEC_INIT_V8QI);
14914
14915  /* Access to the vec_extract patterns.  */
14916  ftype = build_function_type_list (double_type_node, V2DF_type_node,
14917				    integer_type_node, NULL_TREE);
14918  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
14919	       ftype, IX86_BUILTIN_VEC_EXT_V2DF);
14920
14921  ftype = build_function_type_list (long_long_integer_type_node,
14922				    V2DI_type_node, integer_type_node,
14923				    NULL_TREE);
14924  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
14925	       ftype, IX86_BUILTIN_VEC_EXT_V2DI);
14926
14927  ftype = build_function_type_list (float_type_node, V4SF_type_node,
14928				    integer_type_node, NULL_TREE);
14929  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
14930	       ftype, IX86_BUILTIN_VEC_EXT_V4SF);
14931
14932  ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14933				    integer_type_node, NULL_TREE);
14934  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
14935	       ftype, IX86_BUILTIN_VEC_EXT_V4SI);
14936
14937  ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14938				    integer_type_node, NULL_TREE);
14939  def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
14940	       ftype, IX86_BUILTIN_VEC_EXT_V8HI);
14941
14942  ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
14943				    integer_type_node, NULL_TREE);
14944  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
14945	       ftype, IX86_BUILTIN_VEC_EXT_V4HI);
14946
14947  ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
14948				    integer_type_node, NULL_TREE);
14949  def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
14950	       ftype, IX86_BUILTIN_VEC_EXT_V2SI);
14951
14952  /* Access to the vec_set patterns.  */
14953  ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14954				    intHI_type_node,
14955				    integer_type_node, NULL_TREE);
14956  def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
14957	       ftype, IX86_BUILTIN_VEC_SET_V8HI);
14958
14959  ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
14960				    intHI_type_node,
14961				    integer_type_node, NULL_TREE);
14962  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
14963	       ftype, IX86_BUILTIN_VEC_SET_V4HI);
14964}
14965
14966/* Errors in the source file can cause expand_expr to return const0_rtx
14967   where we expect a vector.  To avoid crashing, use one of the vector
14968   clear instructions.  */
14969static rtx
14970safe_vector_operand (rtx x, enum machine_mode mode)
14971{
14972  if (x == const0_rtx)
14973    x = CONST0_RTX (mode);
14974  return x;
14975}
14976
14977/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
14978
14979static rtx
14980ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
14981{
14982  rtx pat, xops[3];
14983  tree arg0 = TREE_VALUE (arglist);
14984  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14985  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14986  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14987  enum machine_mode tmode = insn_data[icode].operand[0].mode;
14988  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
14989  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
14990
14991  if (VECTOR_MODE_P (mode0))
14992    op0 = safe_vector_operand (op0, mode0);
14993  if (VECTOR_MODE_P (mode1))
14994    op1 = safe_vector_operand (op1, mode1);
14995
14996  if (optimize || !target
14997      || GET_MODE (target) != tmode
14998      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14999    target = gen_reg_rtx (tmode);
15000
15001  if (GET_MODE (op1) == SImode && mode1 == TImode)
15002    {
15003      rtx x = gen_reg_rtx (V4SImode);
15004      emit_insn (gen_sse2_loadd (x, op1));
15005      op1 = gen_lowpart (TImode, x);
15006    }
15007
15008  /* The insn must want input operands in the same modes as the
15009     result.  */
15010  gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
15011	      && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
15012
15013  if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
15014    op0 = copy_to_mode_reg (mode0, op0);
15015  if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
15016    op1 = copy_to_mode_reg (mode1, op1);
15017
15018  /* ??? Using ix86_fixup_binary_operands is problematic when
15019     we've got mismatched modes.  Fake it.  */
15020
15021  xops[0] = target;
15022  xops[1] = op0;
15023  xops[2] = op1;
15024
15025  if (tmode == mode0 && tmode == mode1)
15026    {
15027      target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
15028      op0 = xops[1];
15029      op1 = xops[2];
15030    }
15031  else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
15032    {
15033      op0 = force_reg (mode0, op0);
15034      op1 = force_reg (mode1, op1);
15035      target = gen_reg_rtx (tmode);
15036    }
15037
15038  pat = GEN_FCN (icode) (target, op0, op1);
15039  if (! pat)
15040    return 0;
15041  emit_insn (pat);
15042  return target;
15043}
15044
15045/* Subroutine of ix86_expand_builtin to take care of stores.  */
15046
15047static rtx
15048ix86_expand_store_builtin (enum insn_code icode, tree arglist)
15049{
15050  rtx pat;
15051  tree arg0 = TREE_VALUE (arglist);
15052  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15053  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15054  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15055  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
15056  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
15057
15058  if (VECTOR_MODE_P (mode1))
15059    op1 = safe_vector_operand (op1, mode1);
15060
15061  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15062  op1 = copy_to_mode_reg (mode1, op1);
15063
15064  pat = GEN_FCN (icode) (op0, op1);
15065  if (pat)
15066    emit_insn (pat);
15067  return 0;
15068}
15069
15070/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
15071
15072static rtx
15073ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
15074			  rtx target, int do_load)
15075{
15076  rtx pat;
15077  tree arg0 = TREE_VALUE (arglist);
15078  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15079  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15080  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15081
15082  if (optimize || !target
15083      || GET_MODE (target) != tmode
15084      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15085    target = gen_reg_rtx (tmode);
15086  if (do_load)
15087    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15088  else
15089    {
15090      if (VECTOR_MODE_P (mode0))
15091	op0 = safe_vector_operand (op0, mode0);
15092
15093      if ((optimize && !register_operand (op0, mode0))
15094	  || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15095	op0 = copy_to_mode_reg (mode0, op0);
15096    }
15097
15098  pat = GEN_FCN (icode) (target, op0);
15099  if (! pat)
15100    return 0;
15101  emit_insn (pat);
15102  return target;
15103}
15104
15105/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
15106   sqrtss, rsqrtss, rcpss.  */
15107
15108static rtx
15109ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
15110{
15111  rtx pat;
15112  tree arg0 = TREE_VALUE (arglist);
15113  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15114  enum machine_mode tmode = insn_data[icode].operand[0].mode;
15115  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
15116
15117  if (optimize || !target
15118      || GET_MODE (target) != tmode
15119      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15120    target = gen_reg_rtx (tmode);
15121
15122  if (VECTOR_MODE_P (mode0))
15123    op0 = safe_vector_operand (op0, mode0);
15124
15125  if ((optimize && !register_operand (op0, mode0))
15126      || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15127    op0 = copy_to_mode_reg (mode0, op0);
15128
15129  op1 = op0;
15130  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
15131    op1 = copy_to_mode_reg (mode0, op1);
15132
15133  pat = GEN_FCN (icode) (target, op0, op1);
15134  if (! pat)
15135    return 0;
15136  emit_insn (pat);
15137  return target;
15138}
15139
15140/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
15141
15142static rtx
15143ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
15144			 rtx target)
15145{
15146  rtx pat;
15147  tree arg0 = TREE_VALUE (arglist);
15148  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15149  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15150  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15151  rtx op2;
15152  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
15153  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
15154  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
15155  enum rtx_code comparison = d->comparison;
15156
15157  if (VECTOR_MODE_P (mode0))
15158    op0 = safe_vector_operand (op0, mode0);
15159  if (VECTOR_MODE_P (mode1))
15160    op1 = safe_vector_operand (op1, mode1);
15161
15162  /* Swap operands if we have a comparison that isn't available in
15163     hardware.  */
15164  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15165    {
15166      rtx tmp = gen_reg_rtx (mode1);
15167      emit_move_insn (tmp, op1);
15168      op1 = op0;
15169      op0 = tmp;
15170    }
15171
15172  if (optimize || !target
15173      || GET_MODE (target) != tmode
15174      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
15175    target = gen_reg_rtx (tmode);
15176
15177  if ((optimize && !register_operand (op0, mode0))
15178      || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
15179    op0 = copy_to_mode_reg (mode0, op0);
15180  if ((optimize && !register_operand (op1, mode1))
15181      || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
15182    op1 = copy_to_mode_reg (mode1, op1);
15183
15184  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15185  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
15186  if (! pat)
15187    return 0;
15188  emit_insn (pat);
15189  return target;
15190}
15191
15192/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
15193
15194static rtx
15195ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
15196		      rtx target)
15197{
15198  rtx pat;
15199  tree arg0 = TREE_VALUE (arglist);
15200  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15201  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15202  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15203  rtx op2;
15204  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
15205  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
15206  enum rtx_code comparison = d->comparison;
15207
15208  if (VECTOR_MODE_P (mode0))
15209    op0 = safe_vector_operand (op0, mode0);
15210  if (VECTOR_MODE_P (mode1))
15211    op1 = safe_vector_operand (op1, mode1);
15212
15213  /* Swap operands if we have a comparison that isn't available in
15214     hardware.  */
15215  if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
15216    {
15217      rtx tmp = op1;
15218      op1 = op0;
15219      op0 = tmp;
15220    }
15221
15222  target = gen_reg_rtx (SImode);
15223  emit_move_insn (target, const0_rtx);
15224  target = gen_rtx_SUBREG (QImode, target, 0);
15225
15226  if ((optimize && !register_operand (op0, mode0))
15227      || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
15228    op0 = copy_to_mode_reg (mode0, op0);
15229  if ((optimize && !register_operand (op1, mode1))
15230      || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
15231    op1 = copy_to_mode_reg (mode1, op1);
15232
15233  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
15234  pat = GEN_FCN (d->icode) (op0, op1);
15235  if (! pat)
15236    return 0;
15237  emit_insn (pat);
15238  emit_insn (gen_rtx_SET (VOIDmode,
15239			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
15240			  gen_rtx_fmt_ee (comparison, QImode,
15241					  SET_DEST (pat),
15242					  const0_rtx)));
15243
15244  return SUBREG_REG (target);
15245}
15246
15247/* Return the integer constant in ARG.  Constrain it to be in the range
15248   of the subparts of VEC_TYPE; issue an error if not.  */
15249
15250static int
15251get_element_number (tree vec_type, tree arg)
15252{
15253  unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
15254
15255  if (!host_integerp (arg, 1)
15256      || (elt = tree_low_cst (arg, 1), elt > max))
15257    {
15258      error ("selector must be an integer constant in the range 0..%wi", max);
15259      return 0;
15260    }
15261
15262  return elt;
15263}
15264
15265/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15266   ix86_expand_vector_init.  We DO have language-level syntax for this, in
15267   the form of  (type){ init-list }.  Except that since we can't place emms
15268   instructions from inside the compiler, we can't allow the use of MMX
15269   registers unless the user explicitly asks for it.  So we do *not* define
15270   vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md.  Instead
15271   we have builtins invoked by mmintrin.h that gives us license to emit
15272   these sorts of instructions.  */
15273
15274static rtx
15275ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
15276{
15277  enum machine_mode tmode = TYPE_MODE (type);
15278  enum machine_mode inner_mode = GET_MODE_INNER (tmode);
15279  int i, n_elt = GET_MODE_NUNITS (tmode);
15280  rtvec v = rtvec_alloc (n_elt);
15281
15282  gcc_assert (VECTOR_MODE_P (tmode));
15283
15284  for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
15285    {
15286      rtx x = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15287      RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
15288    }
15289
15290  gcc_assert (arglist == NULL);
15291
15292  if (!target || !register_operand (target, tmode))
15293    target = gen_reg_rtx (tmode);
15294
15295  ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
15296  return target;
15297}
15298
15299/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15300   ix86_expand_vector_extract.  They would be redundant (for non-MMX) if we
15301   had a language-level syntax for referencing vector elements.  */
15302
15303static rtx
15304ix86_expand_vec_ext_builtin (tree arglist, rtx target)
15305{
15306  enum machine_mode tmode, mode0;
15307  tree arg0, arg1;
15308  int elt;
15309  rtx op0;
15310
15311  arg0 = TREE_VALUE (arglist);
15312  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15313
15314  op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15315  elt = get_element_number (TREE_TYPE (arg0), arg1);
15316
15317  tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15318  mode0 = TYPE_MODE (TREE_TYPE (arg0));
15319  gcc_assert (VECTOR_MODE_P (mode0));
15320
15321  op0 = force_reg (mode0, op0);
15322
15323  if (optimize || !target || !register_operand (target, tmode))
15324    target = gen_reg_rtx (tmode);
15325
15326  ix86_expand_vector_extract (true, target, op0, elt);
15327
15328  return target;
15329}
15330
15331/* A subroutine of ix86_expand_builtin.  These builtins are a wrapper around
15332   ix86_expand_vector_set.  They would be redundant (for non-MMX) if we had
15333   a language-level syntax for referencing vector elements.  */
15334
15335static rtx
15336ix86_expand_vec_set_builtin (tree arglist)
15337{
15338  enum machine_mode tmode, mode1;
15339  tree arg0, arg1, arg2;
15340  int elt;
15341  rtx op0, op1, target;
15342
15343  arg0 = TREE_VALUE (arglist);
15344  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15345  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15346
15347  tmode = TYPE_MODE (TREE_TYPE (arg0));
15348  mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
15349  gcc_assert (VECTOR_MODE_P (tmode));
15350
15351  op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
15352  op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
15353  elt = get_element_number (TREE_TYPE (arg0), arg2);
15354
15355  if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
15356    op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
15357
15358  op0 = force_reg (tmode, op0);
15359  op1 = force_reg (mode1, op1);
15360
15361  /* OP0 is the source of these builtin functions and shouldn't be
15362     modified.  Create a copy, use it and return it as target.  */
15363  target = gen_reg_rtx (tmode);
15364  emit_move_insn (target, op0);
15365  ix86_expand_vector_set (true, target, op1, elt);
15366
15367  return target;
15368}
15369
15370/* Expand an expression EXP that calls a built-in function,
15371   with result going to TARGET if that's convenient
15372   (and in mode MODE if that's convenient).
15373   SUBTARGET may be used as the target for computing one of EXP's operands.
15374   IGNORE is nonzero if the value is to be ignored.  */
15375
15376static rtx
15377ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
15378		     enum machine_mode mode ATTRIBUTE_UNUSED,
15379		     int ignore ATTRIBUTE_UNUSED)
15380{
15381  const struct builtin_description *d;
15382  size_t i;
15383  enum insn_code icode;
15384  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
15385  tree arglist = TREE_OPERAND (exp, 1);
15386  tree arg0, arg1, arg2;
15387  rtx op0, op1, op2, pat;
15388  enum machine_mode tmode, mode0, mode1, mode2;
15389  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
15390
15391  switch (fcode)
15392    {
15393    case IX86_BUILTIN_EMMS:
15394      emit_insn (gen_mmx_emms ());
15395      return 0;
15396
15397    case IX86_BUILTIN_SFENCE:
15398      emit_insn (gen_sse_sfence ());
15399      return 0;
15400
15401    case IX86_BUILTIN_MASKMOVQ:
15402    case IX86_BUILTIN_MASKMOVDQU:
15403      icode = (fcode == IX86_BUILTIN_MASKMOVQ
15404	       ? CODE_FOR_mmx_maskmovq
15405	       : CODE_FOR_sse2_maskmovdqu);
15406      /* Note the arg order is different from the operand order.  */
15407      arg1 = TREE_VALUE (arglist);
15408      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
15409      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15410      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15411      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15412      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15413      mode0 = insn_data[icode].operand[0].mode;
15414      mode1 = insn_data[icode].operand[1].mode;
15415      mode2 = insn_data[icode].operand[2].mode;
15416
15417      op0 = force_reg (Pmode, op0);
15418      op0 = gen_rtx_MEM (mode1, op0);
15419
15420      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
15421	op0 = copy_to_mode_reg (mode0, op0);
15422      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
15423	op1 = copy_to_mode_reg (mode1, op1);
15424      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
15425	op2 = copy_to_mode_reg (mode2, op2);
15426      pat = GEN_FCN (icode) (op0, op1, op2);
15427      if (! pat)
15428	return 0;
15429      emit_insn (pat);
15430      return 0;
15431
15432    case IX86_BUILTIN_SQRTSS:
15433      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
15434    case IX86_BUILTIN_RSQRTSS:
15435      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
15436    case IX86_BUILTIN_RCPSS:
15437      return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
15438
15439    case IX86_BUILTIN_LOADUPS:
15440      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
15441
15442    case IX86_BUILTIN_STOREUPS:
15443      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
15444
15445    case IX86_BUILTIN_LOADHPS:
15446    case IX86_BUILTIN_LOADLPS:
15447    case IX86_BUILTIN_LOADHPD:
15448    case IX86_BUILTIN_LOADLPD:
15449      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
15450	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
15451	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
15452	       : CODE_FOR_sse2_loadlpd);
15453      arg0 = TREE_VALUE (arglist);
15454      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15455      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15456      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15457      tmode = insn_data[icode].operand[0].mode;
15458      mode0 = insn_data[icode].operand[1].mode;
15459      mode1 = insn_data[icode].operand[2].mode;
15460
15461      op0 = force_reg (mode0, op0);
15462      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
15463      if (optimize || target == 0
15464	  || GET_MODE (target) != tmode
15465	  || !register_operand (target, tmode))
15466	target = gen_reg_rtx (tmode);
15467      pat = GEN_FCN (icode) (target, op0, op1);
15468      if (! pat)
15469	return 0;
15470      emit_insn (pat);
15471      return target;
15472
15473    case IX86_BUILTIN_STOREHPS:
15474    case IX86_BUILTIN_STORELPS:
15475      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
15476	       : CODE_FOR_sse_storelps);
15477      arg0 = TREE_VALUE (arglist);
15478      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15479      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15480      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15481      mode0 = insn_data[icode].operand[0].mode;
15482      mode1 = insn_data[icode].operand[1].mode;
15483
15484      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
15485      op1 = force_reg (mode1, op1);
15486
15487      pat = GEN_FCN (icode) (op0, op1);
15488      if (! pat)
15489	return 0;
15490      emit_insn (pat);
15491      return const0_rtx;
15492
15493    case IX86_BUILTIN_MOVNTPS:
15494      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
15495    case IX86_BUILTIN_MOVNTQ:
15496      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
15497
15498    case IX86_BUILTIN_LDMXCSR:
15499      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
15500      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
15501      emit_move_insn (target, op0);
15502      emit_insn (gen_sse_ldmxcsr (target));
15503      return 0;
15504
15505    case IX86_BUILTIN_STMXCSR:
15506      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
15507      emit_insn (gen_sse_stmxcsr (target));
15508      return copy_to_mode_reg (SImode, target);
15509
15510    case IX86_BUILTIN_SHUFPS:
15511    case IX86_BUILTIN_SHUFPD:
15512      icode = (fcode == IX86_BUILTIN_SHUFPS
15513	       ? CODE_FOR_sse_shufps
15514	       : CODE_FOR_sse2_shufpd);
15515      arg0 = TREE_VALUE (arglist);
15516      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15517      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15518      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15519      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15520      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15521      tmode = insn_data[icode].operand[0].mode;
15522      mode0 = insn_data[icode].operand[1].mode;
15523      mode1 = insn_data[icode].operand[2].mode;
15524      mode2 = insn_data[icode].operand[3].mode;
15525
15526      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
15527	op0 = copy_to_mode_reg (mode0, op0);
15528      if ((optimize && !register_operand (op1, mode1))
15529	  || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
15530	op1 = copy_to_mode_reg (mode1, op1);
15531      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
15532	{
15533	  /* @@@ better error message */
15534	  error ("mask must be an immediate");
15535	  return gen_reg_rtx (tmode);
15536	}
15537      if (optimize || target == 0
15538	  || GET_MODE (target) != tmode
15539	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15540	target = gen_reg_rtx (tmode);
15541      pat = GEN_FCN (icode) (target, op0, op1, op2);
15542      if (! pat)
15543	return 0;
15544      emit_insn (pat);
15545      return target;
15546
15547    case IX86_BUILTIN_PSHUFW:
15548    case IX86_BUILTIN_PSHUFD:
15549    case IX86_BUILTIN_PSHUFHW:
15550    case IX86_BUILTIN_PSHUFLW:
15551      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
15552	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
15553	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
15554	       : CODE_FOR_mmx_pshufw);
15555      arg0 = TREE_VALUE (arglist);
15556      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15557      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15558      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15559      tmode = insn_data[icode].operand[0].mode;
15560      mode1 = insn_data[icode].operand[1].mode;
15561      mode2 = insn_data[icode].operand[2].mode;
15562
15563      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15564	op0 = copy_to_mode_reg (mode1, op0);
15565      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15566	{
15567	  /* @@@ better error message */
15568	  error ("mask must be an immediate");
15569	  return const0_rtx;
15570	}
15571      if (target == 0
15572	  || GET_MODE (target) != tmode
15573	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
15574	target = gen_reg_rtx (tmode);
15575      pat = GEN_FCN (icode) (target, op0, op1);
15576      if (! pat)
15577	return 0;
15578      emit_insn (pat);
15579      return target;
15580
15581    case IX86_BUILTIN_PSLLWI128:
15582      icode = CODE_FOR_ashlv8hi3;
15583      goto do_pshifti;
15584    case IX86_BUILTIN_PSLLDI128:
15585      icode = CODE_FOR_ashlv4si3;
15586      goto do_pshifti;
15587    case IX86_BUILTIN_PSLLQI128:
15588      icode = CODE_FOR_ashlv2di3;
15589      goto do_pshifti;
15590    case IX86_BUILTIN_PSRAWI128:
15591      icode = CODE_FOR_ashrv8hi3;
15592      goto do_pshifti;
15593    case IX86_BUILTIN_PSRADI128:
15594      icode = CODE_FOR_ashrv4si3;
15595      goto do_pshifti;
15596    case IX86_BUILTIN_PSRLWI128:
15597      icode = CODE_FOR_lshrv8hi3;
15598      goto do_pshifti;
15599    case IX86_BUILTIN_PSRLDI128:
15600      icode = CODE_FOR_lshrv4si3;
15601      goto do_pshifti;
15602    case IX86_BUILTIN_PSRLQI128:
15603      icode = CODE_FOR_lshrv2di3;
15604      goto do_pshifti;
15605    do_pshifti:
15606      arg0 = TREE_VALUE (arglist);
15607      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15608      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15609      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15610
15611      if (GET_CODE (op1) != CONST_INT)
15612	{
15613	  error ("shift must be an immediate");
15614	  return const0_rtx;
15615	}
15616      if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
15617	op1 = GEN_INT (255);
15618
15619      tmode = insn_data[icode].operand[0].mode;
15620      mode1 = insn_data[icode].operand[1].mode;
15621      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15622	op0 = copy_to_reg (op0);
15623
15624      target = gen_reg_rtx (tmode);
15625      pat = GEN_FCN (icode) (target, op0, op1);
15626      if (!pat)
15627	return 0;
15628      emit_insn (pat);
15629      return target;
15630
15631    case IX86_BUILTIN_PSLLW128:
15632      icode = CODE_FOR_ashlv8hi3;
15633      goto do_pshift;
15634    case IX86_BUILTIN_PSLLD128:
15635      icode = CODE_FOR_ashlv4si3;
15636      goto do_pshift;
15637    case IX86_BUILTIN_PSLLQ128:
15638      icode = CODE_FOR_ashlv2di3;
15639      goto do_pshift;
15640    case IX86_BUILTIN_PSRAW128:
15641      icode = CODE_FOR_ashrv8hi3;
15642      goto do_pshift;
15643    case IX86_BUILTIN_PSRAD128:
15644      icode = CODE_FOR_ashrv4si3;
15645      goto do_pshift;
15646    case IX86_BUILTIN_PSRLW128:
15647      icode = CODE_FOR_lshrv8hi3;
15648      goto do_pshift;
15649    case IX86_BUILTIN_PSRLD128:
15650      icode = CODE_FOR_lshrv4si3;
15651      goto do_pshift;
15652    case IX86_BUILTIN_PSRLQ128:
15653      icode = CODE_FOR_lshrv2di3;
15654      goto do_pshift;
15655    do_pshift:
15656      arg0 = TREE_VALUE (arglist);
15657      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15658      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15659      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15660
15661      tmode = insn_data[icode].operand[0].mode;
15662      mode1 = insn_data[icode].operand[1].mode;
15663
15664      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15665	op0 = copy_to_reg (op0);
15666
15667      op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
15668      if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
15669	op1 = copy_to_reg (op1);
15670
15671      target = gen_reg_rtx (tmode);
15672      pat = GEN_FCN (icode) (target, op0, op1);
15673      if (!pat)
15674	return 0;
15675      emit_insn (pat);
15676      return target;
15677
15678    case IX86_BUILTIN_PSLLDQI128:
15679    case IX86_BUILTIN_PSRLDQI128:
15680      icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
15681	       : CODE_FOR_sse2_lshrti3);
15682      arg0 = TREE_VALUE (arglist);
15683      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15684      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15685      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15686      tmode = insn_data[icode].operand[0].mode;
15687      mode1 = insn_data[icode].operand[1].mode;
15688      mode2 = insn_data[icode].operand[2].mode;
15689
15690      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
15691	{
15692	  op0 = copy_to_reg (op0);
15693	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
15694	}
15695      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
15696	{
15697	  error ("shift must be an immediate");
15698	  return const0_rtx;
15699	}
15700      target = gen_reg_rtx (V2DImode);
15701      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
15702			     op0, op1);
15703      if (! pat)
15704	return 0;
15705      emit_insn (pat);
15706      return target;
15707
15708    case IX86_BUILTIN_FEMMS:
15709      emit_insn (gen_mmx_femms ());
15710      return NULL_RTX;
15711
15712    case IX86_BUILTIN_PAVGUSB:
15713      return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
15714
15715    case IX86_BUILTIN_PF2ID:
15716      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
15717
15718    case IX86_BUILTIN_PFACC:
15719      return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
15720
15721    case IX86_BUILTIN_PFADD:
15722     return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
15723
15724    case IX86_BUILTIN_PFCMPEQ:
15725      return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
15726
15727    case IX86_BUILTIN_PFCMPGE:
15728      return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
15729
15730    case IX86_BUILTIN_PFCMPGT:
15731      return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
15732
15733    case IX86_BUILTIN_PFMAX:
15734      return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
15735
15736    case IX86_BUILTIN_PFMIN:
15737      return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
15738
15739    case IX86_BUILTIN_PFMUL:
15740      return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
15741
15742    case IX86_BUILTIN_PFRCP:
15743      return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
15744
15745    case IX86_BUILTIN_PFRCPIT1:
15746      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
15747
15748    case IX86_BUILTIN_PFRCPIT2:
15749      return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
15750
15751    case IX86_BUILTIN_PFRSQIT1:
15752      return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
15753
15754    case IX86_BUILTIN_PFRSQRT:
15755      return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
15756
15757    case IX86_BUILTIN_PFSUB:
15758      return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
15759
15760    case IX86_BUILTIN_PFSUBR:
15761      return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
15762
15763    case IX86_BUILTIN_PI2FD:
15764      return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
15765
15766    case IX86_BUILTIN_PMULHRW:
15767      return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
15768
15769    case IX86_BUILTIN_PF2IW:
15770      return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
15771
15772    case IX86_BUILTIN_PFNACC:
15773      return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
15774
15775    case IX86_BUILTIN_PFPNACC:
15776      return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
15777
15778    case IX86_BUILTIN_PI2FW:
15779      return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
15780
15781    case IX86_BUILTIN_PSWAPDSI:
15782      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
15783
15784    case IX86_BUILTIN_PSWAPDSF:
15785      return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
15786
15787    case IX86_BUILTIN_SQRTSD:
15788      return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
15789    case IX86_BUILTIN_LOADUPD:
15790      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
15791    case IX86_BUILTIN_STOREUPD:
15792      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
15793
15794    case IX86_BUILTIN_MFENCE:
15795	emit_insn (gen_sse2_mfence ());
15796	return 0;
15797    case IX86_BUILTIN_LFENCE:
15798	emit_insn (gen_sse2_lfence ());
15799	return 0;
15800
15801    case IX86_BUILTIN_CLFLUSH:
15802	arg0 = TREE_VALUE (arglist);
15803	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15804	icode = CODE_FOR_sse2_clflush;
15805	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
15806	    op0 = copy_to_mode_reg (Pmode, op0);
15807
15808	emit_insn (gen_sse2_clflush (op0));
15809	return 0;
15810
15811    case IX86_BUILTIN_MOVNTPD:
15812      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
15813    case IX86_BUILTIN_MOVNTDQ:
15814      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
15815    case IX86_BUILTIN_MOVNTI:
15816      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
15817
15818    case IX86_BUILTIN_LOADDQU:
15819      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
15820    case IX86_BUILTIN_STOREDQU:
15821      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
15822
15823    case IX86_BUILTIN_MONITOR:
15824      arg0 = TREE_VALUE (arglist);
15825      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15826      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
15827      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15828      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15829      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
15830      if (!REG_P (op0))
15831	op0 = copy_to_mode_reg (SImode, op0);
15832      if (!REG_P (op1))
15833	op1 = copy_to_mode_reg (SImode, op1);
15834      if (!REG_P (op2))
15835	op2 = copy_to_mode_reg (SImode, op2);
15836      emit_insn (gen_sse3_monitor (op0, op1, op2));
15837      return 0;
15838
15839    case IX86_BUILTIN_MWAIT:
15840      arg0 = TREE_VALUE (arglist);
15841      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
15842      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
15843      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
15844      if (!REG_P (op0))
15845	op0 = copy_to_mode_reg (SImode, op0);
15846      if (!REG_P (op1))
15847	op1 = copy_to_mode_reg (SImode, op1);
15848      emit_insn (gen_sse3_mwait (op0, op1));
15849      return 0;
15850
15851    case IX86_BUILTIN_LDDQU:
15852      return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
15853				       target, 1);
15854
15855    case IX86_BUILTIN_VEC_INIT_V2SI:
15856    case IX86_BUILTIN_VEC_INIT_V4HI:
15857    case IX86_BUILTIN_VEC_INIT_V8QI:
15858      return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
15859
15860    case IX86_BUILTIN_VEC_EXT_V2DF:
15861    case IX86_BUILTIN_VEC_EXT_V2DI:
15862    case IX86_BUILTIN_VEC_EXT_V4SF:
15863    case IX86_BUILTIN_VEC_EXT_V4SI:
15864    case IX86_BUILTIN_VEC_EXT_V8HI:
15865    case IX86_BUILTIN_VEC_EXT_V2SI:
15866    case IX86_BUILTIN_VEC_EXT_V4HI:
15867      return ix86_expand_vec_ext_builtin (arglist, target);
15868
15869    case IX86_BUILTIN_VEC_SET_V8HI:
15870    case IX86_BUILTIN_VEC_SET_V4HI:
15871      return ix86_expand_vec_set_builtin (arglist);
15872
15873    default:
15874      break;
15875    }
15876
15877  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15878    if (d->code == fcode)
15879      {
15880	/* Compares are treated specially.  */
15881	if (d->icode == CODE_FOR_sse_maskcmpv4sf3
15882	    || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
15883	    || d->icode == CODE_FOR_sse2_maskcmpv2df3
15884	    || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
15885	  return ix86_expand_sse_compare (d, arglist, target);
15886
15887	return ix86_expand_binop_builtin (d->icode, arglist, target);
15888      }
15889
15890  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15891    if (d->code == fcode)
15892      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
15893
15894  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
15895    if (d->code == fcode)
15896      return ix86_expand_sse_comi (d, arglist, target);
15897
15898  gcc_unreachable ();
15899}
15900
15901/* Store OPERAND to the memory after reload is completed.  This means
15902   that we can't easily use assign_stack_local.  */
15903rtx
15904ix86_force_to_memory (enum machine_mode mode, rtx operand)
15905{
15906  rtx result;
15907
15908  gcc_assert (reload_completed);
15909  if (TARGET_RED_ZONE)
15910    {
15911      result = gen_rtx_MEM (mode,
15912			    gen_rtx_PLUS (Pmode,
15913					  stack_pointer_rtx,
15914					  GEN_INT (-RED_ZONE_SIZE)));
15915      emit_move_insn (result, operand);
15916    }
15917  else if (!TARGET_RED_ZONE && TARGET_64BIT)
15918    {
15919      switch (mode)
15920	{
15921	case HImode:
15922	case SImode:
15923	  operand = gen_lowpart (DImode, operand);
15924	  /* FALLTHRU */
15925	case DImode:
15926	  emit_insn (
15927		      gen_rtx_SET (VOIDmode,
15928				   gen_rtx_MEM (DImode,
15929						gen_rtx_PRE_DEC (DImode,
15930							stack_pointer_rtx)),
15931				   operand));
15932	  break;
15933	default:
15934	  gcc_unreachable ();
15935	}
15936      result = gen_rtx_MEM (mode, stack_pointer_rtx);
15937    }
15938  else
15939    {
15940      switch (mode)
15941	{
15942	case DImode:
15943	  {
15944	    rtx operands[2];
15945	    split_di (&operand, 1, operands, operands + 1);
15946	    emit_insn (
15947			gen_rtx_SET (VOIDmode,
15948				     gen_rtx_MEM (SImode,
15949						  gen_rtx_PRE_DEC (Pmode,
15950							stack_pointer_rtx)),
15951				     operands[1]));
15952	    emit_insn (
15953			gen_rtx_SET (VOIDmode,
15954				     gen_rtx_MEM (SImode,
15955						  gen_rtx_PRE_DEC (Pmode,
15956							stack_pointer_rtx)),
15957				     operands[0]));
15958	  }
15959	  break;
15960	case HImode:
15961	  /* Store HImodes as SImodes.  */
15962	  operand = gen_lowpart (SImode, operand);
15963	  /* FALLTHRU */
15964	case SImode:
15965	  emit_insn (
15966		      gen_rtx_SET (VOIDmode,
15967				   gen_rtx_MEM (GET_MODE (operand),
15968						gen_rtx_PRE_DEC (SImode,
15969							stack_pointer_rtx)),
15970				   operand));
15971	  break;
15972	default:
15973	  gcc_unreachable ();
15974	}
15975      result = gen_rtx_MEM (mode, stack_pointer_rtx);
15976    }
15977  return result;
15978}
15979
15980/* Free operand from the memory.  */
15981void
15982ix86_free_from_memory (enum machine_mode mode)
15983{
15984  if (!TARGET_RED_ZONE)
15985    {
15986      int size;
15987
15988      if (mode == DImode || TARGET_64BIT)
15989	size = 8;
15990      else
15991	size = 4;
15992      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
15993         to pop or add instruction if registers are available.  */
15994      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
15995			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
15996					    GEN_INT (size))));
15997    }
15998}
15999
16000/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
16001   QImode must go into class Q_REGS.
16002   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
16003   movdf to do mem-to-mem moves through integer regs.  */
16004enum reg_class
16005ix86_preferred_reload_class (rtx x, enum reg_class class)
16006{
16007  /* We're only allowed to return a subclass of CLASS.  Many of the
16008     following checks fail for NO_REGS, so eliminate that early.  */
16009  if (class == NO_REGS)
16010    return NO_REGS;
16011
16012  /* All classes can load zeros.  */
16013  if (x == CONST0_RTX (GET_MODE (x)))
16014    return class;
16015
16016  /* Floating-point constants need more complex checks.  */
16017  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
16018    {
16019      /* General regs can load everything.  */
16020      if (reg_class_subset_p (class, GENERAL_REGS))
16021        return class;
16022
16023      /* Floats can load 0 and 1 plus some others.  Note that we eliminated
16024	 zero above.  We only want to wind up preferring 80387 registers if
16025	 we plan on doing computation with them.  */
16026      if (TARGET_80387
16027	  && (TARGET_MIX_SSE_I387
16028	      || !(TARGET_SSE_MATH && SSE_FLOAT_MODE_P (GET_MODE (x))))
16029	  && standard_80387_constant_p (x))
16030	{
16031	  /* Limit class to non-sse.  */
16032	  if (class == FLOAT_SSE_REGS)
16033	    return FLOAT_REGS;
16034	  if (class == FP_TOP_SSE_REGS)
16035	    return FP_TOP_REG;
16036	  if (class == FP_SECOND_SSE_REGS)
16037	    return FP_SECOND_REG;
16038	  if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
16039	    return class;
16040	}
16041
16042      return NO_REGS;
16043    }
16044  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
16045    return NO_REGS;
16046  if (MAYBE_SSE_CLASS_P (class) && CONSTANT_P (x))
16047    return NO_REGS;
16048
16049  /* Generally when we see PLUS here, it's the function invariant
16050     (plus soft-fp const_int).  Which can only be computed into general
16051     regs.  */
16052  if (GET_CODE (x) == PLUS)
16053    return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
16054
16055  /* QImode constants are easy to load, but non-constant QImode data
16056     must go into Q_REGS.  */
16057  if (GET_MODE (x) == QImode && !CONSTANT_P (x))
16058    {
16059      if (reg_class_subset_p (class, Q_REGS))
16060	return class;
16061      if (reg_class_subset_p (Q_REGS, class))
16062	return Q_REGS;
16063      return NO_REGS;
16064    }
16065
16066  return class;
16067}
16068
16069/* If we are copying between general and FP registers, we need a memory
16070   location. The same is true for SSE and MMX registers.
16071
16072   The macro can't work reliably when one of the CLASSES is class containing
16073   registers from multiple units (SSE, MMX, integer).  We avoid this by never
16074   combining those units in single alternative in the machine description.
16075   Ensure that this constraint holds to avoid unexpected surprises.
16076
16077   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
16078   enforce these sanity checks.  */
16079
16080int
16081ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
16082			      enum machine_mode mode, int strict)
16083{
16084  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
16085      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
16086      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
16087      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
16088      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
16089      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
16090    {
16091      gcc_assert (!strict);
16092      return true;
16093    }
16094
16095  if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
16096    return true;
16097
16098  /* ??? This is a lie.  We do have moves between mmx/general, and for
16099     mmx/sse2.  But by saying we need secondary memory we discourage the
16100     register allocator from using the mmx registers unless needed.  */
16101  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
16102    return true;
16103
16104  if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16105    {
16106      /* SSE1 doesn't have any direct moves from other classes.  */
16107      if (!TARGET_SSE2)
16108	return true;
16109
16110      /* If the target says that inter-unit moves are more expensive
16111	 than moving through memory, then don't generate them.  */
16112      if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
16113	return true;
16114
16115      /* Between SSE and general, we have moves no larger than word size.  */
16116      if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
16117	return true;
16118
16119      /* ??? For the cost of one register reformat penalty, we could use
16120	 the same instructions to move SFmode and DFmode data, but the
16121	 relevant move patterns don't support those alternatives.  */
16122      if (mode == SFmode || mode == DFmode)
16123	return true;
16124    }
16125
16126  return false;
16127}
16128
16129/* Return true if the registers in CLASS cannot represent the change from
16130   modes FROM to TO.  */
16131
16132bool
16133ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
16134			       enum reg_class class)
16135{
16136  if (from == to)
16137    return false;
16138
16139  /* x87 registers can't do subreg at all, as all values are reformatted
16140     to extended precision.  */
16141  if (MAYBE_FLOAT_CLASS_P (class))
16142    return true;
16143
16144  if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
16145    {
16146      /* Vector registers do not support QI or HImode loads.  If we don't
16147	 disallow a change to these modes, reload will assume it's ok to
16148	 drop the subreg from (subreg:SI (reg:HI 100) 0).  This affects
16149	 the vec_dupv4hi pattern.  */
16150      if (GET_MODE_SIZE (from) < 4)
16151	return true;
16152
16153      /* Vector registers do not support subreg with nonzero offsets, which
16154	 are otherwise valid for integer registers.  Since we can't see
16155	 whether we have a nonzero offset from here, prohibit all
16156         nonparadoxical subregs changing size.  */
16157      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
16158	return true;
16159    }
16160
16161  return false;
16162}
16163
16164/* Return the cost of moving data from a register in class CLASS1 to
16165   one in class CLASS2.
16166
16167   It is not required that the cost always equal 2 when FROM is the same as TO;
16168   on some machines it is expensive to move between registers if they are not
16169   general registers.  */
16170
16171int
16172ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
16173			 enum reg_class class2)
16174{
16175  /* In case we require secondary memory, compute cost of the store followed
16176     by load.  In order to avoid bad register allocation choices, we need
16177     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
16178
16179  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
16180    {
16181      int cost = 1;
16182
16183      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
16184		   MEMORY_MOVE_COST (mode, class1, 1));
16185      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
16186		   MEMORY_MOVE_COST (mode, class2, 1));
16187
16188      /* In case of copying from general_purpose_register we may emit multiple
16189         stores followed by single load causing memory size mismatch stall.
16190         Count this as arbitrarily high cost of 20.  */
16191      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
16192	cost += 20;
16193
16194      /* In the case of FP/MMX moves, the registers actually overlap, and we
16195	 have to switch modes in order to treat them differently.  */
16196      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
16197          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
16198	cost += 20;
16199
16200      return cost;
16201    }
16202
16203  /* Moves between SSE/MMX and integer unit are expensive.  */
16204  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
16205      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
16206    return ix86_cost->mmxsse_to_integer;
16207  if (MAYBE_FLOAT_CLASS_P (class1))
16208    return ix86_cost->fp_move;
16209  if (MAYBE_SSE_CLASS_P (class1))
16210    return ix86_cost->sse_move;
16211  if (MAYBE_MMX_CLASS_P (class1))
16212    return ix86_cost->mmx_move;
16213  return 2;
16214}
16215
16216/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
16217
16218bool
16219ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
16220{
16221  /* Flags and only flags can only hold CCmode values.  */
16222  if (CC_REGNO_P (regno))
16223    return GET_MODE_CLASS (mode) == MODE_CC;
16224  if (GET_MODE_CLASS (mode) == MODE_CC
16225      || GET_MODE_CLASS (mode) == MODE_RANDOM
16226      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
16227    return 0;
16228  if (FP_REGNO_P (regno))
16229    return VALID_FP_MODE_P (mode);
16230  if (SSE_REGNO_P (regno))
16231    {
16232      /* We implement the move patterns for all vector modes into and
16233	 out of SSE registers, even when no operation instructions
16234	 are available.  */
16235      return (VALID_SSE_REG_MODE (mode)
16236	      || VALID_SSE2_REG_MODE (mode)
16237	      || VALID_MMX_REG_MODE (mode)
16238	      || VALID_MMX_REG_MODE_3DNOW (mode));
16239    }
16240  if (MMX_REGNO_P (regno))
16241    {
16242      /* We implement the move patterns for 3DNOW modes even in MMX mode,
16243	 so if the register is available at all, then we can move data of
16244	 the given mode into or out of it.  */
16245      return (VALID_MMX_REG_MODE (mode)
16246	      || VALID_MMX_REG_MODE_3DNOW (mode));
16247    }
16248
16249  if (mode == QImode)
16250    {
16251      /* Take care for QImode values - they can be in non-QI regs,
16252	 but then they do cause partial register stalls.  */
16253      if (regno < 4 || TARGET_64BIT)
16254	return 1;
16255      if (!TARGET_PARTIAL_REG_STALL)
16256	return 1;
16257      return reload_in_progress || reload_completed;
16258    }
16259  /* We handle both integer and floats in the general purpose registers.  */
16260  else if (VALID_INT_MODE_P (mode))
16261    return 1;
16262  else if (VALID_FP_MODE_P (mode))
16263    return 1;
16264  /* Lots of MMX code casts 8 byte vector modes to DImode.  If we then go
16265     on to use that value in smaller contexts, this can easily force a
16266     pseudo to be allocated to GENERAL_REGS.  Since this is no worse than
16267     supporting DImode, allow it.  */
16268  else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
16269    return 1;
16270
16271  return 0;
16272}
16273
16274/* A subroutine of ix86_modes_tieable_p.  Return true if MODE is a
16275   tieable integer mode.  */
16276
16277static bool
16278ix86_tieable_integer_mode_p (enum machine_mode mode)
16279{
16280  switch (mode)
16281    {
16282    case HImode:
16283    case SImode:
16284      return true;
16285
16286    case QImode:
16287      return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
16288
16289    case DImode:
16290      return TARGET_64BIT;
16291
16292    default:
16293      return false;
16294    }
16295}
16296
16297/* Return true if MODE1 is accessible in a register that can hold MODE2
16298   without copying.  That is, all register classes that can hold MODE2
16299   can also hold MODE1.  */
16300
16301bool
16302ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
16303{
16304  if (mode1 == mode2)
16305    return true;
16306
16307  if (ix86_tieable_integer_mode_p (mode1)
16308      && ix86_tieable_integer_mode_p (mode2))
16309    return true;
16310
16311  /* MODE2 being XFmode implies fp stack or general regs, which means we
16312     can tie any smaller floating point modes to it.  Note that we do not
16313     tie this with TFmode.  */
16314  if (mode2 == XFmode)
16315    return mode1 == SFmode || mode1 == DFmode;
16316
16317  /* MODE2 being DFmode implies fp stack, general or sse regs, which means
16318     that we can tie it with SFmode.  */
16319  if (mode2 == DFmode)
16320    return mode1 == SFmode;
16321
16322  /* If MODE2 is only appropriate for an SSE register, then tie with
16323     any other mode acceptable to SSE registers.  */
16324  if (GET_MODE_SIZE (mode2) >= 8
16325      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
16326    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
16327
16328  /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
16329     with any other mode acceptable to MMX registers.  */
16330  if (GET_MODE_SIZE (mode2) == 8
16331      && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
16332    return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
16333
16334  return false;
16335}
16336
16337/* Return the cost of moving data of mode M between a
16338   register and memory.  A value of 2 is the default; this cost is
16339   relative to those in `REGISTER_MOVE_COST'.
16340
16341   If moving between registers and memory is more expensive than
16342   between two registers, you should define this macro to express the
16343   relative cost.
16344
16345   Model also increased moving costs of QImode registers in non
16346   Q_REGS classes.
16347 */
16348int
16349ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
16350{
16351  if (FLOAT_CLASS_P (class))
16352    {
16353      int index;
16354      switch (mode)
16355	{
16356	  case SFmode:
16357	    index = 0;
16358	    break;
16359	  case DFmode:
16360	    index = 1;
16361	    break;
16362	  case XFmode:
16363	    index = 2;
16364	    break;
16365	  default:
16366	    return 100;
16367	}
16368      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
16369    }
16370  if (SSE_CLASS_P (class))
16371    {
16372      int index;
16373      switch (GET_MODE_SIZE (mode))
16374	{
16375	  case 4:
16376	    index = 0;
16377	    break;
16378	  case 8:
16379	    index = 1;
16380	    break;
16381	  case 16:
16382	    index = 2;
16383	    break;
16384	  default:
16385	    return 100;
16386	}
16387      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
16388    }
16389  if (MMX_CLASS_P (class))
16390    {
16391      int index;
16392      switch (GET_MODE_SIZE (mode))
16393	{
16394	  case 4:
16395	    index = 0;
16396	    break;
16397	  case 8:
16398	    index = 1;
16399	    break;
16400	  default:
16401	    return 100;
16402	}
16403      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
16404    }
16405  switch (GET_MODE_SIZE (mode))
16406    {
16407      case 1:
16408	if (in)
16409	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
16410		  : ix86_cost->movzbl_load);
16411	else
16412	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
16413		  : ix86_cost->int_store[0] + 4);
16414	break;
16415      case 2:
16416	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
16417      default:
16418	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
16419	if (mode == TFmode)
16420	  mode = XFmode;
16421	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
16422		* (((int) GET_MODE_SIZE (mode)
16423		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
16424    }
16425}
16426
16427/* Compute a (partial) cost for rtx X.  Return true if the complete
16428   cost has been computed, and false if subexpressions should be
16429   scanned.  In either case, *TOTAL contains the cost result.  */
16430
16431static bool
16432ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
16433{
16434  enum machine_mode mode = GET_MODE (x);
16435
16436  switch (code)
16437    {
16438    case CONST_INT:
16439    case CONST:
16440    case LABEL_REF:
16441    case SYMBOL_REF:
16442      if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
16443	*total = 3;
16444      else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
16445	*total = 2;
16446      else if (flag_pic && SYMBOLIC_CONST (x)
16447	       && (!TARGET_64BIT
16448		   || (!GET_CODE (x) != LABEL_REF
16449		       && (GET_CODE (x) != SYMBOL_REF
16450		           || !SYMBOL_REF_LOCAL_P (x)))))
16451	*total = 1;
16452      else
16453	*total = 0;
16454      return true;
16455
16456    case CONST_DOUBLE:
16457      if (mode == VOIDmode)
16458	*total = 0;
16459      else
16460	switch (standard_80387_constant_p (x))
16461	  {
16462	  case 1: /* 0.0 */
16463	    *total = 1;
16464	    break;
16465	  default: /* Other constants */
16466	    *total = 2;
16467	    break;
16468	  case 0:
16469	  case -1:
16470	    /* Start with (MEM (SYMBOL_REF)), since that's where
16471	       it'll probably end up.  Add a penalty for size.  */
16472	    *total = (COSTS_N_INSNS (1)
16473		      + (flag_pic != 0 && !TARGET_64BIT)
16474		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
16475	    break;
16476	  }
16477      return true;
16478
16479    case ZERO_EXTEND:
16480      /* The zero extensions is often completely free on x86_64, so make
16481	 it as cheap as possible.  */
16482      if (TARGET_64BIT && mode == DImode
16483	  && GET_MODE (XEXP (x, 0)) == SImode)
16484	*total = 1;
16485      else if (TARGET_ZERO_EXTEND_WITH_AND)
16486	*total = COSTS_N_INSNS (ix86_cost->add);
16487      else
16488	*total = COSTS_N_INSNS (ix86_cost->movzx);
16489      return false;
16490
16491    case SIGN_EXTEND:
16492      *total = COSTS_N_INSNS (ix86_cost->movsx);
16493      return false;
16494
16495    case ASHIFT:
16496      if (GET_CODE (XEXP (x, 1)) == CONST_INT
16497	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
16498	{
16499	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16500	  if (value == 1)
16501	    {
16502	      *total = COSTS_N_INSNS (ix86_cost->add);
16503	      return false;
16504	    }
16505	  if ((value == 2 || value == 3)
16506	      && ix86_cost->lea <= ix86_cost->shift_const)
16507	    {
16508	      *total = COSTS_N_INSNS (ix86_cost->lea);
16509	      return false;
16510	    }
16511	}
16512      /* FALLTHRU */
16513
16514    case ROTATE:
16515    case ASHIFTRT:
16516    case LSHIFTRT:
16517    case ROTATERT:
16518      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
16519	{
16520	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16521	    {
16522	      if (INTVAL (XEXP (x, 1)) > 32)
16523		*total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
16524	      else
16525		*total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
16526	    }
16527	  else
16528	    {
16529	      if (GET_CODE (XEXP (x, 1)) == AND)
16530		*total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
16531	      else
16532		*total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
16533	    }
16534	}
16535      else
16536	{
16537	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16538	    *total = COSTS_N_INSNS (ix86_cost->shift_const);
16539	  else
16540	    *total = COSTS_N_INSNS (ix86_cost->shift_var);
16541	}
16542      return false;
16543
16544    case MULT:
16545      if (FLOAT_MODE_P (mode))
16546	{
16547	  *total = COSTS_N_INSNS (ix86_cost->fmul);
16548	  return false;
16549	}
16550      else
16551	{
16552	  rtx op0 = XEXP (x, 0);
16553	  rtx op1 = XEXP (x, 1);
16554	  int nbits;
16555	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16556	    {
16557	      unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
16558	      for (nbits = 0; value != 0; value &= value - 1)
16559	        nbits++;
16560	    }
16561	  else
16562	    /* This is arbitrary.  */
16563	    nbits = 7;
16564
16565	  /* Compute costs correctly for widening multiplication.  */
16566	  if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
16567	      && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
16568	         == GET_MODE_SIZE (mode))
16569	    {
16570	      int is_mulwiden = 0;
16571	      enum machine_mode inner_mode = GET_MODE (op0);
16572
16573	      if (GET_CODE (op0) == GET_CODE (op1))
16574		is_mulwiden = 1, op1 = XEXP (op1, 0);
16575	      else if (GET_CODE (op1) == CONST_INT)
16576		{
16577		  if (GET_CODE (op0) == SIGN_EXTEND)
16578		    is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
16579			          == INTVAL (op1);
16580		  else
16581		    is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
16582	        }
16583
16584	      if (is_mulwiden)
16585	        op0 = XEXP (op0, 0), mode = GET_MODE (op0);
16586	    }
16587
16588  	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
16589			          + nbits * ix86_cost->mult_bit)
16590	           + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code);
16591
16592          return true;
16593	}
16594
16595    case DIV:
16596    case UDIV:
16597    case MOD:
16598    case UMOD:
16599      if (FLOAT_MODE_P (mode))
16600	*total = COSTS_N_INSNS (ix86_cost->fdiv);
16601      else
16602	*total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
16603      return false;
16604
16605    case PLUS:
16606      if (FLOAT_MODE_P (mode))
16607	*total = COSTS_N_INSNS (ix86_cost->fadd);
16608      else if (GET_MODE_CLASS (mode) == MODE_INT
16609	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
16610	{
16611	  if (GET_CODE (XEXP (x, 0)) == PLUS
16612	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
16613	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
16614	      && CONSTANT_P (XEXP (x, 1)))
16615	    {
16616	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
16617	      if (val == 2 || val == 4 || val == 8)
16618		{
16619		  *total = COSTS_N_INSNS (ix86_cost->lea);
16620		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16621		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
16622				      outer_code);
16623		  *total += rtx_cost (XEXP (x, 1), outer_code);
16624		  return true;
16625		}
16626	    }
16627	  else if (GET_CODE (XEXP (x, 0)) == MULT
16628		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
16629	    {
16630	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
16631	      if (val == 2 || val == 4 || val == 8)
16632		{
16633		  *total = COSTS_N_INSNS (ix86_cost->lea);
16634		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16635		  *total += rtx_cost (XEXP (x, 1), outer_code);
16636		  return true;
16637		}
16638	    }
16639	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
16640	    {
16641	      *total = COSTS_N_INSNS (ix86_cost->lea);
16642	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
16643	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
16644	      *total += rtx_cost (XEXP (x, 1), outer_code);
16645	      return true;
16646	    }
16647	}
16648      /* FALLTHRU */
16649
16650    case MINUS:
16651      if (FLOAT_MODE_P (mode))
16652	{
16653	  *total = COSTS_N_INSNS (ix86_cost->fadd);
16654	  return false;
16655	}
16656      /* FALLTHRU */
16657
16658    case AND:
16659    case IOR:
16660    case XOR:
16661      if (!TARGET_64BIT && mode == DImode)
16662	{
16663	  *total = (COSTS_N_INSNS (ix86_cost->add) * 2
16664		    + (rtx_cost (XEXP (x, 0), outer_code)
16665		       << (GET_MODE (XEXP (x, 0)) != DImode))
16666		    + (rtx_cost (XEXP (x, 1), outer_code)
16667	               << (GET_MODE (XEXP (x, 1)) != DImode)));
16668	  return true;
16669	}
16670      /* FALLTHRU */
16671
16672    case NEG:
16673      if (FLOAT_MODE_P (mode))
16674	{
16675	  *total = COSTS_N_INSNS (ix86_cost->fchs);
16676	  return false;
16677	}
16678      /* FALLTHRU */
16679
16680    case NOT:
16681      if (!TARGET_64BIT && mode == DImode)
16682	*total = COSTS_N_INSNS (ix86_cost->add * 2);
16683      else
16684	*total = COSTS_N_INSNS (ix86_cost->add);
16685      return false;
16686
16687    case COMPARE:
16688      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
16689	  && XEXP (XEXP (x, 0), 1) == const1_rtx
16690	  && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
16691	  && XEXP (x, 1) == const0_rtx)
16692	{
16693	  /* This kind of construct is implemented using test[bwl].
16694	     Treat it as if we had an AND.  */
16695	  *total = (COSTS_N_INSNS (ix86_cost->add)
16696		    + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
16697		    + rtx_cost (const1_rtx, outer_code));
16698	  return true;
16699	}
16700      return false;
16701
16702    case FLOAT_EXTEND:
16703      if (!TARGET_SSE_MATH
16704	  || mode == XFmode
16705	  || (mode == DFmode && !TARGET_SSE2))
16706	*total = 0;
16707      return false;
16708
16709    case ABS:
16710      if (FLOAT_MODE_P (mode))
16711	*total = COSTS_N_INSNS (ix86_cost->fabs);
16712      return false;
16713
16714    case SQRT:
16715      if (FLOAT_MODE_P (mode))
16716	*total = COSTS_N_INSNS (ix86_cost->fsqrt);
16717      return false;
16718
16719    case UNSPEC:
16720      if (XINT (x, 1) == UNSPEC_TP)
16721	*total = 0;
16722      return false;
16723
16724    default:
16725      return false;
16726    }
16727}
16728
16729#if TARGET_MACHO
16730
16731static int current_machopic_label_num;
16732
16733/* Given a symbol name and its associated stub, write out the
16734   definition of the stub.  */
16735
16736void
16737machopic_output_stub (FILE *file, const char *symb, const char *stub)
16738{
16739  unsigned int length;
16740  char *binder_name, *symbol_name, lazy_ptr_name[32];
16741  int label = ++current_machopic_label_num;
16742
16743  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
16744  symb = (*targetm.strip_name_encoding) (symb);
16745
16746  length = strlen (stub);
16747  binder_name = alloca (length + 32);
16748  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
16749
16750  length = strlen (symb);
16751  symbol_name = alloca (length + 32);
16752  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
16753
16754  sprintf (lazy_ptr_name, "L%d$lz", label);
16755
16756  if (MACHOPIC_PURE)
16757    machopic_picsymbol_stub_section ();
16758  else
16759    machopic_symbol_stub_section ();
16760
16761  fprintf (file, "%s:\n", stub);
16762  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16763
16764  if (MACHOPIC_PURE)
16765    {
16766      fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
16767      fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
16768      fprintf (file, "\tjmp %%edx\n");
16769    }
16770  else
16771    fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
16772
16773  fprintf (file, "%s:\n", binder_name);
16774
16775  if (MACHOPIC_PURE)
16776    {
16777      fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
16778      fprintf (file, "\tpushl %%eax\n");
16779    }
16780  else
16781    fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
16782
16783  fprintf (file, "\tjmp dyld_stub_binding_helper\n");
16784
16785  machopic_lazy_symbol_ptr_section ();
16786  fprintf (file, "%s:\n", lazy_ptr_name);
16787  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
16788  fprintf (file, "\t.long %s\n", binder_name);
16789}
16790#endif /* TARGET_MACHO */
16791
16792/* Order the registers for register allocator.  */
16793
16794void
16795x86_order_regs_for_local_alloc (void)
16796{
16797   int pos = 0;
16798   int i;
16799
16800   /* First allocate the local general purpose registers.  */
16801   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16802     if (GENERAL_REGNO_P (i) && call_used_regs[i])
16803	reg_alloc_order [pos++] = i;
16804
16805   /* Global general purpose registers.  */
16806   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
16807     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
16808	reg_alloc_order [pos++] = i;
16809
16810   /* x87 registers come first in case we are doing FP math
16811      using them.  */
16812   if (!TARGET_SSE_MATH)
16813     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16814       reg_alloc_order [pos++] = i;
16815
16816   /* SSE registers.  */
16817   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
16818     reg_alloc_order [pos++] = i;
16819   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
16820     reg_alloc_order [pos++] = i;
16821
16822   /* x87 registers.  */
16823   if (TARGET_SSE_MATH)
16824     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
16825       reg_alloc_order [pos++] = i;
16826
16827   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
16828     reg_alloc_order [pos++] = i;
16829
16830   /* Initialize the rest of array as we do not allocate some registers
16831      at all.  */
16832   while (pos < FIRST_PSEUDO_REGISTER)
16833     reg_alloc_order [pos++] = 0;
16834}
16835
16836/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
16837   struct attribute_spec.handler.  */
16838static tree
16839ix86_handle_struct_attribute (tree *node, tree name,
16840			      tree args ATTRIBUTE_UNUSED,
16841			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
16842{
16843  tree *type = NULL;
16844  if (DECL_P (*node))
16845    {
16846      if (TREE_CODE (*node) == TYPE_DECL)
16847	type = &TREE_TYPE (*node);
16848    }
16849  else
16850    type = node;
16851
16852  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
16853		 || TREE_CODE (*type) == UNION_TYPE)))
16854    {
16855      warning (OPT_Wattributes, "%qs attribute ignored",
16856	       IDENTIFIER_POINTER (name));
16857      *no_add_attrs = true;
16858    }
16859
16860  else if ((is_attribute_p ("ms_struct", name)
16861	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
16862	   || ((is_attribute_p ("gcc_struct", name)
16863		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
16864    {
16865      warning (OPT_Wattributes, "%qs incompatible attribute ignored",
16866               IDENTIFIER_POINTER (name));
16867      *no_add_attrs = true;
16868    }
16869
16870  return NULL_TREE;
16871}
16872
16873static bool
16874ix86_ms_bitfield_layout_p (tree record_type)
16875{
16876  return (TARGET_MS_BITFIELD_LAYOUT &&
16877	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
16878    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
16879}
16880
16881/* Returns an expression indicating where the this parameter is
16882   located on entry to the FUNCTION.  */
16883
16884static rtx
16885x86_this_parameter (tree function)
16886{
16887  tree type = TREE_TYPE (function);
16888
16889  if (TARGET_64BIT)
16890    {
16891      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
16892      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
16893    }
16894
16895  if (ix86_function_regparm (type, function) > 0)
16896    {
16897      tree parm;
16898
16899      parm = TYPE_ARG_TYPES (type);
16900      /* Figure out whether or not the function has a variable number of
16901	 arguments.  */
16902      for (; parm; parm = TREE_CHAIN (parm))
16903	if (TREE_VALUE (parm) == void_type_node)
16904	  break;
16905      /* If not, the this parameter is in the first argument.  */
16906      if (parm)
16907	{
16908	  int regno = 0;
16909	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
16910	    regno = 2;
16911	  return gen_rtx_REG (SImode, regno);
16912	}
16913    }
16914
16915  if (aggregate_value_p (TREE_TYPE (type), type))
16916    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
16917  else
16918    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
16919}
16920
16921/* Determine whether x86_output_mi_thunk can succeed.  */
16922
16923static bool
16924x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
16925			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
16926			 HOST_WIDE_INT vcall_offset, tree function)
16927{
16928  /* 64-bit can handle anything.  */
16929  if (TARGET_64BIT)
16930    return true;
16931
16932  /* For 32-bit, everything's fine if we have one free register.  */
16933  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
16934    return true;
16935
16936  /* Need a free register for vcall_offset.  */
16937  if (vcall_offset)
16938    return false;
16939
16940  /* Need a free register for GOT references.  */
16941  if (flag_pic && !(*targetm.binds_local_p) (function))
16942    return false;
16943
16944  /* Otherwise ok.  */
16945  return true;
16946}
16947
16948/* Output the assembler code for a thunk function.  THUNK_DECL is the
16949   declaration for the thunk function itself, FUNCTION is the decl for
16950   the target function.  DELTA is an immediate constant offset to be
16951   added to THIS.  If VCALL_OFFSET is nonzero, the word at
16952   *(*this + vcall_offset) should be added to THIS.  */
16953
16954static void
16955x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
16956		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
16957		     HOST_WIDE_INT vcall_offset, tree function)
16958{
16959  rtx xops[3];
16960  rtx this = x86_this_parameter (function);
16961  rtx this_reg, tmp;
16962
16963  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
16964     pull it in now and let DELTA benefit.  */
16965  if (REG_P (this))
16966    this_reg = this;
16967  else if (vcall_offset)
16968    {
16969      /* Put the this parameter into %eax.  */
16970      xops[0] = this;
16971      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
16972      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
16973    }
16974  else
16975    this_reg = NULL_RTX;
16976
16977  /* Adjust the this parameter by a fixed constant.  */
16978  if (delta)
16979    {
16980      xops[0] = GEN_INT (delta);
16981      xops[1] = this_reg ? this_reg : this;
16982      if (TARGET_64BIT)
16983	{
16984	  if (!x86_64_general_operand (xops[0], DImode))
16985	    {
16986	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
16987	      xops[1] = tmp;
16988	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
16989	      xops[0] = tmp;
16990	      xops[1] = this;
16991	    }
16992	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
16993	}
16994      else
16995	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
16996    }
16997
16998  /* Adjust the this parameter by a value stored in the vtable.  */
16999  if (vcall_offset)
17000    {
17001      if (TARGET_64BIT)
17002	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
17003      else
17004	{
17005	  int tmp_regno = 2 /* ECX */;
17006	  if (lookup_attribute ("fastcall",
17007	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
17008	    tmp_regno = 0 /* EAX */;
17009	  tmp = gen_rtx_REG (SImode, tmp_regno);
17010	}
17011
17012      xops[0] = gen_rtx_MEM (Pmode, this_reg);
17013      xops[1] = tmp;
17014      if (TARGET_64BIT)
17015	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17016      else
17017	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17018
17019      /* Adjust the this parameter.  */
17020      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
17021      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
17022	{
17023	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
17024	  xops[0] = GEN_INT (vcall_offset);
17025	  xops[1] = tmp2;
17026	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
17027	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
17028	}
17029      xops[1] = this_reg;
17030      if (TARGET_64BIT)
17031	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
17032      else
17033	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
17034    }
17035
17036  /* If necessary, drop THIS back to its stack slot.  */
17037  if (this_reg && this_reg != this)
17038    {
17039      xops[0] = this_reg;
17040      xops[1] = this;
17041      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
17042    }
17043
17044  xops[0] = XEXP (DECL_RTL (function), 0);
17045  if (TARGET_64BIT)
17046    {
17047      if (!flag_pic || (*targetm.binds_local_p) (function))
17048	output_asm_insn ("jmp\t%P0", xops);
17049      else
17050	{
17051	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
17052	  tmp = gen_rtx_CONST (Pmode, tmp);
17053	  tmp = gen_rtx_MEM (QImode, tmp);
17054	  xops[0] = tmp;
17055	  output_asm_insn ("jmp\t%A0", xops);
17056	}
17057    }
17058  else
17059    {
17060      if (!flag_pic || (*targetm.binds_local_p) (function))
17061	output_asm_insn ("jmp\t%P0", xops);
17062      else
17063#if TARGET_MACHO
17064	if (TARGET_MACHO)
17065	  {
17066	    rtx sym_ref = XEXP (DECL_RTL (function), 0);
17067	    tmp = (gen_rtx_SYMBOL_REF
17068		   (Pmode,
17069		    machopic_indirection_name (sym_ref, /*stub_p=*/true)));
17070	    tmp = gen_rtx_MEM (QImode, tmp);
17071	    xops[0] = tmp;
17072	    output_asm_insn ("jmp\t%0", xops);
17073	  }
17074	else
17075#endif /* TARGET_MACHO */
17076	{
17077	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
17078	  output_set_got (tmp);
17079
17080	  xops[1] = tmp;
17081	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
17082	  output_asm_insn ("jmp\t{*}%1", xops);
17083	}
17084    }
17085}
17086
17087static void
17088x86_file_start (void)
17089{
17090  default_file_start ();
17091  if (X86_FILE_START_VERSION_DIRECTIVE)
17092    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
17093  if (X86_FILE_START_FLTUSED)
17094    fputs ("\t.global\t__fltused\n", asm_out_file);
17095  if (ix86_asm_dialect == ASM_INTEL)
17096    fputs ("\t.intel_syntax\n", asm_out_file);
17097}
17098
17099int
17100x86_field_alignment (tree field, int computed)
17101{
17102  enum machine_mode mode;
17103  tree type = TREE_TYPE (field);
17104
17105  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
17106    return computed;
17107  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
17108		    ? get_inner_array_type (type) : type);
17109  if (mode == DFmode || mode == DCmode
17110      || GET_MODE_CLASS (mode) == MODE_INT
17111      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
17112    return MIN (32, computed);
17113  return computed;
17114}
17115
17116/* Output assembler code to FILE to increment profiler label # LABELNO
17117   for profiling a function entry.  */
17118void
17119x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
17120{
17121  if (TARGET_64BIT)
17122    if (flag_pic)
17123      {
17124#ifndef NO_PROFILE_COUNTERS
17125	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
17126#endif
17127	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
17128      }
17129    else
17130      {
17131#ifndef NO_PROFILE_COUNTERS
17132	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
17133#endif
17134	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17135      }
17136  else if (flag_pic)
17137    {
17138#ifndef NO_PROFILE_COUNTERS
17139      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
17140	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
17141#endif
17142      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
17143    }
17144  else
17145    {
17146#ifndef NO_PROFILE_COUNTERS
17147      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
17148	       PROFILE_COUNT_REGISTER);
17149#endif
17150      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
17151    }
17152}
17153
17154/* We don't have exact information about the insn sizes, but we may assume
17155   quite safely that we are informed about all 1 byte insns and memory
17156   address sizes.  This is enough to eliminate unnecessary padding in
17157   99% of cases.  */
17158
17159static int
17160min_insn_size (rtx insn)
17161{
17162  int l = 0;
17163
17164  if (!INSN_P (insn) || !active_insn_p (insn))
17165    return 0;
17166
17167  /* Discard alignments we've emit and jump instructions.  */
17168  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
17169      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
17170    return 0;
17171  if (GET_CODE (insn) == JUMP_INSN
17172      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
17173	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
17174    return 0;
17175
17176  /* Important case - calls are always 5 bytes.
17177     It is common to have many calls in the row.  */
17178  if (GET_CODE (insn) == CALL_INSN
17179      && symbolic_reference_mentioned_p (PATTERN (insn))
17180      && !SIBLING_CALL_P (insn))
17181    return 5;
17182  if (get_attr_length (insn) <= 1)
17183    return 1;
17184
17185  /* For normal instructions we may rely on the sizes of addresses
17186     and the presence of symbol to require 4 bytes of encoding.
17187     This is not the case for jumps where references are PC relative.  */
17188  if (GET_CODE (insn) != JUMP_INSN)
17189    {
17190      l = get_attr_length_address (insn);
17191      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
17192	l = 4;
17193    }
17194  if (l)
17195    return 1+l;
17196  else
17197    return 2;
17198}
17199
17200/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
17201   window.  */
17202
17203static void
17204ix86_avoid_jump_misspredicts (void)
17205{
17206  rtx insn, start = get_insns ();
17207  int nbytes = 0, njumps = 0;
17208  int isjump = 0;
17209
17210  /* Look for all minimal intervals of instructions containing 4 jumps.
17211     The intervals are bounded by START and INSN.  NBYTES is the total
17212     size of instructions in the interval including INSN and not including
17213     START.  When the NBYTES is smaller than 16 bytes, it is possible
17214     that the end of START and INSN ends up in the same 16byte page.
17215
17216     The smallest offset in the page INSN can start is the case where START
17217     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
17218     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
17219     */
17220  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
17221    {
17222
17223      nbytes += min_insn_size (insn);
17224      if (dump_file)
17225        fprintf(dump_file, "Insn %i estimated to %i bytes\n",
17226		INSN_UID (insn), min_insn_size (insn));
17227      if ((GET_CODE (insn) == JUMP_INSN
17228	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
17229	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
17230	  || GET_CODE (insn) == CALL_INSN)
17231	njumps++;
17232      else
17233	continue;
17234
17235      while (njumps > 3)
17236	{
17237	  start = NEXT_INSN (start);
17238	  if ((GET_CODE (start) == JUMP_INSN
17239	       && GET_CODE (PATTERN (start)) != ADDR_VEC
17240	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
17241	      || GET_CODE (start) == CALL_INSN)
17242	    njumps--, isjump = 1;
17243	  else
17244	    isjump = 0;
17245	  nbytes -= min_insn_size (start);
17246	}
17247      gcc_assert (njumps >= 0);
17248      if (dump_file)
17249        fprintf (dump_file, "Interval %i to %i has %i bytes\n",
17250		INSN_UID (start), INSN_UID (insn), nbytes);
17251
17252      if (njumps == 3 && isjump && nbytes < 16)
17253	{
17254	  int padsize = 15 - nbytes + min_insn_size (insn);
17255
17256	  if (dump_file)
17257	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
17258		     INSN_UID (insn), padsize);
17259          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
17260	}
17261    }
17262}
17263
17264/* AMD Athlon works faster
17265   when RET is not destination of conditional jump or directly preceded
17266   by other jump instruction.  We avoid the penalty by inserting NOP just
17267   before the RET instructions in such cases.  */
17268static void
17269ix86_pad_returns (void)
17270{
17271  edge e;
17272  edge_iterator ei;
17273
17274  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
17275    {
17276      basic_block bb = e->src;
17277      rtx ret = BB_END (bb);
17278      rtx prev;
17279      bool replace = false;
17280
17281      if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
17282	  || !maybe_hot_bb_p (bb))
17283	continue;
17284      for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
17285	if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
17286	  break;
17287      if (prev && GET_CODE (prev) == CODE_LABEL)
17288	{
17289	  edge e;
17290	  edge_iterator ei;
17291
17292	  FOR_EACH_EDGE (e, ei, bb->preds)
17293	    if (EDGE_FREQUENCY (e) && e->src->index >= 0
17294		&& !(e->flags & EDGE_FALLTHRU))
17295	      replace = true;
17296	}
17297      if (!replace)
17298	{
17299	  prev = prev_active_insn (ret);
17300	  if (prev
17301	      && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
17302		  || GET_CODE (prev) == CALL_INSN))
17303	    replace = true;
17304	  /* Empty functions get branch mispredict even when the jump destination
17305	     is not visible to us.  */
17306	  if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
17307	    replace = true;
17308	}
17309      if (replace)
17310	{
17311	  emit_insn_before (gen_return_internal_long (), ret);
17312	  delete_insn (ret);
17313	}
17314    }
17315}
17316
17317/* Implement machine specific optimizations.  We implement padding of returns
17318   for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window.  */
17319static void
17320ix86_reorg (void)
17321{
17322  if (TARGET_ATHLON_K8 && optimize && !optimize_size)
17323    ix86_pad_returns ();
17324  if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
17325    ix86_avoid_jump_misspredicts ();
17326}
17327
17328/* Return nonzero when QImode register that must be represented via REX prefix
17329   is used.  */
17330bool
17331x86_extended_QIreg_mentioned_p (rtx insn)
17332{
17333  int i;
17334  extract_insn_cached (insn);
17335  for (i = 0; i < recog_data.n_operands; i++)
17336    if (REG_P (recog_data.operand[i])
17337	&& REGNO (recog_data.operand[i]) >= 4)
17338       return true;
17339  return false;
17340}
17341
17342/* Return nonzero when P points to register encoded via REX prefix.
17343   Called via for_each_rtx.  */
17344static int
17345extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
17346{
17347   unsigned int regno;
17348   if (!REG_P (*p))
17349     return 0;
17350   regno = REGNO (*p);
17351   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
17352}
17353
17354/* Return true when INSN mentions register that must be encoded using REX
17355   prefix.  */
17356bool
17357x86_extended_reg_mentioned_p (rtx insn)
17358{
17359  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
17360}
17361
17362/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
17363   optabs would emit if we didn't have TFmode patterns.  */
17364
17365void
17366x86_emit_floatuns (rtx operands[2])
17367{
17368  rtx neglab, donelab, i0, i1, f0, in, out;
17369  enum machine_mode mode, inmode;
17370
17371  inmode = GET_MODE (operands[1]);
17372  gcc_assert (inmode == SImode || inmode == DImode);
17373
17374  out = operands[0];
17375  in = force_reg (inmode, operands[1]);
17376  mode = GET_MODE (out);
17377  neglab = gen_label_rtx ();
17378  donelab = gen_label_rtx ();
17379  i1 = gen_reg_rtx (Pmode);
17380  f0 = gen_reg_rtx (mode);
17381
17382  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
17383
17384  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
17385  emit_jump_insn (gen_jump (donelab));
17386  emit_barrier ();
17387
17388  emit_label (neglab);
17389
17390  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17391  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
17392  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
17393  expand_float (f0, i0, 0);
17394  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
17395
17396  emit_label (donelab);
17397}
17398
17399/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
17400   with all elements equal to VAR.  Return true if successful.  */
17401
17402static bool
17403ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
17404				   rtx target, rtx val)
17405{
17406  enum machine_mode smode, wsmode, wvmode;
17407  rtx x;
17408
17409  switch (mode)
17410    {
17411    case V2SImode:
17412    case V2SFmode:
17413      if (!mmx_ok)
17414	return false;
17415      /* FALLTHRU */
17416
17417    case V2DFmode:
17418    case V2DImode:
17419    case V4SFmode:
17420    case V4SImode:
17421      val = force_reg (GET_MODE_INNER (mode), val);
17422      x = gen_rtx_VEC_DUPLICATE (mode, val);
17423      emit_insn (gen_rtx_SET (VOIDmode, target, x));
17424      return true;
17425
17426    case V4HImode:
17427      if (!mmx_ok)
17428	return false;
17429      if (TARGET_SSE || TARGET_3DNOW_A)
17430	{
17431	  val = gen_lowpart (SImode, val);
17432	  x = gen_rtx_TRUNCATE (HImode, val);
17433	  x = gen_rtx_VEC_DUPLICATE (mode, x);
17434	  emit_insn (gen_rtx_SET (VOIDmode, target, x));
17435	  return true;
17436	}
17437      else
17438	{
17439	  smode = HImode;
17440	  wsmode = SImode;
17441	  wvmode = V2SImode;
17442	  goto widen;
17443	}
17444
17445    case V8QImode:
17446      if (!mmx_ok)
17447	return false;
17448      smode = QImode;
17449      wsmode = HImode;
17450      wvmode = V4HImode;
17451      goto widen;
17452    case V8HImode:
17453      smode = HImode;
17454      wsmode = SImode;
17455      wvmode = V4SImode;
17456      goto widen;
17457    case V16QImode:
17458      smode = QImode;
17459      wsmode = HImode;
17460      wvmode = V8HImode;
17461      goto widen;
17462    widen:
17463      /* Replicate the value once into the next wider mode and recurse.  */
17464      val = convert_modes (wsmode, smode, val, true);
17465      x = expand_simple_binop (wsmode, ASHIFT, val,
17466			       GEN_INT (GET_MODE_BITSIZE (smode)),
17467			       NULL_RTX, 1, OPTAB_LIB_WIDEN);
17468      val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
17469
17470      x = gen_reg_rtx (wvmode);
17471      if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
17472	gcc_unreachable ();
17473      emit_move_insn (target, gen_lowpart (mode, x));
17474      return true;
17475
17476    default:
17477      return false;
17478    }
17479}
17480
17481/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
17482   whose low element is VAR, and other elements are zero.  Return true
17483   if successful.  */
17484
17485static bool
17486ix86_expand_vector_init_low_nonzero (bool mmx_ok, enum machine_mode mode,
17487				     rtx target, rtx var)
17488{
17489  enum machine_mode vsimode;
17490  rtx x;
17491
17492  switch (mode)
17493    {
17494    case V2SFmode:
17495    case V2SImode:
17496      if (!mmx_ok)
17497	return false;
17498      /* FALLTHRU */
17499
17500    case V2DFmode:
17501    case V2DImode:
17502      var = force_reg (GET_MODE_INNER (mode), var);
17503      x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
17504      emit_insn (gen_rtx_SET (VOIDmode, target, x));
17505      return true;
17506
17507    case V4SFmode:
17508    case V4SImode:
17509      var = force_reg (GET_MODE_INNER (mode), var);
17510      x = gen_rtx_VEC_DUPLICATE (mode, var);
17511      x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
17512      emit_insn (gen_rtx_SET (VOIDmode, target, x));
17513      return true;
17514
17515    case V8HImode:
17516    case V16QImode:
17517      vsimode = V4SImode;
17518      goto widen;
17519    case V4HImode:
17520    case V8QImode:
17521      if (!mmx_ok)
17522	return false;
17523      vsimode = V2SImode;
17524      goto widen;
17525    widen:
17526      /* Zero extend the variable element to SImode and recurse.  */
17527      var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
17528
17529      x = gen_reg_rtx (vsimode);
17530      if (!ix86_expand_vector_init_low_nonzero (mmx_ok, vsimode, x, var))
17531	gcc_unreachable ();
17532
17533      emit_move_insn (target, gen_lowpart (mode, x));
17534      return true;
17535
17536    default:
17537      return false;
17538    }
17539}
17540
17541/* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
17542   consisting of the values in VALS.  It is known that all elements
17543   except ONE_VAR are constants.  Return true if successful.  */
17544
17545static bool
17546ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
17547				 rtx target, rtx vals, int one_var)
17548{
17549  rtx var = XVECEXP (vals, 0, one_var);
17550  enum machine_mode wmode;
17551  rtx const_vec, x;
17552
17553  const_vec = copy_rtx (vals);
17554  XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
17555  const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
17556
17557  switch (mode)
17558    {
17559    case V2DFmode:
17560    case V2DImode:
17561    case V2SFmode:
17562    case V2SImode:
17563      /* For the two element vectors, it's just as easy to use
17564	 the general case.  */
17565      return false;
17566
17567    case V4SFmode:
17568    case V4SImode:
17569    case V8HImode:
17570    case V4HImode:
17571      break;
17572
17573    case V16QImode:
17574      wmode = V8HImode;
17575      goto widen;
17576    case V8QImode:
17577      wmode = V4HImode;
17578      goto widen;
17579    widen:
17580      /* There's no way to set one QImode entry easily.  Combine
17581	 the variable value with its adjacent constant value, and
17582	 promote to an HImode set.  */
17583      x = XVECEXP (vals, 0, one_var ^ 1);
17584      if (one_var & 1)
17585	{
17586	  var = convert_modes (HImode, QImode, var, true);
17587	  var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
17588				     NULL_RTX, 1, OPTAB_LIB_WIDEN);
17589	  x = GEN_INT (INTVAL (x) & 0xff);
17590	}
17591      else
17592	{
17593	  var = convert_modes (HImode, QImode, var, true);
17594	  x = gen_int_mode (INTVAL (x) << 8, HImode);
17595	}
17596      if (x != const0_rtx)
17597	var = expand_simple_binop (HImode, IOR, var, x, var,
17598				   1, OPTAB_LIB_WIDEN);
17599
17600      x = gen_reg_rtx (wmode);
17601      emit_move_insn (x, gen_lowpart (wmode, const_vec));
17602      ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
17603
17604      emit_move_insn (target, gen_lowpart (mode, x));
17605      return true;
17606
17607    default:
17608      return false;
17609    }
17610
17611  emit_move_insn (target, const_vec);
17612  ix86_expand_vector_set (mmx_ok, target, var, one_var);
17613  return true;
17614}
17615
17616/* A subroutine of ix86_expand_vector_init.  Handle the most general case:
17617   all values variable, and none identical.  */
17618
17619static void
17620ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
17621				 rtx target, rtx vals)
17622{
17623  enum machine_mode half_mode = GET_MODE_INNER (mode);
17624  rtx op0 = NULL, op1 = NULL;
17625  bool use_vec_concat = false;
17626
17627  switch (mode)
17628    {
17629    case V2SFmode:
17630    case V2SImode:
17631      if (!mmx_ok && !TARGET_SSE)
17632	break;
17633      /* FALLTHRU */
17634
17635    case V2DFmode:
17636    case V2DImode:
17637      /* For the two element vectors, we always implement VEC_CONCAT.  */
17638      op0 = XVECEXP (vals, 0, 0);
17639      op1 = XVECEXP (vals, 0, 1);
17640      use_vec_concat = true;
17641      break;
17642
17643    case V4SFmode:
17644      half_mode = V2SFmode;
17645      goto half;
17646    case V4SImode:
17647      half_mode = V2SImode;
17648      goto half;
17649    half:
17650      {
17651	rtvec v;
17652
17653	/* For V4SF and V4SI, we implement a concat of two V2 vectors.
17654	   Recurse to load the two halves.  */
17655
17656	op0 = gen_reg_rtx (half_mode);
17657	v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
17658	ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
17659
17660	op1 = gen_reg_rtx (half_mode);
17661	v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
17662	ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
17663
17664	use_vec_concat = true;
17665      }
17666      break;
17667
17668    case V8HImode:
17669    case V16QImode:
17670    case V4HImode:
17671    case V8QImode:
17672      break;
17673
17674    default:
17675      gcc_unreachable ();
17676    }
17677
17678  if (use_vec_concat)
17679    {
17680      if (!register_operand (op0, half_mode))
17681	op0 = force_reg (half_mode, op0);
17682      if (!register_operand (op1, half_mode))
17683	op1 = force_reg (half_mode, op1);
17684
17685      emit_insn (gen_rtx_SET (VOIDmode, target,
17686			      gen_rtx_VEC_CONCAT (mode, op0, op1)));
17687    }
17688  else
17689    {
17690      int i, j, n_elts, n_words, n_elt_per_word;
17691      enum machine_mode inner_mode;
17692      rtx words[4], shift;
17693
17694      inner_mode = GET_MODE_INNER (mode);
17695      n_elts = GET_MODE_NUNITS (mode);
17696      n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
17697      n_elt_per_word = n_elts / n_words;
17698      shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
17699
17700      for (i = 0; i < n_words; ++i)
17701	{
17702	  rtx word = NULL_RTX;
17703
17704	  for (j = 0; j < n_elt_per_word; ++j)
17705	    {
17706	      rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
17707	      elt = convert_modes (word_mode, inner_mode, elt, true);
17708
17709	      if (j == 0)
17710		word = elt;
17711	      else
17712		{
17713		  word = expand_simple_binop (word_mode, ASHIFT, word, shift,
17714					      word, 1, OPTAB_LIB_WIDEN);
17715		  word = expand_simple_binop (word_mode, IOR, word, elt,
17716					      word, 1, OPTAB_LIB_WIDEN);
17717		}
17718	    }
17719
17720	  words[i] = word;
17721	}
17722
17723      if (n_words == 1)
17724	emit_move_insn (target, gen_lowpart (mode, words[0]));
17725      else if (n_words == 2)
17726	{
17727	  rtx tmp = gen_reg_rtx (mode);
17728	  emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
17729	  emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
17730	  emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
17731	  emit_move_insn (target, tmp);
17732	}
17733      else if (n_words == 4)
17734	{
17735	  rtx tmp = gen_reg_rtx (V4SImode);
17736	  vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
17737	  ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
17738	  emit_move_insn (target, gen_lowpart (mode, tmp));
17739	}
17740      else
17741	gcc_unreachable ();
17742    }
17743}
17744
17745/* Initialize vector TARGET via VALS.  Suppress the use of MMX
17746   instructions unless MMX_OK is true.  */
17747
17748void
17749ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
17750{
17751  enum machine_mode mode = GET_MODE (target);
17752  enum machine_mode inner_mode = GET_MODE_INNER (mode);
17753  int n_elts = GET_MODE_NUNITS (mode);
17754  int n_var = 0, one_var = -1;
17755  bool all_same = true, all_const_zero = true;
17756  int i;
17757  rtx x;
17758
17759  for (i = 0; i < n_elts; ++i)
17760    {
17761      x = XVECEXP (vals, 0, i);
17762      if (!CONSTANT_P (x))
17763	n_var++, one_var = i;
17764      else if (x != CONST0_RTX (inner_mode))
17765	all_const_zero = false;
17766      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
17767	all_same = false;
17768    }
17769
17770  /* Constants are best loaded from the constant pool.  */
17771  if (n_var == 0)
17772    {
17773      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
17774      return;
17775    }
17776
17777  /* If all values are identical, broadcast the value.  */
17778  if (all_same
17779      && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
17780					    XVECEXP (vals, 0, 0)))
17781    return;
17782
17783  /* Values where only one field is non-constant are best loaded from
17784     the pool and overwritten via move later.  */
17785  if (n_var == 1)
17786    {
17787      if (all_const_zero && one_var == 0
17788	  && ix86_expand_vector_init_low_nonzero (mmx_ok, mode, target,
17789						  XVECEXP (vals, 0, 0)))
17790	return;
17791
17792      if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
17793	return;
17794    }
17795
17796  ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
17797}
17798
17799void
17800ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
17801{
17802  enum machine_mode mode = GET_MODE (target);
17803  enum machine_mode inner_mode = GET_MODE_INNER (mode);
17804  bool use_vec_merge = false;
17805  rtx tmp;
17806
17807  switch (mode)
17808    {
17809    case V2SFmode:
17810    case V2SImode:
17811      if (mmx_ok)
17812	{
17813	  tmp = gen_reg_rtx (GET_MODE_INNER (mode));
17814	  ix86_expand_vector_extract (true, tmp, target, 1 - elt);
17815	  if (elt == 0)
17816	    tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
17817	  else
17818	    tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
17819	  emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17820	  return;
17821	}
17822      break;
17823
17824    case V2DFmode:
17825    case V2DImode:
17826      {
17827	rtx op0, op1;
17828
17829	/* For the two element vectors, we implement a VEC_CONCAT with
17830	   the extraction of the other element.  */
17831
17832	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
17833	tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
17834
17835	if (elt == 0)
17836	  op0 = val, op1 = tmp;
17837	else
17838	  op0 = tmp, op1 = val;
17839
17840	tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
17841	emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17842      }
17843      return;
17844
17845    case V4SFmode:
17846      switch (elt)
17847	{
17848	case 0:
17849	  use_vec_merge = true;
17850	  break;
17851
17852	case 1:
17853	  /* tmp = target = A B C D */
17854	  tmp = copy_to_reg (target);
17855	  /* target = A A B B */
17856	  emit_insn (gen_sse_unpcklps (target, target, target));
17857	  /* target = X A B B */
17858	  ix86_expand_vector_set (false, target, val, 0);
17859	  /* target = A X C D  */
17860	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
17861				       GEN_INT (1), GEN_INT (0),
17862				       GEN_INT (2+4), GEN_INT (3+4)));
17863	  return;
17864
17865	case 2:
17866	  /* tmp = target = A B C D */
17867	  tmp = copy_to_reg (target);
17868	  /* tmp = X B C D */
17869	  ix86_expand_vector_set (false, tmp, val, 0);
17870	  /* target = A B X D */
17871	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
17872				       GEN_INT (0), GEN_INT (1),
17873				       GEN_INT (0+4), GEN_INT (3+4)));
17874	  return;
17875
17876	case 3:
17877	  /* tmp = target = A B C D */
17878	  tmp = copy_to_reg (target);
17879	  /* tmp = X B C D */
17880	  ix86_expand_vector_set (false, tmp, val, 0);
17881	  /* target = A B X D */
17882	  emit_insn (gen_sse_shufps_1 (target, target, tmp,
17883				       GEN_INT (0), GEN_INT (1),
17884				       GEN_INT (2+4), GEN_INT (0+4)));
17885	  return;
17886
17887	default:
17888	  gcc_unreachable ();
17889	}
17890      break;
17891
17892    case V4SImode:
17893      /* Element 0 handled by vec_merge below.  */
17894      if (elt == 0)
17895	{
17896	  use_vec_merge = true;
17897	  break;
17898	}
17899
17900      if (TARGET_SSE2)
17901	{
17902	  /* With SSE2, use integer shuffles to swap element 0 and ELT,
17903	     store into element 0, then shuffle them back.  */
17904
17905	  rtx order[4];
17906
17907	  order[0] = GEN_INT (elt);
17908	  order[1] = const1_rtx;
17909	  order[2] = const2_rtx;
17910	  order[3] = GEN_INT (3);
17911	  order[elt] = const0_rtx;
17912
17913	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17914					order[1], order[2], order[3]));
17915
17916	  ix86_expand_vector_set (false, target, val, 0);
17917
17918	  emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
17919					order[1], order[2], order[3]));
17920	}
17921      else
17922	{
17923	  /* For SSE1, we have to reuse the V4SF code.  */
17924	  ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
17925				  gen_lowpart (SFmode, val), elt);
17926	}
17927      return;
17928
17929    case V8HImode:
17930      use_vec_merge = TARGET_SSE2;
17931      break;
17932    case V4HImode:
17933      use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
17934      break;
17935
17936    case V16QImode:
17937    case V8QImode:
17938    default:
17939      break;
17940    }
17941
17942  if (use_vec_merge)
17943    {
17944      tmp = gen_rtx_VEC_DUPLICATE (mode, val);
17945      tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
17946      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
17947    }
17948  else
17949    {
17950      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
17951
17952      emit_move_insn (mem, target);
17953
17954      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
17955      emit_move_insn (tmp, val);
17956
17957      emit_move_insn (target, mem);
17958    }
17959}
17960
17961void
17962ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
17963{
17964  enum machine_mode mode = GET_MODE (vec);
17965  enum machine_mode inner_mode = GET_MODE_INNER (mode);
17966  bool use_vec_extr = false;
17967  rtx tmp;
17968
17969  switch (mode)
17970    {
17971    case V2SImode:
17972    case V2SFmode:
17973      if (!mmx_ok)
17974	break;
17975      /* FALLTHRU */
17976
17977    case V2DFmode:
17978    case V2DImode:
17979      use_vec_extr = true;
17980      break;
17981
17982    case V4SFmode:
17983      switch (elt)
17984	{
17985	case 0:
17986	  tmp = vec;
17987	  break;
17988
17989	case 1:
17990	case 3:
17991	  tmp = gen_reg_rtx (mode);
17992	  emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
17993				       GEN_INT (elt), GEN_INT (elt),
17994				       GEN_INT (elt+4), GEN_INT (elt+4)));
17995	  break;
17996
17997	case 2:
17998	  tmp = gen_reg_rtx (mode);
17999	  emit_insn (gen_sse_unpckhps (tmp, vec, vec));
18000	  break;
18001
18002	default:
18003	  gcc_unreachable ();
18004	}
18005      vec = tmp;
18006      use_vec_extr = true;
18007      elt = 0;
18008      break;
18009
18010    case V4SImode:
18011      if (TARGET_SSE2)
18012	{
18013	  switch (elt)
18014	    {
18015	    case 0:
18016	      tmp = vec;
18017	      break;
18018
18019	    case 1:
18020	    case 3:
18021	      tmp = gen_reg_rtx (mode);
18022	      emit_insn (gen_sse2_pshufd_1 (tmp, vec,
18023					    GEN_INT (elt), GEN_INT (elt),
18024					    GEN_INT (elt), GEN_INT (elt)));
18025	      break;
18026
18027	    case 2:
18028	      tmp = gen_reg_rtx (mode);
18029	      emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
18030	      break;
18031
18032	    default:
18033	      gcc_unreachable ();
18034	    }
18035	  vec = tmp;
18036	  use_vec_extr = true;
18037	  elt = 0;
18038	}
18039      else
18040	{
18041	  /* For SSE1, we have to reuse the V4SF code.  */
18042	  ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
18043				      gen_lowpart (V4SFmode, vec), elt);
18044	  return;
18045	}
18046      break;
18047
18048    case V8HImode:
18049      use_vec_extr = TARGET_SSE2;
18050      break;
18051    case V4HImode:
18052      use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
18053      break;
18054
18055    case V16QImode:
18056    case V8QImode:
18057      /* ??? Could extract the appropriate HImode element and shift.  */
18058    default:
18059      break;
18060    }
18061
18062  if (use_vec_extr)
18063    {
18064      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
18065      tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
18066
18067      /* Let the rtl optimizers know about the zero extension performed.  */
18068      if (inner_mode == HImode)
18069	{
18070	  tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
18071	  target = gen_lowpart (SImode, target);
18072	}
18073
18074      emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
18075    }
18076  else
18077    {
18078      rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
18079
18080      emit_move_insn (mem, vec);
18081
18082      tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
18083      emit_move_insn (target, tmp);
18084    }
18085}
18086
18087/* Expand a vector reduction on V4SFmode for SSE1.  FN is the binary
18088   pattern to reduce; DEST is the destination; IN is the input vector.  */
18089
18090void
18091ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
18092{
18093  rtx tmp1, tmp2, tmp3;
18094
18095  tmp1 = gen_reg_rtx (V4SFmode);
18096  tmp2 = gen_reg_rtx (V4SFmode);
18097  tmp3 = gen_reg_rtx (V4SFmode);
18098
18099  emit_insn (gen_sse_movhlps (tmp1, in, in));
18100  emit_insn (fn (tmp2, tmp1, in));
18101
18102  emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
18103			       GEN_INT (1), GEN_INT (1),
18104			       GEN_INT (1+4), GEN_INT (1+4)));
18105  emit_insn (fn (dest, tmp2, tmp3));
18106}
18107
18108/* Implements target hook vector_mode_supported_p.  */
18109static bool
18110ix86_vector_mode_supported_p (enum machine_mode mode)
18111{
18112  if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
18113    return true;
18114  if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
18115    return true;
18116  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
18117    return true;
18118  if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
18119    return true;
18120  return false;
18121}
18122
18123/* Worker function for TARGET_MD_ASM_CLOBBERS.
18124
18125   We do this in the new i386 backend to maintain source compatibility
18126   with the old cc0-based compiler.  */
18127
18128static tree
18129ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
18130		      tree inputs ATTRIBUTE_UNUSED,
18131		      tree clobbers)
18132{
18133  clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
18134			clobbers);
18135  clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
18136			clobbers);
18137  clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
18138			clobbers);
18139  return clobbers;
18140}
18141
18142/* Return true if this goes in small data/bss.  */
18143
18144static bool
18145ix86_in_large_data_p (tree exp)
18146{
18147  if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
18148    return false;
18149
18150  /* Functions are never large data.  */
18151  if (TREE_CODE (exp) == FUNCTION_DECL)
18152    return false;
18153
18154  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
18155    {
18156      const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
18157      if (strcmp (section, ".ldata") == 0
18158	  || strcmp (section, ".lbss") == 0)
18159	return true;
18160      return false;
18161    }
18162  else
18163    {
18164      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
18165
18166      /* If this is an incomplete type with size 0, then we can't put it
18167	 in data because it might be too big when completed.  */
18168      if (!size || size > ix86_section_threshold)
18169	return true;
18170    }
18171
18172  return false;
18173}
18174static void
18175ix86_encode_section_info (tree decl, rtx rtl, int first)
18176{
18177  default_encode_section_info (decl, rtl, first);
18178
18179  if (TREE_CODE (decl) == VAR_DECL
18180      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
18181      && ix86_in_large_data_p (decl))
18182    SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
18183}
18184
18185/* Worker function for REVERSE_CONDITION.  */
18186
18187enum rtx_code
18188ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
18189{
18190  return (mode != CCFPmode && mode != CCFPUmode
18191	  ? reverse_condition (code)
18192	  : reverse_condition_maybe_unordered (code));
18193}
18194
18195/* Output code to perform an x87 FP register move, from OPERANDS[1]
18196   to OPERANDS[0].  */
18197
18198const char *
18199output_387_reg_move (rtx insn, rtx *operands)
18200{
18201  if (REG_P (operands[1])
18202      && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
18203    {
18204      if (REGNO (operands[0]) == FIRST_STACK_REG
18205	  && TARGET_USE_FFREEP)
18206	return "ffreep\t%y0";
18207      return "fstp\t%y0";
18208    }
18209  if (STACK_TOP_P (operands[0]))
18210    return "fld%z1\t%y1";
18211  return "fst\t%y0";
18212}
18213
18214/* Output code to perform a conditional jump to LABEL, if C2 flag in
18215   FP status register is set.  */
18216
18217void
18218ix86_emit_fp_unordered_jump (rtx label)
18219{
18220  rtx reg = gen_reg_rtx (HImode);
18221  rtx temp;
18222
18223  emit_insn (gen_x86_fnstsw_1 (reg));
18224
18225  if (TARGET_USE_SAHF)
18226    {
18227      emit_insn (gen_x86_sahf_1 (reg));
18228
18229      temp = gen_rtx_REG (CCmode, FLAGS_REG);
18230      temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
18231    }
18232  else
18233    {
18234      emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
18235
18236      temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18237      temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
18238    }
18239
18240  temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
18241			      gen_rtx_LABEL_REF (VOIDmode, label),
18242			      pc_rtx);
18243  temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
18244  emit_jump_insn (temp);
18245}
18246
18247/* Output code to perform a log1p XFmode calculation.  */
18248
18249void ix86_emit_i387_log1p (rtx op0, rtx op1)
18250{
18251  rtx label1 = gen_label_rtx ();
18252  rtx label2 = gen_label_rtx ();
18253
18254  rtx tmp = gen_reg_rtx (XFmode);
18255  rtx tmp2 = gen_reg_rtx (XFmode);
18256
18257  emit_insn (gen_absxf2 (tmp, op1));
18258  emit_insn (gen_cmpxf (tmp,
18259    CONST_DOUBLE_FROM_REAL_VALUE (
18260       REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
18261       XFmode)));
18262  emit_jump_insn (gen_bge (label1));
18263
18264  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18265  emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
18266  emit_jump (label2);
18267
18268  emit_label (label1);
18269  emit_move_insn (tmp, CONST1_RTX (XFmode));
18270  emit_insn (gen_addxf3 (tmp, op1, tmp));
18271  emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
18272  emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
18273
18274  emit_label (label2);
18275}
18276
18277/* Solaris named-section hook.  Parameters are as for
18278   named_section_real.  */
18279
18280static void
18281i386_solaris_elf_named_section (const char *name, unsigned int flags,
18282				tree decl)
18283{
18284  /* With Binutils 2.15, the "@unwind" marker must be specified on
18285     every occurrence of the ".eh_frame" section, not just the first
18286     one.  */
18287  if (TARGET_64BIT
18288      && strcmp (name, ".eh_frame") == 0)
18289    {
18290      fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
18291	       flags & SECTION_WRITE ? "aw" : "a");
18292      return;
18293    }
18294  default_elf_asm_named_section (name, flags, decl);
18295}
18296
18297/* Return the mangling of TYPE if it is an extended fundamental type.  */
18298
18299static const char *
18300ix86_mangle_fundamental_type (tree type)
18301{
18302  switch (TYPE_MODE (type))
18303    {
18304    case TFmode:
18305      /* __float128 is "g".  */
18306      return "g";
18307    case XFmode:
18308      /* "long double" or __float80 is "e".  */
18309      return "e";
18310    default:
18311      return NULL;
18312    }
18313}
18314
18315/* For 32-bit code we can save PIC register setup by using
18316   __stack_chk_fail_local hidden function instead of calling
18317   __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
18318   register, so it is better to call __stack_chk_fail directly.  */
18319
18320static tree
18321ix86_stack_protect_fail (void)
18322{
18323  return TARGET_64BIT
18324	 ? default_external_stack_protect_fail ()
18325	 : default_hidden_stack_protect_fail ();
18326}
18327
18328/* Select a format to encode pointers in exception handling data.  CODE
18329   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
18330   true if the symbol may be affected by dynamic relocations.
18331
18332   ??? All x86 object file formats are capable of representing this.
18333   After all, the relocation needed is the same as for the call insn.
18334   Whether or not a particular assembler allows us to enter such, I
18335   guess we'll have to see.  */
18336int
18337asm_preferred_eh_data_format (int code, int global)
18338{
18339  if (flag_pic)
18340    {
18341int type = DW_EH_PE_sdata8;
18342      if (!TARGET_64BIT
18343	  || ix86_cmodel == CM_SMALL_PIC
18344	  || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
18345	type = DW_EH_PE_sdata4;
18346      return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
18347    }
18348  if (ix86_cmodel == CM_SMALL
18349      || (ix86_cmodel == CM_MEDIUM && code))
18350    return DW_EH_PE_udata4;
18351  return DW_EH_PE_absptr;
18352}
18353
18354#include "gt-i386.h"
18355