i386.c revision 146908
1/* Subroutines used for code generation on IA-32.
2   Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3   2002, 2003, 2004 Free Software Foundation, Inc.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GCC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING.  If not, write to
19the Free Software Foundation, 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA.  */
21
22
23/* $FreeBSD: head/contrib/gcc/config/i386/i386.c 146908 2005-06-03 04:02:20Z kan $ */
24
25
26#include "config.h"
27#include "system.h"
28#include "coretypes.h"
29#include "tm.h"
30#include "rtl.h"
31#include "tree.h"
32#include "tm_p.h"
33#include "regs.h"
34#include "hard-reg-set.h"
35#include "real.h"
36#include "insn-config.h"
37#include "conditions.h"
38#include "output.h"
39#include "insn-attr.h"
40#include "flags.h"
41#include "except.h"
42#include "function.h"
43#include "recog.h"
44#include "expr.h"
45#include "optabs.h"
46#include "toplev.h"
47#include "basic-block.h"
48#include "ggc.h"
49#include "target.h"
50#include "target-def.h"
51#include "langhooks.h"
52#include "cgraph.h"
53
54#ifndef CHECK_STACK_LIMIT
55#define CHECK_STACK_LIMIT (-1)
56#endif
57
58/* Return index of given mode in mult and division cost tables.  */
59#define MODE_INDEX(mode)					\
60  ((mode) == QImode ? 0						\
61   : (mode) == HImode ? 1					\
62   : (mode) == SImode ? 2					\
63   : (mode) == DImode ? 3					\
64   : 4)
65
66/* Processor costs (relative to an add) */
67static const
68struct processor_costs size_cost = {	/* costs for tunning for size */
69  2,					/* cost of an add instruction */
70  3,					/* cost of a lea instruction */
71  2,					/* variable shift costs */
72  3,					/* constant shift costs */
73  {3, 3, 3, 3, 5},			/* cost of starting a multiply */
74  0,					/* cost of multiply per each bit set */
75  {3, 3, 3, 3, 5},			/* cost of a divide/mod */
76  3,					/* cost of movsx */
77  3,					/* cost of movzx */
78  0,					/* "large" insn */
79  2,					/* MOVE_RATIO */
80  2,					/* cost for loading QImode using movzbl */
81  {2, 2, 2},				/* cost of loading integer registers
82					   in QImode, HImode and SImode.
83					   Relative to reg-reg move (2).  */
84  {2, 2, 2},				/* cost of storing integer registers */
85  2,					/* cost of reg,reg fld/fst */
86  {2, 2, 2},				/* cost of loading fp registers
87					   in SFmode, DFmode and XFmode */
88  {2, 2, 2},				/* cost of loading integer registers */
89  3,					/* cost of moving MMX register */
90  {3, 3},				/* cost of loading MMX registers
91					   in SImode and DImode */
92  {3, 3},				/* cost of storing MMX registers
93					   in SImode and DImode */
94  3,					/* cost of moving SSE register */
95  {3, 3, 3},				/* cost of loading SSE registers
96					   in SImode, DImode and TImode */
97  {3, 3, 3},				/* cost of storing SSE registers
98					   in SImode, DImode and TImode */
99  3,					/* MMX or SSE register to integer */
100  0,					/* size of prefetch block */
101  0,					/* number of parallel prefetches */
102  1,					/* Branch cost */
103  2,					/* cost of FADD and FSUB insns.  */
104  2,					/* cost of FMUL instruction.  */
105  2,					/* cost of FDIV instruction.  */
106  2,					/* cost of FABS instruction.  */
107  2,					/* cost of FCHS instruction.  */
108  2,					/* cost of FSQRT instruction.  */
109};
110
111/* Processor costs (relative to an add) */
112static const
113struct processor_costs i386_cost = {	/* 386 specific costs */
114  1,					/* cost of an add instruction */
115  1,					/* cost of a lea instruction */
116  3,					/* variable shift costs */
117  2,					/* constant shift costs */
118  {6, 6, 6, 6, 6},			/* cost of starting a multiply */
119  1,					/* cost of multiply per each bit set */
120  {23, 23, 23, 23, 23},			/* cost of a divide/mod */
121  3,					/* cost of movsx */
122  2,					/* cost of movzx */
123  15,					/* "large" insn */
124  3,					/* MOVE_RATIO */
125  4,					/* cost for loading QImode using movzbl */
126  {2, 4, 2},				/* cost of loading integer registers
127					   in QImode, HImode and SImode.
128					   Relative to reg-reg move (2).  */
129  {2, 4, 2},				/* cost of storing integer registers */
130  2,					/* cost of reg,reg fld/fst */
131  {8, 8, 8},				/* cost of loading fp registers
132					   in SFmode, DFmode and XFmode */
133  {8, 8, 8},				/* cost of loading integer registers */
134  2,					/* cost of moving MMX register */
135  {4, 8},				/* cost of loading MMX registers
136					   in SImode and DImode */
137  {4, 8},				/* cost of storing MMX registers
138					   in SImode and DImode */
139  2,					/* cost of moving SSE register */
140  {4, 8, 16},				/* cost of loading SSE registers
141					   in SImode, DImode and TImode */
142  {4, 8, 16},				/* cost of storing SSE registers
143					   in SImode, DImode and TImode */
144  3,					/* MMX or SSE register to integer */
145  0,					/* size of prefetch block */
146  0,					/* number of parallel prefetches */
147  1,					/* Branch cost */
148  23,					/* cost of FADD and FSUB insns.  */
149  27,					/* cost of FMUL instruction.  */
150  88,					/* cost of FDIV instruction.  */
151  22,					/* cost of FABS instruction.  */
152  24,					/* cost of FCHS instruction.  */
153  122,					/* cost of FSQRT instruction.  */
154};
155
156static const
157struct processor_costs i486_cost = {	/* 486 specific costs */
158  1,					/* cost of an add instruction */
159  1,					/* cost of a lea instruction */
160  3,					/* variable shift costs */
161  2,					/* constant shift costs */
162  {12, 12, 12, 12, 12},			/* cost of starting a multiply */
163  1,					/* cost of multiply per each bit set */
164  {40, 40, 40, 40, 40},			/* cost of a divide/mod */
165  3,					/* cost of movsx */
166  2,					/* cost of movzx */
167  15,					/* "large" insn */
168  3,					/* MOVE_RATIO */
169  4,					/* cost for loading QImode using movzbl */
170  {2, 4, 2},				/* cost of loading integer registers
171					   in QImode, HImode and SImode.
172					   Relative to reg-reg move (2).  */
173  {2, 4, 2},				/* cost of storing integer registers */
174  2,					/* cost of reg,reg fld/fst */
175  {8, 8, 8},				/* cost of loading fp registers
176					   in SFmode, DFmode and XFmode */
177  {8, 8, 8},				/* cost of loading integer registers */
178  2,					/* cost of moving MMX register */
179  {4, 8},				/* cost of loading MMX registers
180					   in SImode and DImode */
181  {4, 8},				/* cost of storing MMX registers
182					   in SImode and DImode */
183  2,					/* cost of moving SSE register */
184  {4, 8, 16},				/* cost of loading SSE registers
185					   in SImode, DImode and TImode */
186  {4, 8, 16},				/* cost of storing SSE registers
187					   in SImode, DImode and TImode */
188  3,					/* MMX or SSE register to integer */
189  0,					/* size of prefetch block */
190  0,					/* number of parallel prefetches */
191  1,					/* Branch cost */
192  8,					/* cost of FADD and FSUB insns.  */
193  16,					/* cost of FMUL instruction.  */
194  73,					/* cost of FDIV instruction.  */
195  3,					/* cost of FABS instruction.  */
196  3,					/* cost of FCHS instruction.  */
197  83,					/* cost of FSQRT instruction.  */
198};
199
200static const
201struct processor_costs pentium_cost = {
202  1,					/* cost of an add instruction */
203  1,					/* cost of a lea instruction */
204  4,					/* variable shift costs */
205  1,					/* constant shift costs */
206  {11, 11, 11, 11, 11},			/* cost of starting a multiply */
207  0,					/* cost of multiply per each bit set */
208  {25, 25, 25, 25, 25},			/* cost of a divide/mod */
209  3,					/* cost of movsx */
210  2,					/* cost of movzx */
211  8,					/* "large" insn */
212  6,					/* MOVE_RATIO */
213  6,					/* cost for loading QImode using movzbl */
214  {2, 4, 2},				/* cost of loading integer registers
215					   in QImode, HImode and SImode.
216					   Relative to reg-reg move (2).  */
217  {2, 4, 2},				/* cost of storing integer registers */
218  2,					/* cost of reg,reg fld/fst */
219  {2, 2, 6},				/* cost of loading fp registers
220					   in SFmode, DFmode and XFmode */
221  {4, 4, 6},				/* cost of loading integer registers */
222  8,					/* cost of moving MMX register */
223  {8, 8},				/* cost of loading MMX registers
224					   in SImode and DImode */
225  {8, 8},				/* cost of storing MMX registers
226					   in SImode and DImode */
227  2,					/* cost of moving SSE register */
228  {4, 8, 16},				/* cost of loading SSE registers
229					   in SImode, DImode and TImode */
230  {4, 8, 16},				/* cost of storing SSE registers
231					   in SImode, DImode and TImode */
232  3,					/* MMX or SSE register to integer */
233  0,					/* size of prefetch block */
234  0,					/* number of parallel prefetches */
235  2,					/* Branch cost */
236  3,					/* cost of FADD and FSUB insns.  */
237  3,					/* cost of FMUL instruction.  */
238  39,					/* cost of FDIV instruction.  */
239  1,					/* cost of FABS instruction.  */
240  1,					/* cost of FCHS instruction.  */
241  70,					/* cost of FSQRT instruction.  */
242};
243
244static const
245struct processor_costs pentiumpro_cost = {
246  1,					/* cost of an add instruction */
247  1,					/* cost of a lea instruction */
248  1,					/* variable shift costs */
249  1,					/* constant shift costs */
250  {4, 4, 4, 4, 4},			/* cost of starting a multiply */
251  0,					/* cost of multiply per each bit set */
252  {17, 17, 17, 17, 17},			/* cost of a divide/mod */
253  1,					/* cost of movsx */
254  1,					/* cost of movzx */
255  8,					/* "large" insn */
256  6,					/* MOVE_RATIO */
257  2,					/* cost for loading QImode using movzbl */
258  {4, 4, 4},				/* cost of loading integer registers
259					   in QImode, HImode and SImode.
260					   Relative to reg-reg move (2).  */
261  {2, 2, 2},				/* cost of storing integer registers */
262  2,					/* cost of reg,reg fld/fst */
263  {2, 2, 6},				/* cost of loading fp registers
264					   in SFmode, DFmode and XFmode */
265  {4, 4, 6},				/* cost of loading integer registers */
266  2,					/* cost of moving MMX register */
267  {2, 2},				/* cost of loading MMX registers
268					   in SImode and DImode */
269  {2, 2},				/* cost of storing MMX registers
270					   in SImode and DImode */
271  2,					/* cost of moving SSE register */
272  {2, 2, 8},				/* cost of loading SSE registers
273					   in SImode, DImode and TImode */
274  {2, 2, 8},				/* cost of storing SSE registers
275					   in SImode, DImode and TImode */
276  3,					/* MMX or SSE register to integer */
277  32,					/* size of prefetch block */
278  6,					/* number of parallel prefetches */
279  2,					/* Branch cost */
280  3,					/* cost of FADD and FSUB insns.  */
281  5,					/* cost of FMUL instruction.  */
282  56,					/* cost of FDIV instruction.  */
283  2,					/* cost of FABS instruction.  */
284  2,					/* cost of FCHS instruction.  */
285  56,					/* cost of FSQRT instruction.  */
286};
287
288static const
289struct processor_costs k6_cost = {
290  1,					/* cost of an add instruction */
291  2,					/* cost of a lea instruction */
292  1,					/* variable shift costs */
293  1,					/* constant shift costs */
294  {3, 3, 3, 3, 3},			/* cost of starting a multiply */
295  0,					/* cost of multiply per each bit set */
296  {18, 18, 18, 18, 18},			/* cost of a divide/mod */
297  2,					/* cost of movsx */
298  2,					/* cost of movzx */
299  8,					/* "large" insn */
300  4,					/* MOVE_RATIO */
301  3,					/* cost for loading QImode using movzbl */
302  {4, 5, 4},				/* cost of loading integer registers
303					   in QImode, HImode and SImode.
304					   Relative to reg-reg move (2).  */
305  {2, 3, 2},				/* cost of storing integer registers */
306  4,					/* cost of reg,reg fld/fst */
307  {6, 6, 6},				/* cost of loading fp registers
308					   in SFmode, DFmode and XFmode */
309  {4, 4, 4},				/* cost of loading integer registers */
310  2,					/* cost of moving MMX register */
311  {2, 2},				/* cost of loading MMX registers
312					   in SImode and DImode */
313  {2, 2},				/* cost of storing MMX registers
314					   in SImode and DImode */
315  2,					/* cost of moving SSE register */
316  {2, 2, 8},				/* cost of loading SSE registers
317					   in SImode, DImode and TImode */
318  {2, 2, 8},				/* cost of storing SSE registers
319					   in SImode, DImode and TImode */
320  6,					/* MMX or SSE register to integer */
321  32,					/* size of prefetch block */
322  1,					/* number of parallel prefetches */
323  1,					/* Branch cost */
324  2,					/* cost of FADD and FSUB insns.  */
325  2,					/* cost of FMUL instruction.  */
326  56,					/* cost of FDIV instruction.  */
327  2,					/* cost of FABS instruction.  */
328  2,					/* cost of FCHS instruction.  */
329  56,					/* cost of FSQRT instruction.  */
330};
331
332static const
333struct processor_costs athlon_cost = {
334  1,					/* cost of an add instruction */
335  2,					/* cost of a lea instruction */
336  1,					/* variable shift costs */
337  1,					/* constant shift costs */
338  {5, 5, 5, 5, 5},			/* cost of starting a multiply */
339  0,					/* cost of multiply per each bit set */
340  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
341  1,					/* cost of movsx */
342  1,					/* cost of movzx */
343  8,					/* "large" insn */
344  9,					/* MOVE_RATIO */
345  4,					/* cost for loading QImode using movzbl */
346  {3, 4, 3},				/* cost of loading integer registers
347					   in QImode, HImode and SImode.
348					   Relative to reg-reg move (2).  */
349  {3, 4, 3},				/* cost of storing integer registers */
350  4,					/* cost of reg,reg fld/fst */
351  {4, 4, 12},				/* cost of loading fp registers
352					   in SFmode, DFmode and XFmode */
353  {6, 6, 8},				/* cost of loading integer registers */
354  2,					/* cost of moving MMX register */
355  {4, 4},				/* cost of loading MMX registers
356					   in SImode and DImode */
357  {4, 4},				/* cost of storing MMX registers
358					   in SImode and DImode */
359  2,					/* cost of moving SSE register */
360  {4, 4, 6},				/* cost of loading SSE registers
361					   in SImode, DImode and TImode */
362  {4, 4, 5},				/* cost of storing SSE registers
363					   in SImode, DImode and TImode */
364  5,					/* MMX or SSE register to integer */
365  64,					/* size of prefetch block */
366  6,					/* number of parallel prefetches */
367  2,					/* Branch cost */
368  4,					/* cost of FADD and FSUB insns.  */
369  4,					/* cost of FMUL instruction.  */
370  24,					/* cost of FDIV instruction.  */
371  2,					/* cost of FABS instruction.  */
372  2,					/* cost of FCHS instruction.  */
373  35,					/* cost of FSQRT instruction.  */
374};
375
376static const
377struct processor_costs k8_cost = {
378  1,					/* cost of an add instruction */
379  2,					/* cost of a lea instruction */
380  1,					/* variable shift costs */
381  1,					/* constant shift costs */
382  {3, 4, 3, 4, 5},			/* cost of starting a multiply */
383  0,					/* cost of multiply per each bit set */
384  {18, 26, 42, 74, 74},			/* cost of a divide/mod */
385  1,					/* cost of movsx */
386  1,					/* cost of movzx */
387  8,					/* "large" insn */
388  9,					/* MOVE_RATIO */
389  4,					/* cost for loading QImode using movzbl */
390  {3, 4, 3},				/* cost of loading integer registers
391					   in QImode, HImode and SImode.
392					   Relative to reg-reg move (2).  */
393  {3, 4, 3},				/* cost of storing integer registers */
394  4,					/* cost of reg,reg fld/fst */
395  {4, 4, 12},				/* cost of loading fp registers
396					   in SFmode, DFmode and XFmode */
397  {6, 6, 8},				/* cost of loading integer registers */
398  2,					/* cost of moving MMX register */
399  {3, 3},				/* cost of loading MMX registers
400					   in SImode and DImode */
401  {4, 4},				/* cost of storing MMX registers
402					   in SImode and DImode */
403  2,					/* cost of moving SSE register */
404  {4, 3, 6},				/* cost of loading SSE registers
405					   in SImode, DImode and TImode */
406  {4, 4, 5},				/* cost of storing SSE registers
407					   in SImode, DImode and TImode */
408  5,					/* MMX or SSE register to integer */
409  64,					/* size of prefetch block */
410  6,					/* number of parallel prefetches */
411  2,					/* Branch cost */
412  4,					/* cost of FADD and FSUB insns.  */
413  4,					/* cost of FMUL instruction.  */
414  19,					/* cost of FDIV instruction.  */
415  2,					/* cost of FABS instruction.  */
416  2,					/* cost of FCHS instruction.  */
417  35,					/* cost of FSQRT instruction.  */
418};
419
420static const
421struct processor_costs pentium4_cost = {
422  1,					/* cost of an add instruction */
423  1,					/* cost of a lea instruction */
424  4,					/* variable shift costs */
425  4,					/* constant shift costs */
426  {15, 15, 15, 15, 15},			/* cost of starting a multiply */
427  0,					/* cost of multiply per each bit set */
428  {56, 56, 56, 56, 56},			/* cost of a divide/mod */
429  1,					/* cost of movsx */
430  1,					/* cost of movzx */
431  16,					/* "large" insn */
432  6,					/* MOVE_RATIO */
433  2,					/* cost for loading QImode using movzbl */
434  {4, 5, 4},				/* cost of loading integer registers
435					   in QImode, HImode and SImode.
436					   Relative to reg-reg move (2).  */
437  {2, 3, 2},				/* cost of storing integer registers */
438  2,					/* cost of reg,reg fld/fst */
439  {2, 2, 6},				/* cost of loading fp registers
440					   in SFmode, DFmode and XFmode */
441  {4, 4, 6},				/* cost of loading integer registers */
442  2,					/* cost of moving MMX register */
443  {2, 2},				/* cost of loading MMX registers
444					   in SImode and DImode */
445  {2, 2},				/* cost of storing MMX registers
446					   in SImode and DImode */
447  12,					/* cost of moving SSE register */
448  {12, 12, 12},				/* cost of loading SSE registers
449					   in SImode, DImode and TImode */
450  {2, 2, 8},				/* cost of storing SSE registers
451					   in SImode, DImode and TImode */
452  10,					/* MMX or SSE register to integer */
453  64,					/* size of prefetch block */
454  6,					/* number of parallel prefetches */
455  2,					/* Branch cost */
456  5,					/* cost of FADD and FSUB insns.  */
457  7,					/* cost of FMUL instruction.  */
458  43,					/* cost of FDIV instruction.  */
459  2,					/* cost of FABS instruction.  */
460  2,					/* cost of FCHS instruction.  */
461  43,					/* cost of FSQRT instruction.  */
462};
463
464const struct processor_costs *ix86_cost = &pentium_cost;
465
466/* Processor feature/optimization bitmasks.  */
467#define m_386 (1<<PROCESSOR_I386)
468#define m_486 (1<<PROCESSOR_I486)
469#define m_PENT (1<<PROCESSOR_PENTIUM)
470#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
471#define m_K6  (1<<PROCESSOR_K6)
472#define m_ATHLON  (1<<PROCESSOR_ATHLON)
473#define m_PENT4  (1<<PROCESSOR_PENTIUM4)
474#define m_K8  (1<<PROCESSOR_K8)
475#define m_ATHLON_K8  (m_K8 | m_ATHLON)
476
477const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
478const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
479const int x86_zero_extend_with_and = m_486 | m_PENT;
480const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
481const int x86_double_with_add = ~m_386;
482const int x86_use_bit_test = m_386;
483const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
484const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
485const int x86_3dnow_a = m_ATHLON_K8;
486const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
487const int x86_branch_hints = m_PENT4;
488const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
489const int x86_partial_reg_stall = m_PPRO;
490const int x86_use_loop = m_K6;
491const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
492const int x86_use_mov0 = m_K6;
493const int x86_use_cltd = ~(m_PENT | m_K6);
494const int x86_read_modify_write = ~m_PENT;
495const int x86_read_modify = ~(m_PENT | m_PPRO);
496const int x86_split_long_moves = m_PPRO;
497const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
498const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
499const int x86_single_stringop = m_386 | m_PENT4;
500const int x86_qimode_math = ~(0);
501const int x86_promote_qi_regs = 0;
502const int x86_himode_math = ~(m_PPRO);
503const int x86_promote_hi_regs = m_PPRO;
504const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
505const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
506const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
507const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
508const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
509const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
510const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
511const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
512const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
513const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
514const int x86_decompose_lea = m_PENT4;
515const int x86_shift1 = ~m_486;
516const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
517const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
518/* Set for machines where the type and dependencies are resolved on SSE register
519   parts instead of whole registers, so we may maintain just lower part of
520   scalar values in proper format leaving the upper part undefined.  */
521const int x86_sse_partial_regs = m_ATHLON_K8;
522/* Athlon optimizes partial-register FPS special case, thus avoiding the
523   need for extra instructions beforehand  */
524const int x86_sse_partial_regs_for_cvtsd2ss = 0;
525const int x86_sse_typeless_stores = m_ATHLON_K8;
526const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
527const int x86_use_ffreep = m_ATHLON_K8;
528const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
529
530/* ??? HACK!  The following is a lie.  SSE can hold e.g. SImode, and
531   indeed *must* be able to hold SImode so that SSE2 shifts are able
532   to work right.  But this can result in some mighty surprising
533   register allocation when building kernels.  Turning this off should
534   make us less likely to all-of-the-sudden select an SSE register.  */
535const int x86_inter_unit_moves = 0;  /* ~(m_ATHLON_K8) */
536
537const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
538
539/* In case the average insn count for single function invocation is
540   lower than this constant, emit fast (but longer) prologue and
541   epilogue code.  */
542#define FAST_PROLOGUE_INSN_COUNT 20
543
544/* Names for 8 (low), 8 (high), and 16-bit registers, respectively.  */
545static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
546static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
547static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
548
549/* Array of the smallest class containing reg number REGNO, indexed by
550   REGNO.  Used by REGNO_REG_CLASS in i386.h.  */
551
552enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
553{
554  /* ax, dx, cx, bx */
555  AREG, DREG, CREG, BREG,
556  /* si, di, bp, sp */
557  SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
558  /* FP registers */
559  FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
560  FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
561  /* arg pointer */
562  NON_Q_REGS,
563  /* flags, fpsr, dirflag, frame */
564  NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
565  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
566  SSE_REGS, SSE_REGS,
567  MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
568  MMX_REGS, MMX_REGS,
569  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
570  NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
571  SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
572  SSE_REGS, SSE_REGS,
573};
574
575/* The "default" register map used in 32bit mode.  */
576
577int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
578{
579  0, 2, 1, 3, 6, 7, 4, 5,		/* general regs */
580  12, 13, 14, 15, 16, 17, 18, 19,	/* fp regs */
581  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
582  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE */
583  29, 30, 31, 32, 33, 34, 35, 36,       /* MMX */
584  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
585  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
586};
587
588static int const x86_64_int_parameter_registers[6] =
589{
590  5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
591  FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
592};
593
594static int const x86_64_int_return_registers[4] =
595{
596  0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
597};
598
599/* The "default" register map used in 64bit mode.  */
600int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
601{
602  0, 1, 2, 3, 4, 5, 6, 7,		/* general regs */
603  33, 34, 35, 36, 37, 38, 39, 40,	/* fp regs */
604  -1, -1, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
605  17, 18, 19, 20, 21, 22, 23, 24,	/* SSE */
606  41, 42, 43, 44, 45, 46, 47, 48,       /* MMX */
607  8,9,10,11,12,13,14,15,		/* extended integer registers */
608  25, 26, 27, 28, 29, 30, 31, 32,	/* extended SSE registers */
609};
610
611/* Define the register numbers to be used in Dwarf debugging information.
612   The SVR4 reference port C compiler uses the following register numbers
613   in its Dwarf output code:
614	0 for %eax (gcc regno = 0)
615	1 for %ecx (gcc regno = 2)
616	2 for %edx (gcc regno = 1)
617	3 for %ebx (gcc regno = 3)
618	4 for %esp (gcc regno = 7)
619	5 for %ebp (gcc regno = 6)
620	6 for %esi (gcc regno = 4)
621	7 for %edi (gcc regno = 5)
622   The following three DWARF register numbers are never generated by
623   the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
624   believes these numbers have these meanings.
625	8  for %eip    (no gcc equivalent)
626	9  for %eflags (gcc regno = 17)
627	10 for %trapno (no gcc equivalent)
628   It is not at all clear how we should number the FP stack registers
629   for the x86 architecture.  If the version of SDB on x86/svr4 were
630   a bit less brain dead with respect to floating-point then we would
631   have a precedent to follow with respect to DWARF register numbers
632   for x86 FP registers, but the SDB on x86/svr4 is so completely
633   broken with respect to FP registers that it is hardly worth thinking
634   of it as something to strive for compatibility with.
635   The version of x86/svr4 SDB I have at the moment does (partially)
636   seem to believe that DWARF register number 11 is associated with
637   the x86 register %st(0), but that's about all.  Higher DWARF
638   register numbers don't seem to be associated with anything in
639   particular, and even for DWARF regno 11, SDB only seems to under-
640   stand that it should say that a variable lives in %st(0) (when
641   asked via an `=' command) if we said it was in DWARF regno 11,
642   but SDB still prints garbage when asked for the value of the
643   variable in question (via a `/' command).
644   (Also note that the labels SDB prints for various FP stack regs
645   when doing an `x' command are all wrong.)
646   Note that these problems generally don't affect the native SVR4
647   C compiler because it doesn't allow the use of -O with -g and
648   because when it is *not* optimizing, it allocates a memory
649   location for each floating-point variable, and the memory
650   location is what gets described in the DWARF AT_location
651   attribute for the variable in question.
652   Regardless of the severe mental illness of the x86/svr4 SDB, we
653   do something sensible here and we use the following DWARF
654   register numbers.  Note that these are all stack-top-relative
655   numbers.
656	11 for %st(0) (gcc regno = 8)
657	12 for %st(1) (gcc regno = 9)
658	13 for %st(2) (gcc regno = 10)
659	14 for %st(3) (gcc regno = 11)
660	15 for %st(4) (gcc regno = 12)
661	16 for %st(5) (gcc regno = 13)
662	17 for %st(6) (gcc regno = 14)
663	18 for %st(7) (gcc regno = 15)
664*/
665int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
666{
667  0, 2, 1, 3, 6, 7, 5, 4,		/* general regs */
668  11, 12, 13, 14, 15, 16, 17, 18,	/* fp regs */
669  -1, 9, -1, -1, -1,			/* arg, flags, fpsr, dir, frame */
670  21, 22, 23, 24, 25, 26, 27, 28,	/* SSE registers */
671  29, 30, 31, 32, 33, 34, 35, 36,	/* MMX registers */
672  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended integer registers */
673  -1, -1, -1, -1, -1, -1, -1, -1,	/* extended SSE registers */
674};
675
676/* Test and compare insns in i386.md store the information needed to
677   generate branch and scc insns here.  */
678
679rtx ix86_compare_op0 = NULL_RTX;
680rtx ix86_compare_op1 = NULL_RTX;
681
682#define MAX_386_STACK_LOCALS 3
683/* Size of the register save area.  */
684#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
685
686/* Define the structure for the machine field in struct function.  */
687
688struct stack_local_entry GTY(())
689{
690  unsigned short mode;
691  unsigned short n;
692  rtx rtl;
693  struct stack_local_entry *next;
694};
695
696/* Structure describing stack frame layout.
697   Stack grows downward:
698
699   [arguments]
700					      <- ARG_POINTER
701   saved pc
702
703   saved frame pointer if frame_pointer_needed
704					      <- HARD_FRAME_POINTER
705   [saved regs]
706
707   [padding1]          \
708		        )
709   [va_arg registers]  (
710		        > to_allocate	      <- FRAME_POINTER
711   [frame]	       (
712		        )
713   [padding2]	       /
714  */
715struct ix86_frame
716{
717  int nregs;
718  int padding1;
719  int va_arg_size;
720  HOST_WIDE_INT frame;
721  int padding2;
722  int outgoing_arguments_size;
723  int red_zone_size;
724
725  HOST_WIDE_INT to_allocate;
726  /* The offsets relative to ARG_POINTER.  */
727  HOST_WIDE_INT frame_pointer_offset;
728  HOST_WIDE_INT hard_frame_pointer_offset;
729  HOST_WIDE_INT stack_pointer_offset;
730
731  /* When save_regs_using_mov is set, emit prologue using
732     move instead of push instructions.  */
733  bool save_regs_using_mov;
734};
735
736/* Used to enable/disable debugging features.  */
737const char *ix86_debug_arg_string, *ix86_debug_addr_string;
738/* Code model option as passed by user.  */
739const char *ix86_cmodel_string;
740/* Parsed value.  */
741enum cmodel ix86_cmodel;
742/* Asm dialect.  */
743const char *ix86_asm_string;
744enum asm_dialect ix86_asm_dialect = ASM_ATT;
745/* TLS dialext.  */
746const char *ix86_tls_dialect_string;
747enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
748
749/* Which unit we are generating floating point math for.  */
750enum fpmath_unit ix86_fpmath;
751
752/* Which cpu are we scheduling for.  */
753enum processor_type ix86_tune;
754/* Which instruction set architecture to use.  */
755enum processor_type ix86_arch;
756
757/* Strings to hold which cpu and instruction set architecture  to use.  */
758const char *ix86_tune_string;		/* for -mtune=<xxx> */
759const char *ix86_arch_string;		/* for -march=<xxx> */
760const char *ix86_fpmath_string;		/* for -mfpmath=<xxx> */
761
762/* # of registers to use to pass arguments.  */
763const char *ix86_regparm_string;
764
765/* true if sse prefetch instruction is not NOOP.  */
766int x86_prefetch_sse;
767
768/* ix86_regparm_string as a number */
769int ix86_regparm;
770
771/* Alignment to use for loops and jumps:  */
772
773/* Power of two alignment for loops.  */
774const char *ix86_align_loops_string;
775
776/* Power of two alignment for non-loop jumps.  */
777const char *ix86_align_jumps_string;
778
779/* Power of two alignment for stack boundary in bytes.  */
780const char *ix86_preferred_stack_boundary_string;
781
782/* Preferred alignment for stack boundary in bits.  */
783int ix86_preferred_stack_boundary;
784
785/* Values 1-5: see jump.c */
786int ix86_branch_cost;
787const char *ix86_branch_cost_string;
788
789/* Power of two alignment for functions.  */
790const char *ix86_align_funcs_string;
791
792/* Prefix built by ASM_GENERATE_INTERNAL_LABEL.  */
793static char internal_label_prefix[16];
794static int internal_label_prefix_len;
795
796static int local_symbolic_operand (rtx, enum machine_mode);
797static int tls_symbolic_operand_1 (rtx, enum tls_model);
798static void output_pic_addr_const (FILE *, rtx, int);
799static void put_condition_code (enum rtx_code, enum machine_mode,
800				int, int, FILE *);
801static const char *get_some_local_dynamic_name (void);
802static int get_some_local_dynamic_name_1 (rtx *, void *);
803static rtx maybe_get_pool_constant (rtx);
804static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
805static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
806						   rtx *);
807static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
808static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
809						   enum machine_mode);
810static rtx get_thread_pointer (int);
811static rtx legitimize_tls_address (rtx, enum tls_model, int);
812static void get_pc_thunk_name (char [32], unsigned int);
813static rtx gen_push (rtx);
814static int memory_address_length (rtx addr);
815static int ix86_flags_dependant (rtx, rtx, enum attr_type);
816static int ix86_agi_dependant (rtx, rtx, enum attr_type);
817static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
818static void ix86_dump_ppro_packet (FILE *);
819static void ix86_reorder_insn (rtx *, rtx *);
820static struct machine_function * ix86_init_machine_status (void);
821static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
822static int ix86_nsaved_regs (void);
823static void ix86_emit_save_regs (void);
824static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
825static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
826static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
827static void ix86_sched_reorder_ppro (rtx *, rtx *);
828static HOST_WIDE_INT ix86_GOT_alias_set (void);
829static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
830static rtx ix86_expand_aligntest (rtx, int);
831static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
832static int ix86_issue_rate (void);
833static int ix86_adjust_cost (rtx, rtx, rtx, int);
834static void ix86_sched_init (FILE *, int, int);
835static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
836static int ix86_variable_issue (FILE *, int, rtx, int);
837static int ia32_use_dfa_pipeline_interface (void);
838static int ia32_multipass_dfa_lookahead (void);
839static void ix86_init_mmx_sse_builtins (void);
840static rtx x86_this_parameter (tree);
841static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
842				 HOST_WIDE_INT, tree);
843static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
844static void x86_file_start (void);
845static void ix86_reorg (void);
846static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
847static tree ix86_build_builtin_va_list (void);
848
849struct ix86_address
850{
851  rtx base, index, disp;
852  HOST_WIDE_INT scale;
853  enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
854};
855
856static int ix86_decompose_address (rtx, struct ix86_address *);
857static int ix86_address_cost (rtx);
858static bool ix86_cannot_force_const_mem (rtx);
859static rtx ix86_delegitimize_address (rtx);
860
861struct builtin_description;
862static rtx ix86_expand_sse_comi (const struct builtin_description *,
863				 tree, rtx);
864static rtx ix86_expand_sse_compare (const struct builtin_description *,
865				    tree, rtx);
866static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
867static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
868static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
869static rtx ix86_expand_store_builtin (enum insn_code, tree);
870static rtx safe_vector_operand (rtx, enum machine_mode);
871static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
872static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
873				      enum rtx_code *, enum rtx_code *);
874static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
875static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
876static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
877static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
878static int ix86_fp_comparison_cost (enum rtx_code code);
879static unsigned int ix86_select_alt_pic_regnum (void);
880static int ix86_save_reg (unsigned int, int);
881static void ix86_compute_frame_layout (struct ix86_frame *);
882static int ix86_comp_type_attributes (tree, tree);
883static int ix86_function_regparm (tree, tree);
884const struct attribute_spec ix86_attribute_table[];
885static bool ix86_function_ok_for_sibcall (tree, tree);
886static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
887static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
888static int ix86_value_regno (enum machine_mode);
889static bool contains_128bit_aligned_vector_p (tree);
890static bool ix86_ms_bitfield_layout_p (tree);
891static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
892static int extended_reg_mentioned_1 (rtx *, void *);
893static bool ix86_rtx_costs (rtx, int, int, int *);
894static int min_insn_size (rtx);
895static void k8_avoid_jump_misspredicts (void);
896
897#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
898static void ix86_svr3_asm_out_constructor (rtx, int);
899#endif
900
901/* Register class used for passing given 64bit part of the argument.
902   These represent classes as documented by the PS ABI, with the exception
903   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
904   use SF or DFmode move instead of DImode to avoid reformatting penalties.
905
906   Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
907   whenever possible (upper half does contain padding).
908 */
909enum x86_64_reg_class
910  {
911    X86_64_NO_CLASS,
912    X86_64_INTEGER_CLASS,
913    X86_64_INTEGERSI_CLASS,
914    X86_64_SSE_CLASS,
915    X86_64_SSESF_CLASS,
916    X86_64_SSEDF_CLASS,
917    X86_64_SSEUP_CLASS,
918    X86_64_X87_CLASS,
919    X86_64_X87UP_CLASS,
920    X86_64_MEMORY_CLASS
921  };
922static const char * const x86_64_reg_class_name[] =
923   {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
924
925#define MAX_CLASSES 4
926static int classify_argument (enum machine_mode, tree,
927			      enum x86_64_reg_class [MAX_CLASSES], int);
928static int examine_argument (enum machine_mode, tree, int, int *, int *);
929static rtx construct_container (enum machine_mode, tree, int, int, int,
930				const int *, int);
931static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
932					    enum x86_64_reg_class);
933
934/* Table of constants used by fldpi, fldln2, etc....  */
935static REAL_VALUE_TYPE ext_80387_constants_table [5];
936static bool ext_80387_constants_init = 0;
937static void init_ext_80387_constants (void);
938
939/* Initialize the GCC target structure.  */
940#undef TARGET_ATTRIBUTE_TABLE
941#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
942#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
943#  undef TARGET_MERGE_DECL_ATTRIBUTES
944#  define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
945#endif
946
947#undef TARGET_COMP_TYPE_ATTRIBUTES
948#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
949
950#undef TARGET_INIT_BUILTINS
951#define TARGET_INIT_BUILTINS ix86_init_builtins
952
953#undef TARGET_EXPAND_BUILTIN
954#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
955
956#undef TARGET_ASM_FUNCTION_EPILOGUE
957#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
958
959#undef TARGET_ASM_OPEN_PAREN
960#define TARGET_ASM_OPEN_PAREN ""
961#undef TARGET_ASM_CLOSE_PAREN
962#define TARGET_ASM_CLOSE_PAREN ""
963
964#undef TARGET_ASM_ALIGNED_HI_OP
965#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
966#undef TARGET_ASM_ALIGNED_SI_OP
967#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
968#ifdef ASM_QUAD
969#undef TARGET_ASM_ALIGNED_DI_OP
970#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
971#endif
972
973#undef TARGET_ASM_UNALIGNED_HI_OP
974#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
975#undef TARGET_ASM_UNALIGNED_SI_OP
976#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
977#undef TARGET_ASM_UNALIGNED_DI_OP
978#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
979
980#undef TARGET_SCHED_ADJUST_COST
981#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
982#undef TARGET_SCHED_ISSUE_RATE
983#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
984#undef TARGET_SCHED_VARIABLE_ISSUE
985#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
986#undef TARGET_SCHED_INIT
987#define TARGET_SCHED_INIT ix86_sched_init
988#undef TARGET_SCHED_REORDER
989#define TARGET_SCHED_REORDER ix86_sched_reorder
990#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
991#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
992  ia32_use_dfa_pipeline_interface
993#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
994#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
995  ia32_multipass_dfa_lookahead
996
997#undef TARGET_FUNCTION_OK_FOR_SIBCALL
998#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
999
1000#ifdef HAVE_AS_TLS
1001#undef TARGET_HAVE_TLS
1002#define TARGET_HAVE_TLS true
1003#endif
1004#undef TARGET_CANNOT_FORCE_CONST_MEM
1005#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1006
1007#undef TARGET_DELEGITIMIZE_ADDRESS
1008#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1009
1010#undef TARGET_MS_BITFIELD_LAYOUT_P
1011#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1012
1013#undef TARGET_ASM_OUTPUT_MI_THUNK
1014#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1015#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1016#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1017
1018#undef TARGET_ASM_FILE_START
1019#define TARGET_ASM_FILE_START x86_file_start
1020
1021#undef TARGET_RTX_COSTS
1022#define TARGET_RTX_COSTS ix86_rtx_costs
1023#undef TARGET_ADDRESS_COST
1024#define TARGET_ADDRESS_COST ix86_address_cost
1025
1026#undef TARGET_FIXED_CONDITION_CODE_REGS
1027#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1028#undef TARGET_CC_MODES_COMPATIBLE
1029#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1030
1031#undef TARGET_MACHINE_DEPENDENT_REORG
1032#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1033
1034#undef TARGET_BUILD_BUILTIN_VA_LIST
1035#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1036
1037struct gcc_target targetm = TARGET_INITIALIZER;
1038
1039/* The svr4 ABI for the i386 says that records and unions are returned
1040   in memory.  */
1041#ifndef DEFAULT_PCC_STRUCT_RETURN
1042#define DEFAULT_PCC_STRUCT_RETURN 1
1043#endif
1044
1045/* Sometimes certain combinations of command options do not make
1046   sense on a particular target machine.  You can define a macro
1047   `OVERRIDE_OPTIONS' to take account of this.  This macro, if
1048   defined, is executed once just after all the command options have
1049   been parsed.
1050
1051   Don't use this macro to turn on various extra optimizations for
1052   `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
1053
1054void
1055override_options (void)
1056{
1057  int i;
1058  /* Comes from final.c -- no real reason to change it.  */
1059#define MAX_CODE_ALIGN 16
1060
1061  static struct ptt
1062    {
1063      const struct processor_costs *cost;	/* Processor costs */
1064      const int target_enable;			/* Target flags to enable.  */
1065      const int target_disable;			/* Target flags to disable.  */
1066      const int align_loop;			/* Default alignments.  */
1067      const int align_loop_max_skip;
1068      const int align_jump;
1069      const int align_jump_max_skip;
1070      const int align_func;
1071    }
1072  const processor_target_table[PROCESSOR_max] =
1073    {
1074      {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1075      {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1076      {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1077      {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1078      {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1079      {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1080      {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1081      {&k8_cost, 0, 0, 16, 7, 16, 7, 16}
1082    };
1083
1084  static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1085  static struct pta
1086    {
1087      const char *const name;		/* processor name or nickname.  */
1088      const enum processor_type processor;
1089      const enum pta_flags
1090	{
1091	  PTA_SSE = 1,
1092	  PTA_SSE2 = 2,
1093	  PTA_SSE3 = 4,
1094	  PTA_MMX = 8,
1095	  PTA_PREFETCH_SSE = 16,
1096	  PTA_3DNOW = 32,
1097	  PTA_3DNOW_A = 64,
1098	  PTA_64BIT = 128
1099	} flags;
1100    }
1101  const processor_alias_table[] =
1102    {
1103      {"i386", PROCESSOR_I386, 0},
1104      {"i486", PROCESSOR_I486, 0},
1105      {"i586", PROCESSOR_PENTIUM, 0},
1106      {"pentium", PROCESSOR_PENTIUM, 0},
1107      {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1108      {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1109      {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1110      {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1111      {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1112      {"i686", PROCESSOR_PENTIUMPRO, 0},
1113      {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1114      {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1115      {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1116      {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1117      {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1118      {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1119				       | PTA_MMX | PTA_PREFETCH_SSE},
1120      {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1121				        | PTA_MMX | PTA_PREFETCH_SSE},
1122      {"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
1123				        | PTA_MMX | PTA_PREFETCH_SSE},
1124      {"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1125				     | PTA_MMX | PTA_PREFETCH_SSE},
1126      {"k6", PROCESSOR_K6, PTA_MMX},
1127      {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1128      {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1129      {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1130				   | PTA_3DNOW_A},
1131      {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1132					 | PTA_3DNOW | PTA_3DNOW_A},
1133      {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1134				    | PTA_3DNOW_A | PTA_SSE},
1135      {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1136				      | PTA_3DNOW_A | PTA_SSE},
1137      {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1138				      | PTA_3DNOW_A | PTA_SSE},
1139      {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1140			       | PTA_SSE | PTA_SSE2 },
1141      {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1142				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1143      {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1144				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1145      {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1146				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1147      {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1148				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1149    };
1150
1151  int const pta_size = ARRAY_SIZE (processor_alias_table);
1152
1153  /* Set the default values for switches whose default depends on TARGET_64BIT
1154     in case they weren't overwritten by command line options.  */
1155  if (TARGET_64BIT)
1156    {
1157      if (flag_omit_frame_pointer == 2)
1158	flag_omit_frame_pointer = 1;
1159      if (flag_asynchronous_unwind_tables == 2)
1160	flag_asynchronous_unwind_tables = 1;
1161      if (flag_pcc_struct_return == 2)
1162	flag_pcc_struct_return = 0;
1163    }
1164  else
1165    {
1166      if (flag_omit_frame_pointer == 2)
1167	flag_omit_frame_pointer = 0;
1168      if (flag_asynchronous_unwind_tables == 2)
1169	flag_asynchronous_unwind_tables = 0;
1170      if (flag_pcc_struct_return == 2)
1171	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1172    }
1173
1174#ifdef SUBTARGET_OVERRIDE_OPTIONS
1175  SUBTARGET_OVERRIDE_OPTIONS;
1176#endif
1177
1178  if (!ix86_tune_string && ix86_arch_string)
1179    ix86_tune_string = ix86_arch_string;
1180  if (!ix86_tune_string)
1181    ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1182  if (!ix86_arch_string)
1183    ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1184
1185  if (ix86_cmodel_string != 0)
1186    {
1187      if (!strcmp (ix86_cmodel_string, "small"))
1188	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1189      else if (flag_pic)
1190	sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1191      else if (!strcmp (ix86_cmodel_string, "32"))
1192	ix86_cmodel = CM_32;
1193      else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1194	ix86_cmodel = CM_KERNEL;
1195      else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1196	ix86_cmodel = CM_MEDIUM;
1197      else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1198	ix86_cmodel = CM_LARGE;
1199      else
1200	error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1201    }
1202  else
1203    {
1204      ix86_cmodel = CM_32;
1205      if (TARGET_64BIT)
1206	ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1207    }
1208  if (ix86_asm_string != 0)
1209    {
1210      if (!strcmp (ix86_asm_string, "intel"))
1211	ix86_asm_dialect = ASM_INTEL;
1212      else if (!strcmp (ix86_asm_string, "att"))
1213	ix86_asm_dialect = ASM_ATT;
1214      else
1215	error ("bad value (%s) for -masm= switch", ix86_asm_string);
1216    }
1217  if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1218    error ("code model `%s' not supported in the %s bit mode",
1219	   ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1220  if (ix86_cmodel == CM_LARGE)
1221    sorry ("code model `large' not supported yet");
1222  if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1223    sorry ("%i-bit mode not compiled in",
1224	   (target_flags & MASK_64BIT) ? 64 : 32);
1225
1226  for (i = 0; i < pta_size; i++)
1227    if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1228      {
1229	ix86_arch = processor_alias_table[i].processor;
1230	/* Default cpu tuning to the architecture.  */
1231	ix86_tune = ix86_arch;
1232	if (processor_alias_table[i].flags & PTA_MMX
1233	    && !(target_flags_explicit & MASK_MMX))
1234	  target_flags |= MASK_MMX;
1235	if (processor_alias_table[i].flags & PTA_3DNOW
1236	    && !(target_flags_explicit & MASK_3DNOW))
1237	  target_flags |= MASK_3DNOW;
1238	if (processor_alias_table[i].flags & PTA_3DNOW_A
1239	    && !(target_flags_explicit & MASK_3DNOW_A))
1240	  target_flags |= MASK_3DNOW_A;
1241	if (processor_alias_table[i].flags & PTA_SSE
1242	    && !(target_flags_explicit & MASK_SSE))
1243	  target_flags |= MASK_SSE;
1244	if (processor_alias_table[i].flags & PTA_SSE2
1245	    && !(target_flags_explicit & MASK_SSE2))
1246	  target_flags |= MASK_SSE2;
1247	if (processor_alias_table[i].flags & PTA_SSE3
1248	    && !(target_flags_explicit & MASK_SSE3))
1249	  target_flags |= MASK_SSE3;
1250	if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1251	  x86_prefetch_sse = true;
1252	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1253	  error ("CPU you selected does not support x86-64 instruction set");
1254	break;
1255      }
1256
1257  if (i == pta_size)
1258    error ("bad value (%s) for -march= switch", ix86_arch_string);
1259
1260  for (i = 0; i < pta_size; i++)
1261    if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1262      {
1263	ix86_tune = processor_alias_table[i].processor;
1264	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1265	  error ("CPU you selected does not support x86-64 instruction set");
1266
1267	/* Intel CPUs have always interpreted SSE prefetch instructions as
1268	   NOPs; so, we can enable SSE prefetch instructions even when
1269	   -mtune (rather than -march) points us to a processor that has them.
1270	   However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1271	   higher processors.  */
1272	if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1273	  x86_prefetch_sse = true;
1274	break;
1275      }
1276  if (i == pta_size)
1277    error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1278
1279  if (optimize_size)
1280    ix86_cost = &size_cost;
1281  else
1282    ix86_cost = processor_target_table[ix86_tune].cost;
1283  target_flags |= processor_target_table[ix86_tune].target_enable;
1284  target_flags &= ~processor_target_table[ix86_tune].target_disable;
1285
1286  /* Arrange to set up i386_stack_locals for all functions.  */
1287  init_machine_status = ix86_init_machine_status;
1288
1289  /* Validate -mregparm= value.  */
1290  if (ix86_regparm_string)
1291    {
1292      i = atoi (ix86_regparm_string);
1293      if (i < 0 || i > REGPARM_MAX)
1294	error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1295      else
1296	ix86_regparm = i;
1297    }
1298  else
1299   if (TARGET_64BIT)
1300     ix86_regparm = REGPARM_MAX;
1301
1302  /* If the user has provided any of the -malign-* options,
1303     warn and use that value only if -falign-* is not set.
1304     Remove this code in GCC 3.2 or later.  */
1305  if (ix86_align_loops_string)
1306    {
1307      warning ("-malign-loops is obsolete, use -falign-loops");
1308      if (align_loops == 0)
1309	{
1310	  i = atoi (ix86_align_loops_string);
1311	  if (i < 0 || i > MAX_CODE_ALIGN)
1312	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1313	  else
1314	    align_loops = 1 << i;
1315	}
1316    }
1317
1318  if (ix86_align_jumps_string)
1319    {
1320      warning ("-malign-jumps is obsolete, use -falign-jumps");
1321      if (align_jumps == 0)
1322	{
1323	  i = atoi (ix86_align_jumps_string);
1324	  if (i < 0 || i > MAX_CODE_ALIGN)
1325	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1326	  else
1327	    align_jumps = 1 << i;
1328	}
1329    }
1330
1331  if (ix86_align_funcs_string)
1332    {
1333      warning ("-malign-functions is obsolete, use -falign-functions");
1334      if (align_functions == 0)
1335	{
1336	  i = atoi (ix86_align_funcs_string);
1337	  if (i < 0 || i > MAX_CODE_ALIGN)
1338	    error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1339	  else
1340	    align_functions = 1 << i;
1341	}
1342    }
1343
1344  /* Default align_* from the processor table.  */
1345  if (align_loops == 0)
1346    {
1347      align_loops = processor_target_table[ix86_tune].align_loop;
1348      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
1349    }
1350  if (align_jumps == 0)
1351    {
1352      align_jumps = processor_target_table[ix86_tune].align_jump;
1353      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
1354    }
1355  if (align_functions == 0)
1356    {
1357      align_functions = processor_target_table[ix86_tune].align_func;
1358    }
1359
1360  /* Validate -mpreferred-stack-boundary= value, or provide default.
1361     The default of 128 bits is for Pentium III's SSE __m128, but we
1362     don't want additional code to keep the stack aligned when
1363     optimizing for code size.  */
1364  ix86_preferred_stack_boundary = (optimize_size
1365				   ? TARGET_64BIT ? 128 : 32
1366				   : 128);
1367  if (ix86_preferred_stack_boundary_string)
1368    {
1369      i = atoi (ix86_preferred_stack_boundary_string);
1370      if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
1371	error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
1372	       TARGET_64BIT ? 4 : 2);
1373      else
1374	ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
1375    }
1376
1377  /* Validate -mbranch-cost= value, or provide default.  */
1378  ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
1379  if (ix86_branch_cost_string)
1380    {
1381      i = atoi (ix86_branch_cost_string);
1382      if (i < 0 || i > 5)
1383	error ("-mbranch-cost=%d is not between 0 and 5", i);
1384      else
1385	ix86_branch_cost = i;
1386    }
1387
1388  if (ix86_tls_dialect_string)
1389    {
1390      if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1391	ix86_tls_dialect = TLS_DIALECT_GNU;
1392      else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1393	ix86_tls_dialect = TLS_DIALECT_SUN;
1394      else
1395	error ("bad value (%s) for -mtls-dialect= switch",
1396	       ix86_tls_dialect_string);
1397    }
1398
1399  /* Keep nonleaf frame pointers.  */
1400  if (TARGET_OMIT_LEAF_FRAME_POINTER)
1401    flag_omit_frame_pointer = 1;
1402
1403  /* If we're doing fast math, we don't care about comparison order
1404     wrt NaNs.  This lets us use a shorter comparison sequence.  */
1405  if (flag_unsafe_math_optimizations)
1406    target_flags &= ~MASK_IEEE_FP;
1407
1408  /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1409     since the insns won't need emulation.  */
1410  if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1411    target_flags &= ~MASK_NO_FANCY_MATH_387;
1412
1413  /* Turn on SSE2 builtins for -msse3.  */
1414  if (TARGET_SSE3)
1415    target_flags |= MASK_SSE2;
1416
1417  /* Turn on SSE builtins for -msse2.  */
1418  if (TARGET_SSE2)
1419    target_flags |= MASK_SSE;
1420
1421  if (TARGET_64BIT)
1422    {
1423      if (TARGET_ALIGN_DOUBLE)
1424	error ("-malign-double makes no sense in the 64bit mode");
1425      if (TARGET_RTD)
1426	error ("-mrtd calling convention not supported in the 64bit mode");
1427      /* Enable by default the SSE and MMX builtins.  */
1428      target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1429      ix86_fpmath = FPMATH_SSE;
1430     }
1431  else
1432    {
1433      ix86_fpmath = FPMATH_387;
1434      /* i386 ABI does not specify red zone.  It still makes sense to use it
1435         when programmer takes care to stack from being destroyed.  */
1436      if (!(target_flags_explicit & MASK_NO_RED_ZONE))
1437        target_flags |= MASK_NO_RED_ZONE;
1438    }
1439
1440  if (ix86_fpmath_string != 0)
1441    {
1442      if (! strcmp (ix86_fpmath_string, "387"))
1443	ix86_fpmath = FPMATH_387;
1444      else if (! strcmp (ix86_fpmath_string, "sse"))
1445	{
1446	  if (!TARGET_SSE)
1447	    {
1448	      warning ("SSE instruction set disabled, using 387 arithmetics");
1449	      ix86_fpmath = FPMATH_387;
1450	    }
1451	  else
1452	    ix86_fpmath = FPMATH_SSE;
1453	}
1454      else if (! strcmp (ix86_fpmath_string, "387,sse")
1455	       || ! strcmp (ix86_fpmath_string, "sse,387"))
1456	{
1457	  if (!TARGET_SSE)
1458	    {
1459	      warning ("SSE instruction set disabled, using 387 arithmetics");
1460	      ix86_fpmath = FPMATH_387;
1461	    }
1462	  else if (!TARGET_80387)
1463	    {
1464	      warning ("387 instruction set disabled, using SSE arithmetics");
1465	      ix86_fpmath = FPMATH_SSE;
1466	    }
1467	  else
1468	    ix86_fpmath = FPMATH_SSE | FPMATH_387;
1469	}
1470      else
1471	error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1472    }
1473
1474  /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1475     on by -msse.  */
1476  if (TARGET_SSE)
1477    {
1478      target_flags |= MASK_MMX;
1479      x86_prefetch_sse = true;
1480    }
1481
1482  /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1483  if (TARGET_3DNOW)
1484    {
1485      target_flags |= MASK_MMX;
1486      /* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
1487	 extensions it adds.  */
1488      if (x86_3dnow_a & (1 << ix86_arch))
1489	target_flags |= MASK_3DNOW_A;
1490    }
1491  if ((x86_accumulate_outgoing_args & TUNEMASK)
1492      && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
1493      && !optimize_size)
1494    target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
1495
1496  /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix.  */
1497  {
1498    char *p;
1499    ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1500    p = strchr (internal_label_prefix, 'X');
1501    internal_label_prefix_len = p - internal_label_prefix;
1502    *p = '\0';
1503  }
1504}
1505
1506void
1507optimization_options (int level, int size ATTRIBUTE_UNUSED)
1508{
1509  /* For -O2 and beyond, turn off -fschedule-insns by default.  It tends to
1510     make the problem with not enough registers even worse.  */
1511#ifdef INSN_SCHEDULING
1512  if (level > 1)
1513    flag_schedule_insns = 0;
1514#endif
1515
1516  /* The default values of these switches depend on the TARGET_64BIT
1517     that is not known at this moment.  Mark these values with 2 and
1518     let user the to override these.  In case there is no command line option
1519     specifying them, we will set the defaults in override_options.  */
1520  if (optimize >= 1)
1521    flag_omit_frame_pointer = 2;
1522  flag_pcc_struct_return = 2;
1523  flag_asynchronous_unwind_tables = 2;
1524}
1525
1526/* Table of valid machine attributes.  */
1527const struct attribute_spec ix86_attribute_table[] =
1528{
1529  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
1530  /* Stdcall attribute says callee is responsible for popping arguments
1531     if they are not variable.  */
1532  { "stdcall",   0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1533  /* Fastcall attribute says callee is responsible for popping arguments
1534     if they are not variable.  */
1535  { "fastcall",  0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1536  /* Cdecl attribute says the callee is a normal C declaration */
1537  { "cdecl",     0, 0, false, true,  true,  ix86_handle_cdecl_attribute },
1538  /* Regparm attribute specifies how many integer arguments are to be
1539     passed in registers.  */
1540  { "regparm",   1, 1, false, true,  true,  ix86_handle_regparm_attribute },
1541#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
1542  { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1543  { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1544  { "shared",    0, 0, true,  false, false, ix86_handle_shared_attribute },
1545#endif
1546  { "ms_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1547  { "gcc_struct", 0, 0, false, false,  false, ix86_handle_struct_attribute },
1548  { NULL,        0, 0, false, false, false, NULL }
1549};
1550
1551/* Decide whether we can make a sibling call to a function.  DECL is the
1552   declaration of the function being targeted by the call and EXP is the
1553   CALL_EXPR representing the call.  */
1554
1555static bool
1556ix86_function_ok_for_sibcall (tree decl, tree exp)
1557{
1558  /* If we are generating position-independent code, we cannot sibcall
1559     optimize any indirect call, or a direct call to a global function,
1560     as the PLT requires %ebx be live.  */
1561  if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
1562    return false;
1563
1564  /* If we are returning floats on the 80387 register stack, we cannot
1565     make a sibcall from a function that doesn't return a float to a
1566     function that does or, conversely, from a function that does return
1567     a float to a function that doesn't; the necessary stack adjustment
1568     would not be executed.  */
1569  if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
1570      != STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
1571    return false;
1572
1573  /* If this call is indirect, we'll need to be able to use a call-clobbered
1574     register for the address of the target function.  Make sure that all
1575     such registers are not used for passing parameters.  */
1576  if (!decl && !TARGET_64BIT)
1577    {
1578      tree type;
1579
1580      /* We're looking at the CALL_EXPR, we need the type of the function.  */
1581      type = TREE_OPERAND (exp, 0);		/* pointer expression */
1582      type = TREE_TYPE (type);			/* pointer type */
1583      type = TREE_TYPE (type);			/* function type */
1584
1585      if (ix86_function_regparm (type, NULL) >= 3)
1586	{
1587	  /* ??? Need to count the actual number of registers to be used,
1588	     not the possible number of registers.  Fix later.  */
1589	  return false;
1590	}
1591    }
1592
1593  /* Otherwise okay.  That also includes certain types of indirect calls.  */
1594  return true;
1595}
1596
1597/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
1598   arguments as in struct attribute_spec.handler.  */
1599static tree
1600ix86_handle_cdecl_attribute (tree *node, tree name,
1601			     tree args ATTRIBUTE_UNUSED,
1602			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1603{
1604  if (TREE_CODE (*node) != FUNCTION_TYPE
1605      && TREE_CODE (*node) != METHOD_TYPE
1606      && TREE_CODE (*node) != FIELD_DECL
1607      && TREE_CODE (*node) != TYPE_DECL)
1608    {
1609      warning ("`%s' attribute only applies to functions",
1610	       IDENTIFIER_POINTER (name));
1611      *no_add_attrs = true;
1612    }
1613  else
1614    {
1615      if (is_attribute_p ("fastcall", name))
1616        {
1617          if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
1618            {
1619              error ("fastcall and stdcall attributes are not compatible");
1620            }
1621           else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
1622            {
1623              error ("fastcall and regparm attributes are not compatible");
1624            }
1625        }
1626      else if (is_attribute_p ("stdcall", name))
1627        {
1628          if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1629            {
1630              error ("fastcall and stdcall attributes are not compatible");
1631            }
1632        }
1633    }
1634
1635  if (TARGET_64BIT)
1636    {
1637      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1638      *no_add_attrs = true;
1639    }
1640
1641  return NULL_TREE;
1642}
1643
1644/* Handle a "regparm" attribute;
1645   arguments as in struct attribute_spec.handler.  */
1646static tree
1647ix86_handle_regparm_attribute (tree *node, tree name, tree args,
1648			       int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1649{
1650  if (TREE_CODE (*node) != FUNCTION_TYPE
1651      && TREE_CODE (*node) != METHOD_TYPE
1652      && TREE_CODE (*node) != FIELD_DECL
1653      && TREE_CODE (*node) != TYPE_DECL)
1654    {
1655      warning ("`%s' attribute only applies to functions",
1656	       IDENTIFIER_POINTER (name));
1657      *no_add_attrs = true;
1658    }
1659  else
1660    {
1661      tree cst;
1662
1663      cst = TREE_VALUE (args);
1664      if (TREE_CODE (cst) != INTEGER_CST)
1665	{
1666	  warning ("`%s' attribute requires an integer constant argument",
1667		   IDENTIFIER_POINTER (name));
1668	  *no_add_attrs = true;
1669	}
1670      else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1671	{
1672	  warning ("argument to `%s' attribute larger than %d",
1673		   IDENTIFIER_POINTER (name), REGPARM_MAX);
1674	  *no_add_attrs = true;
1675	}
1676
1677      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
1678	{
1679	  error ("fastcall and regparm attributes are not compatible");
1680	}
1681    }
1682
1683  return NULL_TREE;
1684}
1685
1686/* Return 0 if the attributes for two types are incompatible, 1 if they
1687   are compatible, and 2 if they are nearly compatible (which causes a
1688   warning to be generated).  */
1689
1690static int
1691ix86_comp_type_attributes (tree type1, tree type2)
1692{
1693  /* Check for mismatch of non-default calling convention.  */
1694  const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
1695
1696  if (TREE_CODE (type1) != FUNCTION_TYPE)
1697    return 1;
1698
1699  /*  Check for mismatched fastcall types */
1700  if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
1701      != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
1702    return 0;
1703
1704  /* Check for mismatched return types (cdecl vs stdcall).  */
1705  if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1706      != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
1707    return 0;
1708  if (ix86_function_regparm (type1, NULL)
1709      != ix86_function_regparm (type2, NULL))
1710    return 0;
1711  return 1;
1712}
1713
1714/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
1715   DECL may be NULL when calling function indirectly
1716   or considering a libcall.  */
1717
1718static int
1719ix86_function_regparm (tree type, tree decl)
1720{
1721  tree attr;
1722  int regparm = ix86_regparm;
1723  bool user_convention = false;
1724
1725  if (!TARGET_64BIT)
1726    {
1727      attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
1728      if (attr)
1729	{
1730	  regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1731	  user_convention = true;
1732	}
1733
1734      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
1735	{
1736	  regparm = 2;
1737	  user_convention = true;
1738	}
1739
1740      /* Use register calling convention for local functions when possible.  */
1741      if (!TARGET_64BIT && !user_convention && decl
1742	  && flag_unit_at_a_time && !profile_flag)
1743	{
1744	  struct cgraph_local_info *i = cgraph_local_info (decl);
1745	  if (i && i->local)
1746	    {
1747	      /* We can't use regparm(3) for nested functions as these use
1748		 static chain pointer in third argument.  */
1749	      if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
1750		regparm = 2;
1751	      else
1752		regparm = 3;
1753	    }
1754	}
1755    }
1756  return regparm;
1757}
1758
1759/* Return true if EAX is live at the start of the function.  Used by
1760   ix86_expand_prologue to determine if we need special help before
1761   calling allocate_stack_worker.  */
1762
1763static bool
1764ix86_eax_live_at_start_p (void)
1765{
1766  /* Cheat.  Don't bother working forward from ix86_function_regparm
1767     to the function type to whether an actual argument is located in
1768     eax.  Instead just look at cfg info, which is still close enough
1769     to correct at this point.  This gives false positives for broken
1770     functions that might use uninitialized data that happens to be
1771     allocated in eax, but who cares?  */
1772  return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
1773}
1774
1775/* Value is the number of bytes of arguments automatically
1776   popped when returning from a subroutine call.
1777   FUNDECL is the declaration node of the function (as a tree),
1778   FUNTYPE is the data type of the function (as a tree),
1779   or for a library call it is an identifier node for the subroutine name.
1780   SIZE is the number of bytes of arguments passed on the stack.
1781
1782   On the 80386, the RTD insn may be used to pop them if the number
1783     of args is fixed, but if the number is variable then the caller
1784     must pop them all.  RTD can't be used for library calls now
1785     because the library is compiled with the Unix compiler.
1786   Use of RTD is a selectable option, since it is incompatible with
1787   standard Unix calling sequences.  If the option is not selected,
1788   the caller must always pop the args.
1789
1790   The attribute stdcall is equivalent to RTD on a per module basis.  */
1791
1792int
1793ix86_return_pops_args (tree fundecl, tree funtype, int size)
1794{
1795  int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
1796
1797  /* Cdecl functions override -mrtd, and never pop the stack.  */
1798  if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
1799
1800    /* Stdcall and fastcall functions will pop the stack if not
1801       variable args.  */
1802    if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
1803        || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
1804      rtd = 1;
1805
1806    if (rtd
1807        && (TYPE_ARG_TYPES (funtype) == NULL_TREE
1808	    || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1809		== void_type_node)))
1810      return size;
1811  }
1812
1813  /* Lose any fake structure return argument if it is passed on the stack.  */
1814  if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
1815      && !TARGET_64BIT)
1816    {
1817      int nregs = ix86_function_regparm (funtype, fundecl);
1818
1819      if (!nregs)
1820	return GET_MODE_SIZE (Pmode);
1821    }
1822
1823  return 0;
1824}
1825
1826/* Argument support functions.  */
1827
1828/* Return true when register may be used to pass function parameters.  */
1829bool
1830ix86_function_arg_regno_p (int regno)
1831{
1832  int i;
1833  if (!TARGET_64BIT)
1834    return (regno < REGPARM_MAX
1835	    || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
1836  if (SSE_REGNO_P (regno) && TARGET_SSE)
1837    return true;
1838  /* RAX is used as hidden argument to va_arg functions.  */
1839  if (!regno)
1840    return true;
1841  for (i = 0; i < REGPARM_MAX; i++)
1842    if (regno == x86_64_int_parameter_registers[i])
1843      return true;
1844  return false;
1845}
1846
1847/* Initialize a variable CUM of type CUMULATIVE_ARGS
1848   for a call to a function whose data type is FNTYPE.
1849   For a library call, FNTYPE is 0.  */
1850
1851void
1852init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
1853		      tree fntype,	/* tree ptr for function decl */
1854		      rtx libname,	/* SYMBOL_REF of library name or 0 */
1855		      tree fndecl)
1856{
1857  static CUMULATIVE_ARGS zero_cum;
1858  tree param, next_param;
1859
1860  if (TARGET_DEBUG_ARG)
1861    {
1862      fprintf (stderr, "\ninit_cumulative_args (");
1863      if (fntype)
1864	fprintf (stderr, "fntype code = %s, ret code = %s",
1865		 tree_code_name[(int) TREE_CODE (fntype)],
1866		 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
1867      else
1868	fprintf (stderr, "no fntype");
1869
1870      if (libname)
1871	fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1872    }
1873
1874  *cum = zero_cum;
1875
1876  /* Set up the number of registers to use for passing arguments.  */
1877  if (fntype)
1878    cum->nregs = ix86_function_regparm (fntype, fndecl);
1879  else
1880    cum->nregs = ix86_regparm;
1881  cum->sse_nregs = SSE_REGPARM_MAX;
1882  cum->mmx_nregs = MMX_REGPARM_MAX;
1883  cum->warn_sse = true;
1884  cum->warn_mmx = true;
1885  cum->maybe_vaarg = false;
1886
1887  /* Use ecx and edx registers if function has fastcall attribute */
1888  if (fntype && !TARGET_64BIT)
1889    {
1890      if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
1891	{
1892	  cum->nregs = 2;
1893	  cum->fastcall = 1;
1894	}
1895    }
1896
1897
1898  /* Determine if this function has variable arguments.  This is
1899     indicated by the last argument being 'void_type_mode' if there
1900     are no variable arguments.  If there are variable arguments, then
1901     we won't pass anything in registers */
1902
1903  if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
1904    {
1905      for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
1906	   param != 0; param = next_param)
1907	{
1908	  next_param = TREE_CHAIN (param);
1909	  if (next_param == 0 && TREE_VALUE (param) != void_type_node)
1910	    {
1911	      if (!TARGET_64BIT)
1912		{
1913		  cum->nregs = 0;
1914		  cum->sse_nregs = 0;
1915		  cum->mmx_nregs = 0;
1916		  cum->warn_sse = 0;
1917		  cum->warn_mmx = 0;
1918		  cum->fastcall = 0;
1919		}
1920	      cum->maybe_vaarg = true;
1921	    }
1922	}
1923    }
1924  if ((!fntype && !libname)
1925      || (fntype && !TYPE_ARG_TYPES (fntype)))
1926    cum->maybe_vaarg = 1;
1927
1928  if (TARGET_DEBUG_ARG)
1929    fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1930
1931  return;
1932}
1933
1934/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
1935   of this code is to classify each 8bytes of incoming argument by the register
1936   class and assign registers accordingly.  */
1937
1938/* Return the union class of CLASS1 and CLASS2.
1939   See the x86-64 PS ABI for details.  */
1940
1941static enum x86_64_reg_class
1942merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
1943{
1944  /* Rule #1: If both classes are equal, this is the resulting class.  */
1945  if (class1 == class2)
1946    return class1;
1947
1948  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1949     the other class.  */
1950  if (class1 == X86_64_NO_CLASS)
1951    return class2;
1952  if (class2 == X86_64_NO_CLASS)
1953    return class1;
1954
1955  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
1956  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1957    return X86_64_MEMORY_CLASS;
1958
1959  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
1960  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1961      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1962    return X86_64_INTEGERSI_CLASS;
1963  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1964      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1965    return X86_64_INTEGER_CLASS;
1966
1967  /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used.  */
1968  if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1969      || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1970    return X86_64_MEMORY_CLASS;
1971
1972  /* Rule #6: Otherwise class SSE is used.  */
1973  return X86_64_SSE_CLASS;
1974}
1975
1976/* Classify the argument of type TYPE and mode MODE.
1977   CLASSES will be filled by the register class used to pass each word
1978   of the operand.  The number of words is returned.  In case the parameter
1979   should be passed in memory, 0 is returned. As a special case for zero
1980   sized containers, classes[0] will be NO_CLASS and 1 is returned.
1981
1982   BIT_OFFSET is used internally for handling records and specifies offset
1983   of the offset in bits modulo 256 to avoid overflow cases.
1984
1985   See the x86-64 PS ABI for details.
1986*/
1987
1988static int
1989classify_argument (enum machine_mode mode, tree type,
1990		   enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
1991{
1992  HOST_WIDE_INT bytes =
1993    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1994  int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1995
1996  /* Variable sized entities are always passed/returned in memory.  */
1997  if (bytes < 0)
1998    return 0;
1999
2000  if (mode != VOIDmode
2001      && MUST_PASS_IN_STACK (mode, type))
2002    return 0;
2003
2004  if (type && AGGREGATE_TYPE_P (type))
2005    {
2006      int i;
2007      tree field;
2008      enum x86_64_reg_class subclasses[MAX_CLASSES];
2009
2010      /* On x86-64 we pass structures larger than 16 bytes on the stack.  */
2011      if (bytes > 16)
2012	return 0;
2013
2014      for (i = 0; i < words; i++)
2015	classes[i] = X86_64_NO_CLASS;
2016
2017      /* Zero sized arrays or structures are NO_CLASS.  We return 0 to
2018	 signalize memory class, so handle it as special case.  */
2019      if (!words)
2020	{
2021	  classes[0] = X86_64_NO_CLASS;
2022	  return 1;
2023	}
2024
2025      /* Classify each field of record and merge classes.  */
2026      if (TREE_CODE (type) == RECORD_TYPE)
2027	{
2028	  /* For classes first merge in the field of the subclasses.  */
2029	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2030	    {
2031	      tree bases = TYPE_BINFO_BASETYPES (type);
2032	      int n_bases = TREE_VEC_LENGTH (bases);
2033	      int i;
2034
2035	      for (i = 0; i < n_bases; ++i)
2036		{
2037		   tree binfo = TREE_VEC_ELT (bases, i);
2038		   int num;
2039		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2040		   tree type = BINFO_TYPE (binfo);
2041
2042		   num = classify_argument (TYPE_MODE (type),
2043					    type, subclasses,
2044					    (offset + bit_offset) % 256);
2045		   if (!num)
2046		     return 0;
2047		   for (i = 0; i < num; i++)
2048		     {
2049		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2050		       classes[i + pos] =
2051			 merge_classes (subclasses[i], classes[i + pos]);
2052		     }
2053		}
2054	    }
2055	  /* And now merge the fields of structure.  */
2056	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2057	    {
2058	      if (TREE_CODE (field) == FIELD_DECL)
2059		{
2060		  int num;
2061
2062		  /* Bitfields are always classified as integer.  Handle them
2063		     early, since later code would consider them to be
2064		     misaligned integers.  */
2065		  if (DECL_BIT_FIELD (field))
2066		    {
2067		      for (i = int_bit_position (field) / 8 / 8;
2068			   i < (int_bit_position (field)
2069			        + tree_low_cst (DECL_SIZE (field), 0)
2070				+ 63) / 8 / 8; i++)
2071			classes[i] =
2072			  merge_classes (X86_64_INTEGER_CLASS,
2073					 classes[i]);
2074		    }
2075		  else
2076		    {
2077		      num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2078					       TREE_TYPE (field), subclasses,
2079					       (int_bit_position (field)
2080						+ bit_offset) % 256);
2081		      if (!num)
2082			return 0;
2083		      for (i = 0; i < num; i++)
2084			{
2085			  int pos =
2086			    (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
2087			  classes[i + pos] =
2088			    merge_classes (subclasses[i], classes[i + pos]);
2089			}
2090		    }
2091		}
2092	    }
2093	}
2094      /* Arrays are handled as small records.  */
2095      else if (TREE_CODE (type) == ARRAY_TYPE)
2096	{
2097	  int num;
2098	  num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
2099				   TREE_TYPE (type), subclasses, bit_offset);
2100	  if (!num)
2101	    return 0;
2102
2103	  /* The partial classes are now full classes.  */
2104	  if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
2105	    subclasses[0] = X86_64_SSE_CLASS;
2106	  if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
2107	    subclasses[0] = X86_64_INTEGER_CLASS;
2108
2109	  for (i = 0; i < words; i++)
2110	    classes[i] = subclasses[i % num];
2111	}
2112      /* Unions are similar to RECORD_TYPE but offset is always 0.  */
2113      else if (TREE_CODE (type) == UNION_TYPE
2114	       || TREE_CODE (type) == QUAL_UNION_TYPE)
2115	{
2116	  /* For classes first merge in the field of the subclasses.  */
2117	  if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
2118	    {
2119	      tree bases = TYPE_BINFO_BASETYPES (type);
2120	      int n_bases = TREE_VEC_LENGTH (bases);
2121	      int i;
2122
2123	      for (i = 0; i < n_bases; ++i)
2124		{
2125		   tree binfo = TREE_VEC_ELT (bases, i);
2126		   int num;
2127		   int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
2128		   tree type = BINFO_TYPE (binfo);
2129
2130		   num = classify_argument (TYPE_MODE (type),
2131					    type, subclasses,
2132					    (offset + (bit_offset % 64)) % 256);
2133		   if (!num)
2134		     return 0;
2135		   for (i = 0; i < num; i++)
2136		     {
2137		       int pos = (offset + (bit_offset % 64)) / 8 / 8;
2138		       classes[i + pos] =
2139			 merge_classes (subclasses[i], classes[i + pos]);
2140		     }
2141		}
2142	    }
2143	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2144	    {
2145	      if (TREE_CODE (field) == FIELD_DECL)
2146		{
2147		  int num;
2148		  num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
2149					   TREE_TYPE (field), subclasses,
2150					   bit_offset);
2151		  if (!num)
2152		    return 0;
2153		  for (i = 0; i < num; i++)
2154		    classes[i] = merge_classes (subclasses[i], classes[i]);
2155		}
2156	    }
2157	}
2158      else if (TREE_CODE (type) == SET_TYPE)
2159	{
2160	  if (bytes <= 4)
2161	    {
2162	      classes[0] = X86_64_INTEGERSI_CLASS;
2163	      return 1;
2164	    }
2165	  else if (bytes <= 8)
2166	    {
2167	      classes[0] = X86_64_INTEGER_CLASS;
2168	      return 1;
2169	    }
2170	  else if (bytes <= 12)
2171	    {
2172	      classes[0] = X86_64_INTEGER_CLASS;
2173	      classes[1] = X86_64_INTEGERSI_CLASS;
2174	      return 2;
2175	    }
2176	  else
2177	    {
2178	      classes[0] = X86_64_INTEGER_CLASS;
2179	      classes[1] = X86_64_INTEGER_CLASS;
2180	      return 2;
2181	    }
2182	}
2183      else
2184	abort ();
2185
2186      /* Final merger cleanup.  */
2187      for (i = 0; i < words; i++)
2188	{
2189	  /* If one class is MEMORY, everything should be passed in
2190	     memory.  */
2191	  if (classes[i] == X86_64_MEMORY_CLASS)
2192	    return 0;
2193
2194	  /* The X86_64_SSEUP_CLASS should be always preceded by
2195	     X86_64_SSE_CLASS.  */
2196	  if (classes[i] == X86_64_SSEUP_CLASS
2197	      && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
2198	    classes[i] = X86_64_SSE_CLASS;
2199
2200	  /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
2201	  if (classes[i] == X86_64_X87UP_CLASS
2202	      && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
2203	    classes[i] = X86_64_SSE_CLASS;
2204	}
2205      return words;
2206    }
2207
2208  /* Compute alignment needed.  We align all types to natural boundaries with
2209     exception of XFmode that is aligned to 64bits.  */
2210  if (mode != VOIDmode && mode != BLKmode)
2211    {
2212      int mode_alignment = GET_MODE_BITSIZE (mode);
2213
2214      if (mode == XFmode)
2215	mode_alignment = 128;
2216      else if (mode == XCmode)
2217	mode_alignment = 256;
2218      if (COMPLEX_MODE_P (mode))
2219	mode_alignment /= 2;
2220      /* Misaligned fields are always returned in memory.  */
2221      if (bit_offset % mode_alignment)
2222	return 0;
2223    }
2224
2225  /* Classification of atomic types.  */
2226  switch (mode)
2227    {
2228    case DImode:
2229    case SImode:
2230    case HImode:
2231    case QImode:
2232    case CSImode:
2233    case CHImode:
2234    case CQImode:
2235      if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
2236	classes[0] = X86_64_INTEGERSI_CLASS;
2237      else
2238	classes[0] = X86_64_INTEGER_CLASS;
2239      return 1;
2240    case CDImode:
2241    case TImode:
2242      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2243      return 2;
2244    case CTImode:
2245      classes[0] = classes[1] = X86_64_INTEGER_CLASS;
2246      classes[2] = classes[3] = X86_64_INTEGER_CLASS;
2247      return 4;
2248    case SFmode:
2249      if (!(bit_offset % 64))
2250	classes[0] = X86_64_SSESF_CLASS;
2251      else
2252	classes[0] = X86_64_SSE_CLASS;
2253      return 1;
2254    case DFmode:
2255      classes[0] = X86_64_SSEDF_CLASS;
2256      return 1;
2257    case XFmode:
2258      classes[0] = X86_64_X87_CLASS;
2259      classes[1] = X86_64_X87UP_CLASS;
2260      return 2;
2261    case TFmode:
2262    case TCmode:
2263      return 0;
2264    case XCmode:
2265      classes[0] = X86_64_X87_CLASS;
2266      classes[1] = X86_64_X87UP_CLASS;
2267      classes[2] = X86_64_X87_CLASS;
2268      classes[3] = X86_64_X87UP_CLASS;
2269      return 4;
2270    case DCmode:
2271      classes[0] = X86_64_SSEDF_CLASS;
2272      classes[1] = X86_64_SSEDF_CLASS;
2273      return 2;
2274    case SCmode:
2275      classes[0] = X86_64_SSE_CLASS;
2276      return 1;
2277    case V4SFmode:
2278    case V4SImode:
2279    case V16QImode:
2280    case V8HImode:
2281    case V2DFmode:
2282    case V2DImode:
2283      classes[0] = X86_64_SSE_CLASS;
2284      classes[1] = X86_64_SSEUP_CLASS;
2285      return 2;
2286    case V2SFmode:
2287    case V2SImode:
2288    case V4HImode:
2289    case V8QImode:
2290      return 0;
2291    case BLKmode:
2292    case VOIDmode:
2293      return 0;
2294    default:
2295      abort ();
2296    }
2297}
2298
2299/* Examine the argument and return set number of register required in each
2300   class.  Return 0 iff parameter should be passed in memory.  */
2301static int
2302examine_argument (enum machine_mode mode, tree type, int in_return,
2303		  int *int_nregs, int *sse_nregs)
2304{
2305  enum x86_64_reg_class class[MAX_CLASSES];
2306  int n = classify_argument (mode, type, class, 0);
2307
2308  *int_nregs = 0;
2309  *sse_nregs = 0;
2310  if (!n)
2311    return 0;
2312  for (n--; n >= 0; n--)
2313    switch (class[n])
2314      {
2315      case X86_64_INTEGER_CLASS:
2316      case X86_64_INTEGERSI_CLASS:
2317	(*int_nregs)++;
2318	break;
2319      case X86_64_SSE_CLASS:
2320      case X86_64_SSESF_CLASS:
2321      case X86_64_SSEDF_CLASS:
2322	(*sse_nregs)++;
2323	break;
2324      case X86_64_NO_CLASS:
2325      case X86_64_SSEUP_CLASS:
2326	break;
2327      case X86_64_X87_CLASS:
2328      case X86_64_X87UP_CLASS:
2329	if (!in_return)
2330	  return 0;
2331	break;
2332      case X86_64_MEMORY_CLASS:
2333	abort ();
2334      }
2335  return 1;
2336}
2337/* Construct container for the argument used by GCC interface.  See
2338   FUNCTION_ARG for the detailed description.  */
2339static rtx
2340construct_container (enum machine_mode mode, tree type, int in_return,
2341		     int nintregs, int nsseregs, const int * intreg,
2342		     int sse_regno)
2343{
2344  enum machine_mode tmpmode;
2345  int bytes =
2346    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2347  enum x86_64_reg_class class[MAX_CLASSES];
2348  int n;
2349  int i;
2350  int nexps = 0;
2351  int needed_sseregs, needed_intregs;
2352  rtx exp[MAX_CLASSES];
2353  rtx ret;
2354
2355  n = classify_argument (mode, type, class, 0);
2356  if (TARGET_DEBUG_ARG)
2357    {
2358      if (!n)
2359	fprintf (stderr, "Memory class\n");
2360      else
2361	{
2362	  fprintf (stderr, "Classes:");
2363	  for (i = 0; i < n; i++)
2364	    {
2365	      fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
2366	    }
2367	   fprintf (stderr, "\n");
2368	}
2369    }
2370  if (!n)
2371    return NULL;
2372  if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2373    return NULL;
2374  if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2375    return NULL;
2376
2377  /* First construct simple cases.  Avoid SCmode, since we want to use
2378     single register to pass this type.  */
2379  if (n == 1 && mode != SCmode)
2380    switch (class[0])
2381      {
2382      case X86_64_INTEGER_CLASS:
2383      case X86_64_INTEGERSI_CLASS:
2384	return gen_rtx_REG (mode, intreg[0]);
2385      case X86_64_SSE_CLASS:
2386      case X86_64_SSESF_CLASS:
2387      case X86_64_SSEDF_CLASS:
2388	return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2389      case X86_64_X87_CLASS:
2390	return gen_rtx_REG (mode, FIRST_STACK_REG);
2391      case X86_64_NO_CLASS:
2392	/* Zero sized array, struct or class.  */
2393	return NULL;
2394      default:
2395	abort ();
2396      }
2397  if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
2398      && mode != BLKmode)
2399    return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2400  if (n == 2
2401      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2402    return gen_rtx_REG (XFmode, FIRST_STACK_REG);
2403  if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2404      && class[1] == X86_64_INTEGER_CLASS
2405      && (mode == CDImode || mode == TImode || mode == TFmode)
2406      && intreg[0] + 1 == intreg[1])
2407    return gen_rtx_REG (mode, intreg[0]);
2408  if (n == 4
2409      && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2410      && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
2411      && mode != BLKmode)
2412    return gen_rtx_REG (XCmode, FIRST_STACK_REG);
2413
2414  /* Otherwise figure out the entries of the PARALLEL.  */
2415  for (i = 0; i < n; i++)
2416    {
2417      switch (class[i])
2418        {
2419	  case X86_64_NO_CLASS:
2420	    break;
2421	  case X86_64_INTEGER_CLASS:
2422	  case X86_64_INTEGERSI_CLASS:
2423	    /* Merge TImodes on aligned occasions here too.  */
2424	    if (i * 8 + 8 > bytes)
2425	      tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2426	    else if (class[i] == X86_64_INTEGERSI_CLASS)
2427	      tmpmode = SImode;
2428	    else
2429	      tmpmode = DImode;
2430	    /* We've requested 24 bytes we don't have mode for.  Use DImode.  */
2431	    if (tmpmode == BLKmode)
2432	      tmpmode = DImode;
2433	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2434					       gen_rtx_REG (tmpmode, *intreg),
2435					       GEN_INT (i*8));
2436	    intreg++;
2437	    break;
2438	  case X86_64_SSESF_CLASS:
2439	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2440					       gen_rtx_REG (SFmode,
2441							    SSE_REGNO (sse_regno)),
2442					       GEN_INT (i*8));
2443	    sse_regno++;
2444	    break;
2445	  case X86_64_SSEDF_CLASS:
2446	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2447					       gen_rtx_REG (DFmode,
2448							    SSE_REGNO (sse_regno)),
2449					       GEN_INT (i*8));
2450	    sse_regno++;
2451	    break;
2452	  case X86_64_SSE_CLASS:
2453	    if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
2454	      tmpmode = TImode;
2455	    else
2456	      tmpmode = DImode;
2457	    exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2458					       gen_rtx_REG (tmpmode,
2459							    SSE_REGNO (sse_regno)),
2460					       GEN_INT (i*8));
2461	    if (tmpmode == TImode)
2462	      i++;
2463	    sse_regno++;
2464	    break;
2465	  default:
2466	    abort ();
2467	}
2468    }
2469  ret =  gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2470  for (i = 0; i < nexps; i++)
2471    XVECEXP (ret, 0, i) = exp [i];
2472  return ret;
2473}
2474
2475/* Update the data in CUM to advance over an argument
2476   of mode MODE and data type TYPE.
2477   (TYPE is null for libcalls where that information may not be available.)  */
2478
2479void
2480function_arg_advance (CUMULATIVE_ARGS *cum,	/* current arg information */
2481		      enum machine_mode mode,	/* current arg mode */
2482		      tree type,	/* type of the argument or 0 if lib support */
2483		      int named)	/* whether or not the argument was named */
2484{
2485  int bytes =
2486    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2487  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2488
2489  if (TARGET_DEBUG_ARG)
2490    fprintf (stderr,
2491	     "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
2492	     words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
2493  if (TARGET_64BIT)
2494    {
2495      int int_nregs, sse_nregs;
2496      if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2497	cum->words += words;
2498      else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
2499	{
2500	  cum->nregs -= int_nregs;
2501	  cum->sse_nregs -= sse_nregs;
2502	  cum->regno += int_nregs;
2503	  cum->sse_regno += sse_nregs;
2504	}
2505      else
2506	cum->words += words;
2507    }
2508  else
2509    {
2510      if (TARGET_SSE && SSE_REG_MODE_P (mode)
2511	  && (!type || !AGGREGATE_TYPE_P (type)))
2512	{
2513	  cum->sse_words += words;
2514	  cum->sse_nregs -= 1;
2515	  cum->sse_regno += 1;
2516	  if (cum->sse_nregs <= 0)
2517	    {
2518	      cum->sse_nregs = 0;
2519	      cum->sse_regno = 0;
2520	    }
2521	}
2522      else if (TARGET_MMX && MMX_REG_MODE_P (mode)
2523	       && (!type || !AGGREGATE_TYPE_P (type)))
2524	{
2525	  cum->mmx_words += words;
2526	  cum->mmx_nregs -= 1;
2527	  cum->mmx_regno += 1;
2528	  if (cum->mmx_nregs <= 0)
2529	    {
2530	      cum->mmx_nregs = 0;
2531	      cum->mmx_regno = 0;
2532	    }
2533	}
2534      else
2535	{
2536	  cum->words += words;
2537	  cum->nregs -= words;
2538	  cum->regno += words;
2539
2540	  if (cum->nregs <= 0)
2541	    {
2542	      cum->nregs = 0;
2543	      cum->regno = 0;
2544	    }
2545	}
2546    }
2547  return;
2548}
2549
2550/* A subroutine of function_arg.  We want to pass a parameter whose nominal
2551   type is MODE in REGNO.  We try to minimize ABI variation, so MODE may not
2552   actually be valid for REGNO with the current ISA.  In this case, ALT_MODE
2553   is used instead.  It must be the same size as MODE, and must be known to
2554   be valid for REGNO.  Finally, ORIG_MODE is the original mode of the
2555   parameter, as seen by the type system.  This may be different from MODE
2556   when we're mucking with things minimizing ABI variations.
2557
2558   Returns a REG or a PARALLEL as appropriate.  */
2559
2560static rtx
2561gen_reg_or_parallel (enum machine_mode mode, enum machine_mode alt_mode,
2562		     enum machine_mode orig_mode, unsigned int regno)
2563{
2564  rtx tmp;
2565
2566  if (HARD_REGNO_MODE_OK (regno, mode))
2567    tmp = gen_rtx_REG (mode, regno);
2568  else
2569    {
2570      tmp = gen_rtx_REG (alt_mode, regno);
2571      tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
2572      tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
2573    }
2574
2575  return tmp;
2576}
2577
2578/* Define where to put the arguments to a function.
2579   Value is zero to push the argument on the stack,
2580   or a hard register in which to store the argument.
2581
2582   MODE is the argument's machine mode.
2583   TYPE is the data type of the argument (as a tree).
2584    This is null for libcalls where that information may
2585    not be available.
2586   CUM is a variable of type CUMULATIVE_ARGS which gives info about
2587    the preceding args and about the function being called.
2588   NAMED is nonzero if this argument is a named parameter
2589    (otherwise it is an extra parameter matching an ellipsis).  */
2590
2591rtx
2592function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
2593	      tree type, int named)
2594{
2595  enum machine_mode mode = orig_mode;
2596  rtx ret = NULL_RTX;
2597  int bytes =
2598    (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
2599  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2600  static bool warnedsse, warnedmmx;
2601
2602  /* Handle a hidden AL argument containing number of registers for varargs
2603     x86-64 functions.  For i386 ABI just return constm1_rtx to avoid
2604     any AL settings.  */
2605  if (mode == VOIDmode)
2606    {
2607      if (TARGET_64BIT)
2608	return GEN_INT (cum->maybe_vaarg
2609			? (cum->sse_nregs < 0
2610			   ? SSE_REGPARM_MAX
2611			   : cum->sse_regno)
2612			: -1);
2613      else
2614	return constm1_rtx;
2615    }
2616  if (TARGET_64BIT)
2617    ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2618			       &x86_64_int_parameter_registers [cum->regno],
2619			       cum->sse_regno);
2620  else
2621    switch (mode)
2622      {
2623	/* For now, pass fp/complex values on the stack.  */
2624      default:
2625	break;
2626
2627      case BLKmode:
2628	if (bytes < 0)
2629	  break;
2630	/* FALLTHRU */
2631      case DImode:
2632      case SImode:
2633      case HImode:
2634      case QImode:
2635	if (words <= cum->nregs)
2636	  {
2637	    int regno = cum->regno;
2638
2639	    /* Fastcall allocates the first two DWORD (SImode) or
2640	       smaller arguments to ECX and EDX.  */
2641	    if (cum->fastcall)
2642	      {
2643	        if (mode == BLKmode || mode == DImode)
2644	          break;
2645
2646	        /* ECX not EAX is the first allocated register.  */
2647	        if (regno == 0)
2648		  regno = 2;
2649	      }
2650	    ret = gen_rtx_REG (mode, regno);
2651	  }
2652	break;
2653      case TImode:
2654      case V16QImode:
2655      case V8HImode:
2656      case V4SImode:
2657      case V2DImode:
2658      case V4SFmode:
2659      case V2DFmode:
2660	if (!type || !AGGREGATE_TYPE_P (type))
2661	  {
2662	    if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
2663	      {
2664		warnedsse = true;
2665		warning ("SSE vector argument without SSE enabled "
2666			 "changes the ABI");
2667	      }
2668	    if (cum->sse_nregs)
2669	      ret = gen_reg_or_parallel (mode, TImode, orig_mode,
2670					 cum->sse_regno + FIRST_SSE_REG);
2671	  }
2672	break;
2673      case V8QImode:
2674      case V4HImode:
2675      case V2SImode:
2676      case V2SFmode:
2677	if (!type || !AGGREGATE_TYPE_P (type))
2678	  {
2679	    if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
2680	      {
2681		warnedmmx = true;
2682		warning ("MMX vector argument without MMX enabled "
2683			 "changes the ABI");
2684	      }
2685	    if (cum->mmx_nregs)
2686	      ret = gen_reg_or_parallel (mode, DImode, orig_mode,
2687					 cum->mmx_regno + FIRST_MMX_REG);
2688	  }
2689	break;
2690      }
2691
2692  if (TARGET_DEBUG_ARG)
2693    {
2694      fprintf (stderr,
2695	       "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
2696	       words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2697
2698      if (ret)
2699	print_simple_rtl (stderr, ret);
2700      else
2701	fprintf (stderr, ", stack");
2702
2703      fprintf (stderr, " )\n");
2704    }
2705
2706  return ret;
2707}
2708
2709/* A C expression that indicates when an argument must be passed by
2710   reference.  If nonzero for an argument, a copy of that argument is
2711   made in memory and a pointer to the argument is passed instead of
2712   the argument itself.  The pointer is passed in whatever way is
2713   appropriate for passing a pointer to that type.  */
2714
2715int
2716function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2717				enum machine_mode mode ATTRIBUTE_UNUSED,
2718				tree type, int named ATTRIBUTE_UNUSED)
2719{
2720  if (!TARGET_64BIT)
2721    return 0;
2722
2723  if (type && int_size_in_bytes (type) == -1)
2724    {
2725      if (TARGET_DEBUG_ARG)
2726	fprintf (stderr, "function_arg_pass_by_reference\n");
2727      return 1;
2728    }
2729
2730  return 0;
2731}
2732
2733/* Return true when TYPE should be 128bit aligned for 32bit argument passing
2734   ABI  */
2735static bool
2736contains_128bit_aligned_vector_p (tree type)
2737{
2738  enum machine_mode mode = TYPE_MODE (type);
2739  if (SSE_REG_MODE_P (mode)
2740      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
2741    return true;
2742  if (TYPE_ALIGN (type) < 128)
2743    return false;
2744
2745  if (AGGREGATE_TYPE_P (type))
2746    {
2747      /* Walk the aggregates recursively.  */
2748      if (TREE_CODE (type) == RECORD_TYPE
2749	  || TREE_CODE (type) == UNION_TYPE
2750	  || TREE_CODE (type) == QUAL_UNION_TYPE)
2751	{
2752	  tree field;
2753
2754	  if (TYPE_BINFO (type) != NULL
2755	      && TYPE_BINFO_BASETYPES (type) != NULL)
2756	    {
2757	      tree bases = TYPE_BINFO_BASETYPES (type);
2758	      int n_bases = TREE_VEC_LENGTH (bases);
2759	      int i;
2760
2761	      for (i = 0; i < n_bases; ++i)
2762		{
2763		  tree binfo = TREE_VEC_ELT (bases, i);
2764		  tree type = BINFO_TYPE (binfo);
2765
2766		  if (contains_128bit_aligned_vector_p (type))
2767		    return true;
2768		}
2769	    }
2770	  /* And now merge the fields of structure.  */
2771	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
2772	    {
2773	      if (TREE_CODE (field) == FIELD_DECL
2774		  && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
2775		return true;
2776	    }
2777	}
2778      /* Just for use if some languages passes arrays by value.  */
2779      else if (TREE_CODE (type) == ARRAY_TYPE)
2780	{
2781	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
2782	    return true;
2783	}
2784      else
2785	abort ();
2786    }
2787  return false;
2788}
2789
2790/* Gives the alignment boundary, in bits, of an argument with the
2791   specified mode and type.  */
2792
2793int
2794ix86_function_arg_boundary (enum machine_mode mode, tree type)
2795{
2796  int align;
2797  if (type)
2798    align = TYPE_ALIGN (type);
2799  else
2800    align = GET_MODE_ALIGNMENT (mode);
2801  if (align < PARM_BOUNDARY)
2802    align = PARM_BOUNDARY;
2803  if (!TARGET_64BIT)
2804    {
2805      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
2806	 make an exception for SSE modes since these require 128bit
2807	 alignment.
2808
2809	 The handling here differs from field_alignment.  ICC aligns MMX
2810	 arguments to 4 byte boundaries, while structure fields are aligned
2811	 to 8 byte boundaries.  */
2812      if (!type)
2813	{
2814	  if (!SSE_REG_MODE_P (mode))
2815	    align = PARM_BOUNDARY;
2816	}
2817      else
2818	{
2819	  if (!contains_128bit_aligned_vector_p (type))
2820	    align = PARM_BOUNDARY;
2821	}
2822    }
2823  if (align > 128)
2824    align = 128;
2825  return align;
2826}
2827
2828/* Return true if N is a possible register number of function value.  */
2829bool
2830ix86_function_value_regno_p (int regno)
2831{
2832  if (!TARGET_64BIT)
2833    {
2834      return ((regno) == 0
2835	      || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2836	      || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2837    }
2838  return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2839	  || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2840	  || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2841}
2842
2843/* Define how to find the value returned by a function.
2844   VALTYPE is the data type of the value (as a tree).
2845   If the precise function being called is known, FUNC is its FUNCTION_DECL;
2846   otherwise, FUNC is 0.  */
2847rtx
2848ix86_function_value (tree valtype)
2849{
2850  if (TARGET_64BIT)
2851    {
2852      rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2853				     REGPARM_MAX, SSE_REGPARM_MAX,
2854				     x86_64_int_return_registers, 0);
2855      /* For zero sized structures, construct_container return NULL, but we need
2856         to keep rest of compiler happy by returning meaningful value.  */
2857      if (!ret)
2858	ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2859      return ret;
2860    }
2861  else
2862    return gen_rtx_REG (TYPE_MODE (valtype),
2863			ix86_value_regno (TYPE_MODE (valtype)));
2864}
2865
2866/* Return false iff type is returned in memory.  */
2867int
2868ix86_return_in_memory (tree type)
2869{
2870  int needed_intregs, needed_sseregs, size;
2871  enum machine_mode mode = TYPE_MODE (type);
2872
2873  if (TARGET_64BIT)
2874    return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
2875
2876  if (mode == BLKmode)
2877    return 1;
2878
2879  size = int_size_in_bytes (type);
2880
2881  if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
2882    return 0;
2883
2884  if (VECTOR_MODE_P (mode) || mode == TImode)
2885    {
2886      /* User-created vectors small enough to fit in EAX.  */
2887      if (size < 8)
2888	return 0;
2889
2890      /* MMX/3dNow values are returned on the stack, since we've
2891	 got to EMMS/FEMMS before returning.  */
2892      if (size == 8)
2893	return 1;
2894
2895      /* SSE values are returned in XMM0.  */
2896      /* ??? Except when it doesn't exist?  We have a choice of
2897	 either (1) being abi incompatible with a -march switch,
2898	 or (2) generating an error here.  Given no good solution,
2899	 I think the safest thing is one warning.  The user won't
2900	 be able to use -Werror, but....  */
2901      if (size == 16)
2902	{
2903	  static bool warned;
2904
2905	  if (TARGET_SSE)
2906	    return 0;
2907
2908	  if (!warned)
2909	    {
2910	      warned = true;
2911	      warning ("SSE vector return without SSE enabled "
2912		       "changes the ABI");
2913	    }
2914	  return 1;
2915	}
2916    }
2917
2918  if (mode == XFmode)
2919    return 0;
2920
2921  if (size > 12)
2922    return 1;
2923  return 0;
2924}
2925
2926/* Define how to find the value returned by a library function
2927   assuming the value has mode MODE.  */
2928rtx
2929ix86_libcall_value (enum machine_mode mode)
2930{
2931  if (TARGET_64BIT)
2932    {
2933      switch (mode)
2934	{
2935	case SFmode:
2936	case SCmode:
2937	case DFmode:
2938	case DCmode:
2939	  return gen_rtx_REG (mode, FIRST_SSE_REG);
2940	case XFmode:
2941	case XCmode:
2942	  return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2943	case TFmode:
2944	case TCmode:
2945	  return NULL;
2946	default:
2947	  return gen_rtx_REG (mode, 0);
2948	}
2949    }
2950  else
2951    return gen_rtx_REG (mode, ix86_value_regno (mode));
2952}
2953
2954/* Given a mode, return the register to use for a return value.  */
2955
2956static int
2957ix86_value_regno (enum machine_mode mode)
2958{
2959  /* Floating point return values in %st(0).  */
2960  if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2961    return FIRST_FLOAT_REG;
2962  /* 16-byte vector modes in %xmm0.  See ix86_return_in_memory for where
2963     we prevent this case when sse is not available.  */
2964  if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
2965    return FIRST_SSE_REG;
2966  /* Everything else in %eax.  */
2967  return 0;
2968}
2969
2970/* Create the va_list data type.  */
2971
2972static tree
2973ix86_build_builtin_va_list (void)
2974{
2975  tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
2976
2977  /* For i386 we use plain pointer to argument area.  */
2978  if (!TARGET_64BIT)
2979    return build_pointer_type (char_type_node);
2980
2981  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
2982  type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2983
2984  f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
2985		      unsigned_type_node);
2986  f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
2987		      unsigned_type_node);
2988  f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2989		      ptr_type_node);
2990  f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2991		      ptr_type_node);
2992
2993  DECL_FIELD_CONTEXT (f_gpr) = record;
2994  DECL_FIELD_CONTEXT (f_fpr) = record;
2995  DECL_FIELD_CONTEXT (f_ovf) = record;
2996  DECL_FIELD_CONTEXT (f_sav) = record;
2997
2998  TREE_CHAIN (record) = type_decl;
2999  TYPE_NAME (record) = type_decl;
3000  TYPE_FIELDS (record) = f_gpr;
3001  TREE_CHAIN (f_gpr) = f_fpr;
3002  TREE_CHAIN (f_fpr) = f_ovf;
3003  TREE_CHAIN (f_ovf) = f_sav;
3004
3005  layout_type (record);
3006
3007  /* The correct type is an array type of one element.  */
3008  return build_array_type (record, build_index_type (size_zero_node));
3009}
3010
3011/* Perform any needed actions needed for a function that is receiving a
3012   variable number of arguments.
3013
3014   CUM is as above.
3015
3016   MODE and TYPE are the mode and type of the current parameter.
3017
3018   PRETEND_SIZE is a variable that should be set to the amount of stack
3019   that must be pushed by the prolog to pretend that our caller pushed
3020   it.
3021
3022   Normally, this macro will push all remaining incoming registers on the
3023   stack and set PRETEND_SIZE to the length of the registers pushed.  */
3024
3025void
3026ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3027			     tree type, int *pretend_size ATTRIBUTE_UNUSED,
3028			     int no_rtl)
3029{
3030  CUMULATIVE_ARGS next_cum;
3031  rtx save_area = NULL_RTX, mem;
3032  rtx label;
3033  rtx label_ref;
3034  rtx tmp_reg;
3035  rtx nsse_reg;
3036  int set;
3037  tree fntype;
3038  int stdarg_p;
3039  int i;
3040
3041  if (!TARGET_64BIT)
3042    return;
3043
3044  /* Indicate to allocate space on the stack for varargs save area.  */
3045  ix86_save_varrargs_registers = 1;
3046
3047  cfun->stack_alignment_needed = 128;
3048
3049  fntype = TREE_TYPE (current_function_decl);
3050  stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
3051	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
3052		  != void_type_node));
3053
3054  /* For varargs, we do not want to skip the dummy va_dcl argument.
3055     For stdargs, we do want to skip the last named argument.  */
3056  next_cum = *cum;
3057  if (stdarg_p)
3058    function_arg_advance (&next_cum, mode, type, 1);
3059
3060  if (!no_rtl)
3061    save_area = frame_pointer_rtx;
3062
3063  set = get_varargs_alias_set ();
3064
3065  for (i = next_cum.regno; i < ix86_regparm; i++)
3066    {
3067      mem = gen_rtx_MEM (Pmode,
3068			 plus_constant (save_area, i * UNITS_PER_WORD));
3069      set_mem_alias_set (mem, set);
3070      emit_move_insn (mem, gen_rtx_REG (Pmode,
3071					x86_64_int_parameter_registers[i]));
3072    }
3073
3074  if (next_cum.sse_nregs)
3075    {
3076      /* Now emit code to save SSE registers.  The AX parameter contains number
3077	 of SSE parameter registers used to call this function.  We use
3078	 sse_prologue_save insn template that produces computed jump across
3079	 SSE saves.  We need some preparation work to get this working.  */
3080
3081      label = gen_label_rtx ();
3082      label_ref = gen_rtx_LABEL_REF (Pmode, label);
3083
3084      /* Compute address to jump to :
3085         label - 5*eax + nnamed_sse_arguments*5  */
3086      tmp_reg = gen_reg_rtx (Pmode);
3087      nsse_reg = gen_reg_rtx (Pmode);
3088      emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
3089      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3090			      gen_rtx_MULT (Pmode, nsse_reg,
3091					    GEN_INT (4))));
3092      if (next_cum.sse_regno)
3093	emit_move_insn
3094	  (nsse_reg,
3095	   gen_rtx_CONST (DImode,
3096			  gen_rtx_PLUS (DImode,
3097					label_ref,
3098					GEN_INT (next_cum.sse_regno * 4))));
3099      else
3100	emit_move_insn (nsse_reg, label_ref);
3101      emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
3102
3103      /* Compute address of memory block we save into.  We always use pointer
3104	 pointing 127 bytes after first byte to store - this is needed to keep
3105	 instruction size limited by 4 bytes.  */
3106      tmp_reg = gen_reg_rtx (Pmode);
3107      emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
3108			      plus_constant (save_area,
3109					     8 * REGPARM_MAX + 127)));
3110      mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
3111      set_mem_alias_set (mem, set);
3112      set_mem_align (mem, BITS_PER_WORD);
3113
3114      /* And finally do the dirty job!  */
3115      emit_insn (gen_sse_prologue_save (mem, nsse_reg,
3116					GEN_INT (next_cum.sse_regno), label));
3117    }
3118
3119}
3120
3121/* Implement va_start.  */
3122
3123void
3124ix86_va_start (tree valist, rtx nextarg)
3125{
3126  HOST_WIDE_INT words, n_gpr, n_fpr;
3127  tree f_gpr, f_fpr, f_ovf, f_sav;
3128  tree gpr, fpr, ovf, sav, t;
3129
3130  /* Only 64bit target needs something special.  */
3131  if (!TARGET_64BIT)
3132    {
3133      std_expand_builtin_va_start (valist, nextarg);
3134      return;
3135    }
3136
3137  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3138  f_fpr = TREE_CHAIN (f_gpr);
3139  f_ovf = TREE_CHAIN (f_fpr);
3140  f_sav = TREE_CHAIN (f_ovf);
3141
3142  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3143  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3144  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3145  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3146  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3147
3148  /* Count number of gp and fp argument registers used.  */
3149  words = current_function_args_info.words;
3150  n_gpr = current_function_args_info.regno;
3151  n_fpr = current_function_args_info.sse_regno;
3152
3153  if (TARGET_DEBUG_ARG)
3154    fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
3155	     (int) words, (int) n_gpr, (int) n_fpr);
3156
3157  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
3158	     build_int_2 (n_gpr * 8, 0));
3159  TREE_SIDE_EFFECTS (t) = 1;
3160  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3161
3162  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
3163	     build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
3164  TREE_SIDE_EFFECTS (t) = 1;
3165  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3166
3167  /* Find the overflow area.  */
3168  t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
3169  if (words != 0)
3170    t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
3171	       build_int_2 (words * UNITS_PER_WORD, 0));
3172  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3173  TREE_SIDE_EFFECTS (t) = 1;
3174  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3175
3176  /* Find the register save area.
3177     Prologue of the function save it right above stack frame.  */
3178  t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
3179  t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
3180  TREE_SIDE_EFFECTS (t) = 1;
3181  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3182}
3183
3184/* Implement va_arg.  */
3185rtx
3186ix86_va_arg (tree valist, tree type)
3187{
3188  static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
3189  tree f_gpr, f_fpr, f_ovf, f_sav;
3190  tree gpr, fpr, ovf, sav, t;
3191  int size, rsize;
3192  rtx lab_false, lab_over = NULL_RTX;
3193  rtx addr_rtx, r;
3194  rtx container;
3195  int indirect_p = 0;
3196
3197  /* Only 64bit target needs something special.  */
3198  if (!TARGET_64BIT)
3199    {
3200      return std_expand_builtin_va_arg (valist, type);
3201    }
3202
3203  f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3204  f_fpr = TREE_CHAIN (f_gpr);
3205  f_ovf = TREE_CHAIN (f_fpr);
3206  f_sav = TREE_CHAIN (f_ovf);
3207
3208  valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3209  gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
3210  fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
3211  ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
3212  sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
3213
3214  size = int_size_in_bytes (type);
3215  if (size == -1)
3216    {
3217      /* Passed by reference.  */
3218      indirect_p = 1;
3219      type = build_pointer_type (type);
3220      size = int_size_in_bytes (type);
3221    }
3222  rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3223
3224  container = construct_container (TYPE_MODE (type), type, 0,
3225				   REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
3226  /*
3227   * Pull the value out of the saved registers ...
3228   */
3229
3230  addr_rtx = gen_reg_rtx (Pmode);
3231
3232  if (container)
3233    {
3234      rtx int_addr_rtx, sse_addr_rtx;
3235      int needed_intregs, needed_sseregs;
3236      int need_temp;
3237
3238      lab_over = gen_label_rtx ();
3239      lab_false = gen_label_rtx ();
3240
3241      examine_argument (TYPE_MODE (type), type, 0,
3242		        &needed_intregs, &needed_sseregs);
3243
3244
3245      need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
3246		   || TYPE_ALIGN (type) > 128);
3247
3248      /* In case we are passing structure, verify that it is consecutive block
3249         on the register save area.  If not we need to do moves.  */
3250      if (!need_temp && !REG_P (container))
3251	{
3252	  /* Verify that all registers are strictly consecutive  */
3253	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
3254	    {
3255	      int i;
3256
3257	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3258		{
3259		  rtx slot = XVECEXP (container, 0, i);
3260		  if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
3261		      || INTVAL (XEXP (slot, 1)) != i * 16)
3262		    need_temp = 1;
3263		}
3264	    }
3265	  else
3266	    {
3267	      int i;
3268
3269	      for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
3270		{
3271		  rtx slot = XVECEXP (container, 0, i);
3272		  if (REGNO (XEXP (slot, 0)) != (unsigned int) i
3273		      || INTVAL (XEXP (slot, 1)) != i * 8)
3274		    need_temp = 1;
3275		}
3276	    }
3277	}
3278      if (!need_temp)
3279	{
3280	  int_addr_rtx = addr_rtx;
3281	  sse_addr_rtx = addr_rtx;
3282	}
3283      else
3284	{
3285	  int_addr_rtx = gen_reg_rtx (Pmode);
3286	  sse_addr_rtx = gen_reg_rtx (Pmode);
3287	}
3288      /* First ensure that we fit completely in registers.  */
3289      if (needed_intregs)
3290	{
3291	  emit_cmp_and_jump_insns (expand_expr
3292				   (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
3293				   GEN_INT ((REGPARM_MAX - needed_intregs +
3294					     1) * 8), GE, const1_rtx, SImode,
3295				   1, lab_false);
3296	}
3297      if (needed_sseregs)
3298	{
3299	  emit_cmp_and_jump_insns (expand_expr
3300				   (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
3301				   GEN_INT ((SSE_REGPARM_MAX -
3302					     needed_sseregs + 1) * 16 +
3303					    REGPARM_MAX * 8), GE, const1_rtx,
3304				   SImode, 1, lab_false);
3305	}
3306
3307      /* Compute index to start of area used for integer regs.  */
3308      if (needed_intregs)
3309	{
3310	  t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
3311	  r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
3312	  if (r != int_addr_rtx)
3313	    emit_move_insn (int_addr_rtx, r);
3314	}
3315      if (needed_sseregs)
3316	{
3317	  t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
3318	  r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
3319	  if (r != sse_addr_rtx)
3320	    emit_move_insn (sse_addr_rtx, r);
3321	}
3322      if (need_temp)
3323	{
3324	  int i;
3325	  rtx mem;
3326	  rtx x;
3327
3328	  /* Never use the memory itself, as it has the alias set.  */
3329	  x = XEXP (assign_temp (type, 0, 1, 0), 0);
3330	  mem = gen_rtx_MEM (BLKmode, x);
3331	  force_operand (x, addr_rtx);
3332	  set_mem_alias_set (mem, get_varargs_alias_set ());
3333	  set_mem_align (mem, BITS_PER_UNIT);
3334
3335	  for (i = 0; i < XVECLEN (container, 0); i++)
3336	    {
3337	      rtx slot = XVECEXP (container, 0, i);
3338	      rtx reg = XEXP (slot, 0);
3339	      enum machine_mode mode = GET_MODE (reg);
3340	      rtx src_addr;
3341	      rtx src_mem;
3342	      int src_offset;
3343	      rtx dest_mem;
3344
3345	      if (SSE_REGNO_P (REGNO (reg)))
3346		{
3347		  src_addr = sse_addr_rtx;
3348		  src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
3349		}
3350	      else
3351		{
3352		  src_addr = int_addr_rtx;
3353		  src_offset = REGNO (reg) * 8;
3354		}
3355	      src_mem = gen_rtx_MEM (mode, src_addr);
3356	      set_mem_alias_set (src_mem, get_varargs_alias_set ());
3357	      src_mem = adjust_address (src_mem, mode, src_offset);
3358	      dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
3359	      emit_move_insn (dest_mem, src_mem);
3360	    }
3361	}
3362
3363      if (needed_intregs)
3364	{
3365	  t =
3366	    build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
3367		   build_int_2 (needed_intregs * 8, 0));
3368	  t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
3369	  TREE_SIDE_EFFECTS (t) = 1;
3370	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3371	}
3372      if (needed_sseregs)
3373	{
3374	  t =
3375	    build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
3376		   build_int_2 (needed_sseregs * 16, 0));
3377	  t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
3378	  TREE_SIDE_EFFECTS (t) = 1;
3379	  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3380	}
3381
3382      emit_jump_insn (gen_jump (lab_over));
3383      emit_barrier ();
3384      emit_label (lab_false);
3385    }
3386
3387  /* ... otherwise out of the overflow area.  */
3388
3389  /* Care for on-stack alignment if needed.  */
3390  if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
3391    t = ovf;
3392  else
3393    {
3394      HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
3395      t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
3396      t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
3397    }
3398  t = save_expr (t);
3399
3400  r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
3401  if (r != addr_rtx)
3402    emit_move_insn (addr_rtx, r);
3403
3404  t =
3405    build (PLUS_EXPR, TREE_TYPE (t), t,
3406	   build_int_2 (rsize * UNITS_PER_WORD, 0));
3407  t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
3408  TREE_SIDE_EFFECTS (t) = 1;
3409  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3410
3411  if (container)
3412    emit_label (lab_over);
3413
3414  if (indirect_p)
3415    {
3416      r = gen_rtx_MEM (Pmode, addr_rtx);
3417      set_mem_alias_set (r, get_varargs_alias_set ());
3418      emit_move_insn (addr_rtx, r);
3419    }
3420
3421  return addr_rtx;
3422}
3423
3424/* Return nonzero if OP is either a i387 or SSE fp register.  */
3425int
3426any_fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3427{
3428  return ANY_FP_REG_P (op);
3429}
3430
3431/* Return nonzero if OP is an i387 fp register.  */
3432int
3433fp_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3434{
3435  return FP_REG_P (op);
3436}
3437
3438/* Return nonzero if OP is a non-fp register_operand.  */
3439int
3440register_and_not_any_fp_reg_operand (rtx op, enum machine_mode mode)
3441{
3442  return register_operand (op, mode) && !ANY_FP_REG_P (op);
3443}
3444
3445/* Return nonzero if OP is a register operand other than an
3446   i387 fp register.  */
3447int
3448register_and_not_fp_reg_operand (rtx op, enum machine_mode mode)
3449{
3450  return register_operand (op, mode) && !FP_REG_P (op);
3451}
3452
3453/* Return nonzero if OP is general operand representable on x86_64.  */
3454
3455int
3456x86_64_general_operand (rtx op, enum machine_mode mode)
3457{
3458  if (!TARGET_64BIT)
3459    return general_operand (op, mode);
3460  if (nonimmediate_operand (op, mode))
3461    return 1;
3462  return x86_64_sign_extended_value (op);
3463}
3464
3465/* Return nonzero if OP is general operand representable on x86_64
3466   as either sign extended or zero extended constant.  */
3467
3468int
3469x86_64_szext_general_operand (rtx op, enum machine_mode mode)
3470{
3471  if (!TARGET_64BIT)
3472    return general_operand (op, mode);
3473  if (nonimmediate_operand (op, mode))
3474    return 1;
3475  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3476}
3477
3478/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3479
3480int
3481x86_64_nonmemory_operand (rtx op, enum machine_mode mode)
3482{
3483  if (!TARGET_64BIT)
3484    return nonmemory_operand (op, mode);
3485  if (register_operand (op, mode))
3486    return 1;
3487  return x86_64_sign_extended_value (op);
3488}
3489
3490/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns.  */
3491
3492int
3493x86_64_movabs_operand (rtx op, enum machine_mode mode)
3494{
3495  if (!TARGET_64BIT || !flag_pic)
3496    return nonmemory_operand (op, mode);
3497  if (register_operand (op, mode) || x86_64_sign_extended_value (op))
3498    return 1;
3499  if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
3500    return 1;
3501  return 0;
3502}
3503
3504/* Return nonzero if OPNUM's MEM should be matched
3505   in movabs* patterns.  */
3506
3507int
3508ix86_check_movabs (rtx insn, int opnum)
3509{
3510  rtx set, mem;
3511
3512  set = PATTERN (insn);
3513  if (GET_CODE (set) == PARALLEL)
3514    set = XVECEXP (set, 0, 0);
3515  if (GET_CODE (set) != SET)
3516    abort ();
3517  mem = XEXP (set, opnum);
3518  while (GET_CODE (mem) == SUBREG)
3519    mem = SUBREG_REG (mem);
3520  if (GET_CODE (mem) != MEM)
3521    abort ();
3522  return (volatile_ok || !MEM_VOLATILE_P (mem));
3523}
3524
3525/* Return nonzero if OP is nonmemory operand representable on x86_64.  */
3526
3527int
3528x86_64_szext_nonmemory_operand (rtx op, enum machine_mode mode)
3529{
3530  if (!TARGET_64BIT)
3531    return nonmemory_operand (op, mode);
3532  if (register_operand (op, mode))
3533    return 1;
3534  return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
3535}
3536
3537/* Return nonzero if OP is immediate operand representable on x86_64.  */
3538
3539int
3540x86_64_immediate_operand (rtx op, enum machine_mode mode)
3541{
3542  if (!TARGET_64BIT)
3543    return immediate_operand (op, mode);
3544  return x86_64_sign_extended_value (op);
3545}
3546
3547/* Return nonzero if OP is immediate operand representable on x86_64.  */
3548
3549int
3550x86_64_zext_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3551{
3552  return x86_64_zero_extended_value (op);
3553}
3554
3555/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
3556   for shift & compare patterns, as shifting by 0 does not change flags),
3557   else return zero.  */
3558
3559int
3560const_int_1_31_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3561{
3562  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
3563}
3564
3565/* Returns 1 if OP is either a symbol reference or a sum of a symbol
3566   reference and a constant.  */
3567
3568int
3569symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3570{
3571  switch (GET_CODE (op))
3572    {
3573    case SYMBOL_REF:
3574    case LABEL_REF:
3575      return 1;
3576
3577    case CONST:
3578      op = XEXP (op, 0);
3579      if (GET_CODE (op) == SYMBOL_REF
3580	  || GET_CODE (op) == LABEL_REF
3581	  || (GET_CODE (op) == UNSPEC
3582	      && (XINT (op, 1) == UNSPEC_GOT
3583		  || XINT (op, 1) == UNSPEC_GOTOFF
3584		  || XINT (op, 1) == UNSPEC_GOTPCREL)))
3585	return 1;
3586      if (GET_CODE (op) != PLUS
3587	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3588	return 0;
3589
3590      op = XEXP (op, 0);
3591      if (GET_CODE (op) == SYMBOL_REF
3592	  || GET_CODE (op) == LABEL_REF)
3593	return 1;
3594      /* Only @GOTOFF gets offsets.  */
3595      if (GET_CODE (op) != UNSPEC
3596	  || XINT (op, 1) != UNSPEC_GOTOFF)
3597	return 0;
3598
3599      op = XVECEXP (op, 0, 0);
3600      if (GET_CODE (op) == SYMBOL_REF
3601	  || GET_CODE (op) == LABEL_REF)
3602	return 1;
3603      return 0;
3604
3605    default:
3606      return 0;
3607    }
3608}
3609
3610/* Return true if the operand contains a @GOT or @GOTOFF reference.  */
3611
3612int
3613pic_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3614{
3615  if (GET_CODE (op) != CONST)
3616    return 0;
3617  op = XEXP (op, 0);
3618  if (TARGET_64BIT)
3619    {
3620      if (GET_CODE (op) == UNSPEC
3621	  && XINT (op, 1) == UNSPEC_GOTPCREL)
3622	return 1;
3623      if (GET_CODE (op) == PLUS
3624	  && GET_CODE (XEXP (op, 0)) == UNSPEC
3625	  && XINT (XEXP (op, 0), 1) == UNSPEC_GOTPCREL)
3626	return 1;
3627    }
3628  else
3629    {
3630      if (GET_CODE (op) == UNSPEC)
3631	return 1;
3632      if (GET_CODE (op) != PLUS
3633	  || GET_CODE (XEXP (op, 1)) != CONST_INT)
3634	return 0;
3635      op = XEXP (op, 0);
3636      if (GET_CODE (op) == UNSPEC)
3637	return 1;
3638    }
3639  return 0;
3640}
3641
3642/* Return true if OP is a symbolic operand that resolves locally.  */
3643
3644static int
3645local_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3646{
3647  if (GET_CODE (op) == CONST
3648      && GET_CODE (XEXP (op, 0)) == PLUS
3649      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3650    op = XEXP (XEXP (op, 0), 0);
3651
3652  if (GET_CODE (op) == LABEL_REF)
3653    return 1;
3654
3655  if (GET_CODE (op) != SYMBOL_REF)
3656    return 0;
3657
3658  if (SYMBOL_REF_LOCAL_P (op))
3659    return 1;
3660
3661  /* There is, however, a not insubstantial body of code in the rest of
3662     the compiler that assumes it can just stick the results of
3663     ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done.  */
3664  /* ??? This is a hack.  Should update the body of the compiler to
3665     always create a DECL an invoke targetm.encode_section_info.  */
3666  if (strncmp (XSTR (op, 0), internal_label_prefix,
3667	       internal_label_prefix_len) == 0)
3668    return 1;
3669
3670  return 0;
3671}
3672
3673/* Test for various thread-local symbols.  */
3674
3675int
3676tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3677{
3678  if (GET_CODE (op) != SYMBOL_REF)
3679    return 0;
3680  return SYMBOL_REF_TLS_MODEL (op);
3681}
3682
3683static inline int
3684tls_symbolic_operand_1 (rtx op, enum tls_model kind)
3685{
3686  if (GET_CODE (op) != SYMBOL_REF)
3687    return 0;
3688  return SYMBOL_REF_TLS_MODEL (op) == kind;
3689}
3690
3691int
3692global_dynamic_symbolic_operand (rtx op,
3693				 enum machine_mode mode ATTRIBUTE_UNUSED)
3694{
3695  return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3696}
3697
3698int
3699local_dynamic_symbolic_operand (rtx op,
3700				enum machine_mode mode ATTRIBUTE_UNUSED)
3701{
3702  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3703}
3704
3705int
3706initial_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3707{
3708  return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3709}
3710
3711int
3712local_exec_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3713{
3714  return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3715}
3716
3717/* Test for a valid operand for a call instruction.  Don't allow the
3718   arg pointer register or virtual regs since they may decay into
3719   reg + const, which the patterns can't handle.  */
3720
3721int
3722call_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3723{
3724  /* Disallow indirect through a virtual register.  This leads to
3725     compiler aborts when trying to eliminate them.  */
3726  if (GET_CODE (op) == REG
3727      && (op == arg_pointer_rtx
3728	  || op == frame_pointer_rtx
3729	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3730	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3731    return 0;
3732
3733  /* Disallow `call 1234'.  Due to varying assembler lameness this
3734     gets either rejected or translated to `call .+1234'.  */
3735  if (GET_CODE (op) == CONST_INT)
3736    return 0;
3737
3738  /* Explicitly allow SYMBOL_REF even if pic.  */
3739  if (GET_CODE (op) == SYMBOL_REF)
3740    return 1;
3741
3742  /* Otherwise we can allow any general_operand in the address.  */
3743  return general_operand (op, Pmode);
3744}
3745
3746/* Test for a valid operand for a call instruction.  Don't allow the
3747   arg pointer register or virtual regs since they may decay into
3748   reg + const, which the patterns can't handle.  */
3749
3750int
3751sibcall_insn_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3752{
3753  /* Disallow indirect through a virtual register.  This leads to
3754     compiler aborts when trying to eliminate them.  */
3755  if (GET_CODE (op) == REG
3756      && (op == arg_pointer_rtx
3757	  || op == frame_pointer_rtx
3758	  || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3759	      && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3760    return 0;
3761
3762  /* Explicitly allow SYMBOL_REF even if pic.  */
3763  if (GET_CODE (op) == SYMBOL_REF)
3764    return 1;
3765
3766  /* Otherwise we can only allow register operands.  */
3767  return register_operand (op, Pmode);
3768}
3769
3770int
3771constant_call_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3772{
3773  if (GET_CODE (op) == CONST
3774      && GET_CODE (XEXP (op, 0)) == PLUS
3775      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3776    op = XEXP (XEXP (op, 0), 0);
3777  return GET_CODE (op) == SYMBOL_REF;
3778}
3779
3780/* Match exactly zero and one.  */
3781
3782int
3783const0_operand (rtx op, enum machine_mode mode)
3784{
3785  return op == CONST0_RTX (mode);
3786}
3787
3788int
3789const1_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3790{
3791  return op == const1_rtx;
3792}
3793
3794/* Match 2, 4, or 8.  Used for leal multiplicands.  */
3795
3796int
3797const248_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3798{
3799  return (GET_CODE (op) == CONST_INT
3800	  && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3801}
3802
3803int
3804const_0_to_3_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3805{
3806  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 4);
3807}
3808
3809int
3810const_0_to_7_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3811{
3812  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 8);
3813}
3814
3815int
3816const_0_to_15_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3817{
3818  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 16);
3819}
3820
3821int
3822const_0_to_255_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3823{
3824  return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 256);
3825}
3826
3827
3828/* True if this is a constant appropriate for an increment or decrement.  */
3829
3830int
3831incdec_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3832{
3833  /* On Pentium4, the inc and dec operations causes extra dependency on flag
3834     registers, since carry flag is not set.  */
3835  if (TARGET_PENTIUM4 && !optimize_size)
3836    return 0;
3837  return op == const1_rtx || op == constm1_rtx;
3838}
3839
3840/* Return nonzero if OP is acceptable as operand of DImode shift
3841   expander.  */
3842
3843int
3844shiftdi_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3845{
3846  if (TARGET_64BIT)
3847    return nonimmediate_operand (op, mode);
3848  else
3849    return register_operand (op, mode);
3850}
3851
3852/* Return false if this is the stack pointer, or any other fake
3853   register eliminable to the stack pointer.  Otherwise, this is
3854   a register operand.
3855
3856   This is used to prevent esp from being used as an index reg.
3857   Which would only happen in pathological cases.  */
3858
3859int
3860reg_no_sp_operand (rtx op, enum machine_mode mode)
3861{
3862  rtx t = op;
3863  if (GET_CODE (t) == SUBREG)
3864    t = SUBREG_REG (t);
3865  if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
3866    return 0;
3867
3868  return register_operand (op, mode);
3869}
3870
3871int
3872mmx_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3873{
3874  return MMX_REG_P (op);
3875}
3876
3877/* Return false if this is any eliminable register.  Otherwise
3878   general_operand.  */
3879
3880int
3881general_no_elim_operand (rtx op, enum machine_mode mode)
3882{
3883  rtx t = op;
3884  if (GET_CODE (t) == SUBREG)
3885    t = SUBREG_REG (t);
3886  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3887      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3888      || t == virtual_stack_dynamic_rtx)
3889    return 0;
3890  if (REG_P (t)
3891      && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3892      && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3893    return 0;
3894
3895  return general_operand (op, mode);
3896}
3897
3898/* Return false if this is any eliminable register.  Otherwise
3899   register_operand or const_int.  */
3900
3901int
3902nonmemory_no_elim_operand (rtx op, enum machine_mode mode)
3903{
3904  rtx t = op;
3905  if (GET_CODE (t) == SUBREG)
3906    t = SUBREG_REG (t);
3907  if (t == arg_pointer_rtx || t == frame_pointer_rtx
3908      || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3909      || t == virtual_stack_dynamic_rtx)
3910    return 0;
3911
3912  return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3913}
3914
3915/* Return false if this is any eliminable register or stack register,
3916   otherwise work like register_operand.  */
3917
3918int
3919index_register_operand (rtx op, enum machine_mode mode)
3920{
3921  rtx t = op;
3922  if (GET_CODE (t) == SUBREG)
3923    t = SUBREG_REG (t);
3924  if (!REG_P (t))
3925    return 0;
3926  if (t == arg_pointer_rtx
3927      || t == frame_pointer_rtx
3928      || t == virtual_incoming_args_rtx
3929      || t == virtual_stack_vars_rtx
3930      || t == virtual_stack_dynamic_rtx
3931      || REGNO (t) == STACK_POINTER_REGNUM)
3932    return 0;
3933
3934  return general_operand (op, mode);
3935}
3936
3937/* Return true if op is a Q_REGS class register.  */
3938
3939int
3940q_regs_operand (rtx op, enum machine_mode mode)
3941{
3942  if (mode != VOIDmode && GET_MODE (op) != mode)
3943    return 0;
3944  if (GET_CODE (op) == SUBREG)
3945    op = SUBREG_REG (op);
3946  return ANY_QI_REG_P (op);
3947}
3948
3949/* Return true if op is an flags register.  */
3950
3951int
3952flags_reg_operand (rtx op, enum machine_mode mode)
3953{
3954  if (mode != VOIDmode && GET_MODE (op) != mode)
3955    return 0;
3956  return REG_P (op) && REGNO (op) == FLAGS_REG && GET_MODE (op) != VOIDmode;
3957}
3958
3959/* Return true if op is a NON_Q_REGS class register.  */
3960
3961int
3962non_q_regs_operand (rtx op, enum machine_mode mode)
3963{
3964  if (mode != VOIDmode && GET_MODE (op) != mode)
3965    return 0;
3966  if (GET_CODE (op) == SUBREG)
3967    op = SUBREG_REG (op);
3968  return NON_QI_REG_P (op);
3969}
3970
3971int
3972zero_extended_scalar_load_operand (rtx op,
3973				   enum machine_mode mode ATTRIBUTE_UNUSED)
3974{
3975  unsigned n_elts;
3976  if (GET_CODE (op) != MEM)
3977    return 0;
3978  op = maybe_get_pool_constant (op);
3979  if (!op)
3980    return 0;
3981  if (GET_CODE (op) != CONST_VECTOR)
3982    return 0;
3983  n_elts =
3984    (GET_MODE_SIZE (GET_MODE (op)) /
3985     GET_MODE_SIZE (GET_MODE_INNER (GET_MODE (op))));
3986  for (n_elts--; n_elts > 0; n_elts--)
3987    {
3988      rtx elt = CONST_VECTOR_ELT (op, n_elts);
3989      if (elt != CONST0_RTX (GET_MODE_INNER (GET_MODE (op))))
3990	return 0;
3991    }
3992  return 1;
3993}
3994
3995/*  Return 1 when OP is operand acceptable for standard SSE move.  */
3996int
3997vector_move_operand (rtx op, enum machine_mode mode)
3998{
3999  if (nonimmediate_operand (op, mode))
4000    return 1;
4001  if (GET_MODE (op) != mode && mode != VOIDmode)
4002    return 0;
4003  return (op == CONST0_RTX (GET_MODE (op)));
4004}
4005
4006/* Return true if op if a valid address, and does not contain
4007   a segment override.  */
4008
4009int
4010no_seg_address_operand (rtx op, enum machine_mode mode)
4011{
4012  struct ix86_address parts;
4013
4014  if (! address_operand (op, mode))
4015    return 0;
4016
4017  if (! ix86_decompose_address (op, &parts))
4018    abort ();
4019
4020  return parts.seg == SEG_DEFAULT;
4021}
4022
4023/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
4024   insns.  */
4025int
4026sse_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4027{
4028  enum rtx_code code = GET_CODE (op);
4029  switch (code)
4030    {
4031    /* Operations supported directly.  */
4032    case EQ:
4033    case LT:
4034    case LE:
4035    case UNORDERED:
4036    case NE:
4037    case UNGE:
4038    case UNGT:
4039    case ORDERED:
4040      return 1;
4041    /* These are equivalent to ones above in non-IEEE comparisons.  */
4042    case UNEQ:
4043    case UNLT:
4044    case UNLE:
4045    case LTGT:
4046    case GE:
4047    case GT:
4048      return !TARGET_IEEE_FP;
4049    default:
4050      return 0;
4051    }
4052}
4053/* Return 1 if OP is a valid comparison operator in valid mode.  */
4054int
4055ix86_comparison_operator (rtx op, enum machine_mode mode)
4056{
4057  enum machine_mode inmode;
4058  enum rtx_code code = GET_CODE (op);
4059  if (mode != VOIDmode && GET_MODE (op) != mode)
4060    return 0;
4061  if (GET_RTX_CLASS (code) != '<')
4062    return 0;
4063  inmode = GET_MODE (XEXP (op, 0));
4064
4065  if (inmode == CCFPmode || inmode == CCFPUmode)
4066    {
4067      enum rtx_code second_code, bypass_code;
4068      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4069      return (bypass_code == NIL && second_code == NIL);
4070    }
4071  switch (code)
4072    {
4073    case EQ: case NE:
4074      return 1;
4075    case LT: case GE:
4076      if (inmode == CCmode || inmode == CCGCmode
4077	  || inmode == CCGOCmode || inmode == CCNOmode)
4078	return 1;
4079      return 0;
4080    case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
4081      if (inmode == CCmode)
4082	return 1;
4083      return 0;
4084    case GT: case LE:
4085      if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
4086	return 1;
4087      return 0;
4088    default:
4089      return 0;
4090    }
4091}
4092
4093/* Return 1 if OP is a valid comparison operator testing carry flag
4094   to be set.  */
4095int
4096ix86_carry_flag_operator (rtx op, enum machine_mode mode)
4097{
4098  enum machine_mode inmode;
4099  enum rtx_code code = GET_CODE (op);
4100
4101  if (mode != VOIDmode && GET_MODE (op) != mode)
4102    return 0;
4103  if (GET_RTX_CLASS (code) != '<')
4104    return 0;
4105  inmode = GET_MODE (XEXP (op, 0));
4106  if (GET_CODE (XEXP (op, 0)) != REG
4107      || REGNO (XEXP (op, 0)) != 17
4108      || XEXP (op, 1) != const0_rtx)
4109    return 0;
4110
4111  if (inmode == CCFPmode || inmode == CCFPUmode)
4112    {
4113      enum rtx_code second_code, bypass_code;
4114
4115      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4116      if (bypass_code != NIL || second_code != NIL)
4117	return 0;
4118      code = ix86_fp_compare_code_to_integer (code);
4119    }
4120  else if (inmode != CCmode)
4121    return 0;
4122  return code == LTU;
4123}
4124
4125/* Return 1 if OP is a comparison operator that can be issued by fcmov.  */
4126
4127int
4128fcmov_comparison_operator (rtx op, enum machine_mode mode)
4129{
4130  enum machine_mode inmode;
4131  enum rtx_code code = GET_CODE (op);
4132
4133  if (mode != VOIDmode && GET_MODE (op) != mode)
4134    return 0;
4135  if (GET_RTX_CLASS (code) != '<')
4136    return 0;
4137  inmode = GET_MODE (XEXP (op, 0));
4138  if (inmode == CCFPmode || inmode == CCFPUmode)
4139    {
4140      enum rtx_code second_code, bypass_code;
4141
4142      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
4143      if (bypass_code != NIL || second_code != NIL)
4144	return 0;
4145      code = ix86_fp_compare_code_to_integer (code);
4146    }
4147  /* i387 supports just limited amount of conditional codes.  */
4148  switch (code)
4149    {
4150    case LTU: case GTU: case LEU: case GEU:
4151      if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
4152	return 1;
4153      return 0;
4154    case ORDERED: case UNORDERED:
4155    case EQ: case NE:
4156      return 1;
4157    default:
4158      return 0;
4159    }
4160}
4161
4162/* Return 1 if OP is a binary operator that can be promoted to wider mode.  */
4163
4164int
4165promotable_binary_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4166{
4167  switch (GET_CODE (op))
4168    {
4169    case MULT:
4170      /* Modern CPUs have same latency for HImode and SImode multiply,
4171         but 386 and 486 do HImode multiply faster.  */
4172      return ix86_tune > PROCESSOR_I486;
4173    case PLUS:
4174    case AND:
4175    case IOR:
4176    case XOR:
4177    case ASHIFT:
4178      return 1;
4179    default:
4180      return 0;
4181    }
4182}
4183
4184/* Nearly general operand, but accept any const_double, since we wish
4185   to be able to drop them into memory rather than have them get pulled
4186   into registers.  */
4187
4188int
4189cmp_fp_expander_operand (rtx op, enum machine_mode mode)
4190{
4191  if (mode != VOIDmode && mode != GET_MODE (op))
4192    return 0;
4193  if (GET_CODE (op) == CONST_DOUBLE)
4194    return 1;
4195  return general_operand (op, mode);
4196}
4197
4198/* Match an SI or HImode register for a zero_extract.  */
4199
4200int
4201ext_register_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4202{
4203  int regno;
4204  if ((!TARGET_64BIT || GET_MODE (op) != DImode)
4205      && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
4206    return 0;
4207
4208  if (!register_operand (op, VOIDmode))
4209    return 0;
4210
4211  /* Be careful to accept only registers having upper parts.  */
4212  regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
4213  return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
4214}
4215
4216/* Return 1 if this is a valid binary floating-point operation.
4217   OP is the expression matched, and MODE is its mode.  */
4218
4219int
4220binary_fp_operator (rtx op, enum machine_mode mode)
4221{
4222  if (mode != VOIDmode && mode != GET_MODE (op))
4223    return 0;
4224
4225  switch (GET_CODE (op))
4226    {
4227    case PLUS:
4228    case MINUS:
4229    case MULT:
4230    case DIV:
4231      return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
4232
4233    default:
4234      return 0;
4235    }
4236}
4237
4238int
4239mult_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4240{
4241  return GET_CODE (op) == MULT;
4242}
4243
4244int
4245div_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4246{
4247  return GET_CODE (op) == DIV;
4248}
4249
4250int
4251arith_or_logical_operator (rtx op, enum machine_mode mode)
4252{
4253  return ((mode == VOIDmode || GET_MODE (op) == mode)
4254          && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
4255              || GET_RTX_CLASS (GET_CODE (op)) == '2'));
4256}
4257
4258/* Returns 1 if OP is memory operand with a displacement.  */
4259
4260int
4261memory_displacement_operand (rtx op, enum machine_mode mode)
4262{
4263  struct ix86_address parts;
4264
4265  if (! memory_operand (op, mode))
4266    return 0;
4267
4268  if (! ix86_decompose_address (XEXP (op, 0), &parts))
4269    abort ();
4270
4271  return parts.disp != NULL_RTX;
4272}
4273
4274/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
4275   re-recognize the operand to avoid a copy_to_mode_reg that will fail.
4276
4277   ??? It seems likely that this will only work because cmpsi is an
4278   expander, and no actual insns use this.  */
4279
4280int
4281cmpsi_operand (rtx op, enum machine_mode mode)
4282{
4283  if (nonimmediate_operand (op, mode))
4284    return 1;
4285
4286  if (GET_CODE (op) == AND
4287      && GET_MODE (op) == SImode
4288      && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
4289      && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
4290      && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
4291      && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
4292      && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
4293      && GET_CODE (XEXP (op, 1)) == CONST_INT)
4294    return 1;
4295
4296  return 0;
4297}
4298
4299/* Returns 1 if OP is memory operand that can not be represented by the
4300   modRM array.  */
4301
4302int
4303long_memory_operand (rtx op, enum machine_mode mode)
4304{
4305  if (! memory_operand (op, mode))
4306    return 0;
4307
4308  return memory_address_length (op) != 0;
4309}
4310
4311/* Return nonzero if the rtx is known aligned.  */
4312
4313int
4314aligned_operand (rtx op, enum machine_mode mode)
4315{
4316  struct ix86_address parts;
4317
4318  if (!general_operand (op, mode))
4319    return 0;
4320
4321  /* Registers and immediate operands are always "aligned".  */
4322  if (GET_CODE (op) != MEM)
4323    return 1;
4324
4325  /* Don't even try to do any aligned optimizations with volatiles.  */
4326  if (MEM_VOLATILE_P (op))
4327    return 0;
4328
4329  op = XEXP (op, 0);
4330
4331  /* Pushes and pops are only valid on the stack pointer.  */
4332  if (GET_CODE (op) == PRE_DEC
4333      || GET_CODE (op) == POST_INC)
4334    return 1;
4335
4336  /* Decode the address.  */
4337  if (! ix86_decompose_address (op, &parts))
4338    abort ();
4339
4340  /* Look for some component that isn't known to be aligned.  */
4341  if (parts.index)
4342    {
4343      if (parts.scale < 4
4344	  && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
4345	return 0;
4346    }
4347  if (parts.base)
4348    {
4349      if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
4350	return 0;
4351    }
4352  if (parts.disp)
4353    {
4354      if (GET_CODE (parts.disp) != CONST_INT
4355	  || (INTVAL (parts.disp) & 3) != 0)
4356	return 0;
4357    }
4358
4359  /* Didn't find one -- this must be an aligned address.  */
4360  return 1;
4361}
4362
4363int
4364compare_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
4365{
4366  return GET_CODE (op) == COMPARE;
4367}
4368
4369/* Initialize the table of extra 80387 mathematical constants.  */
4370
4371static void
4372init_ext_80387_constants (void)
4373{
4374  static const char * cst[5] =
4375  {
4376    "0.3010299956639811952256464283594894482",  /* 0: fldlg2  */
4377    "0.6931471805599453094286904741849753009",  /* 1: fldln2  */
4378    "1.4426950408889634073876517827983434472",  /* 2: fldl2e  */
4379    "3.3219280948873623478083405569094566090",  /* 3: fldl2t  */
4380    "3.1415926535897932385128089594061862044",  /* 4: fldpi   */
4381  };
4382  int i;
4383
4384  for (i = 0; i < 5; i++)
4385    {
4386      real_from_string (&ext_80387_constants_table[i], cst[i]);
4387      /* Ensure each constant is rounded to XFmode precision.  */
4388      real_convert (&ext_80387_constants_table[i],
4389		    XFmode, &ext_80387_constants_table[i]);
4390    }
4391
4392  ext_80387_constants_init = 1;
4393}
4394
4395/* Return true if the constant is something that can be loaded with
4396   a special instruction.  */
4397
4398int
4399standard_80387_constant_p (rtx x)
4400{
4401  if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4402    return -1;
4403
4404  if (x == CONST0_RTX (GET_MODE (x)))
4405    return 1;
4406  if (x == CONST1_RTX (GET_MODE (x)))
4407    return 2;
4408
4409  /* For XFmode constants, try to find a special 80387 instruction on
4410     those CPUs that benefit from them.  */
4411  if (GET_MODE (x) == XFmode
4412      && x86_ext_80387_constants & TUNEMASK)
4413    {
4414      REAL_VALUE_TYPE r;
4415      int i;
4416
4417      if (! ext_80387_constants_init)
4418	init_ext_80387_constants ();
4419
4420      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4421      for (i = 0; i < 5; i++)
4422        if (real_identical (&r, &ext_80387_constants_table[i]))
4423	  return i + 3;
4424    }
4425
4426  return 0;
4427}
4428
4429/* Return the opcode of the special instruction to be used to load
4430   the constant X.  */
4431
4432const char *
4433standard_80387_constant_opcode (rtx x)
4434{
4435  switch (standard_80387_constant_p (x))
4436    {
4437    case 1:
4438      return "fldz";
4439    case 2:
4440      return "fld1";
4441    case 3:
4442      return "fldlg2";
4443    case 4:
4444      return "fldln2";
4445    case 5:
4446      return "fldl2e";
4447    case 6:
4448      return "fldl2t";
4449    case 7:
4450      return "fldpi";
4451    }
4452  abort ();
4453}
4454
4455/* Return the CONST_DOUBLE representing the 80387 constant that is
4456   loaded by the specified special instruction.  The argument IDX
4457   matches the return value from standard_80387_constant_p.  */
4458
4459rtx
4460standard_80387_constant_rtx (int idx)
4461{
4462  int i;
4463
4464  if (! ext_80387_constants_init)
4465    init_ext_80387_constants ();
4466
4467  switch (idx)
4468    {
4469    case 3:
4470    case 4:
4471    case 5:
4472    case 6:
4473    case 7:
4474      i = idx - 3;
4475      break;
4476
4477    default:
4478      abort ();
4479    }
4480
4481  return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4482				       XFmode);
4483}
4484
4485/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4486 */
4487int
4488standard_sse_constant_p (rtx x)
4489{
4490  if (x == const0_rtx)
4491    return 1;
4492  return (x == CONST0_RTX (GET_MODE (x)));
4493}
4494
4495/* Returns 1 if OP contains a symbol reference */
4496
4497int
4498symbolic_reference_mentioned_p (rtx op)
4499{
4500  const char *fmt;
4501  int i;
4502
4503  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4504    return 1;
4505
4506  fmt = GET_RTX_FORMAT (GET_CODE (op));
4507  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4508    {
4509      if (fmt[i] == 'E')
4510	{
4511	  int j;
4512
4513	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4514	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4515	      return 1;
4516	}
4517
4518      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4519	return 1;
4520    }
4521
4522  return 0;
4523}
4524
4525/* Return 1 if it is appropriate to emit `ret' instructions in the
4526   body of a function.  Do this only if the epilogue is simple, needing a
4527   couple of insns.  Prior to reloading, we can't tell how many registers
4528   must be saved, so return 0 then.  Return 0 if there is no frame
4529   marker to de-allocate.
4530
4531   If NON_SAVING_SETJMP is defined and true, then it is not possible
4532   for the epilogue to be simple, so return 0.  This is a special case
4533   since NON_SAVING_SETJMP will not cause regs_ever_live to change
4534   until final, but jump_optimize may need to know sooner if a
4535   `return' is OK.  */
4536
4537int
4538ix86_can_use_return_insn_p (void)
4539{
4540  struct ix86_frame frame;
4541
4542#ifdef NON_SAVING_SETJMP
4543  if (NON_SAVING_SETJMP && current_function_calls_setjmp)
4544    return 0;
4545#endif
4546
4547  if (! reload_completed || frame_pointer_needed)
4548    return 0;
4549
4550  /* Don't allow more than 32 pop, since that's all we can do
4551     with one instruction.  */
4552  if (current_function_pops_args
4553      && current_function_args_size >= 32768)
4554    return 0;
4555
4556  ix86_compute_frame_layout (&frame);
4557  return frame.to_allocate == 0 && frame.nregs == 0;
4558}
4559
4560/* Return 1 if VALUE can be stored in the sign extended immediate field.  */
4561int
4562x86_64_sign_extended_value (rtx value)
4563{
4564  switch (GET_CODE (value))
4565    {
4566      /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
4567         to be at least 32 and this all acceptable constants are
4568	 represented as CONST_INT.  */
4569      case CONST_INT:
4570	if (HOST_BITS_PER_WIDE_INT == 32)
4571	  return 1;
4572	else
4573	  {
4574	    HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
4575	    return trunc_int_for_mode (val, SImode) == val;
4576	  }
4577	break;
4578
4579      /* For certain code models, the symbolic references are known to fit.
4580	 in CM_SMALL_PIC model we know it fits if it is local to the shared
4581	 library.  Don't count TLS SYMBOL_REFs here, since they should fit
4582	 only if inside of UNSPEC handled below.  */
4583      case SYMBOL_REF:
4584	/* TLS symbols are not constant.  */
4585	if (tls_symbolic_operand (value, Pmode))
4586	  return false;
4587	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL);
4588
4589      /* For certain code models, the code is near as well.  */
4590      case LABEL_REF:
4591	return (ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM
4592		|| ix86_cmodel == CM_KERNEL);
4593
4594      /* We also may accept the offsetted memory references in certain special
4595         cases.  */
4596      case CONST:
4597	if (GET_CODE (XEXP (value, 0)) == UNSPEC)
4598	  switch (XINT (XEXP (value, 0), 1))
4599	    {
4600	    case UNSPEC_GOTPCREL:
4601	    case UNSPEC_DTPOFF:
4602	    case UNSPEC_GOTNTPOFF:
4603	    case UNSPEC_NTPOFF:
4604	      return 1;
4605	    default:
4606	      break;
4607	    }
4608	if (GET_CODE (XEXP (value, 0)) == PLUS)
4609	  {
4610	    rtx op1 = XEXP (XEXP (value, 0), 0);
4611	    rtx op2 = XEXP (XEXP (value, 0), 1);
4612	    HOST_WIDE_INT offset;
4613
4614	    if (ix86_cmodel == CM_LARGE)
4615	      return 0;
4616	    if (GET_CODE (op2) != CONST_INT)
4617	      return 0;
4618	    offset = trunc_int_for_mode (INTVAL (op2), DImode);
4619	    switch (GET_CODE (op1))
4620	      {
4621		case SYMBOL_REF:
4622		  /* For CM_SMALL assume that latest object is 16MB before
4623		     end of 31bits boundary.  We may also accept pretty
4624		     large negative constants knowing that all objects are
4625		     in the positive half of address space.  */
4626		  if (ix86_cmodel == CM_SMALL
4627		      && offset < 16*1024*1024
4628		      && trunc_int_for_mode (offset, SImode) == offset)
4629		    return 1;
4630		  /* For CM_KERNEL we know that all object resist in the
4631		     negative half of 32bits address space.  We may not
4632		     accept negative offsets, since they may be just off
4633		     and we may accept pretty large positive ones.  */
4634		  if (ix86_cmodel == CM_KERNEL
4635		      && offset > 0
4636		      && trunc_int_for_mode (offset, SImode) == offset)
4637		    return 1;
4638		  break;
4639		case LABEL_REF:
4640		  /* These conditions are similar to SYMBOL_REF ones, just the
4641		     constraints for code models differ.  */
4642		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4643		      && offset < 16*1024*1024
4644		      && trunc_int_for_mode (offset, SImode) == offset)
4645		    return 1;
4646		  if (ix86_cmodel == CM_KERNEL
4647		      && offset > 0
4648		      && trunc_int_for_mode (offset, SImode) == offset)
4649		    return 1;
4650		  break;
4651		case UNSPEC:
4652		  switch (XINT (op1, 1))
4653		    {
4654		    case UNSPEC_DTPOFF:
4655		    case UNSPEC_NTPOFF:
4656		      if (offset > 0
4657			  && trunc_int_for_mode (offset, SImode) == offset)
4658			return 1;
4659		    }
4660		  break;
4661		default:
4662		  return 0;
4663	      }
4664	  }
4665	return 0;
4666      default:
4667	return 0;
4668    }
4669}
4670
4671/* Return 1 if VALUE can be stored in the zero extended immediate field.  */
4672int
4673x86_64_zero_extended_value (rtx value)
4674{
4675  switch (GET_CODE (value))
4676    {
4677      case CONST_DOUBLE:
4678	if (HOST_BITS_PER_WIDE_INT == 32)
4679	  return  (GET_MODE (value) == VOIDmode
4680		   && !CONST_DOUBLE_HIGH (value));
4681	else
4682	  return 0;
4683      case CONST_INT:
4684	if (HOST_BITS_PER_WIDE_INT == 32)
4685	  return INTVAL (value) >= 0;
4686	else
4687	  return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
4688	break;
4689
4690      /* For certain code models, the symbolic references are known to fit.  */
4691      case SYMBOL_REF:
4692	/* TLS symbols are not constant.  */
4693	if (tls_symbolic_operand (value, Pmode))
4694	  return false;
4695	return ix86_cmodel == CM_SMALL;
4696
4697      /* For certain code models, the code is near as well.  */
4698      case LABEL_REF:
4699	return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
4700
4701      /* We also may accept the offsetted memory references in certain special
4702         cases.  */
4703      case CONST:
4704	if (GET_CODE (XEXP (value, 0)) == PLUS)
4705	  {
4706	    rtx op1 = XEXP (XEXP (value, 0), 0);
4707	    rtx op2 = XEXP (XEXP (value, 0), 1);
4708
4709	    if (ix86_cmodel == CM_LARGE)
4710	      return 0;
4711	    switch (GET_CODE (op1))
4712	      {
4713		case SYMBOL_REF:
4714		    return 0;
4715		  /* For small code model we may accept pretty large positive
4716		     offsets, since one bit is available for free.  Negative
4717		     offsets are limited by the size of NULL pointer area
4718		     specified by the ABI.  */
4719		  if (ix86_cmodel == CM_SMALL
4720		      && GET_CODE (op2) == CONST_INT
4721		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4722		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4723			  == INTVAL (op2)))
4724		    return 1;
4725	          /* ??? For the kernel, we may accept adjustment of
4726		     -0x10000000, since we know that it will just convert
4727		     negative address space to positive, but perhaps this
4728		     is not worthwhile.  */
4729		  break;
4730		case LABEL_REF:
4731		  /* These conditions are similar to SYMBOL_REF ones, just the
4732		     constraints for code models differ.  */
4733		  if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
4734		      && GET_CODE (op2) == CONST_INT
4735		      && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
4736		      && (trunc_int_for_mode (INTVAL (op2), SImode)
4737			  == INTVAL (op2)))
4738		    return 1;
4739		  break;
4740		default:
4741		  return 0;
4742	      }
4743	  }
4744	return 0;
4745      default:
4746	return 0;
4747    }
4748}
4749
4750/* Value should be nonzero if functions must have frame pointers.
4751   Zero means the frame pointer need not be set up (and parms may
4752   be accessed via the stack pointer) in functions that seem suitable.  */
4753
4754int
4755ix86_frame_pointer_required (void)
4756{
4757  /* If we accessed previous frames, then the generated code expects
4758     to be able to access the saved ebp value in our frame.  */
4759  if (cfun->machine->accesses_prev_frame)
4760    return 1;
4761
4762  /* Several x86 os'es need a frame pointer for other reasons,
4763     usually pertaining to setjmp.  */
4764  if (SUBTARGET_FRAME_POINTER_REQUIRED)
4765    return 1;
4766
4767  /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
4768     the frame pointer by default.  Turn it back on now if we've not
4769     got a leaf function.  */
4770  if (TARGET_OMIT_LEAF_FRAME_POINTER
4771      && (!current_function_is_leaf))
4772    return 1;
4773
4774  if (current_function_profile)
4775    return 1;
4776
4777  return 0;
4778}
4779
4780/* Record that the current function accesses previous call frames.  */
4781
4782void
4783ix86_setup_frame_addresses (void)
4784{
4785  cfun->machine->accesses_prev_frame = 1;
4786}
4787
4788#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4789# define USE_HIDDEN_LINKONCE 1
4790#else
4791# define USE_HIDDEN_LINKONCE 0
4792#endif
4793
4794static int pic_labels_used;
4795
4796/* Fills in the label name that should be used for a pc thunk for
4797   the given register.  */
4798
4799static void
4800get_pc_thunk_name (char name[32], unsigned int regno)
4801{
4802  if (USE_HIDDEN_LINKONCE)
4803    sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4804  else
4805    ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4806}
4807
4808
4809/* This function generates code for -fpic that loads %ebx with
4810   the return address of the caller and then returns.  */
4811
4812void
4813ix86_file_end (void)
4814{
4815  rtx xops[2];
4816  int regno;
4817
4818  for (regno = 0; regno < 8; ++regno)
4819    {
4820      char name[32];
4821
4822      if (! ((pic_labels_used >> regno) & 1))
4823	continue;
4824
4825      get_pc_thunk_name (name, regno);
4826
4827      if (USE_HIDDEN_LINKONCE)
4828	{
4829	  tree decl;
4830
4831	  decl = build_decl (FUNCTION_DECL, get_identifier (name),
4832			     error_mark_node);
4833	  TREE_PUBLIC (decl) = 1;
4834	  TREE_STATIC (decl) = 1;
4835	  DECL_ONE_ONLY (decl) = 1;
4836
4837	  (*targetm.asm_out.unique_section) (decl, 0);
4838	  named_section (decl, NULL, 0);
4839
4840	  (*targetm.asm_out.globalize_label) (asm_out_file, name);
4841	  fputs ("\t.hidden\t", asm_out_file);
4842	  assemble_name (asm_out_file, name);
4843	  fputc ('\n', asm_out_file);
4844	  ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
4845	}
4846      else
4847	{
4848	  text_section ();
4849	  ASM_OUTPUT_LABEL (asm_out_file, name);
4850	}
4851
4852      xops[0] = gen_rtx_REG (SImode, regno);
4853      xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4854      output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4855      output_asm_insn ("ret", xops);
4856    }
4857
4858  if (NEED_INDICATE_EXEC_STACK)
4859    file_end_indicate_exec_stack ();
4860}
4861
4862/* Emit code for the SET_GOT patterns.  */
4863
4864const char *
4865output_set_got (rtx dest)
4866{
4867  rtx xops[3];
4868
4869  xops[0] = dest;
4870  xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
4871
4872  if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
4873    {
4874      xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4875
4876      if (!flag_pic)
4877	output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4878      else
4879	output_asm_insn ("call\t%a2", xops);
4880
4881#if TARGET_MACHO
4882      /* Output the "canonical" label name ("Lxx$pb") here too.  This
4883         is what will be referred to by the Mach-O PIC subsystem.  */
4884      ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4885#endif
4886      (*targetm.asm_out.internal_label) (asm_out_file, "L",
4887				 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4888
4889      if (flag_pic)
4890	output_asm_insn ("pop{l}\t%0", xops);
4891    }
4892  else
4893    {
4894      char name[32];
4895      get_pc_thunk_name (name, REGNO (dest));
4896      pic_labels_used |= 1 << REGNO (dest);
4897
4898      xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4899      xops[2] = gen_rtx_MEM (QImode, xops[2]);
4900      output_asm_insn ("call\t%X2", xops);
4901    }
4902
4903  if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4904    output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
4905  else if (!TARGET_MACHO)
4906    output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
4907
4908  return "";
4909}
4910
4911/* Generate an "push" pattern for input ARG.  */
4912
4913static rtx
4914gen_push (rtx arg)
4915{
4916  return gen_rtx_SET (VOIDmode,
4917		      gen_rtx_MEM (Pmode,
4918				   gen_rtx_PRE_DEC (Pmode,
4919						    stack_pointer_rtx)),
4920		      arg);
4921}
4922
4923/* Return >= 0 if there is an unused call-clobbered register available
4924   for the entire function.  */
4925
4926static unsigned int
4927ix86_select_alt_pic_regnum (void)
4928{
4929  if (current_function_is_leaf && !current_function_profile)
4930    {
4931      int i;
4932      for (i = 2; i >= 0; --i)
4933        if (!regs_ever_live[i])
4934	  return i;
4935    }
4936
4937  return INVALID_REGNUM;
4938}
4939
4940/* Return 1 if we need to save REGNO.  */
4941static int
4942ix86_save_reg (unsigned int regno, int maybe_eh_return)
4943{
4944  if (pic_offset_table_rtx
4945      && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4946      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4947	  || current_function_profile
4948	  || current_function_calls_eh_return
4949	  || current_function_uses_const_pool))
4950    {
4951      if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4952	return 0;
4953      return 1;
4954    }
4955
4956  if (current_function_calls_eh_return && maybe_eh_return)
4957    {
4958      unsigned i;
4959      for (i = 0; ; i++)
4960	{
4961	  unsigned test = EH_RETURN_DATA_REGNO (i);
4962	  if (test == INVALID_REGNUM)
4963	    break;
4964	  if (test == regno)
4965	    return 1;
4966	}
4967    }
4968
4969  return (regs_ever_live[regno]
4970	  && !call_used_regs[regno]
4971	  && !fixed_regs[regno]
4972	  && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4973}
4974
4975/* Return number of registers to be saved on the stack.  */
4976
4977static int
4978ix86_nsaved_regs (void)
4979{
4980  int nregs = 0;
4981  int regno;
4982
4983  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
4984    if (ix86_save_reg (regno, true))
4985      nregs++;
4986  return nregs;
4987}
4988
4989/* Return the offset between two registers, one to be eliminated, and the other
4990   its replacement, at the start of a routine.  */
4991
4992HOST_WIDE_INT
4993ix86_initial_elimination_offset (int from, int to)
4994{
4995  struct ix86_frame frame;
4996  ix86_compute_frame_layout (&frame);
4997
4998  if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4999    return frame.hard_frame_pointer_offset;
5000  else if (from == FRAME_POINTER_REGNUM
5001	   && to == HARD_FRAME_POINTER_REGNUM)
5002    return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5003  else
5004    {
5005      if (to != STACK_POINTER_REGNUM)
5006	abort ();
5007      else if (from == ARG_POINTER_REGNUM)
5008	return frame.stack_pointer_offset;
5009      else if (from != FRAME_POINTER_REGNUM)
5010	abort ();
5011      else
5012	return frame.stack_pointer_offset - frame.frame_pointer_offset;
5013    }
5014}
5015
5016/* Fill structure ix86_frame about frame of currently computed function.  */
5017
5018static void
5019ix86_compute_frame_layout (struct ix86_frame *frame)
5020{
5021  HOST_WIDE_INT total_size;
5022  int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5023  HOST_WIDE_INT offset;
5024  int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5025  HOST_WIDE_INT size = get_frame_size ();
5026
5027  frame->nregs = ix86_nsaved_regs ();
5028  total_size = size;
5029
5030  /* During reload iteration the amount of registers saved can change.
5031     Recompute the value as needed.  Do not recompute when amount of registers
5032     didn't change as reload does mutiple calls to the function and does not
5033     expect the decision to change within single iteration.  */
5034  if (!optimize_size
5035      && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5036    {
5037      int count = frame->nregs;
5038
5039      cfun->machine->use_fast_prologue_epilogue_nregs = count;
5040      /* The fast prologue uses move instead of push to save registers.  This
5041         is significantly longer, but also executes faster as modern hardware
5042         can execute the moves in parallel, but can't do that for push/pop.
5043
5044	 Be careful about choosing what prologue to emit:  When function takes
5045	 many instructions to execute we may use slow version as well as in
5046	 case function is known to be outside hot spot (this is known with
5047	 feedback only).  Weight the size of function by number of registers
5048	 to save as it is cheap to use one or two push instructions but very
5049	 slow to use many of them.  */
5050      if (count)
5051	count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5052      if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5053	  || (flag_branch_probabilities
5054	      && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5055        cfun->machine->use_fast_prologue_epilogue = false;
5056      else
5057        cfun->machine->use_fast_prologue_epilogue
5058	   = !expensive_function_p (count);
5059    }
5060  if (TARGET_PROLOGUE_USING_MOVE
5061      && cfun->machine->use_fast_prologue_epilogue)
5062    frame->save_regs_using_mov = true;
5063  else
5064    frame->save_regs_using_mov = false;
5065
5066
5067  /* Skip return address and saved base pointer.  */
5068  offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5069
5070  frame->hard_frame_pointer_offset = offset;
5071
5072  /* Do some sanity checking of stack_alignment_needed and
5073     preferred_alignment, since i386 port is the only using those features
5074     that may break easily.  */
5075
5076  if (size && !stack_alignment_needed)
5077    abort ();
5078  if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
5079    abort ();
5080  if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5081    abort ();
5082  if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
5083    abort ();
5084
5085  if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5086    stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5087
5088  /* Register save area */
5089  offset += frame->nregs * UNITS_PER_WORD;
5090
5091  /* Va-arg area */
5092  if (ix86_save_varrargs_registers)
5093    {
5094      offset += X86_64_VARARGS_SIZE;
5095      frame->va_arg_size = X86_64_VARARGS_SIZE;
5096    }
5097  else
5098    frame->va_arg_size = 0;
5099
5100  /* Align start of frame for local function.  */
5101  frame->padding1 = ((offset + stack_alignment_needed - 1)
5102		     & -stack_alignment_needed) - offset;
5103
5104  offset += frame->padding1;
5105
5106  /* Frame pointer points here.  */
5107  frame->frame_pointer_offset = offset;
5108
5109  offset += size;
5110
5111  /* Add outgoing arguments area.  Can be skipped if we eliminated
5112     all the function calls as dead code.
5113     Skipping is however impossible when function calls alloca.  Alloca
5114     expander assumes that last current_function_outgoing_args_size
5115     of stack frame are unused.  */
5116  if (ACCUMULATE_OUTGOING_ARGS
5117      && (!current_function_is_leaf || current_function_calls_alloca))
5118    {
5119      offset += current_function_outgoing_args_size;
5120      frame->outgoing_arguments_size = current_function_outgoing_args_size;
5121    }
5122  else
5123    frame->outgoing_arguments_size = 0;
5124
5125  /* Align stack boundary.  Only needed if we're calling another function
5126     or using alloca.  */
5127  if (!current_function_is_leaf || current_function_calls_alloca)
5128    frame->padding2 = ((offset + preferred_alignment - 1)
5129		       & -preferred_alignment) - offset;
5130  else
5131    frame->padding2 = 0;
5132
5133  offset += frame->padding2;
5134
5135  /* We've reached end of stack frame.  */
5136  frame->stack_pointer_offset = offset;
5137
5138  /* Size prologue needs to allocate.  */
5139  frame->to_allocate =
5140    (size + frame->padding1 + frame->padding2
5141     + frame->outgoing_arguments_size + frame->va_arg_size);
5142
5143  if ((!frame->to_allocate && frame->nregs <= 1)
5144      || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5145    frame->save_regs_using_mov = false;
5146
5147  if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5148      && current_function_is_leaf)
5149    {
5150      frame->red_zone_size = frame->to_allocate;
5151      if (frame->save_regs_using_mov)
5152	frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5153      if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5154	frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5155    }
5156  else
5157    frame->red_zone_size = 0;
5158  frame->to_allocate -= frame->red_zone_size;
5159  frame->stack_pointer_offset -= frame->red_zone_size;
5160#if 0
5161  fprintf (stderr, "nregs: %i\n", frame->nregs);
5162  fprintf (stderr, "size: %i\n", size);
5163  fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5164  fprintf (stderr, "padding1: %i\n", frame->padding1);
5165  fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5166  fprintf (stderr, "padding2: %i\n", frame->padding2);
5167  fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5168  fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5169  fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5170  fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5171	   frame->hard_frame_pointer_offset);
5172  fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5173#endif
5174}
5175
5176/* Emit code to save registers in the prologue.  */
5177
5178static void
5179ix86_emit_save_regs (void)
5180{
5181  int regno;
5182  rtx insn;
5183
5184  for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5185    if (ix86_save_reg (regno, true))
5186      {
5187	insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5188	RTX_FRAME_RELATED_P (insn) = 1;
5189      }
5190}
5191
5192/* Emit code to save registers using MOV insns.  First register
5193   is restored from POINTER + OFFSET.  */
5194static void
5195ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5196{
5197  int regno;
5198  rtx insn;
5199
5200  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5201    if (ix86_save_reg (regno, true))
5202      {
5203	insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5204					       Pmode, offset),
5205			       gen_rtx_REG (Pmode, regno));
5206	RTX_FRAME_RELATED_P (insn) = 1;
5207	offset += UNITS_PER_WORD;
5208      }
5209}
5210
5211/* Expand prologue or epilogue stack adjustment.
5212   The pattern exist to put a dependency on all ebp-based memory accesses.
5213   STYLE should be negative if instructions should be marked as frame related,
5214   zero if %r11 register is live and cannot be freely used and positive
5215   otherwise.  */
5216
5217static void
5218pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5219{
5220  rtx insn;
5221
5222  if (! TARGET_64BIT)
5223    insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5224  else if (x86_64_immediate_operand (offset, DImode))
5225    insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5226  else
5227    {
5228      rtx r11;
5229      /* r11 is used by indirect sibcall return as well, set before the
5230	 epilogue and used after the epilogue.  ATM indirect sibcall
5231	 shouldn't be used together with huge frame sizes in one
5232	 function because of the frame_size check in sibcall.c.  */
5233      if (style == 0)
5234	abort ();
5235      r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5236      insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5237      if (style < 0)
5238	RTX_FRAME_RELATED_P (insn) = 1;
5239      insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5240							       offset));
5241    }
5242  if (style < 0)
5243    RTX_FRAME_RELATED_P (insn) = 1;
5244}
5245
5246/* Expand the prologue into a bunch of separate insns.  */
5247
5248void
5249ix86_expand_prologue (void)
5250{
5251  rtx insn;
5252  bool pic_reg_used;
5253  struct ix86_frame frame;
5254  HOST_WIDE_INT allocate;
5255
5256  ix86_compute_frame_layout (&frame);
5257
5258  /* Note: AT&T enter does NOT have reversed args.  Enter is probably
5259     slower on all targets.  Also sdb doesn't like it.  */
5260
5261  if (frame_pointer_needed)
5262    {
5263      insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5264      RTX_FRAME_RELATED_P (insn) = 1;
5265
5266      insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5267      RTX_FRAME_RELATED_P (insn) = 1;
5268    }
5269
5270  allocate = frame.to_allocate;
5271
5272  if (!frame.save_regs_using_mov)
5273    ix86_emit_save_regs ();
5274  else
5275    allocate += frame.nregs * UNITS_PER_WORD;
5276
5277  /* When using red zone we may start register saving before allocating
5278     the stack frame saving one cycle of the prologue.  */
5279  if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5280    ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5281				   : stack_pointer_rtx,
5282				   -frame.nregs * UNITS_PER_WORD);
5283
5284  if (allocate == 0)
5285    ;
5286  else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5287    pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5288			       GEN_INT (-allocate), -1);
5289  else
5290    {
5291      /* Only valid for Win32.  */
5292      rtx eax = gen_rtx_REG (SImode, 0);
5293      bool eax_live = ix86_eax_live_at_start_p ();
5294
5295      if (TARGET_64BIT)
5296        abort ();
5297
5298      if (eax_live)
5299	{
5300	  emit_insn (gen_push (eax));
5301	  allocate -= 4;
5302	}
5303
5304      insn = emit_move_insn (eax, GEN_INT (allocate));
5305      RTX_FRAME_RELATED_P (insn) = 1;
5306
5307      insn = emit_insn (gen_allocate_stack_worker (eax));
5308      RTX_FRAME_RELATED_P (insn) = 1;
5309
5310      if (eax_live)
5311	{
5312	  rtx t = plus_constant (stack_pointer_rtx, allocate);
5313	  emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5314	}
5315    }
5316
5317  if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5318    {
5319      if (!frame_pointer_needed || !frame.to_allocate)
5320        ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5321      else
5322        ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5323				       -frame.nregs * UNITS_PER_WORD);
5324    }
5325
5326  pic_reg_used = false;
5327  if (pic_offset_table_rtx
5328      && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5329	  || current_function_profile))
5330    {
5331      unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5332
5333      if (alt_pic_reg_used != INVALID_REGNUM)
5334	REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5335
5336      pic_reg_used = true;
5337    }
5338
5339  if (pic_reg_used)
5340    {
5341      insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5342
5343      /* Even with accurate pre-reload life analysis, we can wind up
5344	 deleting all references to the pic register after reload.
5345	 Consider if cross-jumping unifies two sides of a branch
5346	 controlled by a comparison vs the only read from a global.
5347	 In which case, allow the set_got to be deleted, though we're
5348	 too late to do anything about the ebx save in the prologue.  */
5349      REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5350    }
5351
5352  /* Prevent function calls from be scheduled before the call to mcount.
5353     In the pic_reg_used case, make sure that the got load isn't deleted.  */
5354  if (current_function_profile)
5355    emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5356}
5357
5358/* Emit code to restore saved registers using MOV insns.  First register
5359   is restored from POINTER + OFFSET.  */
5360static void
5361ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5362				  int maybe_eh_return)
5363{
5364  int regno;
5365  rtx base_address = gen_rtx_MEM (Pmode, pointer);
5366
5367  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5368    if (ix86_save_reg (regno, maybe_eh_return))
5369      {
5370	/* Ensure that adjust_address won't be forced to produce pointer
5371	   out of range allowed by x86-64 instruction set.  */
5372	if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5373	  {
5374	    rtx r11;
5375
5376	    r11 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
5377	    emit_move_insn (r11, GEN_INT (offset));
5378	    emit_insn (gen_adddi3 (r11, r11, pointer));
5379	    base_address = gen_rtx_MEM (Pmode, r11);
5380	    offset = 0;
5381	  }
5382	emit_move_insn (gen_rtx_REG (Pmode, regno),
5383			adjust_address (base_address, Pmode, offset));
5384	offset += UNITS_PER_WORD;
5385      }
5386}
5387
5388/* Restore function stack, frame, and registers.  */
5389
5390void
5391ix86_expand_epilogue (int style)
5392{
5393  int regno;
5394  int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5395  struct ix86_frame frame;
5396  HOST_WIDE_INT offset;
5397
5398  ix86_compute_frame_layout (&frame);
5399
5400  /* Calculate start of saved registers relative to ebp.  Special care
5401     must be taken for the normal return case of a function using
5402     eh_return: the eax and edx registers are marked as saved, but not
5403     restored along this path.  */
5404  offset = frame.nregs;
5405  if (current_function_calls_eh_return && style != 2)
5406    offset -= 2;
5407  offset *= -UNITS_PER_WORD;
5408
5409  /* If we're only restoring one register and sp is not valid then
5410     using a move instruction to restore the register since it's
5411     less work than reloading sp and popping the register.
5412
5413     The default code result in stack adjustment using add/lea instruction,
5414     while this code results in LEAVE instruction (or discrete equivalent),
5415     so it is profitable in some other cases as well.  Especially when there
5416     are no registers to restore.  We also use this code when TARGET_USE_LEAVE
5417     and there is exactly one register to pop. This heuristic may need some
5418     tuning in future.  */
5419  if ((!sp_valid && frame.nregs <= 1)
5420      || (TARGET_EPILOGUE_USING_MOVE
5421	  && cfun->machine->use_fast_prologue_epilogue
5422	  && (frame.nregs > 1 || frame.to_allocate))
5423      || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5424      || (frame_pointer_needed && TARGET_USE_LEAVE
5425	  && cfun->machine->use_fast_prologue_epilogue
5426	  && frame.nregs == 1)
5427      || current_function_calls_eh_return)
5428    {
5429      /* Restore registers.  We can use ebp or esp to address the memory
5430	 locations.  If both are available, default to ebp, since offsets
5431	 are known to be small.  Only exception is esp pointing directly to the
5432	 end of block of saved registers, where we may simplify addressing
5433	 mode.  */
5434
5435      if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5436	ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5437					  frame.to_allocate, style == 2);
5438      else
5439	ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5440					  offset, style == 2);
5441
5442      /* eh_return epilogues need %ecx added to the stack pointer.  */
5443      if (style == 2)
5444	{
5445	  rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5446
5447	  if (frame_pointer_needed)
5448	    {
5449	      tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5450	      tmp = plus_constant (tmp, UNITS_PER_WORD);
5451	      emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5452
5453	      tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5454	      emit_move_insn (hard_frame_pointer_rtx, tmp);
5455
5456	      pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5457					 const0_rtx, style);
5458	    }
5459	  else
5460	    {
5461	      tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5462	      tmp = plus_constant (tmp, (frame.to_allocate
5463                                         + frame.nregs * UNITS_PER_WORD));
5464	      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5465	    }
5466	}
5467      else if (!frame_pointer_needed)
5468	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5469				   GEN_INT (frame.to_allocate
5470					    + frame.nregs * UNITS_PER_WORD),
5471				   style);
5472      /* If not an i386, mov & pop is faster than "leave".  */
5473      else if (TARGET_USE_LEAVE || optimize_size
5474	       || !cfun->machine->use_fast_prologue_epilogue)
5475	emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5476      else
5477	{
5478	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5479				     hard_frame_pointer_rtx,
5480				     const0_rtx, style);
5481	  if (TARGET_64BIT)
5482	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5483	  else
5484	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5485	}
5486    }
5487  else
5488    {
5489      /* First step is to deallocate the stack frame so that we can
5490	 pop the registers.  */
5491      if (!sp_valid)
5492	{
5493	  if (!frame_pointer_needed)
5494	    abort ();
5495	  pro_epilogue_adjust_stack (stack_pointer_rtx,
5496				     hard_frame_pointer_rtx,
5497				     GEN_INT (offset), style);
5498	}
5499      else if (frame.to_allocate)
5500	pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5501				   GEN_INT (frame.to_allocate), style);
5502
5503      for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5504	if (ix86_save_reg (regno, false))
5505	  {
5506	    if (TARGET_64BIT)
5507	      emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5508	    else
5509	      emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5510	  }
5511      if (frame_pointer_needed)
5512	{
5513	  /* Leave results in shorter dependency chains on CPUs that are
5514	     able to grok it fast.  */
5515	  if (TARGET_USE_LEAVE)
5516	    emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5517	  else if (TARGET_64BIT)
5518	    emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5519	  else
5520	    emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5521	}
5522    }
5523
5524  /* Sibcall epilogues don't want a return instruction.  */
5525  if (style == 0)
5526    return;
5527
5528  if (current_function_pops_args && current_function_args_size)
5529    {
5530      rtx popc = GEN_INT (current_function_pops_args);
5531
5532      /* i386 can only pop 64K bytes.  If asked to pop more, pop
5533	 return address, do explicit add, and jump indirectly to the
5534	 caller.  */
5535
5536      if (current_function_pops_args >= 65536)
5537	{
5538	  rtx ecx = gen_rtx_REG (SImode, 2);
5539
5540	  /* There is no "pascal" calling convention in 64bit ABI.  */
5541	  if (TARGET_64BIT)
5542	    abort ();
5543
5544	  emit_insn (gen_popsi1 (ecx));
5545	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
5546	  emit_jump_insn (gen_return_indirect_internal (ecx));
5547	}
5548      else
5549	emit_jump_insn (gen_return_pop_internal (popc));
5550    }
5551  else
5552    emit_jump_insn (gen_return_internal ());
5553}
5554
5555/* Reset from the function's potential modifications.  */
5556
5557static void
5558ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
5559			       HOST_WIDE_INT size ATTRIBUTE_UNUSED)
5560{
5561  if (pic_offset_table_rtx)
5562    REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
5563}
5564
5565/* Extract the parts of an RTL expression that is a valid memory address
5566   for an instruction.  Return 0 if the structure of the address is
5567   grossly off.  Return -1 if the address contains ASHIFT, so it is not
5568   strictly valid, but still used for computing length of lea instruction.  */
5569
5570static int
5571ix86_decompose_address (rtx addr, struct ix86_address *out)
5572{
5573  rtx base = NULL_RTX;
5574  rtx index = NULL_RTX;
5575  rtx disp = NULL_RTX;
5576  HOST_WIDE_INT scale = 1;
5577  rtx scale_rtx = NULL_RTX;
5578  int retval = 1;
5579  enum ix86_address_seg seg = SEG_DEFAULT;
5580
5581  if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
5582    base = addr;
5583  else if (GET_CODE (addr) == PLUS)
5584    {
5585      rtx addends[4], op;
5586      int n = 0, i;
5587
5588      op = addr;
5589      do
5590	{
5591	  if (n >= 4)
5592	    return 0;
5593	  addends[n++] = XEXP (op, 1);
5594	  op = XEXP (op, 0);
5595	}
5596      while (GET_CODE (op) == PLUS);
5597      if (n >= 4)
5598	return 0;
5599      addends[n] = op;
5600
5601      for (i = n; i >= 0; --i)
5602	{
5603	  op = addends[i];
5604	  switch (GET_CODE (op))
5605	    {
5606	    case MULT:
5607	      if (index)
5608		return 0;
5609	      index = XEXP (op, 0);
5610	      scale_rtx = XEXP (op, 1);
5611	      break;
5612
5613	    case UNSPEC:
5614	      if (XINT (op, 1) == UNSPEC_TP
5615	          && TARGET_TLS_DIRECT_SEG_REFS
5616	          && seg == SEG_DEFAULT)
5617		seg = TARGET_64BIT ? SEG_FS : SEG_GS;
5618	      else
5619		return 0;
5620	      break;
5621
5622	    case REG:
5623	    case SUBREG:
5624	      if (!base)
5625		base = op;
5626	      else if (!index)
5627		index = op;
5628	      else
5629		return 0;
5630	      break;
5631
5632	    case CONST:
5633	    case CONST_INT:
5634	    case SYMBOL_REF:
5635	    case LABEL_REF:
5636	      if (disp)
5637		return 0;
5638	      disp = op;
5639	      break;
5640
5641	    default:
5642	      return 0;
5643	    }
5644	}
5645    }
5646  else if (GET_CODE (addr) == MULT)
5647    {
5648      index = XEXP (addr, 0);		/* index*scale */
5649      scale_rtx = XEXP (addr, 1);
5650    }
5651  else if (GET_CODE (addr) == ASHIFT)
5652    {
5653      rtx tmp;
5654
5655      /* We're called for lea too, which implements ashift on occasion.  */
5656      index = XEXP (addr, 0);
5657      tmp = XEXP (addr, 1);
5658      if (GET_CODE (tmp) != CONST_INT)
5659	return 0;
5660      scale = INTVAL (tmp);
5661      if ((unsigned HOST_WIDE_INT) scale > 3)
5662	return 0;
5663      scale = 1 << scale;
5664      retval = -1;
5665    }
5666  else
5667    disp = addr;			/* displacement */
5668
5669  /* Extract the integral value of scale.  */
5670  if (scale_rtx)
5671    {
5672      if (GET_CODE (scale_rtx) != CONST_INT)
5673	return 0;
5674      scale = INTVAL (scale_rtx);
5675    }
5676
5677  /* Allow arg pointer and stack pointer as index if there is not scaling.  */
5678  if (base && index && scale == 1
5679      && (index == arg_pointer_rtx
5680	  || index == frame_pointer_rtx
5681	  || (REG_P (index) && REGNO (index) == STACK_POINTER_REGNUM)))
5682    {
5683      rtx tmp = base;
5684      base = index;
5685      index = tmp;
5686    }
5687
5688  /* Special case: %ebp cannot be encoded as a base without a displacement.  */
5689  if ((base == hard_frame_pointer_rtx
5690       || base == frame_pointer_rtx
5691       || base == arg_pointer_rtx) && !disp)
5692    disp = const0_rtx;
5693
5694  /* Special case: on K6, [%esi] makes the instruction vector decoded.
5695     Avoid this by transforming to [%esi+0].  */
5696  if (ix86_tune == PROCESSOR_K6 && !optimize_size
5697      && base && !index && !disp
5698      && REG_P (base)
5699      && REGNO_REG_CLASS (REGNO (base)) == SIREG)
5700    disp = const0_rtx;
5701
5702  /* Special case: encode reg+reg instead of reg*2.  */
5703  if (!base && index && scale && scale == 2)
5704    base = index, scale = 1;
5705
5706  /* Special case: scaling cannot be encoded without base or displacement.  */
5707  if (!base && !disp && index && scale != 1)
5708    disp = const0_rtx;
5709
5710  out->base = base;
5711  out->index = index;
5712  out->disp = disp;
5713  out->scale = scale;
5714  out->seg = seg;
5715
5716  return retval;
5717}
5718
5719/* Return cost of the memory address x.
5720   For i386, it is better to use a complex address than let gcc copy
5721   the address into a reg and make a new pseudo.  But not if the address
5722   requires to two regs - that would mean more pseudos with longer
5723   lifetimes.  */
5724static int
5725ix86_address_cost (rtx x)
5726{
5727  struct ix86_address parts;
5728  int cost = 1;
5729
5730  if (!ix86_decompose_address (x, &parts))
5731    abort ();
5732
5733  /* More complex memory references are better.  */
5734  if (parts.disp && parts.disp != const0_rtx)
5735    cost--;
5736  if (parts.seg != SEG_DEFAULT)
5737    cost--;
5738
5739  /* Attempt to minimize number of registers in the address.  */
5740  if ((parts.base
5741       && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
5742      || (parts.index
5743	  && (!REG_P (parts.index)
5744	      || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
5745    cost++;
5746
5747  if (parts.base
5748      && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
5749      && parts.index
5750      && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
5751      && parts.base != parts.index)
5752    cost++;
5753
5754  /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
5755     since it's predecode logic can't detect the length of instructions
5756     and it degenerates to vector decoded.  Increase cost of such
5757     addresses here.  The penalty is minimally 2 cycles.  It may be worthwhile
5758     to split such addresses or even refuse such addresses at all.
5759
5760     Following addressing modes are affected:
5761      [base+scale*index]
5762      [scale*index+disp]
5763      [base+index]
5764
5765     The first and last case  may be avoidable by explicitly coding the zero in
5766     memory address, but I don't have AMD-K6 machine handy to check this
5767     theory.  */
5768
5769  if (TARGET_K6
5770      && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
5771	  || (parts.disp && !parts.base && parts.index && parts.scale != 1)
5772	  || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
5773    cost += 10;
5774
5775  return cost;
5776}
5777
5778/* If X is a machine specific address (i.e. a symbol or label being
5779   referenced as a displacement from the GOT implemented using an
5780   UNSPEC), then return the base term.  Otherwise return X.  */
5781
5782rtx
5783ix86_find_base_term (rtx x)
5784{
5785  rtx term;
5786
5787  if (TARGET_64BIT)
5788    {
5789      if (GET_CODE (x) != CONST)
5790	return x;
5791      term = XEXP (x, 0);
5792      if (GET_CODE (term) == PLUS
5793	  && (GET_CODE (XEXP (term, 1)) == CONST_INT
5794	      || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
5795	term = XEXP (term, 0);
5796      if (GET_CODE (term) != UNSPEC
5797	  || XINT (term, 1) != UNSPEC_GOTPCREL)
5798	return x;
5799
5800      term = XVECEXP (term, 0, 0);
5801
5802      if (GET_CODE (term) != SYMBOL_REF
5803	  && GET_CODE (term) != LABEL_REF)
5804	return x;
5805
5806      return term;
5807    }
5808
5809  term = ix86_delegitimize_address (x);
5810
5811  if (GET_CODE (term) != SYMBOL_REF
5812      && GET_CODE (term) != LABEL_REF)
5813    return x;
5814
5815  return term;
5816}
5817
5818/* Determine if a given RTX is a valid constant.  We already know this
5819   satisfies CONSTANT_P.  */
5820
5821bool
5822legitimate_constant_p (rtx x)
5823{
5824  switch (GET_CODE (x))
5825    {
5826    case CONST:
5827      x = XEXP (x, 0);
5828
5829      if (GET_CODE (x) == PLUS)
5830	{
5831	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
5832	    return false;
5833	  x = XEXP (x, 0);
5834	}
5835
5836      /* Only some unspecs are valid as "constants".  */
5837      if (GET_CODE (x) == UNSPEC)
5838	switch (XINT (x, 1))
5839	  {
5840	  case UNSPEC_TPOFF:
5841	  case UNSPEC_NTPOFF:
5842	    return local_exec_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5843	  case UNSPEC_DTPOFF:
5844	    return local_dynamic_symbolic_operand (XVECEXP (x, 0, 0), Pmode);
5845	  default:
5846	    return false;
5847	  }
5848
5849      /* We must have drilled down to a symbol.  */
5850      if (!symbolic_operand (x, Pmode))
5851	return false;
5852      /* FALLTHRU */
5853
5854    case SYMBOL_REF:
5855      /* TLS symbols are never valid.  */
5856      if (tls_symbolic_operand (x, Pmode))
5857	return false;
5858      break;
5859
5860    default:
5861      break;
5862    }
5863
5864  /* Otherwise we handle everything else in the move patterns.  */
5865  return true;
5866}
5867
5868/* Determine if it's legal to put X into the constant pool.  This
5869   is not possible for the address of thread-local symbols, which
5870   is checked above.  */
5871
5872static bool
5873ix86_cannot_force_const_mem (rtx x)
5874{
5875  return !legitimate_constant_p (x);
5876}
5877
5878/* Determine if a given RTX is a valid constant address.  */
5879
5880bool
5881constant_address_p (rtx x)
5882{
5883  return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
5884}
5885
5886/* Nonzero if the constant value X is a legitimate general operand
5887   when generating PIC code.  It is given that flag_pic is on and
5888   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
5889
5890bool
5891legitimate_pic_operand_p (rtx x)
5892{
5893  rtx inner;
5894
5895  switch (GET_CODE (x))
5896    {
5897    case CONST:
5898      inner = XEXP (x, 0);
5899
5900      /* Only some unspecs are valid as "constants".  */
5901      if (GET_CODE (inner) == UNSPEC)
5902	switch (XINT (inner, 1))
5903	  {
5904	  case UNSPEC_TPOFF:
5905	    return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
5906	  default:
5907	    return false;
5908	  }
5909      /* FALLTHRU */
5910
5911    case SYMBOL_REF:
5912    case LABEL_REF:
5913      return legitimate_pic_address_disp_p (x);
5914
5915    default:
5916      return true;
5917    }
5918}
5919
5920/* Determine if a given CONST RTX is a valid memory displacement
5921   in PIC mode.  */
5922
5923int
5924legitimate_pic_address_disp_p (rtx disp)
5925{
5926  bool saw_plus;
5927
5928  /* In 64bit mode we can allow direct addresses of symbols and labels
5929     when they are not dynamic symbols.  */
5930  if (TARGET_64BIT)
5931    {
5932      /* TLS references should always be enclosed in UNSPEC.  */
5933      if (tls_symbolic_operand (disp, GET_MODE (disp)))
5934	return 0;
5935      if (GET_CODE (disp) == SYMBOL_REF
5936	  && ix86_cmodel == CM_SMALL_PIC
5937	  && SYMBOL_REF_LOCAL_P (disp))
5938	return 1;
5939      if (GET_CODE (disp) == LABEL_REF)
5940	return 1;
5941      if (GET_CODE (disp) == CONST
5942	  && GET_CODE (XEXP (disp, 0)) == PLUS)
5943	{
5944	  rtx op0 = XEXP (XEXP (disp, 0), 0);
5945	  rtx op1 = XEXP (XEXP (disp, 0), 1);
5946
5947	  /* TLS references should always be enclosed in UNSPEC.  */
5948	  if (tls_symbolic_operand (op0, GET_MODE (op0)))
5949	    return 0;
5950	  if (((GET_CODE (op0) == SYMBOL_REF
5951		&& ix86_cmodel == CM_SMALL_PIC
5952		&& SYMBOL_REF_LOCAL_P (op0))
5953	       || GET_CODE (op0) == LABEL_REF)
5954	      && GET_CODE (op1) == CONST_INT
5955	      && INTVAL (op1) < 16*1024*1024
5956	      && INTVAL (op1) >= -16*1024*1024)
5957	    return 1;
5958	}
5959    }
5960  if (GET_CODE (disp) != CONST)
5961    return 0;
5962  disp = XEXP (disp, 0);
5963
5964  if (TARGET_64BIT)
5965    {
5966      /* We are unsafe to allow PLUS expressions.  This limit allowed distance
5967         of GOT tables.  We should not need these anyway.  */
5968      if (GET_CODE (disp) != UNSPEC
5969	  || XINT (disp, 1) != UNSPEC_GOTPCREL)
5970	return 0;
5971
5972      if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5973	  && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5974	return 0;
5975      return 1;
5976    }
5977
5978  saw_plus = false;
5979  if (GET_CODE (disp) == PLUS)
5980    {
5981      if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5982	return 0;
5983      disp = XEXP (disp, 0);
5984      saw_plus = true;
5985    }
5986
5987  /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O.  */
5988  if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5989    {
5990      if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5991          || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5992        if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5993          {
5994            const char *sym_name = XSTR (XEXP (disp, 1), 0);
5995            if (! strcmp (sym_name, "<pic base>"))
5996              return 1;
5997          }
5998    }
5999
6000  if (GET_CODE (disp) != UNSPEC)
6001    return 0;
6002
6003  switch (XINT (disp, 1))
6004    {
6005    case UNSPEC_GOT:
6006      if (saw_plus)
6007	return false;
6008      return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6009    case UNSPEC_GOTOFF:
6010      if (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6011	  || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6012        return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6013      return false;
6014    case UNSPEC_GOTTPOFF:
6015    case UNSPEC_GOTNTPOFF:
6016    case UNSPEC_INDNTPOFF:
6017      if (saw_plus)
6018	return false;
6019      return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6020    case UNSPEC_NTPOFF:
6021      return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6022    case UNSPEC_DTPOFF:
6023      return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6024    }
6025
6026  return 0;
6027}
6028
6029/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6030   memory address for an instruction.  The MODE argument is the machine mode
6031   for the MEM expression that wants to use this address.
6032
6033   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
6034   convert common non-canonical forms to canonical form so that they will
6035   be recognized.  */
6036
6037int
6038legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6039{
6040  struct ix86_address parts;
6041  rtx base, index, disp;
6042  HOST_WIDE_INT scale;
6043  const char *reason = NULL;
6044  rtx reason_rtx = NULL_RTX;
6045
6046  if (TARGET_DEBUG_ADDR)
6047    {
6048      fprintf (stderr,
6049	       "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6050	       GET_MODE_NAME (mode), strict);
6051      debug_rtx (addr);
6052    }
6053
6054  if (ix86_decompose_address (addr, &parts) <= 0)
6055    {
6056      reason = "decomposition failed";
6057      goto report_error;
6058    }
6059
6060  base = parts.base;
6061  index = parts.index;
6062  disp = parts.disp;
6063  scale = parts.scale;
6064
6065  /* Validate base register.
6066
6067     Don't allow SUBREG's here, it can lead to spill failures when the base
6068     is one word out of a two word structure, which is represented internally
6069     as a DImode int.  */
6070
6071  if (base)
6072    {
6073      reason_rtx = base;
6074
6075      if (GET_CODE (base) != REG)
6076	{
6077	  reason = "base is not a register";
6078	  goto report_error;
6079	}
6080
6081      if (GET_MODE (base) != Pmode)
6082	{
6083	  reason = "base is not in Pmode";
6084	  goto report_error;
6085	}
6086
6087      if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
6088	  || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
6089	{
6090	  reason = "base is not valid";
6091	  goto report_error;
6092	}
6093    }
6094
6095  /* Validate index register.
6096
6097     Don't allow SUBREG's here, it can lead to spill failures when the index
6098     is one word out of a two word structure, which is represented internally
6099     as a DImode int.  */
6100
6101  if (index)
6102    {
6103      reason_rtx = index;
6104
6105      if (GET_CODE (index) != REG)
6106	{
6107	  reason = "index is not a register";
6108	  goto report_error;
6109	}
6110
6111      if (GET_MODE (index) != Pmode)
6112	{
6113	  reason = "index is not in Pmode";
6114	  goto report_error;
6115	}
6116
6117      if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (index))
6118	  || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (index)))
6119	{
6120	  reason = "index is not valid";
6121	  goto report_error;
6122	}
6123    }
6124
6125  /* Validate scale factor.  */
6126  if (scale != 1)
6127    {
6128      reason_rtx = GEN_INT (scale);
6129      if (!index)
6130	{
6131	  reason = "scale without index";
6132	  goto report_error;
6133	}
6134
6135      if (scale != 2 && scale != 4 && scale != 8)
6136	{
6137	  reason = "scale is not a valid multiplier";
6138	  goto report_error;
6139	}
6140    }
6141
6142  /* Validate displacement.  */
6143  if (disp)
6144    {
6145      reason_rtx = disp;
6146
6147      if (GET_CODE (disp) == CONST
6148	  && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6149	switch (XINT (XEXP (disp, 0), 1))
6150	  {
6151	  case UNSPEC_GOT:
6152	  case UNSPEC_GOTOFF:
6153	  case UNSPEC_GOTPCREL:
6154	    if (!flag_pic)
6155	      abort ();
6156	    goto is_legitimate_pic;
6157
6158	  case UNSPEC_GOTTPOFF:
6159	  case UNSPEC_GOTNTPOFF:
6160	  case UNSPEC_INDNTPOFF:
6161	  case UNSPEC_NTPOFF:
6162	  case UNSPEC_DTPOFF:
6163	    break;
6164
6165	  default:
6166	    reason = "invalid address unspec";
6167	    goto report_error;
6168	  }
6169
6170      else if (flag_pic && (SYMBOLIC_CONST (disp)
6171#if TARGET_MACHO
6172			    && !machopic_operand_p (disp)
6173#endif
6174			    ))
6175	{
6176	is_legitimate_pic:
6177	  if (TARGET_64BIT && (index || base))
6178	    {
6179	      /* foo@dtpoff(%rX) is ok.  */
6180	      if (GET_CODE (disp) != CONST
6181		  || GET_CODE (XEXP (disp, 0)) != PLUS
6182		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6183		  || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6184		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6185		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6186		{
6187		  reason = "non-constant pic memory reference";
6188		  goto report_error;
6189		}
6190	    }
6191	  else if (! legitimate_pic_address_disp_p (disp))
6192	    {
6193	      reason = "displacement is an invalid pic construct";
6194	      goto report_error;
6195	    }
6196
6197          /* This code used to verify that a symbolic pic displacement
6198	     includes the pic_offset_table_rtx register.
6199
6200	     While this is good idea, unfortunately these constructs may
6201	     be created by "adds using lea" optimization for incorrect
6202	     code like:
6203
6204	     int a;
6205	     int foo(int i)
6206	       {
6207	         return *(&a+i);
6208	       }
6209
6210	     This code is nonsensical, but results in addressing
6211	     GOT table with pic_offset_table_rtx base.  We can't
6212	     just refuse it easily, since it gets matched by
6213	     "addsi3" pattern, that later gets split to lea in the
6214	     case output register differs from input.  While this
6215	     can be handled by separate addsi pattern for this case
6216	     that never results in lea, this seems to be easier and
6217	     correct fix for crash to disable this test.  */
6218	}
6219      else if (GET_CODE (disp) != LABEL_REF
6220	       && GET_CODE (disp) != CONST_INT
6221	       && (GET_CODE (disp) != CONST
6222		   || !legitimate_constant_p (disp))
6223	       && (GET_CODE (disp) != SYMBOL_REF
6224		   || !legitimate_constant_p (disp)))
6225	{
6226	  reason = "displacement is not constant";
6227	  goto report_error;
6228	}
6229      else if (TARGET_64BIT && !x86_64_sign_extended_value (disp))
6230	{
6231	  reason = "displacement is out of range";
6232	  goto report_error;
6233	}
6234    }
6235
6236  /* Everything looks valid.  */
6237  if (TARGET_DEBUG_ADDR)
6238    fprintf (stderr, "Success.\n");
6239  return TRUE;
6240
6241 report_error:
6242  if (TARGET_DEBUG_ADDR)
6243    {
6244      fprintf (stderr, "Error: %s\n", reason);
6245      debug_rtx (reason_rtx);
6246    }
6247  return FALSE;
6248}
6249
6250/* Return an unique alias set for the GOT.  */
6251
6252static HOST_WIDE_INT
6253ix86_GOT_alias_set (void)
6254{
6255  static HOST_WIDE_INT set = -1;
6256  if (set == -1)
6257    set = new_alias_set ();
6258  return set;
6259}
6260
6261/* Return a legitimate reference for ORIG (an address) using the
6262   register REG.  If REG is 0, a new pseudo is generated.
6263
6264   There are two types of references that must be handled:
6265
6266   1. Global data references must load the address from the GOT, via
6267      the PIC reg.  An insn is emitted to do this load, and the reg is
6268      returned.
6269
6270   2. Static data references, constant pool addresses, and code labels
6271      compute the address as an offset from the GOT, whose base is in
6272      the PIC reg.  Static data objects have SYMBOL_FLAG_LOCAL set to
6273      differentiate them from global data objects.  The returned
6274      address is the PIC reg + an unspec constant.
6275
6276   GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6277   reg also appears in the address.  */
6278
6279rtx
6280legitimize_pic_address (rtx orig, rtx reg)
6281{
6282  rtx addr = orig;
6283  rtx new = orig;
6284  rtx base;
6285
6286#if TARGET_MACHO
6287  if (reg == 0)
6288    reg = gen_reg_rtx (Pmode);
6289  /* Use the generic Mach-O PIC machinery.  */
6290  return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6291#endif
6292
6293  if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6294    new = addr;
6295  else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6296    {
6297      /* This symbol may be referenced via a displacement from the PIC
6298	 base address (@GOTOFF).  */
6299
6300      if (reload_in_progress)
6301	regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6302      if (GET_CODE (addr) == CONST)
6303	addr = XEXP (addr, 0);
6304      if (GET_CODE (addr) == PLUS)
6305	  {
6306            new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6307	    new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6308	  }
6309	else
6310          new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6311      new = gen_rtx_CONST (Pmode, new);
6312      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6313
6314      if (reg != 0)
6315	{
6316	  emit_move_insn (reg, new);
6317	  new = reg;
6318	}
6319    }
6320  else if (GET_CODE (addr) == SYMBOL_REF)
6321    {
6322      if (TARGET_64BIT)
6323	{
6324	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6325	  new = gen_rtx_CONST (Pmode, new);
6326	  new = gen_rtx_MEM (Pmode, new);
6327	  RTX_UNCHANGING_P (new) = 1;
6328	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6329
6330	  if (reg == 0)
6331	    reg = gen_reg_rtx (Pmode);
6332	  /* Use directly gen_movsi, otherwise the address is loaded
6333	     into register for CSE.  We don't want to CSE this addresses,
6334	     instead we CSE addresses from the GOT table, so skip this.  */
6335	  emit_insn (gen_movsi (reg, new));
6336	  new = reg;
6337	}
6338      else
6339	{
6340	  /* This symbol must be referenced via a load from the
6341	     Global Offset Table (@GOT).  */
6342
6343	  if (reload_in_progress)
6344	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6345	  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6346	  new = gen_rtx_CONST (Pmode, new);
6347	  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6348	  new = gen_rtx_MEM (Pmode, new);
6349	  RTX_UNCHANGING_P (new) = 1;
6350	  set_mem_alias_set (new, ix86_GOT_alias_set ());
6351
6352	  if (reg == 0)
6353	    reg = gen_reg_rtx (Pmode);
6354	  emit_move_insn (reg, new);
6355	  new = reg;
6356	}
6357    }
6358  else
6359    {
6360      if (GET_CODE (addr) == CONST)
6361	{
6362	  addr = XEXP (addr, 0);
6363
6364	  /* We must match stuff we generate before.  Assume the only
6365	     unspecs that can get here are ours.  Not that we could do
6366	     anything with them anyway....  */
6367	  if (GET_CODE (addr) == UNSPEC
6368	      || (GET_CODE (addr) == PLUS
6369		  && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6370	    return orig;
6371	  if (GET_CODE (addr) != PLUS)
6372	    abort ();
6373	}
6374      if (GET_CODE (addr) == PLUS)
6375	{
6376	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6377
6378	  /* Check first to see if this is a constant offset from a @GOTOFF
6379	     symbol reference.  */
6380	  if (local_symbolic_operand (op0, Pmode)
6381	      && GET_CODE (op1) == CONST_INT)
6382	    {
6383	      if (!TARGET_64BIT)
6384		{
6385		  if (reload_in_progress)
6386		    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6387		  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6388					UNSPEC_GOTOFF);
6389		  new = gen_rtx_PLUS (Pmode, new, op1);
6390		  new = gen_rtx_CONST (Pmode, new);
6391		  new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6392
6393		  if (reg != 0)
6394		    {
6395		      emit_move_insn (reg, new);
6396		      new = reg;
6397		    }
6398		}
6399	      else
6400		{
6401		  if (INTVAL (op1) < -16*1024*1024
6402		      || INTVAL (op1) >= 16*1024*1024)
6403		    new = gen_rtx_PLUS (Pmode, op0, force_reg (Pmode, op1));
6404		}
6405	    }
6406	  else
6407	    {
6408	      base = legitimize_pic_address (XEXP (addr, 0), reg);
6409	      new  = legitimize_pic_address (XEXP (addr, 1),
6410					     base == reg ? NULL_RTX : reg);
6411
6412	      if (GET_CODE (new) == CONST_INT)
6413		new = plus_constant (base, INTVAL (new));
6414	      else
6415		{
6416		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
6417		    {
6418		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
6419		      new = XEXP (new, 1);
6420		    }
6421		  new = gen_rtx_PLUS (Pmode, base, new);
6422		}
6423	    }
6424	}
6425    }
6426  return new;
6427}
6428
6429/* Load the thread pointer.  If TO_REG is true, force it into a register.  */
6430
6431static rtx
6432get_thread_pointer (int to_reg)
6433{
6434  rtx tp, reg, insn;
6435
6436  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
6437  if (!to_reg)
6438    return tp;
6439
6440  reg = gen_reg_rtx (Pmode);
6441  insn = gen_rtx_SET (VOIDmode, reg, tp);
6442  insn = emit_insn (insn);
6443
6444  return reg;
6445}
6446
6447/* A subroutine of legitimize_address and ix86_expand_move.  FOR_MOV is
6448   false if we expect this to be used for a memory address and true if
6449   we expect to load the address into a register.  */
6450
6451static rtx
6452legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
6453{
6454  rtx dest, base, off, pic;
6455  int type;
6456
6457  switch (model)
6458    {
6459    case TLS_MODEL_GLOBAL_DYNAMIC:
6460      dest = gen_reg_rtx (Pmode);
6461      if (TARGET_64BIT)
6462	{
6463	  rtx rax = gen_rtx_REG (Pmode, 0), insns;
6464
6465	  start_sequence ();
6466	  emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
6467	  insns = get_insns ();
6468	  end_sequence ();
6469
6470	  emit_libcall_block (insns, dest, rax, x);
6471	}
6472      else
6473	emit_insn (gen_tls_global_dynamic_32 (dest, x));
6474      break;
6475
6476    case TLS_MODEL_LOCAL_DYNAMIC:
6477      base = gen_reg_rtx (Pmode);
6478      if (TARGET_64BIT)
6479	{
6480	  rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
6481
6482	  start_sequence ();
6483	  emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
6484	  insns = get_insns ();
6485	  end_sequence ();
6486
6487	  note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
6488	  note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
6489	  emit_libcall_block (insns, base, rax, note);
6490	}
6491      else
6492	emit_insn (gen_tls_local_dynamic_base_32 (base));
6493
6494      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
6495      off = gen_rtx_CONST (Pmode, off);
6496
6497      return gen_rtx_PLUS (Pmode, base, off);
6498
6499    case TLS_MODEL_INITIAL_EXEC:
6500      if (TARGET_64BIT)
6501	{
6502	  pic = NULL;
6503	  type = UNSPEC_GOTNTPOFF;
6504	}
6505      else if (flag_pic)
6506	{
6507	  if (reload_in_progress)
6508	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6509	  pic = pic_offset_table_rtx;
6510	  type = TARGET_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
6511	}
6512      else if (!TARGET_GNU_TLS)
6513	{
6514	  pic = gen_reg_rtx (Pmode);
6515	  emit_insn (gen_set_got (pic));
6516	  type = UNSPEC_GOTTPOFF;
6517	}
6518      else
6519	{
6520	  pic = NULL;
6521	  type = UNSPEC_INDNTPOFF;
6522	}
6523
6524      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
6525      off = gen_rtx_CONST (Pmode, off);
6526      if (pic)
6527	off = gen_rtx_PLUS (Pmode, pic, off);
6528      off = gen_rtx_MEM (Pmode, off);
6529      RTX_UNCHANGING_P (off) = 1;
6530      set_mem_alias_set (off, ix86_GOT_alias_set ());
6531
6532      if (TARGET_64BIT || TARGET_GNU_TLS)
6533	{
6534          base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6535	  off = force_reg (Pmode, off);
6536	  return gen_rtx_PLUS (Pmode, base, off);
6537	}
6538      else
6539	{
6540	  base = get_thread_pointer (true);
6541	  dest = gen_reg_rtx (Pmode);
6542	  emit_insn (gen_subsi3 (dest, base, off));
6543	}
6544      break;
6545
6546    case TLS_MODEL_LOCAL_EXEC:
6547      off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
6548			    (TARGET_64BIT || TARGET_GNU_TLS)
6549			    ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
6550      off = gen_rtx_CONST (Pmode, off);
6551
6552      if (TARGET_64BIT || TARGET_GNU_TLS)
6553	{
6554	  base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
6555	  return gen_rtx_PLUS (Pmode, base, off);
6556	}
6557      else
6558	{
6559	  base = get_thread_pointer (true);
6560	  dest = gen_reg_rtx (Pmode);
6561	  emit_insn (gen_subsi3 (dest, base, off));
6562	}
6563      break;
6564
6565    default:
6566      abort ();
6567    }
6568
6569  return dest;
6570}
6571
6572/* Try machine-dependent ways of modifying an illegitimate address
6573   to be legitimate.  If we find one, return the new, valid address.
6574   This macro is used in only one place: `memory_address' in explow.c.
6575
6576   OLDX is the address as it was before break_out_memory_refs was called.
6577   In some cases it is useful to look at this to decide what needs to be done.
6578
6579   MODE and WIN are passed so that this macro can use
6580   GO_IF_LEGITIMATE_ADDRESS.
6581
6582   It is always safe for this macro to do nothing.  It exists to recognize
6583   opportunities to optimize the output.
6584
6585   For the 80386, we handle X+REG by loading X into a register R and
6586   using R+REG.  R will go in a general reg and indexing will be used.
6587   However, if REG is a broken-out memory address or multiplication,
6588   nothing needs to be done because REG can certainly go in a general reg.
6589
6590   When -fpic is used, special handling is needed for symbolic references.
6591   See comments by legitimize_pic_address in i386.c for details.  */
6592
6593rtx
6594legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
6595{
6596  int changed = 0;
6597  unsigned log;
6598
6599  if (TARGET_DEBUG_ADDR)
6600    {
6601      fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
6602	       GET_MODE_NAME (mode));
6603      debug_rtx (x);
6604    }
6605
6606  log = tls_symbolic_operand (x, mode);
6607  if (log)
6608    return legitimize_tls_address (x, log, false);
6609
6610  if (flag_pic && SYMBOLIC_CONST (x))
6611    return legitimize_pic_address (x, 0);
6612
6613  /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
6614  if (GET_CODE (x) == ASHIFT
6615      && GET_CODE (XEXP (x, 1)) == CONST_INT
6616      && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
6617    {
6618      changed = 1;
6619      x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
6620			GEN_INT (1 << log));
6621    }
6622
6623  if (GET_CODE (x) == PLUS)
6624    {
6625      /* Canonicalize shifts by 0, 1, 2, 3 into multiply.  */
6626
6627      if (GET_CODE (XEXP (x, 0)) == ASHIFT
6628	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
6629	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
6630	{
6631	  changed = 1;
6632	  XEXP (x, 0) = gen_rtx_MULT (Pmode,
6633				      force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
6634				      GEN_INT (1 << log));
6635	}
6636
6637      if (GET_CODE (XEXP (x, 1)) == ASHIFT
6638	  && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
6639	  && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
6640	{
6641	  changed = 1;
6642	  XEXP (x, 1) = gen_rtx_MULT (Pmode,
6643				      force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
6644				      GEN_INT (1 << log));
6645	}
6646
6647      /* Put multiply first if it isn't already.  */
6648      if (GET_CODE (XEXP (x, 1)) == MULT)
6649	{
6650	  rtx tmp = XEXP (x, 0);
6651	  XEXP (x, 0) = XEXP (x, 1);
6652	  XEXP (x, 1) = tmp;
6653	  changed = 1;
6654	}
6655
6656      /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
6657	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  This can be
6658	 created by virtual register instantiation, register elimination, and
6659	 similar optimizations.  */
6660      if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
6661	{
6662	  changed = 1;
6663	  x = gen_rtx_PLUS (Pmode,
6664			    gen_rtx_PLUS (Pmode, XEXP (x, 0),
6665					  XEXP (XEXP (x, 1), 0)),
6666			    XEXP (XEXP (x, 1), 1));
6667	}
6668
6669      /* Canonicalize
6670	 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
6671	 into (plus (plus (mult (reg) (const)) (reg)) (const)).  */
6672      else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
6673	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
6674	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
6675	       && CONSTANT_P (XEXP (x, 1)))
6676	{
6677	  rtx constant;
6678	  rtx other = NULL_RTX;
6679
6680	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6681	    {
6682	      constant = XEXP (x, 1);
6683	      other = XEXP (XEXP (XEXP (x, 0), 1), 1);
6684	    }
6685	  else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
6686	    {
6687	      constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
6688	      other = XEXP (x, 1);
6689	    }
6690	  else
6691	    constant = 0;
6692
6693	  if (constant)
6694	    {
6695	      changed = 1;
6696	      x = gen_rtx_PLUS (Pmode,
6697				gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
6698					      XEXP (XEXP (XEXP (x, 0), 1), 0)),
6699				plus_constant (other, INTVAL (constant)));
6700	    }
6701	}
6702
6703      if (changed && legitimate_address_p (mode, x, FALSE))
6704	return x;
6705
6706      if (GET_CODE (XEXP (x, 0)) == MULT)
6707	{
6708	  changed = 1;
6709	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
6710	}
6711
6712      if (GET_CODE (XEXP (x, 1)) == MULT)
6713	{
6714	  changed = 1;
6715	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
6716	}
6717
6718      if (changed
6719	  && GET_CODE (XEXP (x, 1)) == REG
6720	  && GET_CODE (XEXP (x, 0)) == REG)
6721	return x;
6722
6723      if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
6724	{
6725	  changed = 1;
6726	  x = legitimize_pic_address (x, 0);
6727	}
6728
6729      if (changed && legitimate_address_p (mode, x, FALSE))
6730	return x;
6731
6732      if (GET_CODE (XEXP (x, 0)) == REG)
6733	{
6734	  rtx temp = gen_reg_rtx (Pmode);
6735	  rtx val  = force_operand (XEXP (x, 1), temp);
6736	  if (val != temp)
6737	    emit_move_insn (temp, val);
6738
6739	  XEXP (x, 1) = temp;
6740	  return x;
6741	}
6742
6743      else if (GET_CODE (XEXP (x, 1)) == REG)
6744	{
6745	  rtx temp = gen_reg_rtx (Pmode);
6746	  rtx val  = force_operand (XEXP (x, 0), temp);
6747	  if (val != temp)
6748	    emit_move_insn (temp, val);
6749
6750	  XEXP (x, 0) = temp;
6751	  return x;
6752	}
6753    }
6754
6755  return x;
6756}
6757
6758/* Print an integer constant expression in assembler syntax.  Addition
6759   and subtraction are the only arithmetic that may appear in these
6760   expressions.  FILE is the stdio stream to write to, X is the rtx, and
6761   CODE is the operand print code from the output string.  */
6762
6763static void
6764output_pic_addr_const (FILE *file, rtx x, int code)
6765{
6766  char buf[256];
6767
6768  switch (GET_CODE (x))
6769    {
6770    case PC:
6771      if (flag_pic)
6772	putc ('.', file);
6773      else
6774	abort ();
6775      break;
6776
6777    case SYMBOL_REF:
6778      assemble_name (file, XSTR (x, 0));
6779      if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
6780	fputs ("@PLT", file);
6781      break;
6782
6783    case LABEL_REF:
6784      x = XEXP (x, 0);
6785      /* FALLTHRU */
6786    case CODE_LABEL:
6787      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
6788      assemble_name (asm_out_file, buf);
6789      break;
6790
6791    case CONST_INT:
6792      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6793      break;
6794
6795    case CONST:
6796      /* This used to output parentheses around the expression,
6797	 but that does not work on the 386 (either ATT or BSD assembler).  */
6798      output_pic_addr_const (file, XEXP (x, 0), code);
6799      break;
6800
6801    case CONST_DOUBLE:
6802      if (GET_MODE (x) == VOIDmode)
6803	{
6804	  /* We can use %d if the number is <32 bits and positive.  */
6805	  if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
6806	    fprintf (file, "0x%lx%08lx",
6807		     (unsigned long) CONST_DOUBLE_HIGH (x),
6808		     (unsigned long) CONST_DOUBLE_LOW (x));
6809	  else
6810	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
6811	}
6812      else
6813	/* We can't handle floating point constants;
6814	   PRINT_OPERAND must handle them.  */
6815	output_operand_lossage ("floating constant misused");
6816      break;
6817
6818    case PLUS:
6819      /* Some assemblers need integer constants to appear first.  */
6820      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
6821	{
6822	  output_pic_addr_const (file, XEXP (x, 0), code);
6823	  putc ('+', file);
6824	  output_pic_addr_const (file, XEXP (x, 1), code);
6825	}
6826      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6827	{
6828	  output_pic_addr_const (file, XEXP (x, 1), code);
6829	  putc ('+', file);
6830	  output_pic_addr_const (file, XEXP (x, 0), code);
6831	}
6832      else
6833	abort ();
6834      break;
6835
6836    case MINUS:
6837      if (!TARGET_MACHO)
6838	putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
6839      output_pic_addr_const (file, XEXP (x, 0), code);
6840      putc ('-', file);
6841      output_pic_addr_const (file, XEXP (x, 1), code);
6842      if (!TARGET_MACHO)
6843	putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
6844      break;
6845
6846     case UNSPEC:
6847       if (XVECLEN (x, 0) != 1)
6848	 abort ();
6849       output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6850       switch (XINT (x, 1))
6851	{
6852	case UNSPEC_GOT:
6853	  fputs ("@GOT", file);
6854	  break;
6855	case UNSPEC_GOTOFF:
6856	  fputs ("@GOTOFF", file);
6857	  break;
6858	case UNSPEC_GOTPCREL:
6859	  fputs ("@GOTPCREL(%rip)", file);
6860	  break;
6861	case UNSPEC_GOTTPOFF:
6862	  /* FIXME: This might be @TPOFF in Sun ld too.  */
6863	  fputs ("@GOTTPOFF", file);
6864	  break;
6865	case UNSPEC_TPOFF:
6866	  fputs ("@TPOFF", file);
6867	  break;
6868	case UNSPEC_NTPOFF:
6869	  if (TARGET_64BIT)
6870	    fputs ("@TPOFF", file);
6871	  else
6872	    fputs ("@NTPOFF", file);
6873	  break;
6874	case UNSPEC_DTPOFF:
6875	  fputs ("@DTPOFF", file);
6876	  break;
6877	case UNSPEC_GOTNTPOFF:
6878	  if (TARGET_64BIT)
6879	    fputs ("@GOTTPOFF(%rip)", file);
6880	  else
6881	    fputs ("@GOTNTPOFF", file);
6882	  break;
6883	case UNSPEC_INDNTPOFF:
6884	  fputs ("@INDNTPOFF", file);
6885	  break;
6886	default:
6887	  output_operand_lossage ("invalid UNSPEC as operand");
6888	  break;
6889	}
6890       break;
6891
6892    default:
6893      output_operand_lossage ("invalid expression as operand");
6894    }
6895}
6896
6897/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
6898   We need to handle our special PIC relocations.  */
6899
6900void
6901i386_dwarf_output_addr_const (FILE *file, rtx x)
6902{
6903#ifdef ASM_QUAD
6904  fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
6905#else
6906  if (TARGET_64BIT)
6907    abort ();
6908  fprintf (file, "%s", ASM_LONG);
6909#endif
6910  if (flag_pic)
6911    output_pic_addr_const (file, x, '\0');
6912  else
6913    output_addr_const (file, x);
6914  fputc ('\n', file);
6915}
6916
6917/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6918   We need to emit DTP-relative relocations.  */
6919
6920void
6921i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
6922{
6923  fputs (ASM_LONG, file);
6924  output_addr_const (file, x);
6925  fputs ("@DTPOFF", file);
6926  switch (size)
6927    {
6928    case 4:
6929      break;
6930    case 8:
6931      fputs (", 0", file);
6932      break;
6933    default:
6934      abort ();
6935   }
6936}
6937
6938/* In the name of slightly smaller debug output, and to cater to
6939   general assembler losage, recognize PIC+GOTOFF and turn it back
6940   into a direct symbol reference.  */
6941
6942static rtx
6943ix86_delegitimize_address (rtx orig_x)
6944{
6945  rtx x = orig_x, y;
6946
6947  if (GET_CODE (x) == MEM)
6948    x = XEXP (x, 0);
6949
6950  if (TARGET_64BIT)
6951    {
6952      if (GET_CODE (x) != CONST
6953	  || GET_CODE (XEXP (x, 0)) != UNSPEC
6954	  || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
6955	  || GET_CODE (orig_x) != MEM)
6956	return orig_x;
6957      return XVECEXP (XEXP (x, 0), 0, 0);
6958    }
6959
6960  if (GET_CODE (x) != PLUS
6961      || GET_CODE (XEXP (x, 1)) != CONST)
6962    return orig_x;
6963
6964  if (GET_CODE (XEXP (x, 0)) == REG
6965      && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6966    /* %ebx + GOT/GOTOFF */
6967    y = NULL;
6968  else if (GET_CODE (XEXP (x, 0)) == PLUS)
6969    {
6970      /* %ebx + %reg * scale + GOT/GOTOFF */
6971      y = XEXP (x, 0);
6972      if (GET_CODE (XEXP (y, 0)) == REG
6973	  && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6974	y = XEXP (y, 1);
6975      else if (GET_CODE (XEXP (y, 1)) == REG
6976	       && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6977	y = XEXP (y, 0);
6978      else
6979	return orig_x;
6980      if (GET_CODE (y) != REG
6981	  && GET_CODE (y) != MULT
6982	  && GET_CODE (y) != ASHIFT)
6983	return orig_x;
6984    }
6985  else
6986    return orig_x;
6987
6988  x = XEXP (XEXP (x, 1), 0);
6989  if (GET_CODE (x) == UNSPEC
6990      && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6991	  || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
6992    {
6993      if (y)
6994	return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6995      return XVECEXP (x, 0, 0);
6996    }
6997
6998  if (GET_CODE (x) == PLUS
6999      && GET_CODE (XEXP (x, 0)) == UNSPEC
7000      && GET_CODE (XEXP (x, 1)) == CONST_INT
7001      && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7002	  || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
7003	      && GET_CODE (orig_x) != MEM)))
7004    {
7005      x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
7006      if (y)
7007	return gen_rtx_PLUS (Pmode, y, x);
7008      return x;
7009    }
7010
7011  return orig_x;
7012}
7013
7014static void
7015put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7016		    int fp, FILE *file)
7017{
7018  const char *suffix;
7019
7020  if (mode == CCFPmode || mode == CCFPUmode)
7021    {
7022      enum rtx_code second_code, bypass_code;
7023      ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7024      if (bypass_code != NIL || second_code != NIL)
7025	abort ();
7026      code = ix86_fp_compare_code_to_integer (code);
7027      mode = CCmode;
7028    }
7029  if (reverse)
7030    code = reverse_condition (code);
7031
7032  switch (code)
7033    {
7034    case EQ:
7035      suffix = "e";
7036      break;
7037    case NE:
7038      suffix = "ne";
7039      break;
7040    case GT:
7041      if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
7042	abort ();
7043      suffix = "g";
7044      break;
7045    case GTU:
7046      /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
7047	 Those same assemblers have the same but opposite losage on cmov.  */
7048      if (mode != CCmode)
7049	abort ();
7050      suffix = fp ? "nbe" : "a";
7051      break;
7052    case LT:
7053      if (mode == CCNOmode || mode == CCGOCmode)
7054	suffix = "s";
7055      else if (mode == CCmode || mode == CCGCmode)
7056	suffix = "l";
7057      else
7058	abort ();
7059      break;
7060    case LTU:
7061      if (mode != CCmode)
7062	abort ();
7063      suffix = "b";
7064      break;
7065    case GE:
7066      if (mode == CCNOmode || mode == CCGOCmode)
7067	suffix = "ns";
7068      else if (mode == CCmode || mode == CCGCmode)
7069	suffix = "ge";
7070      else
7071	abort ();
7072      break;
7073    case GEU:
7074      /* ??? As above.  */
7075      if (mode != CCmode)
7076	abort ();
7077      suffix = fp ? "nb" : "ae";
7078      break;
7079    case LE:
7080      if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
7081	abort ();
7082      suffix = "le";
7083      break;
7084    case LEU:
7085      if (mode != CCmode)
7086	abort ();
7087      suffix = "be";
7088      break;
7089    case UNORDERED:
7090      suffix = fp ? "u" : "p";
7091      break;
7092    case ORDERED:
7093      suffix = fp ? "nu" : "np";
7094      break;
7095    default:
7096      abort ();
7097    }
7098  fputs (suffix, file);
7099}
7100
7101/* Print the name of register X to FILE based on its machine mode and number.
7102   If CODE is 'w', pretend the mode is HImode.
7103   If CODE is 'b', pretend the mode is QImode.
7104   If CODE is 'k', pretend the mode is SImode.
7105   If CODE is 'q', pretend the mode is DImode.
7106   If CODE is 'h', pretend the reg is the `high' byte register.
7107   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
7108
7109void
7110print_reg (rtx x, int code, FILE *file)
7111{
7112  if (REGNO (x) == ARG_POINTER_REGNUM
7113      || REGNO (x) == FRAME_POINTER_REGNUM
7114      || REGNO (x) == FLAGS_REG
7115      || REGNO (x) == FPSR_REG)
7116    abort ();
7117
7118  if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7119    putc ('%', file);
7120
7121  if (code == 'w' || MMX_REG_P (x))
7122    code = 2;
7123  else if (code == 'b')
7124    code = 1;
7125  else if (code == 'k')
7126    code = 4;
7127  else if (code == 'q')
7128    code = 8;
7129  else if (code == 'y')
7130    code = 3;
7131  else if (code == 'h')
7132    code = 0;
7133  else
7134    code = GET_MODE_SIZE (GET_MODE (x));
7135
7136  /* Irritatingly, AMD extended registers use different naming convention
7137     from the normal registers.  */
7138  if (REX_INT_REG_P (x))
7139    {
7140      if (!TARGET_64BIT)
7141	abort ();
7142      switch (code)
7143	{
7144	  case 0:
7145	    error ("extended registers have no high halves");
7146	    break;
7147	  case 1:
7148	    fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7149	    break;
7150	  case 2:
7151	    fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7152	    break;
7153	  case 4:
7154	    fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7155	    break;
7156	  case 8:
7157	    fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7158	    break;
7159	  default:
7160	    error ("unsupported operand size for extended register");
7161	    break;
7162	}
7163      return;
7164    }
7165  switch (code)
7166    {
7167    case 3:
7168      if (STACK_TOP_P (x))
7169	{
7170	  fputs ("st(0)", file);
7171	  break;
7172	}
7173      /* FALLTHRU */
7174    case 8:
7175    case 4:
7176    case 12:
7177      if (! ANY_FP_REG_P (x))
7178	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7179      /* FALLTHRU */
7180    case 16:
7181    case 2:
7182    normal:
7183      fputs (hi_reg_name[REGNO (x)], file);
7184      break;
7185    case 1:
7186      if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7187	goto normal;
7188      fputs (qi_reg_name[REGNO (x)], file);
7189      break;
7190    case 0:
7191      if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7192	goto normal;
7193      fputs (qi_high_reg_name[REGNO (x)], file);
7194      break;
7195    default:
7196      abort ();
7197    }
7198}
7199
7200/* Locate some local-dynamic symbol still in use by this function
7201   so that we can print its name in some tls_local_dynamic_base
7202   pattern.  */
7203
7204static const char *
7205get_some_local_dynamic_name (void)
7206{
7207  rtx insn;
7208
7209  if (cfun->machine->some_ld_name)
7210    return cfun->machine->some_ld_name;
7211
7212  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7213    if (INSN_P (insn)
7214	&& for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7215      return cfun->machine->some_ld_name;
7216
7217  abort ();
7218}
7219
7220static int
7221get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7222{
7223  rtx x = *px;
7224
7225  if (GET_CODE (x) == SYMBOL_REF
7226      && local_dynamic_symbolic_operand (x, Pmode))
7227    {
7228      cfun->machine->some_ld_name = XSTR (x, 0);
7229      return 1;
7230    }
7231
7232  return 0;
7233}
7234
7235/* Meaning of CODE:
7236   L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7237   C -- print opcode suffix for set/cmov insn.
7238   c -- like C, but print reversed condition
7239   F,f -- likewise, but for floating-point.
7240   O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7241        otherwise nothing
7242   R -- print the prefix for register names.
7243   z -- print the opcode suffix for the size of the current operand.
7244   * -- print a star (in certain assembler syntax)
7245   A -- print an absolute memory reference.
7246   w -- print the operand as if it's a "word" (HImode) even if it isn't.
7247   s -- print a shift double count, followed by the assemblers argument
7248	delimiter.
7249   b -- print the QImode name of the register for the indicated operand.
7250	%b0 would print %al if operands[0] is reg 0.
7251   w --  likewise, print the HImode name of the register.
7252   k --  likewise, print the SImode name of the register.
7253   q --  likewise, print the DImode name of the register.
7254   h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7255   y -- print "st(0)" instead of "st" as a register.
7256   D -- print condition for SSE cmp instruction.
7257   P -- if PIC, print an @PLT suffix.
7258   X -- don't print any sort of PIC '@' suffix for a symbol.
7259   & -- print some in-use local-dynamic symbol name.
7260 */
7261
7262void
7263print_operand (FILE *file, rtx x, int code)
7264{
7265  if (code)
7266    {
7267      switch (code)
7268	{
7269	case '*':
7270	  if (ASSEMBLER_DIALECT == ASM_ATT)
7271	    putc ('*', file);
7272	  return;
7273
7274	case '&':
7275	  assemble_name (file, get_some_local_dynamic_name ());
7276	  return;
7277
7278	case 'A':
7279	  if (ASSEMBLER_DIALECT == ASM_ATT)
7280	    putc ('*', file);
7281	  else if (ASSEMBLER_DIALECT == ASM_INTEL)
7282	    {
7283	      /* Intel syntax. For absolute addresses, registers should not
7284		 be surrounded by braces.  */
7285	      if (GET_CODE (x) != REG)
7286		{
7287		  putc ('[', file);
7288		  PRINT_OPERAND (file, x, 0);
7289		  putc (']', file);
7290		  return;
7291		}
7292	    }
7293	  else
7294	    abort ();
7295
7296	  PRINT_OPERAND (file, x, 0);
7297	  return;
7298
7299
7300	case 'L':
7301	  if (ASSEMBLER_DIALECT == ASM_ATT)
7302	    putc ('l', file);
7303	  return;
7304
7305	case 'W':
7306	  if (ASSEMBLER_DIALECT == ASM_ATT)
7307	    putc ('w', file);
7308	  return;
7309
7310	case 'B':
7311	  if (ASSEMBLER_DIALECT == ASM_ATT)
7312	    putc ('b', file);
7313	  return;
7314
7315	case 'Q':
7316	  if (ASSEMBLER_DIALECT == ASM_ATT)
7317	    putc ('l', file);
7318	  return;
7319
7320	case 'S':
7321	  if (ASSEMBLER_DIALECT == ASM_ATT)
7322	    putc ('s', file);
7323	  return;
7324
7325	case 'T':
7326	  if (ASSEMBLER_DIALECT == ASM_ATT)
7327	    putc ('t', file);
7328	  return;
7329
7330	case 'z':
7331	  /* 387 opcodes don't get size suffixes if the operands are
7332	     registers.  */
7333	  if (STACK_REG_P (x))
7334	    return;
7335
7336	  /* Likewise if using Intel opcodes.  */
7337	  if (ASSEMBLER_DIALECT == ASM_INTEL)
7338	    return;
7339
7340	  /* This is the size of op from size of operand.  */
7341	  switch (GET_MODE_SIZE (GET_MODE (x)))
7342	    {
7343	    case 2:
7344#ifdef HAVE_GAS_FILDS_FISTS
7345	      putc ('s', file);
7346#endif
7347	      return;
7348
7349	    case 4:
7350	      if (GET_MODE (x) == SFmode)
7351		{
7352		  putc ('s', file);
7353		  return;
7354		}
7355	      else
7356		putc ('l', file);
7357	      return;
7358
7359	    case 12:
7360	    case 16:
7361	      putc ('t', file);
7362	      return;
7363
7364	    case 8:
7365	      if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
7366		{
7367#ifdef GAS_MNEMONICS
7368		  putc ('q', file);
7369#else
7370		  putc ('l', file);
7371		  putc ('l', file);
7372#endif
7373		}
7374	      else
7375	        putc ('l', file);
7376	      return;
7377
7378	    default:
7379	      abort ();
7380	    }
7381
7382	case 'b':
7383	case 'w':
7384	case 'k':
7385	case 'q':
7386	case 'h':
7387	case 'y':
7388	case 'X':
7389	case 'P':
7390	  break;
7391
7392	case 's':
7393	  if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
7394	    {
7395	      PRINT_OPERAND (file, x, 0);
7396	      putc (',', file);
7397	    }
7398	  return;
7399
7400	case 'D':
7401	  /* Little bit of braindamage here.  The SSE compare instructions
7402	     does use completely different names for the comparisons that the
7403	     fp conditional moves.  */
7404	  switch (GET_CODE (x))
7405	    {
7406	    case EQ:
7407	    case UNEQ:
7408	      fputs ("eq", file);
7409	      break;
7410	    case LT:
7411	    case UNLT:
7412	      fputs ("lt", file);
7413	      break;
7414	    case LE:
7415	    case UNLE:
7416	      fputs ("le", file);
7417	      break;
7418	    case UNORDERED:
7419	      fputs ("unord", file);
7420	      break;
7421	    case NE:
7422	    case LTGT:
7423	      fputs ("neq", file);
7424	      break;
7425	    case UNGE:
7426	    case GE:
7427	      fputs ("nlt", file);
7428	      break;
7429	    case UNGT:
7430	    case GT:
7431	      fputs ("nle", file);
7432	      break;
7433	    case ORDERED:
7434	      fputs ("ord", file);
7435	      break;
7436	    default:
7437	      abort ();
7438	      break;
7439	    }
7440	  return;
7441	case 'O':
7442#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7443	  if (ASSEMBLER_DIALECT == ASM_ATT)
7444	    {
7445	      switch (GET_MODE (x))
7446		{
7447		case HImode: putc ('w', file); break;
7448		case SImode:
7449		case SFmode: putc ('l', file); break;
7450		case DImode:
7451		case DFmode: putc ('q', file); break;
7452		default: abort ();
7453		}
7454	      putc ('.', file);
7455	    }
7456#endif
7457	  return;
7458	case 'C':
7459	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
7460	  return;
7461	case 'F':
7462#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7463	  if (ASSEMBLER_DIALECT == ASM_ATT)
7464	    putc ('.', file);
7465#endif
7466	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
7467	  return;
7468
7469	  /* Like above, but reverse condition */
7470	case 'c':
7471	  /* Check to see if argument to %c is really a constant
7472	     and not a condition code which needs to be reversed.  */
7473	  if (GET_RTX_CLASS (GET_CODE (x)) != '<')
7474	  {
7475	    output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
7476	     return;
7477	  }
7478	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
7479	  return;
7480	case 'f':
7481#ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
7482	  if (ASSEMBLER_DIALECT == ASM_ATT)
7483	    putc ('.', file);
7484#endif
7485	  put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
7486	  return;
7487	case '+':
7488	  {
7489	    rtx x;
7490
7491	    if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
7492	      return;
7493
7494	    x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
7495	    if (x)
7496	      {
7497		int pred_val = INTVAL (XEXP (x, 0));
7498
7499		if (pred_val < REG_BR_PROB_BASE * 45 / 100
7500		    || pred_val > REG_BR_PROB_BASE * 55 / 100)
7501		  {
7502		    int taken = pred_val > REG_BR_PROB_BASE / 2;
7503		    int cputaken = final_forward_branch_p (current_output_insn) == 0;
7504
7505		    /* Emit hints only in the case default branch prediction
7506		       heuristics would fail.  */
7507		    if (taken != cputaken)
7508		      {
7509			/* We use 3e (DS) prefix for taken branches and
7510			   2e (CS) prefix for not taken branches.  */
7511			if (taken)
7512			  fputs ("ds ; ", file);
7513			else
7514			  fputs ("cs ; ", file);
7515		      }
7516		  }
7517	      }
7518	    return;
7519	  }
7520	default:
7521	    output_operand_lossage ("invalid operand code `%c'", code);
7522	}
7523    }
7524
7525  if (GET_CODE (x) == REG)
7526    print_reg (x, code, file);
7527
7528  else if (GET_CODE (x) == MEM)
7529    {
7530      /* No `byte ptr' prefix for call instructions.  */
7531      if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
7532	{
7533	  const char * size;
7534	  switch (GET_MODE_SIZE (GET_MODE (x)))
7535	    {
7536	    case 1: size = "BYTE"; break;
7537	    case 2: size = "WORD"; break;
7538	    case 4: size = "DWORD"; break;
7539	    case 8: size = "QWORD"; break;
7540	    case 12: size = "XWORD"; break;
7541	    case 16: size = "XMMWORD"; break;
7542	    default:
7543	      abort ();
7544	    }
7545
7546	  /* Check for explicit size override (codes 'b', 'w' and 'k')  */
7547	  if (code == 'b')
7548	    size = "BYTE";
7549	  else if (code == 'w')
7550	    size = "WORD";
7551	  else if (code == 'k')
7552	    size = "DWORD";
7553
7554	  fputs (size, file);
7555	  fputs (" PTR ", file);
7556	}
7557
7558      x = XEXP (x, 0);
7559      /* Avoid (%rip) for call operands.  */
7560      if (CONSTANT_ADDRESS_P (x) && code == 'P'
7561	       && GET_CODE (x) != CONST_INT)
7562	output_addr_const (file, x);
7563      else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
7564	output_operand_lossage ("invalid constraints for operand");
7565      else
7566	output_address (x);
7567    }
7568
7569  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
7570    {
7571      REAL_VALUE_TYPE r;
7572      long l;
7573
7574      REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7575      REAL_VALUE_TO_TARGET_SINGLE (r, l);
7576
7577      if (ASSEMBLER_DIALECT == ASM_ATT)
7578	putc ('$', file);
7579      fprintf (file, "0x%08lx", l);
7580    }
7581
7582  /* These float cases don't actually occur as immediate operands.  */
7583  else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
7584    {
7585      char dstr[30];
7586
7587      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7588      fprintf (file, "%s", dstr);
7589    }
7590
7591  else if (GET_CODE (x) == CONST_DOUBLE
7592	   && GET_MODE (x) == XFmode)
7593    {
7594      char dstr[30];
7595
7596      real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
7597      fprintf (file, "%s", dstr);
7598    }
7599
7600  else
7601    {
7602      if (code != 'P')
7603	{
7604	  if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
7605	    {
7606	      if (ASSEMBLER_DIALECT == ASM_ATT)
7607		putc ('$', file);
7608	    }
7609	  else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
7610		   || GET_CODE (x) == LABEL_REF)
7611	    {
7612	      if (ASSEMBLER_DIALECT == ASM_ATT)
7613		putc ('$', file);
7614	      else
7615		fputs ("OFFSET FLAT:", file);
7616	    }
7617	}
7618      if (GET_CODE (x) == CONST_INT)
7619	fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7620      else if (flag_pic)
7621	output_pic_addr_const (file, x, code);
7622      else
7623	output_addr_const (file, x);
7624    }
7625}
7626
7627/* Print a memory operand whose address is ADDR.  */
7628
7629void
7630print_operand_address (FILE *file, rtx addr)
7631{
7632  struct ix86_address parts;
7633  rtx base, index, disp;
7634  int scale;
7635
7636  if (! ix86_decompose_address (addr, &parts))
7637    abort ();
7638
7639  base = parts.base;
7640  index = parts.index;
7641  disp = parts.disp;
7642  scale = parts.scale;
7643
7644  switch (parts.seg)
7645    {
7646    case SEG_DEFAULT:
7647      break;
7648    case SEG_FS:
7649    case SEG_GS:
7650      if (USER_LABEL_PREFIX[0] == 0)
7651	putc ('%', file);
7652      fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
7653      break;
7654    default:
7655      abort ();
7656    }
7657
7658  if (!base && !index)
7659    {
7660      /* Displacement only requires special attention.  */
7661
7662      if (GET_CODE (disp) == CONST_INT)
7663	{
7664	  if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
7665	    {
7666	      if (USER_LABEL_PREFIX[0] == 0)
7667		putc ('%', file);
7668	      fputs ("ds:", file);
7669	    }
7670	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
7671	}
7672      else if (flag_pic)
7673	output_pic_addr_const (file, disp, 0);
7674      else
7675	output_addr_const (file, disp);
7676
7677      /* Use one byte shorter RIP relative addressing for 64bit mode.  */
7678      if (TARGET_64BIT
7679	  && ((GET_CODE (disp) == SYMBOL_REF
7680	       && ! tls_symbolic_operand (disp, GET_MODE (disp)))
7681	      || GET_CODE (disp) == LABEL_REF
7682	      || (GET_CODE (disp) == CONST
7683		  && GET_CODE (XEXP (disp, 0)) == PLUS
7684		  && (GET_CODE (XEXP (XEXP (disp, 0), 0)) == SYMBOL_REF
7685		      || GET_CODE (XEXP (XEXP (disp, 0), 0)) == LABEL_REF)
7686		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)))
7687	fputs ("(%rip)", file);
7688    }
7689  else
7690    {
7691      if (ASSEMBLER_DIALECT == ASM_ATT)
7692	{
7693	  if (disp)
7694	    {
7695	      if (flag_pic)
7696		output_pic_addr_const (file, disp, 0);
7697	      else if (GET_CODE (disp) == LABEL_REF)
7698		output_asm_label (disp);
7699	      else
7700		output_addr_const (file, disp);
7701	    }
7702
7703	  putc ('(', file);
7704	  if (base)
7705	    print_reg (base, 0, file);
7706	  if (index)
7707	    {
7708	      putc (',', file);
7709	      print_reg (index, 0, file);
7710	      if (scale != 1)
7711		fprintf (file, ",%d", scale);
7712	    }
7713	  putc (')', file);
7714	}
7715      else
7716	{
7717	  rtx offset = NULL_RTX;
7718
7719	  if (disp)
7720	    {
7721	      /* Pull out the offset of a symbol; print any symbol itself.  */
7722	      if (GET_CODE (disp) == CONST
7723		  && GET_CODE (XEXP (disp, 0)) == PLUS
7724		  && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
7725		{
7726		  offset = XEXP (XEXP (disp, 0), 1);
7727		  disp = gen_rtx_CONST (VOIDmode,
7728					XEXP (XEXP (disp, 0), 0));
7729		}
7730
7731	      if (flag_pic)
7732		output_pic_addr_const (file, disp, 0);
7733	      else if (GET_CODE (disp) == LABEL_REF)
7734		output_asm_label (disp);
7735	      else if (GET_CODE (disp) == CONST_INT)
7736		offset = disp;
7737	      else
7738		output_addr_const (file, disp);
7739	    }
7740
7741	  putc ('[', file);
7742	  if (base)
7743	    {
7744	      print_reg (base, 0, file);
7745	      if (offset)
7746		{
7747		  if (INTVAL (offset) >= 0)
7748		    putc ('+', file);
7749		  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7750		}
7751	    }
7752	  else if (offset)
7753	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
7754	  else
7755	    putc ('0', file);
7756
7757	  if (index)
7758	    {
7759	      putc ('+', file);
7760	      print_reg (index, 0, file);
7761	      if (scale != 1)
7762		fprintf (file, "*%d", scale);
7763	    }
7764	  putc (']', file);
7765	}
7766    }
7767}
7768
7769bool
7770output_addr_const_extra (FILE *file, rtx x)
7771{
7772  rtx op;
7773
7774  if (GET_CODE (x) != UNSPEC)
7775    return false;
7776
7777  op = XVECEXP (x, 0, 0);
7778  switch (XINT (x, 1))
7779    {
7780    case UNSPEC_GOTTPOFF:
7781      output_addr_const (file, op);
7782      /* FIXME: This might be @TPOFF in Sun ld.  */
7783      fputs ("@GOTTPOFF", file);
7784      break;
7785    case UNSPEC_TPOFF:
7786      output_addr_const (file, op);
7787      fputs ("@TPOFF", file);
7788      break;
7789    case UNSPEC_NTPOFF:
7790      output_addr_const (file, op);
7791      if (TARGET_64BIT)
7792	fputs ("@TPOFF", file);
7793      else
7794	fputs ("@NTPOFF", file);
7795      break;
7796    case UNSPEC_DTPOFF:
7797      output_addr_const (file, op);
7798      fputs ("@DTPOFF", file);
7799      break;
7800    case UNSPEC_GOTNTPOFF:
7801      output_addr_const (file, op);
7802      if (TARGET_64BIT)
7803	fputs ("@GOTTPOFF(%rip)", file);
7804      else
7805	fputs ("@GOTNTPOFF", file);
7806      break;
7807    case UNSPEC_INDNTPOFF:
7808      output_addr_const (file, op);
7809      fputs ("@INDNTPOFF", file);
7810      break;
7811
7812    default:
7813      return false;
7814    }
7815
7816  return true;
7817}
7818
7819/* Split one or more DImode RTL references into pairs of SImode
7820   references.  The RTL can be REG, offsettable MEM, integer constant, or
7821   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7822   split and "num" is its length.  lo_half and hi_half are output arrays
7823   that parallel "operands".  */
7824
7825void
7826split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7827{
7828  while (num--)
7829    {
7830      rtx op = operands[num];
7831
7832      /* simplify_subreg refuse to split volatile memory addresses,
7833         but we still have to handle it.  */
7834      if (GET_CODE (op) == MEM)
7835	{
7836	  lo_half[num] = adjust_address (op, SImode, 0);
7837	  hi_half[num] = adjust_address (op, SImode, 4);
7838	}
7839      else
7840	{
7841	  lo_half[num] = simplify_gen_subreg (SImode, op,
7842					      GET_MODE (op) == VOIDmode
7843					      ? DImode : GET_MODE (op), 0);
7844	  hi_half[num] = simplify_gen_subreg (SImode, op,
7845					      GET_MODE (op) == VOIDmode
7846					      ? DImode : GET_MODE (op), 4);
7847	}
7848    }
7849}
7850/* Split one or more TImode RTL references into pairs of SImode
7851   references.  The RTL can be REG, offsettable MEM, integer constant, or
7852   CONST_DOUBLE.  "operands" is a pointer to an array of DImode RTL to
7853   split and "num" is its length.  lo_half and hi_half are output arrays
7854   that parallel "operands".  */
7855
7856void
7857split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
7858{
7859  while (num--)
7860    {
7861      rtx op = operands[num];
7862
7863      /* simplify_subreg refuse to split volatile memory addresses, but we
7864         still have to handle it.  */
7865      if (GET_CODE (op) == MEM)
7866	{
7867	  lo_half[num] = adjust_address (op, DImode, 0);
7868	  hi_half[num] = adjust_address (op, DImode, 8);
7869	}
7870      else
7871	{
7872	  lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7873	  hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7874	}
7875    }
7876}
7877
7878/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7879   MINUS, MULT or DIV.  OPERANDS are the insn operands, where operands[3]
7880   is the expression of the binary operation.  The output may either be
7881   emitted here, or returned to the caller, like all output_* functions.
7882
7883   There is no guarantee that the operands are the same mode, as they
7884   might be within FLOAT or FLOAT_EXTEND expressions.  */
7885
7886#ifndef SYSV386_COMPAT
7887/* Set to 1 for compatibility with brain-damaged assemblers.  No-one
7888   wants to fix the assemblers because that causes incompatibility
7889   with gcc.  No-one wants to fix gcc because that causes
7890   incompatibility with assemblers...  You can use the option of
7891   -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way.  */
7892#define SYSV386_COMPAT 1
7893#endif
7894
7895const char *
7896output_387_binary_op (rtx insn, rtx *operands)
7897{
7898  static char buf[30];
7899  const char *p;
7900  const char *ssep;
7901  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
7902
7903#ifdef ENABLE_CHECKING
7904  /* Even if we do not want to check the inputs, this documents input
7905     constraints.  Which helps in understanding the following code.  */
7906  if (STACK_REG_P (operands[0])
7907      && ((REG_P (operands[1])
7908	   && REGNO (operands[0]) == REGNO (operands[1])
7909	   && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7910	  || (REG_P (operands[2])
7911	      && REGNO (operands[0]) == REGNO (operands[2])
7912	      && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7913      && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7914    ; /* ok */
7915  else if (!is_sse)
7916    abort ();
7917#endif
7918
7919  switch (GET_CODE (operands[3]))
7920    {
7921    case PLUS:
7922      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7923	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7924	p = "fiadd";
7925      else
7926	p = "fadd";
7927      ssep = "add";
7928      break;
7929
7930    case MINUS:
7931      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7932	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7933	p = "fisub";
7934      else
7935	p = "fsub";
7936      ssep = "sub";
7937      break;
7938
7939    case MULT:
7940      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7941	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7942	p = "fimul";
7943      else
7944	p = "fmul";
7945      ssep = "mul";
7946      break;
7947
7948    case DIV:
7949      if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7950	  || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7951	p = "fidiv";
7952      else
7953	p = "fdiv";
7954      ssep = "div";
7955      break;
7956
7957    default:
7958      abort ();
7959    }
7960
7961  if (is_sse)
7962   {
7963      strcpy (buf, ssep);
7964      if (GET_MODE (operands[0]) == SFmode)
7965	strcat (buf, "ss\t{%2, %0|%0, %2}");
7966      else
7967	strcat (buf, "sd\t{%2, %0|%0, %2}");
7968      return buf;
7969   }
7970  strcpy (buf, p);
7971
7972  switch (GET_CODE (operands[3]))
7973    {
7974    case MULT:
7975    case PLUS:
7976      if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7977	{
7978	  rtx temp = operands[2];
7979	  operands[2] = operands[1];
7980	  operands[1] = temp;
7981	}
7982
7983      /* know operands[0] == operands[1].  */
7984
7985      if (GET_CODE (operands[2]) == MEM)
7986	{
7987	  p = "%z2\t%2";
7988	  break;
7989	}
7990
7991      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
7992	{
7993	  if (STACK_TOP_P (operands[0]))
7994	    /* How is it that we are storing to a dead operand[2]?
7995	       Well, presumably operands[1] is dead too.  We can't
7996	       store the result to st(0) as st(0) gets popped on this
7997	       instruction.  Instead store to operands[2] (which I
7998	       think has to be st(1)).  st(1) will be popped later.
7999	       gcc <= 2.8.1 didn't have this check and generated
8000	       assembly code that the Unixware assembler rejected.  */
8001	    p = "p\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8002	  else
8003	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8004	  break;
8005	}
8006
8007      if (STACK_TOP_P (operands[0]))
8008	p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8009      else
8010	p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8011      break;
8012
8013    case MINUS:
8014    case DIV:
8015      if (GET_CODE (operands[1]) == MEM)
8016	{
8017	  p = "r%z1\t%1";
8018	  break;
8019	}
8020
8021      if (GET_CODE (operands[2]) == MEM)
8022	{
8023	  p = "%z2\t%2";
8024	  break;
8025	}
8026
8027      if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8028	{
8029#if SYSV386_COMPAT
8030	  /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8031	     derived assemblers, confusingly reverse the direction of
8032	     the operation for fsub{r} and fdiv{r} when the
8033	     destination register is not st(0).  The Intel assembler
8034	     doesn't have this brain damage.  Read !SYSV386_COMPAT to
8035	     figure out what the hardware really does.  */
8036	  if (STACK_TOP_P (operands[0]))
8037	    p = "{p\t%0, %2|rp\t%2, %0}";
8038	  else
8039	    p = "{rp\t%2, %0|p\t%0, %2}";
8040#else
8041	  if (STACK_TOP_P (operands[0]))
8042	    /* As above for fmul/fadd, we can't store to st(0).  */
8043	    p = "rp\t{%0, %2|%2, %0}";	/* st(1) = st(0) op st(1); pop */
8044	  else
8045	    p = "p\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0); pop */
8046#endif
8047	  break;
8048	}
8049
8050      if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8051	{
8052#if SYSV386_COMPAT
8053	  if (STACK_TOP_P (operands[0]))
8054	    p = "{rp\t%0, %1|p\t%1, %0}";
8055	  else
8056	    p = "{p\t%1, %0|rp\t%0, %1}";
8057#else
8058	  if (STACK_TOP_P (operands[0]))
8059	    p = "p\t{%0, %1|%1, %0}";	/* st(1) = st(1) op st(0); pop */
8060	  else
8061	    p = "rp\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2); pop */
8062#endif
8063	  break;
8064	}
8065
8066      if (STACK_TOP_P (operands[0]))
8067	{
8068	  if (STACK_TOP_P (operands[1]))
8069	    p = "\t{%y2, %0|%0, %y2}";	/* st(0) = st(0) op st(r2) */
8070	  else
8071	    p = "r\t{%y1, %0|%0, %y1}";	/* st(0) = st(r1) op st(0) */
8072	  break;
8073	}
8074      else if (STACK_TOP_P (operands[1]))
8075	{
8076#if SYSV386_COMPAT
8077	  p = "{\t%1, %0|r\t%0, %1}";
8078#else
8079	  p = "r\t{%1, %0|%0, %1}";	/* st(r2) = st(0) op st(r2) */
8080#endif
8081	}
8082      else
8083	{
8084#if SYSV386_COMPAT
8085	  p = "{r\t%2, %0|\t%0, %2}";
8086#else
8087	  p = "\t{%2, %0|%0, %2}";	/* st(r1) = st(r1) op st(0) */
8088#endif
8089	}
8090      break;
8091
8092    default:
8093      abort ();
8094    }
8095
8096  strcat (buf, p);
8097  return buf;
8098}
8099
8100/* Output code to initialize control word copies used by
8101   trunc?f?i patterns.  NORMAL is set to current control word, while ROUND_DOWN
8102   is set to control word rounding downwards.  */
8103void
8104emit_i387_cw_initialization (rtx normal, rtx round_down)
8105{
8106  rtx reg = gen_reg_rtx (HImode);
8107
8108  emit_insn (gen_x86_fnstcw_1 (normal));
8109  emit_move_insn (reg, normal);
8110  if (!TARGET_PARTIAL_REG_STALL && !optimize_size
8111      && !TARGET_64BIT)
8112    emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8113  else
8114    emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
8115  emit_move_insn (round_down, reg);
8116}
8117
8118/* Output code for INSN to convert a float to a signed int.  OPERANDS
8119   are the insn operands.  The output may be [HSD]Imode and the input
8120   operand may be [SDX]Fmode.  */
8121
8122const char *
8123output_fix_trunc (rtx insn, rtx *operands)
8124{
8125  int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8126  int dimode_p = GET_MODE (operands[0]) == DImode;
8127
8128  /* Jump through a hoop or two for DImode, since the hardware has no
8129     non-popping instruction.  We used to do this a different way, but
8130     that was somewhat fragile and broke with post-reload splitters.  */
8131  if (dimode_p && !stack_top_dies)
8132    output_asm_insn ("fld\t%y1", operands);
8133
8134  if (!STACK_TOP_P (operands[1]))
8135    abort ();
8136
8137  if (GET_CODE (operands[0]) != MEM)
8138    abort ();
8139
8140  output_asm_insn ("fldcw\t%3", operands);
8141  if (stack_top_dies || dimode_p)
8142    output_asm_insn ("fistp%z0\t%0", operands);
8143  else
8144    output_asm_insn ("fist%z0\t%0", operands);
8145  output_asm_insn ("fldcw\t%2", operands);
8146
8147  return "";
8148}
8149
8150/* Output code for INSN to compare OPERANDS.  EFLAGS_P is 1 when fcomi
8151   should be used and 2 when fnstsw should be used.  UNORDERED_P is true
8152   when fucom should be used.  */
8153
8154const char *
8155output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
8156{
8157  int stack_top_dies;
8158  rtx cmp_op0 = operands[0];
8159  rtx cmp_op1 = operands[1];
8160  int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
8161
8162  if (eflags_p == 2)
8163    {
8164      cmp_op0 = cmp_op1;
8165      cmp_op1 = operands[2];
8166    }
8167  if (is_sse)
8168    {
8169      if (GET_MODE (operands[0]) == SFmode)
8170	if (unordered_p)
8171	  return "ucomiss\t{%1, %0|%0, %1}";
8172	else
8173	  return "comiss\t{%1, %0|%0, %1}";
8174      else
8175	if (unordered_p)
8176	  return "ucomisd\t{%1, %0|%0, %1}";
8177	else
8178	  return "comisd\t{%1, %0|%0, %1}";
8179    }
8180
8181  if (! STACK_TOP_P (cmp_op0))
8182    abort ();
8183
8184  stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8185
8186  if (STACK_REG_P (cmp_op1)
8187      && stack_top_dies
8188      && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
8189      && REGNO (cmp_op1) != FIRST_STACK_REG)
8190    {
8191      /* If both the top of the 387 stack dies, and the other operand
8192	 is also a stack register that dies, then this must be a
8193	 `fcompp' float compare */
8194
8195      if (eflags_p == 1)
8196	{
8197	  /* There is no double popping fcomi variant.  Fortunately,
8198	     eflags is immune from the fstp's cc clobbering.  */
8199	  if (unordered_p)
8200	    output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
8201	  else
8202	    output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
8203	  return "fstp\t%y0";
8204	}
8205      else
8206	{
8207	  if (eflags_p == 2)
8208	    {
8209	      if (unordered_p)
8210		return "fucompp\n\tfnstsw\t%0";
8211	      else
8212		return "fcompp\n\tfnstsw\t%0";
8213	    }
8214	  else
8215	    {
8216	      if (unordered_p)
8217		return "fucompp";
8218	      else
8219		return "fcompp";
8220	    }
8221	}
8222    }
8223  else
8224    {
8225      /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies.  */
8226
8227      static const char * const alt[24] =
8228      {
8229	"fcom%z1\t%y1",
8230	"fcomp%z1\t%y1",
8231	"fucom%z1\t%y1",
8232	"fucomp%z1\t%y1",
8233
8234	"ficom%z1\t%y1",
8235	"ficomp%z1\t%y1",
8236	NULL,
8237	NULL,
8238
8239	"fcomi\t{%y1, %0|%0, %y1}",
8240	"fcomip\t{%y1, %0|%0, %y1}",
8241	"fucomi\t{%y1, %0|%0, %y1}",
8242	"fucomip\t{%y1, %0|%0, %y1}",
8243
8244	NULL,
8245	NULL,
8246	NULL,
8247	NULL,
8248
8249	"fcom%z2\t%y2\n\tfnstsw\t%0",
8250	"fcomp%z2\t%y2\n\tfnstsw\t%0",
8251	"fucom%z2\t%y2\n\tfnstsw\t%0",
8252	"fucomp%z2\t%y2\n\tfnstsw\t%0",
8253
8254	"ficom%z2\t%y2\n\tfnstsw\t%0",
8255	"ficomp%z2\t%y2\n\tfnstsw\t%0",
8256	NULL,
8257	NULL
8258      };
8259
8260      int mask;
8261      const char *ret;
8262
8263      mask  = eflags_p << 3;
8264      mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
8265      mask |= unordered_p << 1;
8266      mask |= stack_top_dies;
8267
8268      if (mask >= 24)
8269	abort ();
8270      ret = alt[mask];
8271      if (ret == NULL)
8272	abort ();
8273
8274      return ret;
8275    }
8276}
8277
8278void
8279ix86_output_addr_vec_elt (FILE *file, int value)
8280{
8281  const char *directive = ASM_LONG;
8282
8283  if (TARGET_64BIT)
8284    {
8285#ifdef ASM_QUAD
8286      directive = ASM_QUAD;
8287#else
8288      abort ();
8289#endif
8290    }
8291
8292  fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
8293}
8294
8295void
8296ix86_output_addr_diff_elt (FILE *file, int value, int rel)
8297{
8298  if (TARGET_64BIT)
8299    fprintf (file, "%s%s%d-%s%d\n",
8300	     ASM_LONG, LPREFIX, value, LPREFIX, rel);
8301  else if (HAVE_AS_GOTOFF_IN_DATA)
8302    fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
8303#if TARGET_MACHO
8304  else if (TARGET_MACHO)
8305    {
8306      fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
8307      machopic_output_function_base_name (file);
8308      fprintf(file, "\n");
8309    }
8310#endif
8311  else
8312    asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
8313		 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
8314}
8315
8316/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
8317   for the target.  */
8318
8319void
8320ix86_expand_clear (rtx dest)
8321{
8322  rtx tmp;
8323
8324  /* We play register width games, which are only valid after reload.  */
8325  if (!reload_completed)
8326    abort ();
8327
8328  /* Avoid HImode and its attendant prefix byte.  */
8329  if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
8330    dest = gen_rtx_REG (SImode, REGNO (dest));
8331
8332  tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
8333
8334  /* This predicate should match that for movsi_xor and movdi_xor_rex64.  */
8335  if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
8336    {
8337      rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
8338      tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
8339    }
8340
8341  emit_insn (tmp);
8342}
8343
8344/* X is an unchanging MEM.  If it is a constant pool reference, return
8345   the constant pool rtx, else NULL.  */
8346
8347static rtx
8348maybe_get_pool_constant (rtx x)
8349{
8350  x = ix86_delegitimize_address (XEXP (x, 0));
8351
8352  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8353    return get_pool_constant (x);
8354
8355  return NULL_RTX;
8356}
8357
8358void
8359ix86_expand_move (enum machine_mode mode, rtx operands[])
8360{
8361  int strict = (reload_in_progress || reload_completed);
8362  rtx op0, op1;
8363  enum tls_model model;
8364
8365  op0 = operands[0];
8366  op1 = operands[1];
8367
8368  model = tls_symbolic_operand (op1, Pmode);
8369  if (model)
8370    {
8371      op1 = legitimize_tls_address (op1, model, true);
8372      op1 = force_operand (op1, op0);
8373      if (op1 == op0)
8374	return;
8375    }
8376
8377  if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
8378    {
8379#if TARGET_MACHO
8380      if (MACHOPIC_PURE)
8381	{
8382	  rtx temp = ((reload_in_progress
8383		       || ((op0 && GET_CODE (op0) == REG)
8384			   && mode == Pmode))
8385		      ? op0 : gen_reg_rtx (Pmode));
8386	  op1 = machopic_indirect_data_reference (op1, temp);
8387	  op1 = machopic_legitimize_pic_address (op1, mode,
8388						 temp == op1 ? 0 : temp);
8389	}
8390      else if (MACHOPIC_INDIRECT)
8391	op1 = machopic_indirect_data_reference (op1, 0);
8392      if (op0 == op1)
8393	return;
8394#else
8395      if (GET_CODE (op0) == MEM)
8396	op1 = force_reg (Pmode, op1);
8397      else
8398	{
8399	  rtx temp = op0;
8400	  if (GET_CODE (temp) != REG)
8401	    temp = gen_reg_rtx (Pmode);
8402	  temp = legitimize_pic_address (op1, temp);
8403	  if (temp == op0)
8404	    return;
8405	  op1 = temp;
8406	}
8407#endif /* TARGET_MACHO */
8408    }
8409  else
8410    {
8411      if (GET_CODE (op0) == MEM
8412	  && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
8413	      || !push_operand (op0, mode))
8414	  && GET_CODE (op1) == MEM)
8415	op1 = force_reg (mode, op1);
8416
8417      if (push_operand (op0, mode)
8418	  && ! general_no_elim_operand (op1, mode))
8419	op1 = copy_to_mode_reg (mode, op1);
8420
8421      /* Force large constants in 64bit compilation into register
8422	 to get them CSEed.  */
8423      if (TARGET_64BIT && mode == DImode
8424	  && immediate_operand (op1, mode)
8425	  && !x86_64_zero_extended_value (op1)
8426	  && !register_operand (op0, mode)
8427	  && optimize && !reload_completed && !reload_in_progress)
8428	op1 = copy_to_mode_reg (mode, op1);
8429
8430      if (FLOAT_MODE_P (mode))
8431	{
8432	  /* If we are loading a floating point constant to a register,
8433	     force the value to memory now, since we'll get better code
8434	     out the back end.  */
8435
8436	  if (strict)
8437	    ;
8438	  else if (GET_CODE (op1) == CONST_DOUBLE)
8439	    {
8440	      op1 = validize_mem (force_const_mem (mode, op1));
8441	      if (!register_operand (op0, mode))
8442		{
8443		  rtx temp = gen_reg_rtx (mode);
8444		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
8445		  emit_move_insn (op0, temp);
8446		  return;
8447		}
8448	    }
8449	}
8450    }
8451
8452  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
8453}
8454
8455void
8456ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
8457{
8458  /* Force constants other than zero into memory.  We do not know how
8459     the instructions used to build constants modify the upper 64 bits
8460     of the register, once we have that information we may be able
8461     to handle some of them more efficiently.  */
8462  if ((reload_in_progress | reload_completed) == 0
8463      && register_operand (operands[0], mode)
8464      && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
8465    operands[1] = validize_mem (force_const_mem (mode, operands[1]));
8466
8467  /* Make operand1 a register if it isn't already.  */
8468  if (!no_new_pseudos
8469      && !register_operand (operands[0], mode)
8470      && !register_operand (operands[1], mode))
8471    {
8472      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
8473      emit_move_insn (operands[0], temp);
8474      return;
8475    }
8476
8477  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8478}
8479
8480/* Attempt to expand a binary operator.  Make the expansion closer to the
8481   actual machine, then just general_operand, which will allow 3 separate
8482   memory references (one output, two input) in a single insn.  */
8483
8484void
8485ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
8486			     rtx operands[])
8487{
8488  int matching_memory;
8489  rtx src1, src2, dst, op, clob;
8490
8491  dst = operands[0];
8492  src1 = operands[1];
8493  src2 = operands[2];
8494
8495  /* Recognize <var1> = <value> <op> <var1> for commutative operators */
8496  if (GET_RTX_CLASS (code) == 'c'
8497      && (rtx_equal_p (dst, src2)
8498	  || immediate_operand (src1, mode)))
8499    {
8500      rtx temp = src1;
8501      src1 = src2;
8502      src2 = temp;
8503    }
8504
8505  /* If the destination is memory, and we do not have matching source
8506     operands, do things in registers.  */
8507  matching_memory = 0;
8508  if (GET_CODE (dst) == MEM)
8509    {
8510      if (rtx_equal_p (dst, src1))
8511	matching_memory = 1;
8512      else if (GET_RTX_CLASS (code) == 'c'
8513	       && rtx_equal_p (dst, src2))
8514	matching_memory = 2;
8515      else
8516	dst = gen_reg_rtx (mode);
8517    }
8518
8519  /* Both source operands cannot be in memory.  */
8520  if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
8521    {
8522      if (matching_memory != 2)
8523	src2 = force_reg (mode, src2);
8524      else
8525	src1 = force_reg (mode, src1);
8526    }
8527
8528  /* If the operation is not commutable, source 1 cannot be a constant
8529     or non-matching memory.  */
8530  if ((CONSTANT_P (src1)
8531       || (!matching_memory && GET_CODE (src1) == MEM))
8532      && GET_RTX_CLASS (code) != 'c')
8533    src1 = force_reg (mode, src1);
8534
8535  /* If optimizing, copy to regs to improve CSE */
8536  if (optimize && ! no_new_pseudos)
8537    {
8538      if (GET_CODE (dst) == MEM)
8539	dst = gen_reg_rtx (mode);
8540      if (GET_CODE (src1) == MEM)
8541	src1 = force_reg (mode, src1);
8542      if (GET_CODE (src2) == MEM)
8543	src2 = force_reg (mode, src2);
8544    }
8545
8546  /* Emit the instruction.  */
8547
8548  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
8549  if (reload_in_progress)
8550    {
8551      /* Reload doesn't know about the flags register, and doesn't know that
8552         it doesn't want to clobber it.  We can only do this with PLUS.  */
8553      if (code != PLUS)
8554	abort ();
8555      emit_insn (op);
8556    }
8557  else
8558    {
8559      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8560      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8561    }
8562
8563  /* Fix up the destination if needed.  */
8564  if (dst != operands[0])
8565    emit_move_insn (operands[0], dst);
8566}
8567
8568/* Return TRUE or FALSE depending on whether the binary operator meets the
8569   appropriate constraints.  */
8570
8571int
8572ix86_binary_operator_ok (enum rtx_code code,
8573			 enum machine_mode mode ATTRIBUTE_UNUSED,
8574			 rtx operands[3])
8575{
8576  /* Both source operands cannot be in memory.  */
8577  if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
8578    return 0;
8579  /* If the operation is not commutable, source 1 cannot be a constant.  */
8580  if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
8581    return 0;
8582  /* If the destination is memory, we must have a matching source operand.  */
8583  if (GET_CODE (operands[0]) == MEM
8584      && ! (rtx_equal_p (operands[0], operands[1])
8585	    || (GET_RTX_CLASS (code) == 'c'
8586		&& rtx_equal_p (operands[0], operands[2]))))
8587    return 0;
8588  /* If the operation is not commutable and the source 1 is memory, we must
8589     have a matching destination.  */
8590  if (GET_CODE (operands[1]) == MEM
8591      && GET_RTX_CLASS (code) != 'c'
8592      && ! rtx_equal_p (operands[0], operands[1]))
8593    return 0;
8594  return 1;
8595}
8596
8597/* Attempt to expand a unary operator.  Make the expansion closer to the
8598   actual machine, then just general_operand, which will allow 2 separate
8599   memory references (one output, one input) in a single insn.  */
8600
8601void
8602ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
8603			    rtx operands[])
8604{
8605  int matching_memory;
8606  rtx src, dst, op, clob;
8607
8608  dst = operands[0];
8609  src = operands[1];
8610
8611  /* If the destination is memory, and we do not have matching source
8612     operands, do things in registers.  */
8613  matching_memory = 0;
8614  if (GET_CODE (dst) == MEM)
8615    {
8616      if (rtx_equal_p (dst, src))
8617	matching_memory = 1;
8618      else
8619	dst = gen_reg_rtx (mode);
8620    }
8621
8622  /* When source operand is memory, destination must match.  */
8623  if (!matching_memory && GET_CODE (src) == MEM)
8624    src = force_reg (mode, src);
8625
8626  /* If optimizing, copy to regs to improve CSE */
8627  if (optimize && ! no_new_pseudos)
8628    {
8629      if (GET_CODE (dst) == MEM)
8630	dst = gen_reg_rtx (mode);
8631      if (GET_CODE (src) == MEM)
8632	src = force_reg (mode, src);
8633    }
8634
8635  /* Emit the instruction.  */
8636
8637  op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
8638  if (reload_in_progress || code == NOT)
8639    {
8640      /* Reload doesn't know about the flags register, and doesn't know that
8641         it doesn't want to clobber it.  */
8642      if (code != NOT)
8643        abort ();
8644      emit_insn (op);
8645    }
8646  else
8647    {
8648      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
8649      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
8650    }
8651
8652  /* Fix up the destination if needed.  */
8653  if (dst != operands[0])
8654    emit_move_insn (operands[0], dst);
8655}
8656
8657/* Return TRUE or FALSE depending on whether the unary operator meets the
8658   appropriate constraints.  */
8659
8660int
8661ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
8662			enum machine_mode mode ATTRIBUTE_UNUSED,
8663			rtx operands[2] ATTRIBUTE_UNUSED)
8664{
8665  /* If one of operands is memory, source and destination must match.  */
8666  if ((GET_CODE (operands[0]) == MEM
8667       || GET_CODE (operands[1]) == MEM)
8668      && ! rtx_equal_p (operands[0], operands[1]))
8669    return FALSE;
8670  return TRUE;
8671}
8672
8673/* Return TRUE or FALSE depending on whether the first SET in INSN
8674   has source and destination with matching CC modes, and that the
8675   CC mode is at least as constrained as REQ_MODE.  */
8676
8677int
8678ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
8679{
8680  rtx set;
8681  enum machine_mode set_mode;
8682
8683  set = PATTERN (insn);
8684  if (GET_CODE (set) == PARALLEL)
8685    set = XVECEXP (set, 0, 0);
8686  if (GET_CODE (set) != SET)
8687    abort ();
8688  if (GET_CODE (SET_SRC (set)) != COMPARE)
8689    abort ();
8690
8691  set_mode = GET_MODE (SET_DEST (set));
8692  switch (set_mode)
8693    {
8694    case CCNOmode:
8695      if (req_mode != CCNOmode
8696	  && (req_mode != CCmode
8697	      || XEXP (SET_SRC (set), 1) != const0_rtx))
8698	return 0;
8699      break;
8700    case CCmode:
8701      if (req_mode == CCGCmode)
8702	return 0;
8703      /* FALLTHRU */
8704    case CCGCmode:
8705      if (req_mode == CCGOCmode || req_mode == CCNOmode)
8706	return 0;
8707      /* FALLTHRU */
8708    case CCGOCmode:
8709      if (req_mode == CCZmode)
8710	return 0;
8711      /* FALLTHRU */
8712    case CCZmode:
8713      break;
8714
8715    default:
8716      abort ();
8717    }
8718
8719  return (GET_MODE (SET_SRC (set)) == set_mode);
8720}
8721
8722/* Generate insn patterns to do an integer compare of OPERANDS.  */
8723
8724static rtx
8725ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
8726{
8727  enum machine_mode cmpmode;
8728  rtx tmp, flags;
8729
8730  cmpmode = SELECT_CC_MODE (code, op0, op1);
8731  flags = gen_rtx_REG (cmpmode, FLAGS_REG);
8732
8733  /* This is very simple, but making the interface the same as in the
8734     FP case makes the rest of the code easier.  */
8735  tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
8736  emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
8737
8738  /* Return the test that should be put into the flags user, i.e.
8739     the bcc, scc, or cmov instruction.  */
8740  return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
8741}
8742
8743/* Figure out whether to use ordered or unordered fp comparisons.
8744   Return the appropriate mode to use.  */
8745
8746enum machine_mode
8747ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
8748{
8749  /* ??? In order to make all comparisons reversible, we do all comparisons
8750     non-trapping when compiling for IEEE.  Once gcc is able to distinguish
8751     all forms trapping and nontrapping comparisons, we can make inequality
8752     comparisons trapping again, since it results in better code when using
8753     FCOM based compares.  */
8754  return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
8755}
8756
8757enum machine_mode
8758ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
8759{
8760  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
8761    return ix86_fp_compare_mode (code);
8762  switch (code)
8763    {
8764      /* Only zero flag is needed.  */
8765    case EQ:			/* ZF=0 */
8766    case NE:			/* ZF!=0 */
8767      return CCZmode;
8768      /* Codes needing carry flag.  */
8769    case GEU:			/* CF=0 */
8770    case GTU:			/* CF=0 & ZF=0 */
8771    case LTU:			/* CF=1 */
8772    case LEU:			/* CF=1 | ZF=1 */
8773      return CCmode;
8774      /* Codes possibly doable only with sign flag when
8775         comparing against zero.  */
8776    case GE:			/* SF=OF   or   SF=0 */
8777    case LT:			/* SF<>OF  or   SF=1 */
8778      if (op1 == const0_rtx)
8779	return CCGOCmode;
8780      else
8781	/* For other cases Carry flag is not required.  */
8782	return CCGCmode;
8783      /* Codes doable only with sign flag when comparing
8784         against zero, but we miss jump instruction for it
8785         so we need to use relational tests against overflow
8786         that thus needs to be zero.  */
8787    case GT:			/* ZF=0 & SF=OF */
8788    case LE:			/* ZF=1 | SF<>OF */
8789      if (op1 == const0_rtx)
8790	return CCNOmode;
8791      else
8792	return CCGCmode;
8793      /* strcmp pattern do (use flags) and combine may ask us for proper
8794	 mode.  */
8795    case USE:
8796      return CCmode;
8797    default:
8798      abort ();
8799    }
8800}
8801
8802/* Return the fixed registers used for condition codes.  */
8803
8804static bool
8805ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
8806{
8807  *p1 = FLAGS_REG;
8808  *p2 = FPSR_REG;
8809  return true;
8810}
8811
8812/* If two condition code modes are compatible, return a condition code
8813   mode which is compatible with both.  Otherwise, return
8814   VOIDmode.  */
8815
8816static enum machine_mode
8817ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
8818{
8819  if (m1 == m2)
8820    return m1;
8821
8822  if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
8823    return VOIDmode;
8824
8825  if ((m1 == CCGCmode && m2 == CCGOCmode)
8826      || (m1 == CCGOCmode && m2 == CCGCmode))
8827    return CCGCmode;
8828
8829  switch (m1)
8830    {
8831    default:
8832      abort ();
8833
8834    case CCmode:
8835    case CCGCmode:
8836    case CCGOCmode:
8837    case CCNOmode:
8838    case CCZmode:
8839      switch (m2)
8840	{
8841	default:
8842	  return VOIDmode;
8843
8844	case CCmode:
8845	case CCGCmode:
8846	case CCGOCmode:
8847	case CCNOmode:
8848	case CCZmode:
8849	  return CCmode;
8850	}
8851
8852    case CCFPmode:
8853    case CCFPUmode:
8854      /* These are only compatible with themselves, which we already
8855	 checked above.  */
8856      return VOIDmode;
8857    }
8858}
8859
8860/* Return true if we should use an FCOMI instruction for this fp comparison.  */
8861
8862int
8863ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
8864{
8865  enum rtx_code swapped_code = swap_condition (code);
8866  return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8867	  || (ix86_fp_comparison_cost (swapped_code)
8868	      == ix86_fp_comparison_fcomi_cost (swapped_code)));
8869}
8870
8871/* Swap, force into registers, or otherwise massage the two operands
8872   to a fp comparison.  The operands are updated in place; the new
8873   comparison code is returned.  */
8874
8875static enum rtx_code
8876ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
8877{
8878  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8879  rtx op0 = *pop0, op1 = *pop1;
8880  enum machine_mode op_mode = GET_MODE (op0);
8881  int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
8882
8883  /* All of the unordered compare instructions only work on registers.
8884     The same is true of the XFmode compare instructions.  The same is
8885     true of the fcomi compare instructions.  */
8886
8887  if (!is_sse
8888      && (fpcmp_mode == CCFPUmode
8889	  || op_mode == XFmode
8890	  || ix86_use_fcomi_compare (code)))
8891    {
8892      op0 = force_reg (op_mode, op0);
8893      op1 = force_reg (op_mode, op1);
8894    }
8895  else
8896    {
8897      /* %%% We only allow op1 in memory; op0 must be st(0).  So swap
8898	 things around if they appear profitable, otherwise force op0
8899	 into a register.  */
8900
8901      if (standard_80387_constant_p (op0) == 0
8902	  || (GET_CODE (op0) == MEM
8903	      && ! (standard_80387_constant_p (op1) == 0
8904		    || GET_CODE (op1) == MEM)))
8905	{
8906	  rtx tmp;
8907	  tmp = op0, op0 = op1, op1 = tmp;
8908	  code = swap_condition (code);
8909	}
8910
8911      if (GET_CODE (op0) != REG)
8912	op0 = force_reg (op_mode, op0);
8913
8914      if (CONSTANT_P (op1))
8915	{
8916	  if (standard_80387_constant_p (op1))
8917	    op1 = force_reg (op_mode, op1);
8918	  else
8919	    op1 = validize_mem (force_const_mem (op_mode, op1));
8920	}
8921    }
8922
8923  /* Try to rearrange the comparison to make it cheaper.  */
8924  if (ix86_fp_comparison_cost (code)
8925      > ix86_fp_comparison_cost (swap_condition (code))
8926      && (GET_CODE (op1) == REG || !no_new_pseudos))
8927    {
8928      rtx tmp;
8929      tmp = op0, op0 = op1, op1 = tmp;
8930      code = swap_condition (code);
8931      if (GET_CODE (op0) != REG)
8932	op0 = force_reg (op_mode, op0);
8933    }
8934
8935  *pop0 = op0;
8936  *pop1 = op1;
8937  return code;
8938}
8939
8940/* Convert comparison codes we use to represent FP comparison to integer
8941   code that will result in proper branch.  Return UNKNOWN if no such code
8942   is available.  */
8943static enum rtx_code
8944ix86_fp_compare_code_to_integer (enum rtx_code code)
8945{
8946  switch (code)
8947    {
8948    case GT:
8949      return GTU;
8950    case GE:
8951      return GEU;
8952    case ORDERED:
8953    case UNORDERED:
8954      return code;
8955      break;
8956    case UNEQ:
8957      return EQ;
8958      break;
8959    case UNLT:
8960      return LTU;
8961      break;
8962    case UNLE:
8963      return LEU;
8964      break;
8965    case LTGT:
8966      return NE;
8967      break;
8968    default:
8969      return UNKNOWN;
8970    }
8971}
8972
8973/* Split comparison code CODE into comparisons we can do using branch
8974   instructions.  BYPASS_CODE is comparison code for branch that will
8975   branch around FIRST_CODE and SECOND_CODE.  If some of branches
8976   is not required, set value to NIL.
8977   We never require more than two branches.  */
8978static void
8979ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
8980			  enum rtx_code *first_code,
8981			  enum rtx_code *second_code)
8982{
8983  *first_code = code;
8984  *bypass_code = NIL;
8985  *second_code = NIL;
8986
8987  /* The fcomi comparison sets flags as follows:
8988
8989     cmp    ZF PF CF
8990     >      0  0  0
8991     <      0  0  1
8992     =      1  0  0
8993     un     1  1  1 */
8994
8995  switch (code)
8996    {
8997    case GT:			/* GTU - CF=0 & ZF=0 */
8998    case GE:			/* GEU - CF=0 */
8999    case ORDERED:		/* PF=0 */
9000    case UNORDERED:		/* PF=1 */
9001    case UNEQ:			/* EQ - ZF=1 */
9002    case UNLT:			/* LTU - CF=1 */
9003    case UNLE:			/* LEU - CF=1 | ZF=1 */
9004    case LTGT:			/* EQ - ZF=0 */
9005      break;
9006    case LT:			/* LTU - CF=1 - fails on unordered */
9007      *first_code = UNLT;
9008      *bypass_code = UNORDERED;
9009      break;
9010    case LE:			/* LEU - CF=1 | ZF=1 - fails on unordered */
9011      *first_code = UNLE;
9012      *bypass_code = UNORDERED;
9013      break;
9014    case EQ:			/* EQ - ZF=1 - fails on unordered */
9015      *first_code = UNEQ;
9016      *bypass_code = UNORDERED;
9017      break;
9018    case NE:			/* NE - ZF=0 - fails on unordered */
9019      *first_code = LTGT;
9020      *second_code = UNORDERED;
9021      break;
9022    case UNGE:			/* GEU - CF=0 - fails on unordered */
9023      *first_code = GE;
9024      *second_code = UNORDERED;
9025      break;
9026    case UNGT:			/* GTU - CF=0 & ZF=0 - fails on unordered */
9027      *first_code = GT;
9028      *second_code = UNORDERED;
9029      break;
9030    default:
9031      abort ();
9032    }
9033  if (!TARGET_IEEE_FP)
9034    {
9035      *second_code = NIL;
9036      *bypass_code = NIL;
9037    }
9038}
9039
9040/* Return cost of comparison done fcom + arithmetics operations on AX.
9041   All following functions do use number of instructions as a cost metrics.
9042   In future this should be tweaked to compute bytes for optimize_size and
9043   take into account performance of various instructions on various CPUs.  */
9044static int
9045ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
9046{
9047  if (!TARGET_IEEE_FP)
9048    return 4;
9049  /* The cost of code output by ix86_expand_fp_compare.  */
9050  switch (code)
9051    {
9052    case UNLE:
9053    case UNLT:
9054    case LTGT:
9055    case GT:
9056    case GE:
9057    case UNORDERED:
9058    case ORDERED:
9059    case UNEQ:
9060      return 4;
9061      break;
9062    case LT:
9063    case NE:
9064    case EQ:
9065    case UNGE:
9066      return 5;
9067      break;
9068    case LE:
9069    case UNGT:
9070      return 6;
9071      break;
9072    default:
9073      abort ();
9074    }
9075}
9076
9077/* Return cost of comparison done using fcomi operation.
9078   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9079static int
9080ix86_fp_comparison_fcomi_cost (enum rtx_code code)
9081{
9082  enum rtx_code bypass_code, first_code, second_code;
9083  /* Return arbitrarily high cost when instruction is not supported - this
9084     prevents gcc from using it.  */
9085  if (!TARGET_CMOVE)
9086    return 1024;
9087  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9088  return (bypass_code != NIL || second_code != NIL) + 2;
9089}
9090
9091/* Return cost of comparison done using sahf operation.
9092   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9093static int
9094ix86_fp_comparison_sahf_cost (enum rtx_code code)
9095{
9096  enum rtx_code bypass_code, first_code, second_code;
9097  /* Return arbitrarily high cost when instruction is not preferred - this
9098     avoids gcc from using it.  */
9099  if (!TARGET_USE_SAHF && !optimize_size)
9100    return 1024;
9101  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9102  return (bypass_code != NIL || second_code != NIL) + 3;
9103}
9104
9105/* Compute cost of the comparison done using any method.
9106   See ix86_fp_comparison_arithmetics_cost for the metrics.  */
9107static int
9108ix86_fp_comparison_cost (enum rtx_code code)
9109{
9110  int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
9111  int min;
9112
9113  fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
9114  sahf_cost = ix86_fp_comparison_sahf_cost (code);
9115
9116  min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
9117  if (min > sahf_cost)
9118    min = sahf_cost;
9119  if (min > fcomi_cost)
9120    min = fcomi_cost;
9121  return min;
9122}
9123
9124/* Generate insn patterns to do a floating point compare of OPERANDS.  */
9125
9126static rtx
9127ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
9128			rtx *second_test, rtx *bypass_test)
9129{
9130  enum machine_mode fpcmp_mode, intcmp_mode;
9131  rtx tmp, tmp2;
9132  int cost = ix86_fp_comparison_cost (code);
9133  enum rtx_code bypass_code, first_code, second_code;
9134
9135  fpcmp_mode = ix86_fp_compare_mode (code);
9136  code = ix86_prepare_fp_compare_args (code, &op0, &op1);
9137
9138  if (second_test)
9139    *second_test = NULL_RTX;
9140  if (bypass_test)
9141    *bypass_test = NULL_RTX;
9142
9143  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9144
9145  /* Do fcomi/sahf based test when profitable.  */
9146  if ((bypass_code == NIL || bypass_test)
9147      && (second_code == NIL || second_test)
9148      && ix86_fp_comparison_arithmetics_cost (code) > cost)
9149    {
9150      if (TARGET_CMOVE)
9151	{
9152	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9153	  tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
9154			     tmp);
9155	  emit_insn (tmp);
9156	}
9157      else
9158	{
9159	  tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9160	  tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9161	  if (!scratch)
9162	    scratch = gen_reg_rtx (HImode);
9163	  emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9164	  emit_insn (gen_x86_sahf_1 (scratch));
9165	}
9166
9167      /* The FP codes work out to act like unsigned.  */
9168      intcmp_mode = fpcmp_mode;
9169      code = first_code;
9170      if (bypass_code != NIL)
9171	*bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
9172				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9173				       const0_rtx);
9174      if (second_code != NIL)
9175	*second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
9176				       gen_rtx_REG (intcmp_mode, FLAGS_REG),
9177				       const0_rtx);
9178    }
9179  else
9180    {
9181      /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first.  */
9182      tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
9183      tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
9184      if (!scratch)
9185	scratch = gen_reg_rtx (HImode);
9186      emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
9187
9188      /* In the unordered case, we have to check C2 for NaN's, which
9189	 doesn't happen to work out to anything nice combination-wise.
9190	 So do some bit twiddling on the value we've got in AH to come
9191	 up with an appropriate set of condition codes.  */
9192
9193      intcmp_mode = CCNOmode;
9194      switch (code)
9195	{
9196	case GT:
9197	case UNGT:
9198	  if (code == GT || !TARGET_IEEE_FP)
9199	    {
9200	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9201	      code = EQ;
9202	    }
9203	  else
9204	    {
9205	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9206	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9207	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
9208	      intcmp_mode = CCmode;
9209	      code = GEU;
9210	    }
9211	  break;
9212	case LT:
9213	case UNLT:
9214	  if (code == LT && TARGET_IEEE_FP)
9215	    {
9216	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9217	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
9218	      intcmp_mode = CCmode;
9219	      code = EQ;
9220	    }
9221	  else
9222	    {
9223	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
9224	      code = NE;
9225	    }
9226	  break;
9227	case GE:
9228	case UNGE:
9229	  if (code == GE || !TARGET_IEEE_FP)
9230	    {
9231	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
9232	      code = EQ;
9233	    }
9234	  else
9235	    {
9236	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9237	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9238					     GEN_INT (0x01)));
9239	      code = NE;
9240	    }
9241	  break;
9242	case LE:
9243	case UNLE:
9244	  if (code == LE && TARGET_IEEE_FP)
9245	    {
9246	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9247	      emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
9248	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9249	      intcmp_mode = CCmode;
9250	      code = LTU;
9251	    }
9252	  else
9253	    {
9254	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
9255	      code = NE;
9256	    }
9257	  break;
9258	case EQ:
9259	case UNEQ:
9260	  if (code == EQ && TARGET_IEEE_FP)
9261	    {
9262	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9263	      emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
9264	      intcmp_mode = CCmode;
9265	      code = EQ;
9266	    }
9267	  else
9268	    {
9269	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9270	      code = NE;
9271	      break;
9272	    }
9273	  break;
9274	case NE:
9275	case LTGT:
9276	  if (code == NE && TARGET_IEEE_FP)
9277	    {
9278	      emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9279	      emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
9280					     GEN_INT (0x40)));
9281	      code = NE;
9282	    }
9283	  else
9284	    {
9285	      emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
9286	      code = EQ;
9287	    }
9288	  break;
9289
9290	case UNORDERED:
9291	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9292	  code = NE;
9293	  break;
9294	case ORDERED:
9295	  emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
9296	  code = EQ;
9297	  break;
9298
9299	default:
9300	  abort ();
9301	}
9302    }
9303
9304  /* Return the test that should be put into the flags user, i.e.
9305     the bcc, scc, or cmov instruction.  */
9306  return gen_rtx_fmt_ee (code, VOIDmode,
9307			 gen_rtx_REG (intcmp_mode, FLAGS_REG),
9308			 const0_rtx);
9309}
9310
9311rtx
9312ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
9313{
9314  rtx op0, op1, ret;
9315  op0 = ix86_compare_op0;
9316  op1 = ix86_compare_op1;
9317
9318  if (second_test)
9319    *second_test = NULL_RTX;
9320  if (bypass_test)
9321    *bypass_test = NULL_RTX;
9322
9323  if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
9324    ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9325				  second_test, bypass_test);
9326  else
9327    ret = ix86_expand_int_compare (code, op0, op1);
9328
9329  return ret;
9330}
9331
9332/* Return true if the CODE will result in nontrivial jump sequence.  */
9333bool
9334ix86_fp_jump_nontrivial_p (enum rtx_code code)
9335{
9336  enum rtx_code bypass_code, first_code, second_code;
9337  if (!TARGET_CMOVE)
9338    return true;
9339  ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9340  return bypass_code != NIL || second_code != NIL;
9341}
9342
9343void
9344ix86_expand_branch (enum rtx_code code, rtx label)
9345{
9346  rtx tmp;
9347
9348  switch (GET_MODE (ix86_compare_op0))
9349    {
9350    case QImode:
9351    case HImode:
9352    case SImode:
9353      simple:
9354      tmp = ix86_expand_compare (code, NULL, NULL);
9355      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9356				  gen_rtx_LABEL_REF (VOIDmode, label),
9357				  pc_rtx);
9358      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
9359      return;
9360
9361    case SFmode:
9362    case DFmode:
9363    case XFmode:
9364      {
9365	rtvec vec;
9366	int use_fcomi;
9367	enum rtx_code bypass_code, first_code, second_code;
9368
9369	code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
9370					     &ix86_compare_op1);
9371
9372	ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
9373
9374	/* Check whether we will use the natural sequence with one jump.  If
9375	   so, we can expand jump early.  Otherwise delay expansion by
9376	   creating compound insn to not confuse optimizers.  */
9377	if (bypass_code == NIL && second_code == NIL
9378	    && TARGET_CMOVE)
9379	  {
9380	    ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
9381				  gen_rtx_LABEL_REF (VOIDmode, label),
9382				  pc_rtx, NULL_RTX);
9383	  }
9384	else
9385	  {
9386	    tmp = gen_rtx_fmt_ee (code, VOIDmode,
9387				  ix86_compare_op0, ix86_compare_op1);
9388	    tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
9389					gen_rtx_LABEL_REF (VOIDmode, label),
9390					pc_rtx);
9391	    tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
9392
9393	    use_fcomi = ix86_use_fcomi_compare (code);
9394	    vec = rtvec_alloc (3 + !use_fcomi);
9395	    RTVEC_ELT (vec, 0) = tmp;
9396	    RTVEC_ELT (vec, 1)
9397	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
9398	    RTVEC_ELT (vec, 2)
9399	      = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
9400	    if (! use_fcomi)
9401	      RTVEC_ELT (vec, 3)
9402		= gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
9403
9404	    emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
9405	  }
9406	return;
9407      }
9408
9409    case DImode:
9410      if (TARGET_64BIT)
9411	goto simple;
9412      /* Expand DImode branch into multiple compare+branch.  */
9413      {
9414	rtx lo[2], hi[2], label2;
9415	enum rtx_code code1, code2, code3;
9416
9417	if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
9418	  {
9419	    tmp = ix86_compare_op0;
9420	    ix86_compare_op0 = ix86_compare_op1;
9421	    ix86_compare_op1 = tmp;
9422	    code = swap_condition (code);
9423	  }
9424	split_di (&ix86_compare_op0, 1, lo+0, hi+0);
9425	split_di (&ix86_compare_op1, 1, lo+1, hi+1);
9426
9427	/* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
9428	   avoid two branches.  This costs one extra insn, so disable when
9429	   optimizing for size.  */
9430
9431	if ((code == EQ || code == NE)
9432	    && (!optimize_size
9433	        || hi[1] == const0_rtx || lo[1] == const0_rtx))
9434	  {
9435	    rtx xor0, xor1;
9436
9437	    xor1 = hi[0];
9438	    if (hi[1] != const0_rtx)
9439	      xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
9440				   NULL_RTX, 0, OPTAB_WIDEN);
9441
9442	    xor0 = lo[0];
9443	    if (lo[1] != const0_rtx)
9444	      xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
9445				   NULL_RTX, 0, OPTAB_WIDEN);
9446
9447	    tmp = expand_binop (SImode, ior_optab, xor1, xor0,
9448				NULL_RTX, 0, OPTAB_WIDEN);
9449
9450	    ix86_compare_op0 = tmp;
9451	    ix86_compare_op1 = const0_rtx;
9452	    ix86_expand_branch (code, label);
9453	    return;
9454	  }
9455
9456	/* Otherwise, if we are doing less-than or greater-or-equal-than,
9457	   op1 is a constant and the low word is zero, then we can just
9458	   examine the high word.  */
9459
9460	if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
9461	  switch (code)
9462	    {
9463	    case LT: case LTU: case GE: case GEU:
9464	      ix86_compare_op0 = hi[0];
9465	      ix86_compare_op1 = hi[1];
9466	      ix86_expand_branch (code, label);
9467	      return;
9468	    default:
9469	      break;
9470	    }
9471
9472	/* Otherwise, we need two or three jumps.  */
9473
9474	label2 = gen_label_rtx ();
9475
9476	code1 = code;
9477	code2 = swap_condition (code);
9478	code3 = unsigned_condition (code);
9479
9480	switch (code)
9481	  {
9482	  case LT: case GT: case LTU: case GTU:
9483	    break;
9484
9485	  case LE:   code1 = LT;  code2 = GT;  break;
9486	  case GE:   code1 = GT;  code2 = LT;  break;
9487	  case LEU:  code1 = LTU; code2 = GTU; break;
9488	  case GEU:  code1 = GTU; code2 = LTU; break;
9489
9490	  case EQ:   code1 = NIL; code2 = NE;  break;
9491	  case NE:   code2 = NIL; break;
9492
9493	  default:
9494	    abort ();
9495	  }
9496
9497	/*
9498	 * a < b =>
9499	 *    if (hi(a) < hi(b)) goto true;
9500	 *    if (hi(a) > hi(b)) goto false;
9501	 *    if (lo(a) < lo(b)) goto true;
9502	 *  false:
9503	 */
9504
9505	ix86_compare_op0 = hi[0];
9506	ix86_compare_op1 = hi[1];
9507
9508	if (code1 != NIL)
9509	  ix86_expand_branch (code1, label);
9510	if (code2 != NIL)
9511	  ix86_expand_branch (code2, label2);
9512
9513	ix86_compare_op0 = lo[0];
9514	ix86_compare_op1 = lo[1];
9515	ix86_expand_branch (code3, label);
9516
9517	if (code2 != NIL)
9518	  emit_label (label2);
9519	return;
9520      }
9521
9522    default:
9523      abort ();
9524    }
9525}
9526
9527/* Split branch based on floating point condition.  */
9528void
9529ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
9530		      rtx target1, rtx target2, rtx tmp)
9531{
9532  rtx second, bypass;
9533  rtx label = NULL_RTX;
9534  rtx condition;
9535  int bypass_probability = -1, second_probability = -1, probability = -1;
9536  rtx i;
9537
9538  if (target2 != pc_rtx)
9539    {
9540      rtx tmp = target2;
9541      code = reverse_condition_maybe_unordered (code);
9542      target2 = target1;
9543      target1 = tmp;
9544    }
9545
9546  condition = ix86_expand_fp_compare (code, op1, op2,
9547				      tmp, &second, &bypass);
9548
9549  if (split_branch_probability >= 0)
9550    {
9551      /* Distribute the probabilities across the jumps.
9552	 Assume the BYPASS and SECOND to be always test
9553	 for UNORDERED.  */
9554      probability = split_branch_probability;
9555
9556      /* Value of 1 is low enough to make no need for probability
9557	 to be updated.  Later we may run some experiments and see
9558	 if unordered values are more frequent in practice.  */
9559      if (bypass)
9560	bypass_probability = 1;
9561      if (second)
9562	second_probability = 1;
9563    }
9564  if (bypass != NULL_RTX)
9565    {
9566      label = gen_label_rtx ();
9567      i = emit_jump_insn (gen_rtx_SET
9568			  (VOIDmode, pc_rtx,
9569			   gen_rtx_IF_THEN_ELSE (VOIDmode,
9570						 bypass,
9571						 gen_rtx_LABEL_REF (VOIDmode,
9572								    label),
9573						 pc_rtx)));
9574      if (bypass_probability >= 0)
9575	REG_NOTES (i)
9576	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9577			       GEN_INT (bypass_probability),
9578			       REG_NOTES (i));
9579    }
9580  i = emit_jump_insn (gen_rtx_SET
9581		      (VOIDmode, pc_rtx,
9582		       gen_rtx_IF_THEN_ELSE (VOIDmode,
9583					     condition, target1, target2)));
9584  if (probability >= 0)
9585    REG_NOTES (i)
9586      = gen_rtx_EXPR_LIST (REG_BR_PROB,
9587			   GEN_INT (probability),
9588			   REG_NOTES (i));
9589  if (second != NULL_RTX)
9590    {
9591      i = emit_jump_insn (gen_rtx_SET
9592			  (VOIDmode, pc_rtx,
9593			   gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
9594						 target2)));
9595      if (second_probability >= 0)
9596	REG_NOTES (i)
9597	  = gen_rtx_EXPR_LIST (REG_BR_PROB,
9598			       GEN_INT (second_probability),
9599			       REG_NOTES (i));
9600    }
9601  if (label != NULL_RTX)
9602    emit_label (label);
9603}
9604
9605int
9606ix86_expand_setcc (enum rtx_code code, rtx dest)
9607{
9608  rtx ret, tmp, tmpreg, equiv;
9609  rtx second_test, bypass_test;
9610
9611  if (GET_MODE (ix86_compare_op0) == DImode
9612      && !TARGET_64BIT)
9613    return 0; /* FAIL */
9614
9615  if (GET_MODE (dest) != QImode)
9616    abort ();
9617
9618  ret = ix86_expand_compare (code, &second_test, &bypass_test);
9619  PUT_MODE (ret, QImode);
9620
9621  tmp = dest;
9622  tmpreg = dest;
9623
9624  emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
9625  if (bypass_test || second_test)
9626    {
9627      rtx test = second_test;
9628      int bypass = 0;
9629      rtx tmp2 = gen_reg_rtx (QImode);
9630      if (bypass_test)
9631	{
9632	  if (second_test)
9633	    abort ();
9634	  test = bypass_test;
9635	  bypass = 1;
9636	  PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
9637	}
9638      PUT_MODE (test, QImode);
9639      emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
9640
9641      if (bypass)
9642	emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
9643      else
9644	emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
9645    }
9646
9647  /* Attach a REG_EQUAL note describing the comparison result.  */
9648  equiv = simplify_gen_relational (code, QImode,
9649				   GET_MODE (ix86_compare_op0),
9650				   ix86_compare_op0, ix86_compare_op1);
9651  set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
9652
9653  return 1; /* DONE */
9654}
9655
9656/* Expand comparison setting or clearing carry flag.  Return true when
9657   successful and set pop for the operation.  */
9658static bool
9659ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
9660{
9661  enum machine_mode mode =
9662    GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
9663
9664  /* Do not handle DImode compares that go trought special path.  Also we can't
9665     deal with FP compares yet.  This is possible to add.  */
9666  if ((mode == DImode && !TARGET_64BIT))
9667    return false;
9668  if (FLOAT_MODE_P (mode))
9669    {
9670      rtx second_test = NULL, bypass_test = NULL;
9671      rtx compare_op, compare_seq;
9672
9673      /* Shortcut:  following common codes never translate into carry flag compares.  */
9674      if (code == EQ || code == NE || code == UNEQ || code == LTGT
9675	  || code == ORDERED || code == UNORDERED)
9676	return false;
9677
9678      /* These comparisons require zero flag; swap operands so they won't.  */
9679      if ((code == GT || code == UNLE || code == LE || code == UNGT)
9680	  && !TARGET_IEEE_FP)
9681	{
9682	  rtx tmp = op0;
9683	  op0 = op1;
9684	  op1 = tmp;
9685	  code = swap_condition (code);
9686	}
9687
9688      /* Try to expand the comparison and verify that we end up with carry flag
9689	 based comparison.  This is fails to be true only when we decide to expand
9690	 comparison using arithmetic that is not too common scenario.  */
9691      start_sequence ();
9692      compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
9693					   &second_test, &bypass_test);
9694      compare_seq = get_insns ();
9695      end_sequence ();
9696
9697      if (second_test || bypass_test)
9698	return false;
9699      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9700	  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9701        code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
9702      else
9703	code = GET_CODE (compare_op);
9704      if (code != LTU && code != GEU)
9705	return false;
9706      emit_insn (compare_seq);
9707      *pop = compare_op;
9708      return true;
9709    }
9710  if (!INTEGRAL_MODE_P (mode))
9711    return false;
9712  switch (code)
9713    {
9714    case LTU:
9715    case GEU:
9716      break;
9717
9718    /* Convert a==0 into (unsigned)a<1.  */
9719    case EQ:
9720    case NE:
9721      if (op1 != const0_rtx)
9722	return false;
9723      op1 = const1_rtx;
9724      code = (code == EQ ? LTU : GEU);
9725      break;
9726
9727    /* Convert a>b into b<a or a>=b-1.  */
9728    case GTU:
9729    case LEU:
9730      if (GET_CODE (op1) == CONST_INT)
9731	{
9732	  op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
9733	  /* Bail out on overflow.  We still can swap operands but that
9734	     would force loading of the constant into register.  */
9735	  if (op1 == const0_rtx
9736	      || !x86_64_immediate_operand (op1, GET_MODE (op1)))
9737	    return false;
9738	  code = (code == GTU ? GEU : LTU);
9739	}
9740      else
9741	{
9742	  rtx tmp = op1;
9743	  op1 = op0;
9744	  op0 = tmp;
9745	  code = (code == GTU ? LTU : GEU);
9746	}
9747      break;
9748
9749    /* Convert a>=0 into (unsigned)a<0x80000000.  */
9750    case LT:
9751    case GE:
9752      if (mode == DImode || op1 != const0_rtx)
9753	return false;
9754      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9755      code = (code == LT ? GEU : LTU);
9756      break;
9757    case LE:
9758    case GT:
9759      if (mode == DImode || op1 != constm1_rtx)
9760	return false;
9761      op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
9762      code = (code == LE ? GEU : LTU);
9763      break;
9764
9765    default:
9766      return false;
9767    }
9768  /* Swapping operands may cause constant to appear as first operand.  */
9769  if (!nonimmediate_operand (op0, VOIDmode))
9770    {
9771      if (no_new_pseudos)
9772	return false;
9773      op0 = force_reg (mode, op0);
9774    }
9775  ix86_compare_op0 = op0;
9776  ix86_compare_op1 = op1;
9777  *pop = ix86_expand_compare (code, NULL, NULL);
9778  if (GET_CODE (*pop) != LTU && GET_CODE (*pop) != GEU)
9779    abort ();
9780  return true;
9781}
9782
9783int
9784ix86_expand_int_movcc (rtx operands[])
9785{
9786  enum rtx_code code = GET_CODE (operands[1]), compare_code;
9787  rtx compare_seq, compare_op;
9788  rtx second_test, bypass_test;
9789  enum machine_mode mode = GET_MODE (operands[0]);
9790  bool sign_bit_compare_p = false;;
9791
9792  start_sequence ();
9793  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9794  compare_seq = get_insns ();
9795  end_sequence ();
9796
9797  compare_code = GET_CODE (compare_op);
9798
9799  if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
9800      || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
9801    sign_bit_compare_p = true;
9802
9803  /* Don't attempt mode expansion here -- if we had to expand 5 or 6
9804     HImode insns, we'd be swallowed in word prefix ops.  */
9805
9806  if ((mode != HImode || TARGET_FAST_PREFIX)
9807      && (mode != DImode || TARGET_64BIT)
9808      && GET_CODE (operands[2]) == CONST_INT
9809      && GET_CODE (operands[3]) == CONST_INT)
9810    {
9811      rtx out = operands[0];
9812      HOST_WIDE_INT ct = INTVAL (operands[2]);
9813      HOST_WIDE_INT cf = INTVAL (operands[3]);
9814      HOST_WIDE_INT diff;
9815
9816      diff = ct - cf;
9817      /*  Sign bit compares are better done using shifts than we do by using
9818	  sbb.  */
9819      if (sign_bit_compare_p
9820	  || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
9821					     ix86_compare_op1, &compare_op))
9822	{
9823	  /* Detect overlap between destination and compare sources.  */
9824	  rtx tmp = out;
9825
9826          if (!sign_bit_compare_p)
9827	    {
9828	      bool fpcmp = false;
9829
9830	      compare_code = GET_CODE (compare_op);
9831
9832	      if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
9833		  || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
9834		{
9835		  fpcmp = true;
9836		  compare_code = ix86_fp_compare_code_to_integer (compare_code);
9837		}
9838
9839	      /* To simplify rest of code, restrict to the GEU case.  */
9840	      if (compare_code == LTU)
9841		{
9842		  HOST_WIDE_INT tmp = ct;
9843		  ct = cf;
9844		  cf = tmp;
9845		  compare_code = reverse_condition (compare_code);
9846		  code = reverse_condition (code);
9847		}
9848	      else
9849		{
9850		  if (fpcmp)
9851		    PUT_CODE (compare_op,
9852			      reverse_condition_maybe_unordered
9853			        (GET_CODE (compare_op)));
9854		  else
9855		    PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
9856		}
9857	      diff = ct - cf;
9858
9859	      if (reg_overlap_mentioned_p (out, ix86_compare_op0)
9860		  || reg_overlap_mentioned_p (out, ix86_compare_op1))
9861		tmp = gen_reg_rtx (mode);
9862
9863	      if (mode == DImode)
9864		emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
9865	      else
9866		emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
9867	    }
9868	  else
9869	    {
9870	      if (code == GT || code == GE)
9871		code = reverse_condition (code);
9872	      else
9873		{
9874		  HOST_WIDE_INT tmp = ct;
9875		  ct = cf;
9876		  cf = tmp;
9877		  diff = ct - cf;
9878		}
9879	      tmp = emit_store_flag (tmp, code, ix86_compare_op0,
9880				     ix86_compare_op1, VOIDmode, 0, -1);
9881	    }
9882
9883	  if (diff == 1)
9884	    {
9885	      /*
9886	       * cmpl op0,op1
9887	       * sbbl dest,dest
9888	       * [addl dest, ct]
9889	       *
9890	       * Size 5 - 8.
9891	       */
9892	      if (ct)
9893		tmp = expand_simple_binop (mode, PLUS,
9894					   tmp, GEN_INT (ct),
9895					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9896	    }
9897	  else if (cf == -1)
9898	    {
9899	      /*
9900	       * cmpl op0,op1
9901	       * sbbl dest,dest
9902	       * orl $ct, dest
9903	       *
9904	       * Size 8.
9905	       */
9906	      tmp = expand_simple_binop (mode, IOR,
9907					 tmp, GEN_INT (ct),
9908					 copy_rtx (tmp), 1, OPTAB_DIRECT);
9909	    }
9910	  else if (diff == -1 && ct)
9911	    {
9912	      /*
9913	       * cmpl op0,op1
9914	       * sbbl dest,dest
9915	       * notl dest
9916	       * [addl dest, cf]
9917	       *
9918	       * Size 8 - 11.
9919	       */
9920	      tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9921	      if (cf)
9922		tmp = expand_simple_binop (mode, PLUS,
9923					   copy_rtx (tmp), GEN_INT (cf),
9924					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9925	    }
9926	  else
9927	    {
9928	      /*
9929	       * cmpl op0,op1
9930	       * sbbl dest,dest
9931	       * [notl dest]
9932	       * andl cf - ct, dest
9933	       * [addl dest, ct]
9934	       *
9935	       * Size 8 - 11.
9936	       */
9937
9938	      if (cf == 0)
9939		{
9940		  cf = ct;
9941		  ct = 0;
9942		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
9943		}
9944
9945	      tmp = expand_simple_binop (mode, AND,
9946					 copy_rtx (tmp),
9947					 gen_int_mode (cf - ct, mode),
9948					 copy_rtx (tmp), 1, OPTAB_DIRECT);
9949	      if (ct)
9950		tmp = expand_simple_binop (mode, PLUS,
9951					   copy_rtx (tmp), GEN_INT (ct),
9952					   copy_rtx (tmp), 1, OPTAB_DIRECT);
9953	    }
9954
9955	  if (!rtx_equal_p (tmp, out))
9956	    emit_move_insn (copy_rtx (out), copy_rtx (tmp));
9957
9958	  return 1; /* DONE */
9959	}
9960
9961      if (diff < 0)
9962	{
9963	  HOST_WIDE_INT tmp;
9964	  tmp = ct, ct = cf, cf = tmp;
9965	  diff = -diff;
9966	  if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9967	    {
9968	      /* We may be reversing unordered compare to normal compare, that
9969		 is not valid in general (we may convert non-trapping condition
9970		 to trapping one), however on i386 we currently emit all
9971		 comparisons unordered.  */
9972	      compare_code = reverse_condition_maybe_unordered (compare_code);
9973	      code = reverse_condition_maybe_unordered (code);
9974	    }
9975	  else
9976	    {
9977	      compare_code = reverse_condition (compare_code);
9978	      code = reverse_condition (code);
9979	    }
9980	}
9981
9982      compare_code = NIL;
9983      if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9984	  && GET_CODE (ix86_compare_op1) == CONST_INT)
9985	{
9986	  if (ix86_compare_op1 == const0_rtx
9987	      && (code == LT || code == GE))
9988	    compare_code = code;
9989	  else if (ix86_compare_op1 == constm1_rtx)
9990	    {
9991	      if (code == LE)
9992		compare_code = LT;
9993	      else if (code == GT)
9994		compare_code = GE;
9995	    }
9996	}
9997
9998      /* Optimize dest = (op0 < 0) ? -1 : cf.  */
9999      if (compare_code != NIL
10000	  && GET_MODE (ix86_compare_op0) == GET_MODE (out)
10001	  && (cf == -1 || ct == -1))
10002	{
10003	  /* If lea code below could be used, only optimize
10004	     if it results in a 2 insn sequence.  */
10005
10006	  if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
10007		 || diff == 3 || diff == 5 || diff == 9)
10008	      || (compare_code == LT && ct == -1)
10009	      || (compare_code == GE && cf == -1))
10010	    {
10011	      /*
10012	       * notl op1	(if necessary)
10013	       * sarl $31, op1
10014	       * orl cf, op1
10015	       */
10016	      if (ct != -1)
10017		{
10018		  cf = ct;
10019		  ct = -1;
10020		  code = reverse_condition (code);
10021		}
10022
10023	      out = emit_store_flag (out, code, ix86_compare_op0,
10024				     ix86_compare_op1, VOIDmode, 0, -1);
10025
10026	      out = expand_simple_binop (mode, IOR,
10027					 out, GEN_INT (cf),
10028					 out, 1, OPTAB_DIRECT);
10029	      if (out != operands[0])
10030		emit_move_insn (operands[0], out);
10031
10032	      return 1; /* DONE */
10033	    }
10034	}
10035
10036
10037      if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
10038	   || diff == 3 || diff == 5 || diff == 9)
10039	  && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
10040	  && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
10041	{
10042	  /*
10043	   * xorl dest,dest
10044	   * cmpl op1,op2
10045	   * setcc dest
10046	   * lea cf(dest*(ct-cf)),dest
10047	   *
10048	   * Size 14.
10049	   *
10050	   * This also catches the degenerate setcc-only case.
10051	   */
10052
10053	  rtx tmp;
10054	  int nops;
10055
10056	  out = emit_store_flag (out, code, ix86_compare_op0,
10057				 ix86_compare_op1, VOIDmode, 0, 1);
10058
10059	  nops = 0;
10060	  /* On x86_64 the lea instruction operates on Pmode, so we need
10061	     to get arithmetics done in proper mode to match.  */
10062	  if (diff == 1)
10063	    tmp = copy_rtx (out);
10064	  else
10065	    {
10066	      rtx out1;
10067	      out1 = copy_rtx (out);
10068	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
10069	      nops++;
10070	      if (diff & 1)
10071		{
10072		  tmp = gen_rtx_PLUS (mode, tmp, out1);
10073		  nops++;
10074		}
10075	    }
10076	  if (cf != 0)
10077	    {
10078	      tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
10079	      nops++;
10080	    }
10081	  if (!rtx_equal_p (tmp, out))
10082	    {
10083	      if (nops == 1)
10084		out = force_operand (tmp, copy_rtx (out));
10085	      else
10086		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
10087	    }
10088	  if (!rtx_equal_p (out, operands[0]))
10089	    emit_move_insn (operands[0], copy_rtx (out));
10090
10091	  return 1; /* DONE */
10092	}
10093
10094      /*
10095       * General case:			Jumpful:
10096       *   xorl dest,dest		cmpl op1, op2
10097       *   cmpl op1, op2		movl ct, dest
10098       *   setcc dest			jcc 1f
10099       *   decl dest			movl cf, dest
10100       *   andl (cf-ct),dest		1:
10101       *   addl ct,dest
10102       *
10103       * Size 20.			Size 14.
10104       *
10105       * This is reasonably steep, but branch mispredict costs are
10106       * high on modern cpus, so consider failing only if optimizing
10107       * for space.
10108       */
10109
10110      if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10111	  && BRANCH_COST >= 2)
10112	{
10113	  if (cf == 0)
10114	    {
10115	      cf = ct;
10116	      ct = 0;
10117	      if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
10118		/* We may be reversing unordered compare to normal compare,
10119		   that is not valid in general (we may convert non-trapping
10120		   condition to trapping one), however on i386 we currently
10121		   emit all comparisons unordered.  */
10122		code = reverse_condition_maybe_unordered (code);
10123	      else
10124		{
10125		  code = reverse_condition (code);
10126		  if (compare_code != NIL)
10127		    compare_code = reverse_condition (compare_code);
10128		}
10129	    }
10130
10131	  if (compare_code != NIL)
10132	    {
10133	      /* notl op1	(if needed)
10134		 sarl $31, op1
10135		 andl (cf-ct), op1
10136		 addl ct, op1
10137
10138		 For x < 0 (resp. x <= -1) there will be no notl,
10139		 so if possible swap the constants to get rid of the
10140		 complement.
10141		 True/false will be -1/0 while code below (store flag
10142		 followed by decrement) is 0/-1, so the constants need
10143		 to be exchanged once more.  */
10144
10145	      if (compare_code == GE || !cf)
10146		{
10147		  code = reverse_condition (code);
10148		  compare_code = LT;
10149		}
10150	      else
10151		{
10152		  HOST_WIDE_INT tmp = cf;
10153		  cf = ct;
10154		  ct = tmp;
10155		}
10156
10157	      out = emit_store_flag (out, code, ix86_compare_op0,
10158				     ix86_compare_op1, VOIDmode, 0, -1);
10159	    }
10160	  else
10161	    {
10162	      out = emit_store_flag (out, code, ix86_compare_op0,
10163				     ix86_compare_op1, VOIDmode, 0, 1);
10164
10165	      out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
10166					 copy_rtx (out), 1, OPTAB_DIRECT);
10167	    }
10168
10169	  out = expand_simple_binop (mode, AND, copy_rtx (out),
10170				     gen_int_mode (cf - ct, mode),
10171				     copy_rtx (out), 1, OPTAB_DIRECT);
10172	  if (ct)
10173	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
10174				       copy_rtx (out), 1, OPTAB_DIRECT);
10175	  if (!rtx_equal_p (out, operands[0]))
10176	    emit_move_insn (operands[0], copy_rtx (out));
10177
10178	  return 1; /* DONE */
10179	}
10180    }
10181
10182  if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
10183    {
10184      /* Try a few things more with specific constants and a variable.  */
10185
10186      optab op;
10187      rtx var, orig_out, out, tmp;
10188
10189      if (BRANCH_COST <= 2)
10190	return 0; /* FAIL */
10191
10192      /* If one of the two operands is an interesting constant, load a
10193	 constant with the above and mask it in with a logical operation.  */
10194
10195      if (GET_CODE (operands[2]) == CONST_INT)
10196	{
10197	  var = operands[3];
10198	  if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
10199	    operands[3] = constm1_rtx, op = and_optab;
10200	  else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
10201	    operands[3] = const0_rtx, op = ior_optab;
10202	  else
10203	    return 0; /* FAIL */
10204	}
10205      else if (GET_CODE (operands[3]) == CONST_INT)
10206	{
10207	  var = operands[2];
10208	  if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
10209	    operands[2] = constm1_rtx, op = and_optab;
10210	  else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
10211	    operands[2] = const0_rtx, op = ior_optab;
10212	  else
10213	    return 0; /* FAIL */
10214	}
10215      else
10216        return 0; /* FAIL */
10217
10218      orig_out = operands[0];
10219      tmp = gen_reg_rtx (mode);
10220      operands[0] = tmp;
10221
10222      /* Recurse to get the constant loaded.  */
10223      if (ix86_expand_int_movcc (operands) == 0)
10224        return 0; /* FAIL */
10225
10226      /* Mask in the interesting variable.  */
10227      out = expand_binop (mode, op, var, tmp, orig_out, 0,
10228			  OPTAB_WIDEN);
10229      if (!rtx_equal_p (out, orig_out))
10230	emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
10231
10232      return 1; /* DONE */
10233    }
10234
10235  /*
10236   * For comparison with above,
10237   *
10238   * movl cf,dest
10239   * movl ct,tmp
10240   * cmpl op1,op2
10241   * cmovcc tmp,dest
10242   *
10243   * Size 15.
10244   */
10245
10246  if (! nonimmediate_operand (operands[2], mode))
10247    operands[2] = force_reg (mode, operands[2]);
10248  if (! nonimmediate_operand (operands[3], mode))
10249    operands[3] = force_reg (mode, operands[3]);
10250
10251  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10252    {
10253      rtx tmp = gen_reg_rtx (mode);
10254      emit_move_insn (tmp, operands[3]);
10255      operands[3] = tmp;
10256    }
10257  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10258    {
10259      rtx tmp = gen_reg_rtx (mode);
10260      emit_move_insn (tmp, operands[2]);
10261      operands[2] = tmp;
10262    }
10263
10264  if (! register_operand (operands[2], VOIDmode)
10265      && (mode == QImode
10266          || ! register_operand (operands[3], VOIDmode)))
10267    operands[2] = force_reg (mode, operands[2]);
10268
10269  if (mode == QImode
10270      && ! register_operand (operands[3], VOIDmode))
10271    operands[3] = force_reg (mode, operands[3]);
10272
10273  emit_insn (compare_seq);
10274  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10275			  gen_rtx_IF_THEN_ELSE (mode,
10276						compare_op, operands[2],
10277						operands[3])));
10278  if (bypass_test)
10279    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10280			    gen_rtx_IF_THEN_ELSE (mode,
10281				  bypass_test,
10282				  copy_rtx (operands[3]),
10283				  copy_rtx (operands[0]))));
10284  if (second_test)
10285    emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
10286			    gen_rtx_IF_THEN_ELSE (mode,
10287				  second_test,
10288				  copy_rtx (operands[2]),
10289				  copy_rtx (operands[0]))));
10290
10291  return 1; /* DONE */
10292}
10293
10294int
10295ix86_expand_fp_movcc (rtx operands[])
10296{
10297  enum rtx_code code;
10298  rtx tmp;
10299  rtx compare_op, second_test, bypass_test;
10300
10301  /* For SF/DFmode conditional moves based on comparisons
10302     in same mode, we may want to use SSE min/max instructions.  */
10303  if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
10304       || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
10305      && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
10306      /* The SSE comparisons does not support the LTGT/UNEQ pair.  */
10307      && (!TARGET_IEEE_FP
10308	  || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
10309      /* We may be called from the post-reload splitter.  */
10310      && (!REG_P (operands[0])
10311	  || SSE_REG_P (operands[0])
10312	  || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
10313    {
10314      rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
10315      code = GET_CODE (operands[1]);
10316
10317      /* See if we have (cross) match between comparison operands and
10318         conditional move operands.  */
10319      if (rtx_equal_p (operands[2], op1))
10320	{
10321	  rtx tmp = op0;
10322	  op0 = op1;
10323	  op1 = tmp;
10324	  code = reverse_condition_maybe_unordered (code);
10325	}
10326      if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
10327	{
10328	  /* Check for min operation.  */
10329	  if (code == LT || code == UNLE)
10330	    {
10331	       if (code == UNLE)
10332		{
10333		  rtx tmp = op0;
10334		  op0 = op1;
10335		  op1 = tmp;
10336		}
10337	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10338	       if (memory_operand (op0, VOIDmode))
10339		 op0 = force_reg (GET_MODE (operands[0]), op0);
10340	       if (GET_MODE (operands[0]) == SFmode)
10341		 emit_insn (gen_minsf3 (operands[0], op0, op1));
10342	       else
10343		 emit_insn (gen_mindf3 (operands[0], op0, op1));
10344	       return 1;
10345	    }
10346	  /* Check for max operation.  */
10347	  if (code == GT || code == UNGE)
10348	    {
10349	       if (code == UNGE)
10350		{
10351		  rtx tmp = op0;
10352		  op0 = op1;
10353		  op1 = tmp;
10354		}
10355	       operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
10356	       if (memory_operand (op0, VOIDmode))
10357		 op0 = force_reg (GET_MODE (operands[0]), op0);
10358	       if (GET_MODE (operands[0]) == SFmode)
10359		 emit_insn (gen_maxsf3 (operands[0], op0, op1));
10360	       else
10361		 emit_insn (gen_maxdf3 (operands[0], op0, op1));
10362	       return 1;
10363	    }
10364	}
10365      /* Manage condition to be sse_comparison_operator.  In case we are
10366	 in non-ieee mode, try to canonicalize the destination operand
10367	 to be first in the comparison - this helps reload to avoid extra
10368	 moves.  */
10369      if (!sse_comparison_operator (operands[1], VOIDmode)
10370	  || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
10371	{
10372	  rtx tmp = ix86_compare_op0;
10373	  ix86_compare_op0 = ix86_compare_op1;
10374	  ix86_compare_op1 = tmp;
10375	  operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
10376					VOIDmode, ix86_compare_op0,
10377					ix86_compare_op1);
10378	}
10379      /* Similarly try to manage result to be first operand of conditional
10380	 move. We also don't support the NE comparison on SSE, so try to
10381	 avoid it.  */
10382      if ((rtx_equal_p (operands[0], operands[3])
10383	   && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
10384	  || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
10385	{
10386	  rtx tmp = operands[2];
10387	  operands[2] = operands[3];
10388	  operands[3] = tmp;
10389	  operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
10390					  (GET_CODE (operands[1])),
10391					VOIDmode, ix86_compare_op0,
10392					ix86_compare_op1);
10393	}
10394      if (GET_MODE (operands[0]) == SFmode)
10395	emit_insn (gen_sse_movsfcc (operands[0], operands[1],
10396				    operands[2], operands[3],
10397				    ix86_compare_op0, ix86_compare_op1));
10398      else
10399	emit_insn (gen_sse_movdfcc (operands[0], operands[1],
10400				    operands[2], operands[3],
10401				    ix86_compare_op0, ix86_compare_op1));
10402      return 1;
10403    }
10404
10405  /* The floating point conditional move instructions don't directly
10406     support conditions resulting from a signed integer comparison.  */
10407
10408  code = GET_CODE (operands[1]);
10409  compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
10410
10411  /* The floating point conditional move instructions don't directly
10412     support signed integer comparisons.  */
10413
10414  if (!fcmov_comparison_operator (compare_op, VOIDmode))
10415    {
10416      if (second_test != NULL || bypass_test != NULL)
10417	abort ();
10418      tmp = gen_reg_rtx (QImode);
10419      ix86_expand_setcc (code, tmp);
10420      code = NE;
10421      ix86_compare_op0 = tmp;
10422      ix86_compare_op1 = const0_rtx;
10423      compare_op = ix86_expand_compare (code,  &second_test, &bypass_test);
10424    }
10425  if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
10426    {
10427      tmp = gen_reg_rtx (GET_MODE (operands[0]));
10428      emit_move_insn (tmp, operands[3]);
10429      operands[3] = tmp;
10430    }
10431  if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
10432    {
10433      tmp = gen_reg_rtx (GET_MODE (operands[0]));
10434      emit_move_insn (tmp, operands[2]);
10435      operands[2] = tmp;
10436    }
10437
10438  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10439			  gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10440				compare_op,
10441				operands[2],
10442				operands[3])));
10443  if (bypass_test)
10444    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10445			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10446				  bypass_test,
10447				  operands[3],
10448				  operands[0])));
10449  if (second_test)
10450    emit_insn (gen_rtx_SET (VOIDmode, operands[0],
10451			    gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
10452				  second_test,
10453				  operands[2],
10454				  operands[0])));
10455
10456  return 1;
10457}
10458
10459/* Expand conditional increment or decrement using adb/sbb instructions.
10460   The default case using setcc followed by the conditional move can be
10461   done by generic code.  */
10462int
10463ix86_expand_int_addcc (rtx operands[])
10464{
10465  enum rtx_code code = GET_CODE (operands[1]);
10466  rtx compare_op;
10467  rtx val = const0_rtx;
10468  bool fpcmp = false;
10469  enum machine_mode mode = GET_MODE (operands[0]);
10470
10471  if (operands[3] != const1_rtx
10472      && operands[3] != constm1_rtx)
10473    return 0;
10474  if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
10475				       ix86_compare_op1, &compare_op))
10476     return 0;
10477  code = GET_CODE (compare_op);
10478
10479  if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
10480      || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
10481    {
10482      fpcmp = true;
10483      code = ix86_fp_compare_code_to_integer (code);
10484    }
10485
10486  if (code != LTU)
10487    {
10488      val = constm1_rtx;
10489      if (fpcmp)
10490	PUT_CODE (compare_op,
10491		  reverse_condition_maybe_unordered
10492		    (GET_CODE (compare_op)));
10493      else
10494	PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
10495    }
10496  PUT_MODE (compare_op, mode);
10497
10498  /* Construct either adc or sbb insn.  */
10499  if ((code == LTU) == (operands[3] == constm1_rtx))
10500    {
10501      switch (GET_MODE (operands[0]))
10502	{
10503	  case QImode:
10504            emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
10505	    break;
10506	  case HImode:
10507            emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
10508	    break;
10509	  case SImode:
10510            emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
10511	    break;
10512	  case DImode:
10513            emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10514	    break;
10515	  default:
10516	    abort ();
10517	}
10518    }
10519  else
10520    {
10521      switch (GET_MODE (operands[0]))
10522	{
10523	  case QImode:
10524            emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
10525	    break;
10526	  case HImode:
10527            emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
10528	    break;
10529	  case SImode:
10530            emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
10531	    break;
10532	  case DImode:
10533            emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
10534	    break;
10535	  default:
10536	    abort ();
10537	}
10538    }
10539  return 1; /* DONE */
10540}
10541
10542
10543/* Split operands 0 and 1 into SImode parts.  Similar to split_di, but
10544   works for floating pointer parameters and nonoffsetable memories.
10545   For pushes, it returns just stack offsets; the values will be saved
10546   in the right order.  Maximally three parts are generated.  */
10547
10548static int
10549ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
10550{
10551  int size;
10552
10553  if (!TARGET_64BIT)
10554    size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
10555  else
10556    size = (GET_MODE_SIZE (mode) + 4) / 8;
10557
10558  if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
10559    abort ();
10560  if (size < 2 || size > 3)
10561    abort ();
10562
10563  /* Optimize constant pool reference to immediates.  This is used by fp
10564     moves, that force all constants to memory to allow combining.  */
10565  if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
10566    {
10567      rtx tmp = maybe_get_pool_constant (operand);
10568      if (tmp)
10569	operand = tmp;
10570    }
10571
10572  if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
10573    {
10574      /* The only non-offsetable memories we handle are pushes.  */
10575      if (! push_operand (operand, VOIDmode))
10576	abort ();
10577
10578      operand = copy_rtx (operand);
10579      PUT_MODE (operand, Pmode);
10580      parts[0] = parts[1] = parts[2] = operand;
10581    }
10582  else if (!TARGET_64BIT)
10583    {
10584      if (mode == DImode)
10585	split_di (&operand, 1, &parts[0], &parts[1]);
10586      else
10587	{
10588	  if (REG_P (operand))
10589	    {
10590	      if (!reload_completed)
10591		abort ();
10592	      parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
10593	      parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
10594	      if (size == 3)
10595		parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
10596	    }
10597	  else if (offsettable_memref_p (operand))
10598	    {
10599	      operand = adjust_address (operand, SImode, 0);
10600	      parts[0] = operand;
10601	      parts[1] = adjust_address (operand, SImode, 4);
10602	      if (size == 3)
10603		parts[2] = adjust_address (operand, SImode, 8);
10604	    }
10605	  else if (GET_CODE (operand) == CONST_DOUBLE)
10606	    {
10607	      REAL_VALUE_TYPE r;
10608	      long l[4];
10609
10610	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10611	      switch (mode)
10612		{
10613		case XFmode:
10614		  REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
10615		  parts[2] = gen_int_mode (l[2], SImode);
10616		  break;
10617		case DFmode:
10618		  REAL_VALUE_TO_TARGET_DOUBLE (r, l);
10619		  break;
10620		default:
10621		  abort ();
10622		}
10623	      parts[1] = gen_int_mode (l[1], SImode);
10624	      parts[0] = gen_int_mode (l[0], SImode);
10625	    }
10626	  else
10627	    abort ();
10628	}
10629    }
10630  else
10631    {
10632      if (mode == TImode)
10633	split_ti (&operand, 1, &parts[0], &parts[1]);
10634      if (mode == XFmode || mode == TFmode)
10635	{
10636	  enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
10637	  if (REG_P (operand))
10638	    {
10639	      if (!reload_completed)
10640		abort ();
10641	      parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
10642	      parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
10643	    }
10644	  else if (offsettable_memref_p (operand))
10645	    {
10646	      operand = adjust_address (operand, DImode, 0);
10647	      parts[0] = operand;
10648	      parts[1] = adjust_address (operand, upper_mode, 8);
10649	    }
10650	  else if (GET_CODE (operand) == CONST_DOUBLE)
10651	    {
10652	      REAL_VALUE_TYPE r;
10653	      long l[4];
10654
10655	      REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
10656	      real_to_target (l, &r, mode);
10657
10658	      /* Do not use shift by 32 to avoid warning on 32bit systems.  */
10659	      if (HOST_BITS_PER_WIDE_INT >= 64)
10660	        parts[0]
10661		  = gen_int_mode
10662		      ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
10663		       + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
10664		       DImode);
10665	      else
10666	        parts[0] = immed_double_const (l[0], l[1], DImode);
10667
10668	      if (upper_mode == SImode)
10669	        parts[1] = gen_int_mode (l[2], SImode);
10670	      else if (HOST_BITS_PER_WIDE_INT >= 64)
10671	        parts[1]
10672		  = gen_int_mode
10673		      ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
10674		       + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
10675		       DImode);
10676	      else
10677	        parts[1] = immed_double_const (l[2], l[3], DImode);
10678	    }
10679	  else
10680	    abort ();
10681	}
10682    }
10683
10684  return size;
10685}
10686
10687/* Emit insns to perform a move or push of DI, DF, and XF values.
10688   Return false when normal moves are needed; true when all required
10689   insns have been emitted.  Operands 2-4 contain the input values
10690   int the correct order; operands 5-7 contain the output values.  */
10691
10692void
10693ix86_split_long_move (rtx operands[])
10694{
10695  rtx part[2][3];
10696  int nparts;
10697  int push = 0;
10698  int collisions = 0;
10699  enum machine_mode mode = GET_MODE (operands[0]);
10700
10701  /* The DFmode expanders may ask us to move double.
10702     For 64bit target this is single move.  By hiding the fact
10703     here we simplify i386.md splitters.  */
10704  if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
10705    {
10706      /* Optimize constant pool reference to immediates.  This is used by
10707	 fp moves, that force all constants to memory to allow combining.  */
10708
10709      if (GET_CODE (operands[1]) == MEM
10710	  && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
10711	  && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
10712	operands[1] = get_pool_constant (XEXP (operands[1], 0));
10713      if (push_operand (operands[0], VOIDmode))
10714	{
10715	  operands[0] = copy_rtx (operands[0]);
10716	  PUT_MODE (operands[0], Pmode);
10717	}
10718      else
10719        operands[0] = gen_lowpart (DImode, operands[0]);
10720      operands[1] = gen_lowpart (DImode, operands[1]);
10721      emit_move_insn (operands[0], operands[1]);
10722      return;
10723    }
10724
10725  /* The only non-offsettable memory we handle is push.  */
10726  if (push_operand (operands[0], VOIDmode))
10727    push = 1;
10728  else if (GET_CODE (operands[0]) == MEM
10729	   && ! offsettable_memref_p (operands[0]))
10730    abort ();
10731
10732  nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
10733  ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
10734
10735  /* When emitting push, take care for source operands on the stack.  */
10736  if (push && GET_CODE (operands[1]) == MEM
10737      && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
10738    {
10739      if (nparts == 3)
10740	part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
10741				     XEXP (part[1][2], 0));
10742      part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
10743				   XEXP (part[1][1], 0));
10744    }
10745
10746  /* We need to do copy in the right order in case an address register
10747     of the source overlaps the destination.  */
10748  if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
10749    {
10750      if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
10751	collisions++;
10752      if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10753	collisions++;
10754      if (nparts == 3
10755	  && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
10756	collisions++;
10757
10758      /* Collision in the middle part can be handled by reordering.  */
10759      if (collisions == 1 && nparts == 3
10760	  && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
10761	{
10762	  rtx tmp;
10763	  tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
10764	  tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
10765	}
10766
10767      /* If there are more collisions, we can't handle it by reordering.
10768	 Do an lea to the last part and use only one colliding move.  */
10769      else if (collisions > 1)
10770	{
10771	  rtx base;
10772
10773	  collisions = 1;
10774
10775	  base = part[0][nparts - 1];
10776
10777	  /* Handle the case when the last part isn't valid for lea.
10778	     Happens in 64-bit mode storing the 12-byte XFmode.  */
10779	  if (GET_MODE (base) != Pmode)
10780	    base = gen_rtx_REG (Pmode, REGNO (base));
10781
10782	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
10783	  part[1][0] = replace_equiv_address (part[1][0], base);
10784	  part[1][1] = replace_equiv_address (part[1][1],
10785				      plus_constant (base, UNITS_PER_WORD));
10786	  if (nparts == 3)
10787	    part[1][2] = replace_equiv_address (part[1][2],
10788				      plus_constant (base, 8));
10789	}
10790    }
10791
10792  if (push)
10793    {
10794      if (!TARGET_64BIT)
10795	{
10796	  if (nparts == 3)
10797	    {
10798	      if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
10799                emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
10800	      emit_move_insn (part[0][2], part[1][2]);
10801	    }
10802	}
10803      else
10804	{
10805	  /* In 64bit mode we don't have 32bit push available.  In case this is
10806	     register, it is OK - we will just use larger counterpart.  We also
10807	     retype memory - these comes from attempt to avoid REX prefix on
10808	     moving of second half of TFmode value.  */
10809	  if (GET_MODE (part[1][1]) == SImode)
10810	    {
10811	      if (GET_CODE (part[1][1]) == MEM)
10812		part[1][1] = adjust_address (part[1][1], DImode, 0);
10813	      else if (REG_P (part[1][1]))
10814		part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
10815	      else
10816		abort ();
10817	      if (GET_MODE (part[1][0]) == SImode)
10818		part[1][0] = part[1][1];
10819	    }
10820	}
10821      emit_move_insn (part[0][1], part[1][1]);
10822      emit_move_insn (part[0][0], part[1][0]);
10823      return;
10824    }
10825
10826  /* Choose correct order to not overwrite the source before it is copied.  */
10827  if ((REG_P (part[0][0])
10828       && REG_P (part[1][1])
10829       && (REGNO (part[0][0]) == REGNO (part[1][1])
10830	   || (nparts == 3
10831	       && REGNO (part[0][0]) == REGNO (part[1][2]))))
10832      || (collisions > 0
10833	  && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
10834    {
10835      if (nparts == 3)
10836	{
10837	  operands[2] = part[0][2];
10838	  operands[3] = part[0][1];
10839	  operands[4] = part[0][0];
10840	  operands[5] = part[1][2];
10841	  operands[6] = part[1][1];
10842	  operands[7] = part[1][0];
10843	}
10844      else
10845	{
10846	  operands[2] = part[0][1];
10847	  operands[3] = part[0][0];
10848	  operands[5] = part[1][1];
10849	  operands[6] = part[1][0];
10850	}
10851    }
10852  else
10853    {
10854      if (nparts == 3)
10855	{
10856	  operands[2] = part[0][0];
10857	  operands[3] = part[0][1];
10858	  operands[4] = part[0][2];
10859	  operands[5] = part[1][0];
10860	  operands[6] = part[1][1];
10861	  operands[7] = part[1][2];
10862	}
10863      else
10864	{
10865	  operands[2] = part[0][0];
10866	  operands[3] = part[0][1];
10867	  operands[5] = part[1][0];
10868	  operands[6] = part[1][1];
10869	}
10870    }
10871  emit_move_insn (operands[2], operands[5]);
10872  emit_move_insn (operands[3], operands[6]);
10873  if (nparts == 3)
10874    emit_move_insn (operands[4], operands[7]);
10875
10876  return;
10877}
10878
10879void
10880ix86_split_ashldi (rtx *operands, rtx scratch)
10881{
10882  rtx low[2], high[2];
10883  int count;
10884
10885  if (GET_CODE (operands[2]) == CONST_INT)
10886    {
10887      split_di (operands, 2, low, high);
10888      count = INTVAL (operands[2]) & 63;
10889
10890      if (count >= 32)
10891	{
10892	  emit_move_insn (high[0], low[1]);
10893	  emit_move_insn (low[0], const0_rtx);
10894
10895	  if (count > 32)
10896	    emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
10897	}
10898      else
10899	{
10900	  if (!rtx_equal_p (operands[0], operands[1]))
10901	    emit_move_insn (operands[0], operands[1]);
10902	  emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
10903	  emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
10904	}
10905    }
10906  else
10907    {
10908      if (!rtx_equal_p (operands[0], operands[1]))
10909	emit_move_insn (operands[0], operands[1]);
10910
10911      split_di (operands, 1, low, high);
10912
10913      emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
10914      emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
10915
10916      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10917	{
10918	  if (! no_new_pseudos)
10919	    scratch = force_reg (SImode, const0_rtx);
10920	  else
10921	    emit_move_insn (scratch, const0_rtx);
10922
10923	  emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
10924					  scratch));
10925	}
10926      else
10927	emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
10928    }
10929}
10930
10931void
10932ix86_split_ashrdi (rtx *operands, rtx scratch)
10933{
10934  rtx low[2], high[2];
10935  int count;
10936
10937  if (GET_CODE (operands[2]) == CONST_INT)
10938    {
10939      split_di (operands, 2, low, high);
10940      count = INTVAL (operands[2]) & 63;
10941
10942      if (count >= 32)
10943	{
10944	  emit_move_insn (low[0], high[1]);
10945
10946	  if (! reload_completed)
10947	    emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
10948	  else
10949	    {
10950	      emit_move_insn (high[0], low[0]);
10951	      emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
10952	    }
10953
10954	  if (count > 32)
10955	    emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
10956	}
10957      else
10958	{
10959	  if (!rtx_equal_p (operands[0], operands[1]))
10960	    emit_move_insn (operands[0], operands[1]);
10961	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
10962	  emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
10963	}
10964    }
10965  else
10966    {
10967      if (!rtx_equal_p (operands[0], operands[1]))
10968	emit_move_insn (operands[0], operands[1]);
10969
10970      split_di (operands, 1, low, high);
10971
10972      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
10973      emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
10974
10975      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
10976	{
10977	  if (! no_new_pseudos)
10978	    scratch = gen_reg_rtx (SImode);
10979	  emit_move_insn (scratch, high[0]);
10980	  emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
10981	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
10982					  scratch));
10983	}
10984      else
10985	emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
10986    }
10987}
10988
10989void
10990ix86_split_lshrdi (rtx *operands, rtx scratch)
10991{
10992  rtx low[2], high[2];
10993  int count;
10994
10995  if (GET_CODE (operands[2]) == CONST_INT)
10996    {
10997      split_di (operands, 2, low, high);
10998      count = INTVAL (operands[2]) & 63;
10999
11000      if (count >= 32)
11001	{
11002	  emit_move_insn (low[0], high[1]);
11003	  emit_move_insn (high[0], const0_rtx);
11004
11005	  if (count > 32)
11006	    emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
11007	}
11008      else
11009	{
11010	  if (!rtx_equal_p (operands[0], operands[1]))
11011	    emit_move_insn (operands[0], operands[1]);
11012	  emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
11013	  emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
11014	}
11015    }
11016  else
11017    {
11018      if (!rtx_equal_p (operands[0], operands[1]))
11019	emit_move_insn (operands[0], operands[1]);
11020
11021      split_di (operands, 1, low, high);
11022
11023      emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
11024      emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
11025
11026      /* Heh.  By reversing the arguments, we can reuse this pattern.  */
11027      if (TARGET_CMOVE && (! no_new_pseudos || scratch))
11028	{
11029	  if (! no_new_pseudos)
11030	    scratch = force_reg (SImode, const0_rtx);
11031	  else
11032	    emit_move_insn (scratch, const0_rtx);
11033
11034	  emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
11035					  scratch));
11036	}
11037      else
11038	emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
11039    }
11040}
11041
11042/* Helper function for the string operations below.  Dest VARIABLE whether
11043   it is aligned to VALUE bytes.  If true, jump to the label.  */
11044static rtx
11045ix86_expand_aligntest (rtx variable, int value)
11046{
11047  rtx label = gen_label_rtx ();
11048  rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
11049  if (GET_MODE (variable) == DImode)
11050    emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
11051  else
11052    emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
11053  emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
11054			   1, label);
11055  return label;
11056}
11057
11058/* Adjust COUNTER by the VALUE.  */
11059static void
11060ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
11061{
11062  if (GET_MODE (countreg) == DImode)
11063    emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
11064  else
11065    emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
11066}
11067
11068/* Zero extend possibly SImode EXP to Pmode register.  */
11069rtx
11070ix86_zero_extend_to_Pmode (rtx exp)
11071{
11072  rtx r;
11073  if (GET_MODE (exp) == VOIDmode)
11074    return force_reg (Pmode, exp);
11075  if (GET_MODE (exp) == Pmode)
11076    return copy_to_mode_reg (Pmode, exp);
11077  r = gen_reg_rtx (Pmode);
11078  emit_insn (gen_zero_extendsidi2 (r, exp));
11079  return r;
11080}
11081
11082/* Expand string move (memcpy) operation.  Use i386 string operations when
11083   profitable.  expand_clrstr contains similar code.  */
11084int
11085ix86_expand_movstr (rtx dst, rtx src, rtx count_exp, rtx align_exp)
11086{
11087  rtx srcreg, destreg, countreg, srcexp, destexp;
11088  enum machine_mode counter_mode;
11089  HOST_WIDE_INT align = 0;
11090  unsigned HOST_WIDE_INT count = 0;
11091
11092  if (GET_CODE (align_exp) == CONST_INT)
11093    align = INTVAL (align_exp);
11094
11095  /* Can't use any of this if the user has appropriated esi or edi.  */
11096  if (global_regs[4] || global_regs[5])
11097    return 0;
11098
11099  /* This simple hack avoids all inlining code and simplifies code below.  */
11100  if (!TARGET_ALIGN_STRINGOPS)
11101    align = 64;
11102
11103  if (GET_CODE (count_exp) == CONST_INT)
11104    {
11105      count = INTVAL (count_exp);
11106      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11107	return 0;
11108    }
11109
11110  /* Figure out proper mode for counter.  For 32bits it is always SImode,
11111     for 64bits use SImode when possible, otherwise DImode.
11112     Set count to number of bytes copied when known at compile time.  */
11113  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11114      || x86_64_zero_extended_value (count_exp))
11115    counter_mode = SImode;
11116  else
11117    counter_mode = DImode;
11118
11119  if (counter_mode != SImode && counter_mode != DImode)
11120    abort ();
11121
11122  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11123  if (destreg != XEXP (dst, 0))
11124    dst = replace_equiv_address_nv (dst, destreg);
11125  srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
11126  if (srcreg != XEXP (src, 0))
11127    src = replace_equiv_address_nv (src, srcreg);
11128
11129  /* When optimizing for size emit simple rep ; movsb instruction for
11130     counts not divisible by 4.  */
11131
11132  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11133    {
11134      emit_insn (gen_cld ());
11135      countreg = ix86_zero_extend_to_Pmode (count_exp);
11136      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11137      srcexp = gen_rtx_PLUS (Pmode, srcreg, countreg);
11138      emit_insn (gen_rep_mov (destreg, dst, srcreg, src, countreg,
11139			      destexp, srcexp));
11140    }
11141
11142  /* For constant aligned (or small unaligned) copies use rep movsl
11143     followed by code copying the rest.  For PentiumPro ensure 8 byte
11144     alignment to allow rep movsl acceleration.  */
11145
11146  else if (count != 0
11147	   && (align >= 8
11148	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11149	       || optimize_size || count < (unsigned int) 64))
11150    {
11151      unsigned HOST_WIDE_INT offset = 0;
11152      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11153      rtx srcmem, dstmem;
11154
11155      emit_insn (gen_cld ());
11156      if (count & ~(size - 1))
11157	{
11158	  countreg = copy_to_mode_reg (counter_mode,
11159				       GEN_INT ((count >> (size == 4 ? 2 : 3))
11160						& (TARGET_64BIT ? -1 : 0x3fffffff)));
11161	  countreg = ix86_zero_extend_to_Pmode (countreg);
11162
11163	  destexp = gen_rtx_ASHIFT (Pmode, countreg,
11164				    GEN_INT (size == 4 ? 2 : 3));
11165	  srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11166	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11167
11168	  emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11169				  countreg, destexp, srcexp));
11170	  offset = count & ~(size - 1);
11171	}
11172      if (size == 8 && (count & 0x04))
11173	{
11174	  srcmem = adjust_automodify_address_nv (src, SImode, srcreg,
11175						 offset);
11176	  dstmem = adjust_automodify_address_nv (dst, SImode, destreg,
11177						 offset);
11178	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11179	  offset += 4;
11180	}
11181      if (count & 0x02)
11182	{
11183	  srcmem = adjust_automodify_address_nv (src, HImode, srcreg,
11184						 offset);
11185	  dstmem = adjust_automodify_address_nv (dst, HImode, destreg,
11186						 offset);
11187	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11188	  offset += 2;
11189	}
11190      if (count & 0x01)
11191	{
11192	  srcmem = adjust_automodify_address_nv (src, QImode, srcreg,
11193						 offset);
11194	  dstmem = adjust_automodify_address_nv (dst, QImode, destreg,
11195						 offset);
11196	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11197	}
11198    }
11199  /* The generic code based on the glibc implementation:
11200     - align destination to 4 bytes (8 byte alignment is used for PentiumPro
11201     allowing accelerated copying there)
11202     - copy the data using rep movsl
11203     - copy the rest.  */
11204  else
11205    {
11206      rtx countreg2;
11207      rtx label = NULL;
11208      rtx srcmem, dstmem;
11209      int desired_alignment = (TARGET_PENTIUMPRO
11210			       && (count == 0 || count >= (unsigned int) 260)
11211			       ? 8 : UNITS_PER_WORD);
11212      /* Get rid of MEM_OFFSETs, they won't be accurate.  */
11213      dst = change_address (dst, BLKmode, destreg);
11214      src = change_address (src, BLKmode, srcreg);
11215
11216      /* In case we don't know anything about the alignment, default to
11217         library version, since it is usually equally fast and result in
11218         shorter code.
11219
11220	 Also emit call when we know that the count is large and call overhead
11221	 will not be important.  */
11222      if (!TARGET_INLINE_ALL_STRINGOPS
11223	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11224	return 0;
11225
11226      if (TARGET_SINGLE_STRINGOP)
11227	emit_insn (gen_cld ());
11228
11229      countreg2 = gen_reg_rtx (Pmode);
11230      countreg = copy_to_mode_reg (counter_mode, count_exp);
11231
11232      /* We don't use loops to align destination and to copy parts smaller
11233         than 4 bytes, because gcc is able to optimize such code better (in
11234         the case the destination or the count really is aligned, gcc is often
11235         able to predict the branches) and also it is friendlier to the
11236         hardware branch prediction.
11237
11238         Using loops is beneficial for generic case, because we can
11239         handle small counts using the loops.  Many CPUs (such as Athlon)
11240         have large REP prefix setup costs.
11241
11242         This is quite costly.  Maybe we can revisit this decision later or
11243         add some customizability to this code.  */
11244
11245      if (count == 0 && align < desired_alignment)
11246	{
11247	  label = gen_label_rtx ();
11248	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11249				   LEU, 0, counter_mode, 1, label);
11250	}
11251      if (align <= 1)
11252	{
11253	  rtx label = ix86_expand_aligntest (destreg, 1);
11254	  srcmem = change_address (src, QImode, srcreg);
11255	  dstmem = change_address (dst, QImode, destreg);
11256	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11257	  ix86_adjust_counter (countreg, 1);
11258	  emit_label (label);
11259	  LABEL_NUSES (label) = 1;
11260	}
11261      if (align <= 2)
11262	{
11263	  rtx label = ix86_expand_aligntest (destreg, 2);
11264	  srcmem = change_address (src, HImode, srcreg);
11265	  dstmem = change_address (dst, HImode, destreg);
11266	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11267	  ix86_adjust_counter (countreg, 2);
11268	  emit_label (label);
11269	  LABEL_NUSES (label) = 1;
11270	}
11271      if (align <= 4 && desired_alignment > 4)
11272	{
11273	  rtx label = ix86_expand_aligntest (destreg, 4);
11274	  srcmem = change_address (src, SImode, srcreg);
11275	  dstmem = change_address (dst, SImode, destreg);
11276	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11277	  ix86_adjust_counter (countreg, 4);
11278	  emit_label (label);
11279	  LABEL_NUSES (label) = 1;
11280	}
11281
11282      if (label && desired_alignment > 4 && !TARGET_64BIT)
11283	{
11284	  emit_label (label);
11285	  LABEL_NUSES (label) = 1;
11286	  label = NULL_RTX;
11287	}
11288      if (!TARGET_SINGLE_STRINGOP)
11289	emit_insn (gen_cld ());
11290      if (TARGET_64BIT)
11291	{
11292	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11293				  GEN_INT (3)));
11294	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11295	}
11296      else
11297	{
11298	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11299	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11300	}
11301      srcexp = gen_rtx_PLUS (Pmode, destexp, srcreg);
11302      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11303      emit_insn (gen_rep_mov (destreg, dst, srcreg, src,
11304			      countreg2, destexp, srcexp));
11305
11306      if (label)
11307	{
11308	  emit_label (label);
11309	  LABEL_NUSES (label) = 1;
11310	}
11311      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11312	{
11313	  srcmem = change_address (src, SImode, srcreg);
11314	  dstmem = change_address (dst, SImode, destreg);
11315	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11316	}
11317      if ((align <= 4 || count == 0) && TARGET_64BIT)
11318	{
11319	  rtx label = ix86_expand_aligntest (countreg, 4);
11320	  srcmem = change_address (src, SImode, srcreg);
11321	  dstmem = change_address (dst, SImode, destreg);
11322	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11323	  emit_label (label);
11324	  LABEL_NUSES (label) = 1;
11325	}
11326      if (align > 2 && count != 0 && (count & 2))
11327	{
11328	  srcmem = change_address (src, HImode, srcreg);
11329	  dstmem = change_address (dst, HImode, destreg);
11330	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11331	}
11332      if (align <= 2 || count == 0)
11333	{
11334	  rtx label = ix86_expand_aligntest (countreg, 2);
11335	  srcmem = change_address (src, HImode, srcreg);
11336	  dstmem = change_address (dst, HImode, destreg);
11337	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11338	  emit_label (label);
11339	  LABEL_NUSES (label) = 1;
11340	}
11341      if (align > 1 && count != 0 && (count & 1))
11342	{
11343	  srcmem = change_address (src, QImode, srcreg);
11344	  dstmem = change_address (dst, QImode, destreg);
11345	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11346	}
11347      if (align <= 1 || count == 0)
11348	{
11349	  rtx label = ix86_expand_aligntest (countreg, 1);
11350	  srcmem = change_address (src, QImode, srcreg);
11351	  dstmem = change_address (dst, QImode, destreg);
11352	  emit_insn (gen_strmov (destreg, dstmem, srcreg, srcmem));
11353	  emit_label (label);
11354	  LABEL_NUSES (label) = 1;
11355	}
11356    }
11357
11358  return 1;
11359}
11360
11361/* Expand string clear operation (bzero).  Use i386 string operations when
11362   profitable.  expand_movstr contains similar code.  */
11363int
11364ix86_expand_clrstr (rtx dst, rtx count_exp, rtx align_exp)
11365{
11366  rtx destreg, zeroreg, countreg, destexp;
11367  enum machine_mode counter_mode;
11368  HOST_WIDE_INT align = 0;
11369  unsigned HOST_WIDE_INT count = 0;
11370
11371  if (GET_CODE (align_exp) == CONST_INT)
11372    align = INTVAL (align_exp);
11373
11374  /* Can't use any of this if the user has appropriated esi.  */
11375  if (global_regs[4])
11376    return 0;
11377
11378  /* This simple hack avoids all inlining code and simplifies code below.  */
11379  if (!TARGET_ALIGN_STRINGOPS)
11380    align = 32;
11381
11382  if (GET_CODE (count_exp) == CONST_INT)
11383    {
11384      count = INTVAL (count_exp);
11385      if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
11386	return 0;
11387    }
11388  /* Figure out proper mode for counter.  For 32bits it is always SImode,
11389     for 64bits use SImode when possible, otherwise DImode.
11390     Set count to number of bytes copied when known at compile time.  */
11391  if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
11392      || x86_64_zero_extended_value (count_exp))
11393    counter_mode = SImode;
11394  else
11395    counter_mode = DImode;
11396
11397  destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
11398  if (destreg != XEXP (dst, 0))
11399    dst = replace_equiv_address_nv (dst, destreg);
11400
11401  emit_insn (gen_cld ());
11402
11403  /* When optimizing for size emit simple rep ; movsb instruction for
11404     counts not divisible by 4.  */
11405
11406  if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
11407    {
11408      countreg = ix86_zero_extend_to_Pmode (count_exp);
11409      zeroreg = copy_to_mode_reg (QImode, const0_rtx);
11410      destexp = gen_rtx_PLUS (Pmode, destreg, countreg);
11411      emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11412    }
11413  else if (count != 0
11414	   && (align >= 8
11415	       || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
11416	       || optimize_size || count < (unsigned int) 64))
11417    {
11418      int size = TARGET_64BIT && !optimize_size ? 8 : 4;
11419      unsigned HOST_WIDE_INT offset = 0;
11420
11421      zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
11422      if (count & ~(size - 1))
11423	{
11424	  countreg = copy_to_mode_reg (counter_mode,
11425				       GEN_INT ((count >> (size == 4 ? 2 : 3))
11426						& (TARGET_64BIT ? -1 : 0x3fffffff)));
11427	  countreg = ix86_zero_extend_to_Pmode (countreg);
11428	  destexp = gen_rtx_ASHIFT (Pmode, countreg, GEN_INT (size == 4 ? 2 : 3));
11429	  destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11430	  emit_insn (gen_rep_stos (destreg, countreg, dst, zeroreg, destexp));
11431	  offset = count & ~(size - 1);
11432	}
11433      if (size == 8 && (count & 0x04))
11434	{
11435	  rtx mem = adjust_automodify_address_nv (dst, SImode, destreg,
11436						  offset);
11437	  emit_insn (gen_strset (destreg, mem,
11438				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11439	  offset += 4;
11440	}
11441      if (count & 0x02)
11442	{
11443	  rtx mem = adjust_automodify_address_nv (dst, HImode, destreg,
11444						  offset);
11445	  emit_insn (gen_strset (destreg, mem,
11446				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11447	  offset += 2;
11448	}
11449      if (count & 0x01)
11450	{
11451	  rtx mem = adjust_automodify_address_nv (dst, QImode, destreg,
11452						  offset);
11453	  emit_insn (gen_strset (destreg, mem,
11454				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11455	}
11456    }
11457  else
11458    {
11459      rtx countreg2;
11460      rtx label = NULL;
11461      /* Compute desired alignment of the string operation.  */
11462      int desired_alignment = (TARGET_PENTIUMPRO
11463			       && (count == 0 || count >= (unsigned int) 260)
11464			       ? 8 : UNITS_PER_WORD);
11465
11466      /* In case we don't know anything about the alignment, default to
11467         library version, since it is usually equally fast and result in
11468         shorter code.
11469
11470	 Also emit call when we know that the count is large and call overhead
11471	 will not be important.  */
11472      if (!TARGET_INLINE_ALL_STRINGOPS
11473	  && (align < UNITS_PER_WORD || !TARGET_REP_MOVL_OPTIMAL))
11474	return 0;
11475
11476      if (TARGET_SINGLE_STRINGOP)
11477	emit_insn (gen_cld ());
11478
11479      countreg2 = gen_reg_rtx (Pmode);
11480      countreg = copy_to_mode_reg (counter_mode, count_exp);
11481      zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
11482      /* Get rid of MEM_OFFSET, it won't be accurate.  */
11483      dst = change_address (dst, BLKmode, destreg);
11484
11485      if (count == 0 && align < desired_alignment)
11486	{
11487	  label = gen_label_rtx ();
11488	  emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
11489				   LEU, 0, counter_mode, 1, label);
11490	}
11491      if (align <= 1)
11492	{
11493	  rtx label = ix86_expand_aligntest (destreg, 1);
11494	  emit_insn (gen_strset (destreg, dst,
11495				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11496	  ix86_adjust_counter (countreg, 1);
11497	  emit_label (label);
11498	  LABEL_NUSES (label) = 1;
11499	}
11500      if (align <= 2)
11501	{
11502	  rtx label = ix86_expand_aligntest (destreg, 2);
11503	  emit_insn (gen_strset (destreg, dst,
11504				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11505	  ix86_adjust_counter (countreg, 2);
11506	  emit_label (label);
11507	  LABEL_NUSES (label) = 1;
11508	}
11509      if (align <= 4 && desired_alignment > 4)
11510	{
11511	  rtx label = ix86_expand_aligntest (destreg, 4);
11512	  emit_insn (gen_strset (destreg, dst,
11513				 (TARGET_64BIT
11514				  ? gen_rtx_SUBREG (SImode, zeroreg, 0)
11515				  : zeroreg)));
11516	  ix86_adjust_counter (countreg, 4);
11517	  emit_label (label);
11518	  LABEL_NUSES (label) = 1;
11519	}
11520
11521      if (label && desired_alignment > 4 && !TARGET_64BIT)
11522	{
11523	  emit_label (label);
11524	  LABEL_NUSES (label) = 1;
11525	  label = NULL_RTX;
11526	}
11527
11528      if (!TARGET_SINGLE_STRINGOP)
11529	emit_insn (gen_cld ());
11530      if (TARGET_64BIT)
11531	{
11532	  emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
11533				  GEN_INT (3)));
11534	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, GEN_INT (3));
11535	}
11536      else
11537	{
11538	  emit_insn (gen_lshrsi3 (countreg2, countreg, const2_rtx));
11539	  destexp = gen_rtx_ASHIFT (Pmode, countreg2, const2_rtx);
11540	}
11541      destexp = gen_rtx_PLUS (Pmode, destexp, destreg);
11542      emit_insn (gen_rep_stos (destreg, countreg2, dst, zeroreg, destexp));
11543
11544      if (label)
11545	{
11546	  emit_label (label);
11547	  LABEL_NUSES (label) = 1;
11548	}
11549
11550      if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
11551	emit_insn (gen_strset (destreg, dst,
11552			       gen_rtx_SUBREG (SImode, zeroreg, 0)));
11553      if (TARGET_64BIT && (align <= 4 || count == 0))
11554	{
11555	  rtx label = ix86_expand_aligntest (countreg, 4);
11556	  emit_insn (gen_strset (destreg, dst,
11557				 gen_rtx_SUBREG (SImode, zeroreg, 0)));
11558	  emit_label (label);
11559	  LABEL_NUSES (label) = 1;
11560	}
11561      if (align > 2 && count != 0 && (count & 2))
11562	emit_insn (gen_strset (destreg, dst,
11563			       gen_rtx_SUBREG (HImode, zeroreg, 0)));
11564      if (align <= 2 || count == 0)
11565	{
11566	  rtx label = ix86_expand_aligntest (countreg, 2);
11567	  emit_insn (gen_strset (destreg, dst,
11568				 gen_rtx_SUBREG (HImode, zeroreg, 0)));
11569	  emit_label (label);
11570	  LABEL_NUSES (label) = 1;
11571	}
11572      if (align > 1 && count != 0 && (count & 1))
11573	emit_insn (gen_strset (destreg, dst,
11574			       gen_rtx_SUBREG (QImode, zeroreg, 0)));
11575      if (align <= 1 || count == 0)
11576	{
11577	  rtx label = ix86_expand_aligntest (countreg, 1);
11578	  emit_insn (gen_strset (destreg, dst,
11579				 gen_rtx_SUBREG (QImode, zeroreg, 0)));
11580	  emit_label (label);
11581	  LABEL_NUSES (label) = 1;
11582	}
11583    }
11584  return 1;
11585}
11586
11587/* Expand strlen.  */
11588int
11589ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
11590{
11591  rtx addr, scratch1, scratch2, scratch3, scratch4;
11592
11593  /* The generic case of strlen expander is long.  Avoid it's
11594     expanding unless TARGET_INLINE_ALL_STRINGOPS.  */
11595
11596  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11597      && !TARGET_INLINE_ALL_STRINGOPS
11598      && !optimize_size
11599      && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
11600    return 0;
11601
11602  addr = force_reg (Pmode, XEXP (src, 0));
11603  scratch1 = gen_reg_rtx (Pmode);
11604
11605  if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
11606      && !optimize_size)
11607    {
11608      /* Well it seems that some optimizer does not combine a call like
11609         foo(strlen(bar), strlen(bar));
11610         when the move and the subtraction is done here.  It does calculate
11611         the length just once when these instructions are done inside of
11612         output_strlen_unroll().  But I think since &bar[strlen(bar)] is
11613         often used and I use one fewer register for the lifetime of
11614         output_strlen_unroll() this is better.  */
11615
11616      emit_move_insn (out, addr);
11617
11618      ix86_expand_strlensi_unroll_1 (out, src, align);
11619
11620      /* strlensi_unroll_1 returns the address of the zero at the end of
11621         the string, like memchr(), so compute the length by subtracting
11622         the start address.  */
11623      if (TARGET_64BIT)
11624	emit_insn (gen_subdi3 (out, out, addr));
11625      else
11626	emit_insn (gen_subsi3 (out, out, addr));
11627    }
11628  else
11629    {
11630      rtx unspec;
11631      scratch2 = gen_reg_rtx (Pmode);
11632      scratch3 = gen_reg_rtx (Pmode);
11633      scratch4 = force_reg (Pmode, constm1_rtx);
11634
11635      emit_move_insn (scratch3, addr);
11636      eoschar = force_reg (QImode, eoschar);
11637
11638      emit_insn (gen_cld ());
11639      src = replace_equiv_address_nv (src, scratch3);
11640
11641      /* If .md starts supporting :P, this can be done in .md.  */
11642      unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
11643						 scratch4), UNSPEC_SCAS);
11644      emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
11645      if (TARGET_64BIT)
11646	{
11647	  emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
11648	  emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
11649	}
11650      else
11651	{
11652	  emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
11653	  emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
11654	}
11655    }
11656  return 1;
11657}
11658
11659/* Expand the appropriate insns for doing strlen if not just doing
11660   repnz; scasb
11661
11662   out = result, initialized with the start address
11663   align_rtx = alignment of the address.
11664   scratch = scratch register, initialized with the startaddress when
11665	not aligned, otherwise undefined
11666
11667   This is just the body. It needs the initializations mentioned above and
11668   some address computing at the end.  These things are done in i386.md.  */
11669
11670static void
11671ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
11672{
11673  int align;
11674  rtx tmp;
11675  rtx align_2_label = NULL_RTX;
11676  rtx align_3_label = NULL_RTX;
11677  rtx align_4_label = gen_label_rtx ();
11678  rtx end_0_label = gen_label_rtx ();
11679  rtx mem;
11680  rtx tmpreg = gen_reg_rtx (SImode);
11681  rtx scratch = gen_reg_rtx (SImode);
11682  rtx cmp;
11683
11684  align = 0;
11685  if (GET_CODE (align_rtx) == CONST_INT)
11686    align = INTVAL (align_rtx);
11687
11688  /* Loop to check 1..3 bytes for null to get an aligned pointer.  */
11689
11690  /* Is there a known alignment and is it less than 4?  */
11691  if (align < 4)
11692    {
11693      rtx scratch1 = gen_reg_rtx (Pmode);
11694      emit_move_insn (scratch1, out);
11695      /* Is there a known alignment and is it not 2? */
11696      if (align != 2)
11697	{
11698	  align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
11699	  align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
11700
11701	  /* Leave just the 3 lower bits.  */
11702	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
11703				    NULL_RTX, 0, OPTAB_WIDEN);
11704
11705	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11706				   Pmode, 1, align_4_label);
11707	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
11708				   Pmode, 1, align_2_label);
11709	  emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
11710				   Pmode, 1, align_3_label);
11711	}
11712      else
11713        {
11714	  /* Since the alignment is 2, we have to check 2 or 0 bytes;
11715	     check if is aligned to 4 - byte.  */
11716
11717	  align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
11718				    NULL_RTX, 0, OPTAB_WIDEN);
11719
11720	  emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
11721				   Pmode, 1, align_4_label);
11722        }
11723
11724      mem = change_address (src, QImode, out);
11725
11726      /* Now compare the bytes.  */
11727
11728      /* Compare the first n unaligned byte on a byte per byte basis.  */
11729      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
11730			       QImode, 1, end_0_label);
11731
11732      /* Increment the address.  */
11733      if (TARGET_64BIT)
11734	emit_insn (gen_adddi3 (out, out, const1_rtx));
11735      else
11736	emit_insn (gen_addsi3 (out, out, const1_rtx));
11737
11738      /* Not needed with an alignment of 2 */
11739      if (align != 2)
11740	{
11741	  emit_label (align_2_label);
11742
11743	  emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11744				   end_0_label);
11745
11746	  if (TARGET_64BIT)
11747	    emit_insn (gen_adddi3 (out, out, const1_rtx));
11748	  else
11749	    emit_insn (gen_addsi3 (out, out, const1_rtx));
11750
11751	  emit_label (align_3_label);
11752	}
11753
11754      emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
11755			       end_0_label);
11756
11757      if (TARGET_64BIT)
11758	emit_insn (gen_adddi3 (out, out, const1_rtx));
11759      else
11760	emit_insn (gen_addsi3 (out, out, const1_rtx));
11761    }
11762
11763  /* Generate loop to check 4 bytes at a time.  It is not a good idea to
11764     align this loop.  It gives only huge programs, but does not help to
11765     speed up.  */
11766  emit_label (align_4_label);
11767
11768  mem = change_address (src, SImode, out);
11769  emit_move_insn (scratch, mem);
11770  if (TARGET_64BIT)
11771    emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
11772  else
11773    emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
11774
11775  /* This formula yields a nonzero result iff one of the bytes is zero.
11776     This saves three branches inside loop and many cycles.  */
11777
11778  emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
11779  emit_insn (gen_one_cmplsi2 (scratch, scratch));
11780  emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
11781  emit_insn (gen_andsi3 (tmpreg, tmpreg,
11782			 gen_int_mode (0x80808080, SImode)));
11783  emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
11784			   align_4_label);
11785
11786  if (TARGET_CMOVE)
11787    {
11788       rtx reg = gen_reg_rtx (SImode);
11789       rtx reg2 = gen_reg_rtx (Pmode);
11790       emit_move_insn (reg, tmpreg);
11791       emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
11792
11793       /* If zero is not in the first two bytes, move two bytes forward.  */
11794       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11795       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11796       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11797       emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
11798			       gen_rtx_IF_THEN_ELSE (SImode, tmp,
11799						     reg,
11800						     tmpreg)));
11801       /* Emit lea manually to avoid clobbering of flags.  */
11802       emit_insn (gen_rtx_SET (SImode, reg2,
11803			       gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
11804
11805       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11806       tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
11807       emit_insn (gen_rtx_SET (VOIDmode, out,
11808			       gen_rtx_IF_THEN_ELSE (Pmode, tmp,
11809						     reg2,
11810						     out)));
11811
11812    }
11813  else
11814    {
11815       rtx end_2_label = gen_label_rtx ();
11816       /* Is zero in the first two bytes? */
11817
11818       emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
11819       tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
11820       tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
11821       tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11822                            gen_rtx_LABEL_REF (VOIDmode, end_2_label),
11823                            pc_rtx);
11824       tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11825       JUMP_LABEL (tmp) = end_2_label;
11826
11827       /* Not in the first two.  Move two bytes forward.  */
11828       emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
11829       if (TARGET_64BIT)
11830	 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
11831       else
11832	 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
11833
11834       emit_label (end_2_label);
11835
11836    }
11837
11838  /* Avoid branch in fixing the byte.  */
11839  tmpreg = gen_lowpart (QImode, tmpreg);
11840  emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
11841  cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
11842  if (TARGET_64BIT)
11843    emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
11844  else
11845    emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
11846
11847  emit_label (end_0_label);
11848}
11849
11850void
11851ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
11852		  rtx callarg2 ATTRIBUTE_UNUSED,
11853		  rtx pop, int sibcall)
11854{
11855  rtx use = NULL, call;
11856
11857  if (pop == const0_rtx)
11858    pop = NULL;
11859  if (TARGET_64BIT && pop)
11860    abort ();
11861
11862#if TARGET_MACHO
11863  if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
11864    fnaddr = machopic_indirect_call_target (fnaddr);
11865#else
11866  /* Static functions and indirect calls don't need the pic register.  */
11867  if (! TARGET_64BIT && flag_pic
11868      && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
11869      && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
11870    use_reg (&use, pic_offset_table_rtx);
11871
11872  if (TARGET_64BIT && INTVAL (callarg2) >= 0)
11873    {
11874      rtx al = gen_rtx_REG (QImode, 0);
11875      emit_move_insn (al, callarg2);
11876      use_reg (&use, al);
11877    }
11878#endif /* TARGET_MACHO */
11879
11880  if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
11881    {
11882      fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11883      fnaddr = gen_rtx_MEM (QImode, fnaddr);
11884    }
11885  if (sibcall && TARGET_64BIT
11886      && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
11887    {
11888      rtx addr;
11889      addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
11890      fnaddr = gen_rtx_REG (Pmode, FIRST_REX_INT_REG + 3 /* R11 */);
11891      emit_move_insn (fnaddr, addr);
11892      fnaddr = gen_rtx_MEM (QImode, fnaddr);
11893    }
11894
11895  call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
11896  if (retval)
11897    call = gen_rtx_SET (VOIDmode, retval, call);
11898  if (pop)
11899    {
11900      pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
11901      pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
11902      call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
11903    }
11904
11905  call = emit_call_insn (call);
11906  if (use)
11907    CALL_INSN_FUNCTION_USAGE (call) = use;
11908}
11909
11910
11911/* Clear stack slot assignments remembered from previous functions.
11912   This is called from INIT_EXPANDERS once before RTL is emitted for each
11913   function.  */
11914
11915static struct machine_function *
11916ix86_init_machine_status (void)
11917{
11918  struct machine_function *f;
11919
11920  f = ggc_alloc_cleared (sizeof (struct machine_function));
11921  f->use_fast_prologue_epilogue_nregs = -1;
11922
11923  return f;
11924}
11925
11926/* Return a MEM corresponding to a stack slot with mode MODE.
11927   Allocate a new slot if necessary.
11928
11929   The RTL for a function can have several slots available: N is
11930   which slot to use.  */
11931
11932rtx
11933assign_386_stack_local (enum machine_mode mode, int n)
11934{
11935  struct stack_local_entry *s;
11936
11937  if (n < 0 || n >= MAX_386_STACK_LOCALS)
11938    abort ();
11939
11940  for (s = ix86_stack_locals; s; s = s->next)
11941    if (s->mode == mode && s->n == n)
11942      return s->rtl;
11943
11944  s = (struct stack_local_entry *)
11945    ggc_alloc (sizeof (struct stack_local_entry));
11946  s->n = n;
11947  s->mode = mode;
11948  s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
11949
11950  s->next = ix86_stack_locals;
11951  ix86_stack_locals = s;
11952  return s->rtl;
11953}
11954
11955/* Construct the SYMBOL_REF for the tls_get_addr function.  */
11956
11957static GTY(()) rtx ix86_tls_symbol;
11958rtx
11959ix86_tls_get_addr (void)
11960{
11961
11962  if (!ix86_tls_symbol)
11963    {
11964      ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
11965					    (TARGET_GNU_TLS && !TARGET_64BIT)
11966					    ? "___tls_get_addr"
11967					    : "__tls_get_addr");
11968    }
11969
11970  return ix86_tls_symbol;
11971}
11972
11973/* Calculate the length of the memory address in the instruction
11974   encoding.  Does not include the one-byte modrm, opcode, or prefix.  */
11975
11976static int
11977memory_address_length (rtx addr)
11978{
11979  struct ix86_address parts;
11980  rtx base, index, disp;
11981  int len;
11982
11983  if (GET_CODE (addr) == PRE_DEC
11984      || GET_CODE (addr) == POST_INC
11985      || GET_CODE (addr) == PRE_MODIFY
11986      || GET_CODE (addr) == POST_MODIFY)
11987    return 0;
11988
11989  if (! ix86_decompose_address (addr, &parts))
11990    abort ();
11991
11992  base = parts.base;
11993  index = parts.index;
11994  disp = parts.disp;
11995  len = 0;
11996
11997  /* Rule of thumb:
11998       - esp as the base always wants an index,
11999       - ebp as the base always wants a displacement.  */
12000
12001  /* Register Indirect.  */
12002  if (base && !index && !disp)
12003    {
12004      /* esp (for its index) and ebp (for its displacement) need
12005	 the two-byte modrm form.  */
12006      if (addr == stack_pointer_rtx
12007	  || addr == arg_pointer_rtx
12008	  || addr == frame_pointer_rtx
12009	  || addr == hard_frame_pointer_rtx)
12010	len = 1;
12011    }
12012
12013  /* Direct Addressing.  */
12014  else if (disp && !base && !index)
12015    len = 4;
12016
12017  else
12018    {
12019      /* Find the length of the displacement constant.  */
12020      if (disp)
12021	{
12022	  if (GET_CODE (disp) == CONST_INT
12023	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
12024	      && base)
12025	    len = 1;
12026	  else
12027	    len = 4;
12028	}
12029      /* ebp always wants a displacement.  */
12030      else if (base == hard_frame_pointer_rtx)
12031        len = 1;
12032
12033      /* An index requires the two-byte modrm form....  */
12034      if (index
12035	  /* ...like esp, which always wants an index.  */
12036	  || base == stack_pointer_rtx
12037	  || base == arg_pointer_rtx
12038	  || base == frame_pointer_rtx)
12039	len += 1;
12040    }
12041
12042  return len;
12043}
12044
12045/* Compute default value for "length_immediate" attribute.  When SHORTFORM
12046   is set, expect that insn have 8bit immediate alternative.  */
12047int
12048ix86_attr_length_immediate_default (rtx insn, int shortform)
12049{
12050  int len = 0;
12051  int i;
12052  extract_insn_cached (insn);
12053  for (i = recog_data.n_operands - 1; i >= 0; --i)
12054    if (CONSTANT_P (recog_data.operand[i]))
12055      {
12056	if (len)
12057	  abort ();
12058	if (shortform
12059	    && GET_CODE (recog_data.operand[i]) == CONST_INT
12060	    && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
12061	  len = 1;
12062	else
12063	  {
12064	    switch (get_attr_mode (insn))
12065	      {
12066		case MODE_QI:
12067		  len+=1;
12068		  break;
12069		case MODE_HI:
12070		  len+=2;
12071		  break;
12072		case MODE_SI:
12073		  len+=4;
12074		  break;
12075		/* Immediates for DImode instructions are encoded as 32bit sign extended values.  */
12076		case MODE_DI:
12077		  len+=4;
12078		  break;
12079		default:
12080		  fatal_insn ("unknown insn mode", insn);
12081	      }
12082	  }
12083      }
12084  return len;
12085}
12086/* Compute default value for "length_address" attribute.  */
12087int
12088ix86_attr_length_address_default (rtx insn)
12089{
12090  int i;
12091
12092  if (get_attr_type (insn) == TYPE_LEA)
12093    {
12094      rtx set = PATTERN (insn);
12095      if (GET_CODE (set) == SET)
12096	;
12097      else if (GET_CODE (set) == PARALLEL
12098	       && GET_CODE (XVECEXP (set, 0, 0)) == SET)
12099	set = XVECEXP (set, 0, 0);
12100      else
12101	{
12102#ifdef ENABLE_CHECKING
12103	  abort ();
12104#endif
12105	  return 0;
12106	}
12107
12108      return memory_address_length (SET_SRC (set));
12109    }
12110
12111  extract_insn_cached (insn);
12112  for (i = recog_data.n_operands - 1; i >= 0; --i)
12113    if (GET_CODE (recog_data.operand[i]) == MEM)
12114      {
12115	return memory_address_length (XEXP (recog_data.operand[i], 0));
12116	break;
12117      }
12118  return 0;
12119}
12120
12121/* Return the maximum number of instructions a cpu can issue.  */
12122
12123static int
12124ix86_issue_rate (void)
12125{
12126  switch (ix86_tune)
12127    {
12128    case PROCESSOR_PENTIUM:
12129    case PROCESSOR_K6:
12130      return 2;
12131
12132    case PROCESSOR_PENTIUMPRO:
12133    case PROCESSOR_PENTIUM4:
12134    case PROCESSOR_ATHLON:
12135    case PROCESSOR_K8:
12136      return 3;
12137
12138    default:
12139      return 1;
12140    }
12141}
12142
12143/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
12144   by DEP_INSN and nothing set by DEP_INSN.  */
12145
12146static int
12147ix86_flags_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12148{
12149  rtx set, set2;
12150
12151  /* Simplify the test for uninteresting insns.  */
12152  if (insn_type != TYPE_SETCC
12153      && insn_type != TYPE_ICMOV
12154      && insn_type != TYPE_FCMOV
12155      && insn_type != TYPE_IBR)
12156    return 0;
12157
12158  if ((set = single_set (dep_insn)) != 0)
12159    {
12160      set = SET_DEST (set);
12161      set2 = NULL_RTX;
12162    }
12163  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
12164	   && XVECLEN (PATTERN (dep_insn), 0) == 2
12165	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
12166	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
12167    {
12168      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12169      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
12170    }
12171  else
12172    return 0;
12173
12174  if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
12175    return 0;
12176
12177  /* This test is true if the dependent insn reads the flags but
12178     not any other potentially set register.  */
12179  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
12180    return 0;
12181
12182  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
12183    return 0;
12184
12185  return 1;
12186}
12187
12188/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
12189   address with operands set by DEP_INSN.  */
12190
12191static int
12192ix86_agi_dependant (rtx insn, rtx dep_insn, enum attr_type insn_type)
12193{
12194  rtx addr;
12195
12196  if (insn_type == TYPE_LEA
12197      && TARGET_PENTIUM)
12198    {
12199      addr = PATTERN (insn);
12200      if (GET_CODE (addr) == SET)
12201	;
12202      else if (GET_CODE (addr) == PARALLEL
12203	       && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
12204	addr = XVECEXP (addr, 0, 0);
12205      else
12206	abort ();
12207      addr = SET_SRC (addr);
12208    }
12209  else
12210    {
12211      int i;
12212      extract_insn_cached (insn);
12213      for (i = recog_data.n_operands - 1; i >= 0; --i)
12214	if (GET_CODE (recog_data.operand[i]) == MEM)
12215	  {
12216	    addr = XEXP (recog_data.operand[i], 0);
12217	    goto found;
12218	  }
12219      return 0;
12220    found:;
12221    }
12222
12223  return modified_in_p (addr, dep_insn);
12224}
12225
12226static int
12227ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
12228{
12229  enum attr_type insn_type, dep_insn_type;
12230  enum attr_memory memory, dep_memory;
12231  rtx set, set2;
12232  int dep_insn_code_number;
12233
12234  /* Anti and output dependencies have zero cost on all CPUs.  */
12235  if (REG_NOTE_KIND (link) != 0)
12236    return 0;
12237
12238  dep_insn_code_number = recog_memoized (dep_insn);
12239
12240  /* If we can't recognize the insns, we can't really do anything.  */
12241  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
12242    return cost;
12243
12244  insn_type = get_attr_type (insn);
12245  dep_insn_type = get_attr_type (dep_insn);
12246
12247  switch (ix86_tune)
12248    {
12249    case PROCESSOR_PENTIUM:
12250      /* Address Generation Interlock adds a cycle of latency.  */
12251      if (ix86_agi_dependant (insn, dep_insn, insn_type))
12252	cost += 1;
12253
12254      /* ??? Compares pair with jump/setcc.  */
12255      if (ix86_flags_dependant (insn, dep_insn, insn_type))
12256	cost = 0;
12257
12258      /* Floating point stores require value to be ready one cycle earlier.  */
12259      if (insn_type == TYPE_FMOV
12260	  && get_attr_memory (insn) == MEMORY_STORE
12261	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12262	cost += 1;
12263      break;
12264
12265    case PROCESSOR_PENTIUMPRO:
12266      memory = get_attr_memory (insn);
12267      dep_memory = get_attr_memory (dep_insn);
12268
12269      /* Since we can't represent delayed latencies of load+operation,
12270	 increase the cost here for non-imov insns.  */
12271      if (dep_insn_type != TYPE_IMOV
12272          && dep_insn_type != TYPE_FMOV
12273          && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
12274	cost += 1;
12275
12276      /* INT->FP conversion is expensive.  */
12277      if (get_attr_fp_int_src (dep_insn))
12278	cost += 5;
12279
12280      /* There is one cycle extra latency between an FP op and a store.  */
12281      if (insn_type == TYPE_FMOV
12282	  && (set = single_set (dep_insn)) != NULL_RTX
12283	  && (set2 = single_set (insn)) != NULL_RTX
12284	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
12285	  && GET_CODE (SET_DEST (set2)) == MEM)
12286	cost += 1;
12287
12288      /* Show ability of reorder buffer to hide latency of load by executing
12289	 in parallel with previous instruction in case
12290	 previous instruction is not needed to compute the address.  */
12291      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12292	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12293	{
12294	  /* Claim moves to take one cycle, as core can issue one load
12295	     at time and the next load can start cycle later.  */
12296	  if (dep_insn_type == TYPE_IMOV
12297	      || dep_insn_type == TYPE_FMOV)
12298	    cost = 1;
12299	  else if (cost > 1)
12300	    cost--;
12301	}
12302      break;
12303
12304    case PROCESSOR_K6:
12305      memory = get_attr_memory (insn);
12306      dep_memory = get_attr_memory (dep_insn);
12307      /* The esp dependency is resolved before the instruction is really
12308         finished.  */
12309      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
12310	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
12311	return 1;
12312
12313      /* Since we can't represent delayed latencies of load+operation,
12314	 increase the cost here for non-imov insns.  */
12315      if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
12316	cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
12317
12318      /* INT->FP conversion is expensive.  */
12319      if (get_attr_fp_int_src (dep_insn))
12320	cost += 5;
12321
12322      /* Show ability of reorder buffer to hide latency of load by executing
12323	 in parallel with previous instruction in case
12324	 previous instruction is not needed to compute the address.  */
12325      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12326	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12327	{
12328	  /* Claim moves to take one cycle, as core can issue one load
12329	     at time and the next load can start cycle later.  */
12330	  if (dep_insn_type == TYPE_IMOV
12331	      || dep_insn_type == TYPE_FMOV)
12332	    cost = 1;
12333	  else if (cost > 2)
12334	    cost -= 2;
12335	  else
12336	    cost = 1;
12337	}
12338      break;
12339
12340    case PROCESSOR_ATHLON:
12341    case PROCESSOR_K8:
12342      memory = get_attr_memory (insn);
12343      dep_memory = get_attr_memory (dep_insn);
12344
12345      /* Show ability of reorder buffer to hide latency of load by executing
12346	 in parallel with previous instruction in case
12347	 previous instruction is not needed to compute the address.  */
12348      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
12349	  && !ix86_agi_dependant (insn, dep_insn, insn_type))
12350	{
12351	  enum attr_unit unit = get_attr_unit (insn);
12352	  int loadcost = 3;
12353
12354	  /* Because of the difference between the length of integer and
12355	     floating unit pipeline preparation stages, the memory operands
12356	     for floating point are cheaper.
12357
12358	     ??? For Athlon it the difference is most probably 2.  */
12359	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
12360	    loadcost = 3;
12361	  else
12362	    loadcost = TARGET_ATHLON ? 2 : 0;
12363
12364	  if (cost >= loadcost)
12365	    cost -= loadcost;
12366	  else
12367	    cost = 0;
12368	}
12369
12370    default:
12371      break;
12372    }
12373
12374  return cost;
12375}
12376
12377static union
12378{
12379  struct ppro_sched_data
12380  {
12381    rtx decode[3];
12382    int issued_this_cycle;
12383  } ppro;
12384} ix86_sched_data;
12385
12386static enum attr_ppro_uops
12387ix86_safe_ppro_uops (rtx insn)
12388{
12389  if (recog_memoized (insn) >= 0)
12390    return get_attr_ppro_uops (insn);
12391  else
12392    return PPRO_UOPS_MANY;
12393}
12394
12395static void
12396ix86_dump_ppro_packet (FILE *dump)
12397{
12398  if (ix86_sched_data.ppro.decode[0])
12399    {
12400      fprintf (dump, "PPRO packet: %d",
12401	       INSN_UID (ix86_sched_data.ppro.decode[0]));
12402      if (ix86_sched_data.ppro.decode[1])
12403	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
12404      if (ix86_sched_data.ppro.decode[2])
12405	fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
12406      fputc ('\n', dump);
12407    }
12408}
12409
12410/* We're beginning a new block.  Initialize data structures as necessary.  */
12411
12412static void
12413ix86_sched_init (FILE *dump ATTRIBUTE_UNUSED,
12414		 int sched_verbose ATTRIBUTE_UNUSED,
12415		 int veclen ATTRIBUTE_UNUSED)
12416{
12417  memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
12418}
12419
12420/* Shift INSN to SLOT, and shift everything else down.  */
12421
12422static void
12423ix86_reorder_insn (rtx *insnp, rtx *slot)
12424{
12425  if (insnp != slot)
12426    {
12427      rtx insn = *insnp;
12428      do
12429	insnp[0] = insnp[1];
12430      while (++insnp != slot);
12431      *insnp = insn;
12432    }
12433}
12434
12435static void
12436ix86_sched_reorder_ppro (rtx *ready, rtx *e_ready)
12437{
12438  rtx decode[3];
12439  enum attr_ppro_uops cur_uops;
12440  int issued_this_cycle;
12441  rtx *insnp;
12442  int i;
12443
12444  /* At this point .ppro.decode contains the state of the three
12445     decoders from last "cycle".  That is, those insns that were
12446     actually independent.  But here we're scheduling for the
12447     decoder, and we may find things that are decodable in the
12448     same cycle.  */
12449
12450  memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
12451  issued_this_cycle = 0;
12452
12453  insnp = e_ready;
12454  cur_uops = ix86_safe_ppro_uops (*insnp);
12455
12456  /* If the decoders are empty, and we've a complex insn at the
12457     head of the priority queue, let it issue without complaint.  */
12458  if (decode[0] == NULL)
12459    {
12460      if (cur_uops == PPRO_UOPS_MANY)
12461	{
12462	  decode[0] = *insnp;
12463	  goto ppro_done;
12464	}
12465
12466      /* Otherwise, search for a 2-4 uop unsn to issue.  */
12467      while (cur_uops != PPRO_UOPS_FEW)
12468	{
12469	  if (insnp == ready)
12470	    break;
12471	  cur_uops = ix86_safe_ppro_uops (*--insnp);
12472	}
12473
12474      /* If so, move it to the head of the line.  */
12475      if (cur_uops == PPRO_UOPS_FEW)
12476	ix86_reorder_insn (insnp, e_ready);
12477
12478      /* Issue the head of the queue.  */
12479      issued_this_cycle = 1;
12480      decode[0] = *e_ready--;
12481    }
12482
12483  /* Look for simple insns to fill in the other two slots.  */
12484  for (i = 1; i < 3; ++i)
12485    if (decode[i] == NULL)
12486      {
12487	if (ready > e_ready)
12488	  goto ppro_done;
12489
12490	insnp = e_ready;
12491	cur_uops = ix86_safe_ppro_uops (*insnp);
12492	while (cur_uops != PPRO_UOPS_ONE)
12493	  {
12494	    if (insnp == ready)
12495	      break;
12496	    cur_uops = ix86_safe_ppro_uops (*--insnp);
12497	  }
12498
12499	/* Found one.  Move it to the head of the queue and issue it.  */
12500	if (cur_uops == PPRO_UOPS_ONE)
12501	  {
12502	    ix86_reorder_insn (insnp, e_ready);
12503	    decode[i] = *e_ready--;
12504	    issued_this_cycle++;
12505	    continue;
12506	  }
12507
12508	/* ??? Didn't find one.  Ideally, here we would do a lazy split
12509	   of 2-uop insns, issue one and queue the other.  */
12510      }
12511
12512 ppro_done:
12513  if (issued_this_cycle == 0)
12514    issued_this_cycle = 1;
12515  ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
12516}
12517
12518/* We are about to being issuing insns for this clock cycle.
12519   Override the default sort algorithm to better slot instructions.  */
12520static int
12521ix86_sched_reorder (FILE *dump ATTRIBUTE_UNUSED,
12522		    int sched_verbose ATTRIBUTE_UNUSED, rtx *ready,
12523		    int *n_readyp, int clock_var ATTRIBUTE_UNUSED)
12524{
12525  int n_ready = *n_readyp;
12526  rtx *e_ready = ready + n_ready - 1;
12527
12528  /* Make sure to go ahead and initialize key items in
12529     ix86_sched_data if we are not going to bother trying to
12530     reorder the ready queue.  */
12531  if (n_ready < 2)
12532    {
12533      ix86_sched_data.ppro.issued_this_cycle = 1;
12534      goto out;
12535    }
12536
12537  switch (ix86_tune)
12538    {
12539    default:
12540      break;
12541
12542    case PROCESSOR_PENTIUMPRO:
12543      ix86_sched_reorder_ppro (ready, e_ready);
12544      break;
12545    }
12546
12547out:
12548  return ix86_issue_rate ();
12549}
12550
12551/* We are about to issue INSN.  Return the number of insns left on the
12552   ready queue that can be issued this cycle.  */
12553
12554static int
12555ix86_variable_issue (FILE *dump, int sched_verbose, rtx insn,
12556		     int can_issue_more)
12557{
12558  int i;
12559  switch (ix86_tune)
12560    {
12561    default:
12562      return can_issue_more - 1;
12563
12564    case PROCESSOR_PENTIUMPRO:
12565      {
12566	enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
12567
12568	if (uops == PPRO_UOPS_MANY)
12569	  {
12570	    if (sched_verbose)
12571	      ix86_dump_ppro_packet (dump);
12572	    ix86_sched_data.ppro.decode[0] = insn;
12573	    ix86_sched_data.ppro.decode[1] = NULL;
12574	    ix86_sched_data.ppro.decode[2] = NULL;
12575	    if (sched_verbose)
12576	      ix86_dump_ppro_packet (dump);
12577	    ix86_sched_data.ppro.decode[0] = NULL;
12578	  }
12579	else if (uops == PPRO_UOPS_FEW)
12580	  {
12581	    if (sched_verbose)
12582	      ix86_dump_ppro_packet (dump);
12583	    ix86_sched_data.ppro.decode[0] = insn;
12584	    ix86_sched_data.ppro.decode[1] = NULL;
12585	    ix86_sched_data.ppro.decode[2] = NULL;
12586	  }
12587	else
12588	  {
12589	    for (i = 0; i < 3; ++i)
12590	      if (ix86_sched_data.ppro.decode[i] == NULL)
12591		{
12592		  ix86_sched_data.ppro.decode[i] = insn;
12593		  break;
12594		}
12595	    if (i == 3)
12596	      abort ();
12597	    if (i == 2)
12598	      {
12599	        if (sched_verbose)
12600	          ix86_dump_ppro_packet (dump);
12601		ix86_sched_data.ppro.decode[0] = NULL;
12602		ix86_sched_data.ppro.decode[1] = NULL;
12603		ix86_sched_data.ppro.decode[2] = NULL;
12604	      }
12605	  }
12606      }
12607      return --ix86_sched_data.ppro.issued_this_cycle;
12608    }
12609}
12610
12611static int
12612ia32_use_dfa_pipeline_interface (void)
12613{
12614  if (TARGET_PENTIUM || TARGET_ATHLON_K8)
12615    return 1;
12616  return 0;
12617}
12618
12619/* How many alternative schedules to try.  This should be as wide as the
12620   scheduling freedom in the DFA, but no wider.  Making this value too
12621   large results extra work for the scheduler.  */
12622
12623static int
12624ia32_multipass_dfa_lookahead (void)
12625{
12626  if (ix86_tune == PROCESSOR_PENTIUM)
12627    return 2;
12628  else
12629   return 0;
12630}
12631
12632
12633/* Compute the alignment given to a constant that is being placed in memory.
12634   EXP is the constant and ALIGN is the alignment that the object would
12635   ordinarily have.
12636   The value of this function is used instead of that alignment to align
12637   the object.  */
12638
12639int
12640ix86_constant_alignment (tree exp, int align)
12641{
12642  if (TREE_CODE (exp) == REAL_CST)
12643    {
12644      if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
12645	return 64;
12646      else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
12647	return 128;
12648    }
12649  else if (!optimize_size && TREE_CODE (exp) == STRING_CST
12650	   && !TARGET_NO_ALIGN_LONG_STRINGS
12651	   && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
12652    return BITS_PER_WORD;
12653
12654  return align;
12655}
12656
12657/* Compute the alignment for a static variable.
12658   TYPE is the data type, and ALIGN is the alignment that
12659   the object would ordinarily have.  The value of this function is used
12660   instead of that alignment to align the object.  */
12661
12662int
12663ix86_data_alignment (tree type, int align)
12664{
12665  if (AGGREGATE_TYPE_P (type)
12666       && TYPE_SIZE (type)
12667       && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12668       && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
12669	   || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
12670    return 256;
12671
12672  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12673     to 16byte boundary.  */
12674  if (TARGET_64BIT)
12675    {
12676      if (AGGREGATE_TYPE_P (type)
12677	   && TYPE_SIZE (type)
12678	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12679	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
12680	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12681	return 128;
12682    }
12683
12684  if (TREE_CODE (type) == ARRAY_TYPE)
12685    {
12686      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12687	return 64;
12688      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12689	return 128;
12690    }
12691  else if (TREE_CODE (type) == COMPLEX_TYPE)
12692    {
12693
12694      if (TYPE_MODE (type) == DCmode && align < 64)
12695	return 64;
12696      if (TYPE_MODE (type) == XCmode && align < 128)
12697	return 128;
12698    }
12699  else if ((TREE_CODE (type) == RECORD_TYPE
12700	    || TREE_CODE (type) == UNION_TYPE
12701	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12702	   && TYPE_FIELDS (type))
12703    {
12704      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12705	return 64;
12706      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12707	return 128;
12708    }
12709  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12710	   || TREE_CODE (type) == INTEGER_TYPE)
12711    {
12712      if (TYPE_MODE (type) == DFmode && align < 64)
12713	return 64;
12714      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12715	return 128;
12716    }
12717
12718  return align;
12719}
12720
12721/* Compute the alignment for a local variable.
12722   TYPE is the data type, and ALIGN is the alignment that
12723   the object would ordinarily have.  The value of this macro is used
12724   instead of that alignment to align the object.  */
12725
12726int
12727ix86_local_alignment (tree type, int align)
12728{
12729  /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
12730     to 16byte boundary.  */
12731  if (TARGET_64BIT)
12732    {
12733      if (AGGREGATE_TYPE_P (type)
12734	   && TYPE_SIZE (type)
12735	   && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
12736	   && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
12737	       || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
12738	return 128;
12739    }
12740  if (TREE_CODE (type) == ARRAY_TYPE)
12741    {
12742      if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
12743	return 64;
12744      if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
12745	return 128;
12746    }
12747  else if (TREE_CODE (type) == COMPLEX_TYPE)
12748    {
12749      if (TYPE_MODE (type) == DCmode && align < 64)
12750	return 64;
12751      if (TYPE_MODE (type) == XCmode && align < 128)
12752	return 128;
12753    }
12754  else if ((TREE_CODE (type) == RECORD_TYPE
12755	    || TREE_CODE (type) == UNION_TYPE
12756	    || TREE_CODE (type) == QUAL_UNION_TYPE)
12757	   && TYPE_FIELDS (type))
12758    {
12759      if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
12760	return 64;
12761      if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
12762	return 128;
12763    }
12764  else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
12765	   || TREE_CODE (type) == INTEGER_TYPE)
12766    {
12767
12768      if (TYPE_MODE (type) == DFmode && align < 64)
12769	return 64;
12770      if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
12771	return 128;
12772    }
12773  return align;
12774}
12775
12776/* Emit RTL insns to initialize the variable parts of a trampoline.
12777   FNADDR is an RTX for the address of the function's pure code.
12778   CXT is an RTX for the static chain value for the function.  */
12779void
12780x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
12781{
12782  if (!TARGET_64BIT)
12783    {
12784      /* Compute offset from the end of the jmp to the target function.  */
12785      rtx disp = expand_binop (SImode, sub_optab, fnaddr,
12786			       plus_constant (tramp, 10),
12787			       NULL_RTX, 1, OPTAB_DIRECT);
12788      emit_move_insn (gen_rtx_MEM (QImode, tramp),
12789		      gen_int_mode (0xb9, QImode));
12790      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
12791      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
12792		      gen_int_mode (0xe9, QImode));
12793      emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
12794    }
12795  else
12796    {
12797      int offset = 0;
12798      /* Try to load address using shorter movl instead of movabs.
12799         We may want to support movq for kernel mode, but kernel does not use
12800         trampolines at the moment.  */
12801      if (x86_64_zero_extended_value (fnaddr))
12802	{
12803	  fnaddr = copy_to_mode_reg (DImode, fnaddr);
12804	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12805			  gen_int_mode (0xbb41, HImode));
12806	  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
12807			  gen_lowpart (SImode, fnaddr));
12808	  offset += 6;
12809	}
12810      else
12811	{
12812	  emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12813			  gen_int_mode (0xbb49, HImode));
12814	  emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12815			  fnaddr);
12816	  offset += 10;
12817	}
12818      /* Load static chain using movabs to r10.  */
12819      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12820		      gen_int_mode (0xba49, HImode));
12821      emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
12822		      cxt);
12823      offset += 10;
12824      /* Jump to the r11 */
12825      emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
12826		      gen_int_mode (0xff49, HImode));
12827      emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
12828		      gen_int_mode (0xe3, QImode));
12829      offset += 3;
12830      if (offset > TRAMPOLINE_SIZE)
12831	abort ();
12832    }
12833
12834#ifdef ENABLE_EXECUTE_STACK
12835  emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
12836		     LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
12837#endif
12838}
12839
12840#define def_builtin(MASK, NAME, TYPE, CODE)			\
12841do {								\
12842  if ((MASK) & target_flags					\
12843      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
12844    builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
12845		      NULL, NULL_TREE);				\
12846} while (0)
12847
12848struct builtin_description
12849{
12850  const unsigned int mask;
12851  const enum insn_code icode;
12852  const char *const name;
12853  const enum ix86_builtins code;
12854  const enum rtx_code comparison;
12855  const unsigned int flag;
12856};
12857
12858static const struct builtin_description bdesc_comi[] =
12859{
12860  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
12861  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
12862  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
12863  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
12864  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
12865  { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
12866  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
12867  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
12868  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
12869  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
12870  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
12871  { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
12872  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
12873  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
12874  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
12875  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
12876  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
12877  { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
12878  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
12879  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
12880  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
12881  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
12882  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
12883  { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
12884};
12885
12886static const struct builtin_description bdesc_2arg[] =
12887{
12888  /* SSE */
12889  { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
12890  { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
12891  { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
12892  { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
12893  { MASK_SSE, CODE_FOR_vmaddv4sf3,  "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
12894  { MASK_SSE, CODE_FOR_vmsubv4sf3,  "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
12895  { MASK_SSE, CODE_FOR_vmmulv4sf3,  "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
12896  { MASK_SSE, CODE_FOR_vmdivv4sf3,  "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
12897
12898  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
12899  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
12900  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
12901  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
12902  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
12903  { MASK_SSE, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
12904  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
12905  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
12906  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
12907  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
12908  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
12909  { MASK_SSE, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
12910  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
12911  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
12912  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
12913  { MASK_SSE, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
12914  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
12915  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
12916  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
12917  { MASK_SSE, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
12918
12919  { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
12920  { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
12921  { MASK_SSE, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
12922  { MASK_SSE, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
12923
12924  { MASK_SSE, CODE_FOR_sse_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
12925  { MASK_SSE, CODE_FOR_sse_nandv4sf3,  "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
12926  { MASK_SSE, CODE_FOR_sse_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
12927  { MASK_SSE, CODE_FOR_sse_xorv4sf3,  "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
12928
12929  { MASK_SSE, CODE_FOR_sse_movss,  "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
12930  { MASK_SSE, CODE_FOR_sse_movhlps,  "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
12931  { MASK_SSE, CODE_FOR_sse_movlhps,  "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
12932  { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
12933  { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
12934
12935  /* MMX */
12936  { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
12937  { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
12938  { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
12939  { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
12940  { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
12941  { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
12942  { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
12943  { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
12944
12945  { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
12946  { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
12947  { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
12948  { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
12949  { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
12950  { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
12951  { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
12952  { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
12953
12954  { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
12955  { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
12956  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
12957
12958  { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
12959  { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
12960  { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
12961  { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
12962
12963  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
12964  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
12965
12966  { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
12967  { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
12968  { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
12969  { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
12970  { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
12971  { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
12972
12973  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
12974  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
12975  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
12976  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
12977
12978  { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
12979  { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
12980  { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
12981  { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
12982  { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
12983  { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
12984
12985  /* Special.  */
12986  { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
12987  { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
12988  { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
12989
12990  { MASK_SSE, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
12991  { MASK_SSE, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
12992  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
12993
12994  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
12995  { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
12996  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
12997  { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
12998  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
12999  { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
13000
13001  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
13002  { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
13003  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
13004  { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
13005  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
13006  { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
13007
13008  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
13009  { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
13010  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
13011  { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
13012
13013  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
13014  { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
13015
13016  /* SSE2 */
13017  { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
13018  { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
13019  { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
13020  { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
13021  { MASK_SSE2, CODE_FOR_vmaddv2df3,  "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
13022  { MASK_SSE2, CODE_FOR_vmsubv2df3,  "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
13023  { MASK_SSE2, CODE_FOR_vmmulv2df3,  "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
13024  { MASK_SSE2, CODE_FOR_vmdivv2df3,  "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
13025
13026  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
13027  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
13028  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
13029  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
13030  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
13031  { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
13032  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
13033  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
13034  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
13035  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
13036  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
13037  { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
13038  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
13039  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
13040  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
13041  { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
13042  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
13043  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
13044  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
13045  { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
13046
13047  { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
13048  { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
13049  { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
13050  { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
13051
13052  { MASK_SSE2, CODE_FOR_sse2_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
13053  { MASK_SSE2, CODE_FOR_sse2_nandv2df3,  "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
13054  { MASK_SSE2, CODE_FOR_sse2_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
13055  { MASK_SSE2, CODE_FOR_sse2_xorv2df3,  "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
13056
13057  { MASK_SSE2, CODE_FOR_sse2_movsd,  "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
13058  { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
13059  { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
13060
13061  /* SSE2 MMX */
13062  { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
13063  { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
13064  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
13065  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
13066  { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
13067  { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
13068  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
13069  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
13070
13071  { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
13072  { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
13073  { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
13074  { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
13075  { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
13076  { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
13077  { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
13078  { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
13079
13080  { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
13081  { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
13082  { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
13083  { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
13084
13085  { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
13086  { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
13087  { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
13088  { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
13089
13090  { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
13091  { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
13092
13093  { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
13094  { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
13095  { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
13096  { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
13097  { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
13098  { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
13099
13100  { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
13101  { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
13102  { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
13103  { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
13104
13105  { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
13106  { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
13107  { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
13108  { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
13109  { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
13110  { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
13111  { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
13112  { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
13113
13114  { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
13115  { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
13116  { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
13117
13118  { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
13119  { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
13120
13121  { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
13122  { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
13123  { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
13124  { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
13125  { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
13126  { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
13127
13128  { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
13129  { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
13130  { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
13131  { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
13132  { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
13133  { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
13134
13135  { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
13136  { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
13137  { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
13138  { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
13139
13140  { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
13141
13142  { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
13143  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
13144  { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
13145  { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
13146
13147  /* SSE3 MMX */
13148  { MASK_SSE3, CODE_FOR_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
13149  { MASK_SSE3, CODE_FOR_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
13150  { MASK_SSE3, CODE_FOR_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
13151  { MASK_SSE3, CODE_FOR_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
13152  { MASK_SSE3, CODE_FOR_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
13153  { MASK_SSE3, CODE_FOR_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 }
13154};
13155
13156static const struct builtin_description bdesc_1arg[] =
13157{
13158  { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
13159  { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
13160
13161  { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
13162  { MASK_SSE, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
13163  { MASK_SSE, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
13164
13165  { MASK_SSE, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
13166  { MASK_SSE, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
13167  { MASK_SSE | MASK_64BIT, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
13168  { MASK_SSE, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
13169  { MASK_SSE, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
13170  { MASK_SSE | MASK_64BIT, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
13171
13172  { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
13173  { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
13174  { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
13175  { MASK_SSE2, CODE_FOR_sse2_movdq2q, 0, IX86_BUILTIN_MOVDQ2Q, 0, 0 },
13176
13177  { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
13178
13179  { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
13180  { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
13181
13182  { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
13183  { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
13184  { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
13185  { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
13186  { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
13187
13188  { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
13189
13190  { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
13191  { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
13192  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
13193  { MASK_SSE2 | MASK_64BIT, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
13194
13195  { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
13196  { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
13197  { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
13198
13199  { MASK_SSE2, CODE_FOR_sse2_movq, 0, IX86_BUILTIN_MOVQ, 0, 0 },
13200
13201  /* SSE3 */
13202  { MASK_SSE3, CODE_FOR_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
13203  { MASK_SSE3, CODE_FOR_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
13204  { MASK_SSE3, CODE_FOR_movddup,  0, IX86_BUILTIN_MOVDDUP, 0, 0 }
13205};
13206
13207void
13208ix86_init_builtins (void)
13209{
13210  if (TARGET_MMX)
13211    ix86_init_mmx_sse_builtins ();
13212}
13213
13214/* Set up all the MMX/SSE builtins.  This is not called if TARGET_MMX
13215   is zero.  Otherwise, if TARGET_SSE is not set, only expand the MMX
13216   builtins.  */
13217static void
13218ix86_init_mmx_sse_builtins (void)
13219{
13220  const struct builtin_description * d;
13221  size_t i;
13222
13223  tree pchar_type_node = build_pointer_type (char_type_node);
13224  tree pcchar_type_node = build_pointer_type (
13225			     build_type_variant (char_type_node, 1, 0));
13226  tree pfloat_type_node = build_pointer_type (float_type_node);
13227  tree pcfloat_type_node = build_pointer_type (
13228			     build_type_variant (float_type_node, 1, 0));
13229  tree pv2si_type_node = build_pointer_type (V2SI_type_node);
13230  tree pv2di_type_node = build_pointer_type (V2DI_type_node);
13231  tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
13232
13233  /* Comparisons.  */
13234  tree int_ftype_v4sf_v4sf
13235    = build_function_type_list (integer_type_node,
13236				V4SF_type_node, V4SF_type_node, NULL_TREE);
13237  tree v4si_ftype_v4sf_v4sf
13238    = build_function_type_list (V4SI_type_node,
13239				V4SF_type_node, V4SF_type_node, NULL_TREE);
13240  /* MMX/SSE/integer conversions.  */
13241  tree int_ftype_v4sf
13242    = build_function_type_list (integer_type_node,
13243				V4SF_type_node, NULL_TREE);
13244  tree int64_ftype_v4sf
13245    = build_function_type_list (long_long_integer_type_node,
13246				V4SF_type_node, NULL_TREE);
13247  tree int_ftype_v8qi
13248    = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
13249  tree v4sf_ftype_v4sf_int
13250    = build_function_type_list (V4SF_type_node,
13251				V4SF_type_node, integer_type_node, NULL_TREE);
13252  tree v4sf_ftype_v4sf_int64
13253    = build_function_type_list (V4SF_type_node,
13254				V4SF_type_node, long_long_integer_type_node,
13255				NULL_TREE);
13256  tree v4sf_ftype_v4sf_v2si
13257    = build_function_type_list (V4SF_type_node,
13258				V4SF_type_node, V2SI_type_node, NULL_TREE);
13259  tree int_ftype_v4hi_int
13260    = build_function_type_list (integer_type_node,
13261				V4HI_type_node, integer_type_node, NULL_TREE);
13262  tree v4hi_ftype_v4hi_int_int
13263    = build_function_type_list (V4HI_type_node, V4HI_type_node,
13264				integer_type_node, integer_type_node,
13265				NULL_TREE);
13266  /* Miscellaneous.  */
13267  tree v8qi_ftype_v4hi_v4hi
13268    = build_function_type_list (V8QI_type_node,
13269				V4HI_type_node, V4HI_type_node, NULL_TREE);
13270  tree v4hi_ftype_v2si_v2si
13271    = build_function_type_list (V4HI_type_node,
13272				V2SI_type_node, V2SI_type_node, NULL_TREE);
13273  tree v4sf_ftype_v4sf_v4sf_int
13274    = build_function_type_list (V4SF_type_node,
13275				V4SF_type_node, V4SF_type_node,
13276				integer_type_node, NULL_TREE);
13277  tree v2si_ftype_v4hi_v4hi
13278    = build_function_type_list (V2SI_type_node,
13279				V4HI_type_node, V4HI_type_node, NULL_TREE);
13280  tree v4hi_ftype_v4hi_int
13281    = build_function_type_list (V4HI_type_node,
13282				V4HI_type_node, integer_type_node, NULL_TREE);
13283  tree v4hi_ftype_v4hi_di
13284    = build_function_type_list (V4HI_type_node,
13285				V4HI_type_node, long_long_unsigned_type_node,
13286				NULL_TREE);
13287  tree v2si_ftype_v2si_di
13288    = build_function_type_list (V2SI_type_node,
13289				V2SI_type_node, long_long_unsigned_type_node,
13290				NULL_TREE);
13291  tree void_ftype_void
13292    = build_function_type (void_type_node, void_list_node);
13293  tree void_ftype_unsigned
13294    = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
13295  tree void_ftype_unsigned_unsigned
13296    = build_function_type_list (void_type_node, unsigned_type_node,
13297				unsigned_type_node, NULL_TREE);
13298  tree void_ftype_pcvoid_unsigned_unsigned
13299    = build_function_type_list (void_type_node, const_ptr_type_node,
13300				unsigned_type_node, unsigned_type_node,
13301				NULL_TREE);
13302  tree unsigned_ftype_void
13303    = build_function_type (unsigned_type_node, void_list_node);
13304  tree di_ftype_void
13305    = build_function_type (long_long_unsigned_type_node, void_list_node);
13306  tree v4sf_ftype_void
13307    = build_function_type (V4SF_type_node, void_list_node);
13308  tree v2si_ftype_v4sf
13309    = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
13310  /* Loads/stores.  */
13311  tree void_ftype_v8qi_v8qi_pchar
13312    = build_function_type_list (void_type_node,
13313				V8QI_type_node, V8QI_type_node,
13314				pchar_type_node, NULL_TREE);
13315  tree v4sf_ftype_pcfloat
13316    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
13317  /* @@@ the type is bogus */
13318  tree v4sf_ftype_v4sf_pv2si
13319    = build_function_type_list (V4SF_type_node,
13320				V4SF_type_node, pv2si_type_node, NULL_TREE);
13321  tree void_ftype_pv2si_v4sf
13322    = build_function_type_list (void_type_node,
13323				pv2si_type_node, V4SF_type_node, NULL_TREE);
13324  tree void_ftype_pfloat_v4sf
13325    = build_function_type_list (void_type_node,
13326				pfloat_type_node, V4SF_type_node, NULL_TREE);
13327  tree void_ftype_pdi_di
13328    = build_function_type_list (void_type_node,
13329				pdi_type_node, long_long_unsigned_type_node,
13330				NULL_TREE);
13331  tree void_ftype_pv2di_v2di
13332    = build_function_type_list (void_type_node,
13333				pv2di_type_node, V2DI_type_node, NULL_TREE);
13334  /* Normal vector unops.  */
13335  tree v4sf_ftype_v4sf
13336    = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13337
13338  /* Normal vector binops.  */
13339  tree v4sf_ftype_v4sf_v4sf
13340    = build_function_type_list (V4SF_type_node,
13341				V4SF_type_node, V4SF_type_node, NULL_TREE);
13342  tree v8qi_ftype_v8qi_v8qi
13343    = build_function_type_list (V8QI_type_node,
13344				V8QI_type_node, V8QI_type_node, NULL_TREE);
13345  tree v4hi_ftype_v4hi_v4hi
13346    = build_function_type_list (V4HI_type_node,
13347				V4HI_type_node, V4HI_type_node, NULL_TREE);
13348  tree v2si_ftype_v2si_v2si
13349    = build_function_type_list (V2SI_type_node,
13350				V2SI_type_node, V2SI_type_node, NULL_TREE);
13351  tree di_ftype_di_di
13352    = build_function_type_list (long_long_unsigned_type_node,
13353				long_long_unsigned_type_node,
13354				long_long_unsigned_type_node, NULL_TREE);
13355
13356  tree v2si_ftype_v2sf
13357    = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
13358  tree v2sf_ftype_v2si
13359    = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
13360  tree v2si_ftype_v2si
13361    = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
13362  tree v2sf_ftype_v2sf
13363    = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
13364  tree v2sf_ftype_v2sf_v2sf
13365    = build_function_type_list (V2SF_type_node,
13366				V2SF_type_node, V2SF_type_node, NULL_TREE);
13367  tree v2si_ftype_v2sf_v2sf
13368    = build_function_type_list (V2SI_type_node,
13369				V2SF_type_node, V2SF_type_node, NULL_TREE);
13370  tree pint_type_node    = build_pointer_type (integer_type_node);
13371  tree pcint_type_node = build_pointer_type (
13372			     build_type_variant (integer_type_node, 1, 0));
13373  tree pdouble_type_node = build_pointer_type (double_type_node);
13374  tree pcdouble_type_node = build_pointer_type (
13375				build_type_variant (double_type_node, 1, 0));
13376  tree int_ftype_v2df_v2df
13377    = build_function_type_list (integer_type_node,
13378				V2DF_type_node, V2DF_type_node, NULL_TREE);
13379
13380  tree ti_ftype_void
13381    = build_function_type (intTI_type_node, void_list_node);
13382  tree v2di_ftype_void
13383    = build_function_type (V2DI_type_node, void_list_node);
13384  tree ti_ftype_ti_ti
13385    = build_function_type_list (intTI_type_node,
13386				intTI_type_node, intTI_type_node, NULL_TREE);
13387  tree void_ftype_pcvoid
13388    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
13389  tree v2di_ftype_di
13390    = build_function_type_list (V2DI_type_node,
13391				long_long_unsigned_type_node, NULL_TREE);
13392  tree di_ftype_v2di
13393    = build_function_type_list (long_long_unsigned_type_node,
13394				V2DI_type_node, NULL_TREE);
13395  tree v4sf_ftype_v4si
13396    = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
13397  tree v4si_ftype_v4sf
13398    = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
13399  tree v2df_ftype_v4si
13400    = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
13401  tree v4si_ftype_v2df
13402    = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
13403  tree v2si_ftype_v2df
13404    = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
13405  tree v4sf_ftype_v2df
13406    = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
13407  tree v2df_ftype_v2si
13408    = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
13409  tree v2df_ftype_v4sf
13410    = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
13411  tree int_ftype_v2df
13412    = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
13413  tree int64_ftype_v2df
13414    = build_function_type_list (long_long_integer_type_node,
13415				V2DF_type_node, NULL_TREE);
13416  tree v2df_ftype_v2df_int
13417    = build_function_type_list (V2DF_type_node,
13418				V2DF_type_node, integer_type_node, NULL_TREE);
13419  tree v2df_ftype_v2df_int64
13420    = build_function_type_list (V2DF_type_node,
13421				V2DF_type_node, long_long_integer_type_node,
13422				NULL_TREE);
13423  tree v4sf_ftype_v4sf_v2df
13424    = build_function_type_list (V4SF_type_node,
13425				V4SF_type_node, V2DF_type_node, NULL_TREE);
13426  tree v2df_ftype_v2df_v4sf
13427    = build_function_type_list (V2DF_type_node,
13428				V2DF_type_node, V4SF_type_node, NULL_TREE);
13429  tree v2df_ftype_v2df_v2df_int
13430    = build_function_type_list (V2DF_type_node,
13431				V2DF_type_node, V2DF_type_node,
13432				integer_type_node,
13433				NULL_TREE);
13434  tree v2df_ftype_v2df_pv2si
13435    = build_function_type_list (V2DF_type_node,
13436				V2DF_type_node, pv2si_type_node, NULL_TREE);
13437  tree void_ftype_pv2si_v2df
13438    = build_function_type_list (void_type_node,
13439				pv2si_type_node, V2DF_type_node, NULL_TREE);
13440  tree void_ftype_pdouble_v2df
13441    = build_function_type_list (void_type_node,
13442				pdouble_type_node, V2DF_type_node, NULL_TREE);
13443  tree void_ftype_pint_int
13444    = build_function_type_list (void_type_node,
13445				pint_type_node, integer_type_node, NULL_TREE);
13446  tree void_ftype_v16qi_v16qi_pchar
13447    = build_function_type_list (void_type_node,
13448				V16QI_type_node, V16QI_type_node,
13449				pchar_type_node, NULL_TREE);
13450  tree v2df_ftype_pcdouble
13451    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
13452  tree v2df_ftype_v2df_v2df
13453    = build_function_type_list (V2DF_type_node,
13454				V2DF_type_node, V2DF_type_node, NULL_TREE);
13455  tree v16qi_ftype_v16qi_v16qi
13456    = build_function_type_list (V16QI_type_node,
13457				V16QI_type_node, V16QI_type_node, NULL_TREE);
13458  tree v8hi_ftype_v8hi_v8hi
13459    = build_function_type_list (V8HI_type_node,
13460				V8HI_type_node, V8HI_type_node, NULL_TREE);
13461  tree v4si_ftype_v4si_v4si
13462    = build_function_type_list (V4SI_type_node,
13463				V4SI_type_node, V4SI_type_node, NULL_TREE);
13464  tree v2di_ftype_v2di_v2di
13465    = build_function_type_list (V2DI_type_node,
13466				V2DI_type_node, V2DI_type_node, NULL_TREE);
13467  tree v2di_ftype_v2df_v2df
13468    = build_function_type_list (V2DI_type_node,
13469				V2DF_type_node, V2DF_type_node, NULL_TREE);
13470  tree v2df_ftype_v2df
13471    = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13472  tree v2df_ftype_double
13473    = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
13474  tree v2df_ftype_double_double
13475    = build_function_type_list (V2DF_type_node,
13476				double_type_node, double_type_node, NULL_TREE);
13477  tree int_ftype_v8hi_int
13478    = build_function_type_list (integer_type_node,
13479				V8HI_type_node, integer_type_node, NULL_TREE);
13480  tree v8hi_ftype_v8hi_int_int
13481    = build_function_type_list (V8HI_type_node,
13482				V8HI_type_node, integer_type_node,
13483				integer_type_node, NULL_TREE);
13484  tree v2di_ftype_v2di_int
13485    = build_function_type_list (V2DI_type_node,
13486				V2DI_type_node, integer_type_node, NULL_TREE);
13487  tree v4si_ftype_v4si_int
13488    = build_function_type_list (V4SI_type_node,
13489				V4SI_type_node, integer_type_node, NULL_TREE);
13490  tree v8hi_ftype_v8hi_int
13491    = build_function_type_list (V8HI_type_node,
13492				V8HI_type_node, integer_type_node, NULL_TREE);
13493  tree v8hi_ftype_v8hi_v2di
13494    = build_function_type_list (V8HI_type_node,
13495				V8HI_type_node, V2DI_type_node, NULL_TREE);
13496  tree v4si_ftype_v4si_v2di
13497    = build_function_type_list (V4SI_type_node,
13498				V4SI_type_node, V2DI_type_node, NULL_TREE);
13499  tree v4si_ftype_v8hi_v8hi
13500    = build_function_type_list (V4SI_type_node,
13501				V8HI_type_node, V8HI_type_node, NULL_TREE);
13502  tree di_ftype_v8qi_v8qi
13503    = build_function_type_list (long_long_unsigned_type_node,
13504				V8QI_type_node, V8QI_type_node, NULL_TREE);
13505  tree v2di_ftype_v16qi_v16qi
13506    = build_function_type_list (V2DI_type_node,
13507				V16QI_type_node, V16QI_type_node, NULL_TREE);
13508  tree int_ftype_v16qi
13509    = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
13510  tree v16qi_ftype_pcchar
13511    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
13512  tree void_ftype_pchar_v16qi
13513    = build_function_type_list (void_type_node,
13514			        pchar_type_node, V16QI_type_node, NULL_TREE);
13515  tree v4si_ftype_pcint
13516    = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
13517  tree void_ftype_pcint_v4si
13518    = build_function_type_list (void_type_node,
13519			        pcint_type_node, V4SI_type_node, NULL_TREE);
13520  tree v2di_ftype_v2di
13521    = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13522
13523  tree float80_type;
13524  tree float128_type;
13525
13526  /* The __float80 type.  */
13527  if (TYPE_MODE (long_double_type_node) == XFmode)
13528    (*lang_hooks.types.register_builtin_type) (long_double_type_node,
13529					       "__float80");
13530  else
13531    {
13532      /* The __float80 type.  */
13533      float80_type = make_node (REAL_TYPE);
13534      TYPE_PRECISION (float80_type) = 96;
13535      layout_type (float80_type);
13536      (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
13537    }
13538
13539  float128_type = make_node (REAL_TYPE);
13540  TYPE_PRECISION (float128_type) = 128;
13541  layout_type (float128_type);
13542  (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
13543
13544  /* Add all builtins that are more or less simple operations on two
13545     operands.  */
13546  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13547    {
13548      /* Use one of the operands; the target can have a different mode for
13549	 mask-generating compares.  */
13550      enum machine_mode mode;
13551      tree type;
13552
13553      if (d->name == 0)
13554	continue;
13555      mode = insn_data[d->icode].operand[1].mode;
13556
13557      switch (mode)
13558	{
13559	case V16QImode:
13560	  type = v16qi_ftype_v16qi_v16qi;
13561	  break;
13562	case V8HImode:
13563	  type = v8hi_ftype_v8hi_v8hi;
13564	  break;
13565	case V4SImode:
13566	  type = v4si_ftype_v4si_v4si;
13567	  break;
13568	case V2DImode:
13569	  type = v2di_ftype_v2di_v2di;
13570	  break;
13571	case V2DFmode:
13572	  type = v2df_ftype_v2df_v2df;
13573	  break;
13574	case TImode:
13575	  type = ti_ftype_ti_ti;
13576	  break;
13577	case V4SFmode:
13578	  type = v4sf_ftype_v4sf_v4sf;
13579	  break;
13580	case V8QImode:
13581	  type = v8qi_ftype_v8qi_v8qi;
13582	  break;
13583	case V4HImode:
13584	  type = v4hi_ftype_v4hi_v4hi;
13585	  break;
13586	case V2SImode:
13587	  type = v2si_ftype_v2si_v2si;
13588	  break;
13589	case DImode:
13590	  type = di_ftype_di_di;
13591	  break;
13592
13593	default:
13594	  abort ();
13595	}
13596
13597      /* Override for comparisons.  */
13598      if (d->icode == CODE_FOR_maskcmpv4sf3
13599	  || d->icode == CODE_FOR_maskncmpv4sf3
13600	  || d->icode == CODE_FOR_vmmaskcmpv4sf3
13601	  || d->icode == CODE_FOR_vmmaskncmpv4sf3)
13602	type = v4si_ftype_v4sf_v4sf;
13603
13604      if (d->icode == CODE_FOR_maskcmpv2df3
13605	  || d->icode == CODE_FOR_maskncmpv2df3
13606	  || d->icode == CODE_FOR_vmmaskcmpv2df3
13607	  || d->icode == CODE_FOR_vmmaskncmpv2df3)
13608	type = v2di_ftype_v2df_v2df;
13609
13610      def_builtin (d->mask, d->name, type, d->code);
13611    }
13612
13613  /* Add the remaining MMX insns with somewhat more complicated types.  */
13614  def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
13615  def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
13616  def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
13617  def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
13618  def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
13619
13620  def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
13621  def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
13622  def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
13623
13624  def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
13625  def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
13626
13627  def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
13628  def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
13629
13630  /* comi/ucomi insns.  */
13631  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
13632    if (d->mask == MASK_SSE2)
13633      def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
13634    else
13635      def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
13636
13637  def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
13638  def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
13639  def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
13640
13641  def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
13642  def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
13643  def_builtin (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
13644  def_builtin (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
13645  def_builtin (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
13646  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
13647  def_builtin (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
13648  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
13649  def_builtin (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
13650  def_builtin (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
13651  def_builtin (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
13652
13653  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
13654  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
13655
13656  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
13657
13658  def_builtin (MASK_SSE, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
13659  def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
13660  def_builtin (MASK_SSE, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
13661  def_builtin (MASK_SSE, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
13662  def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
13663  def_builtin (MASK_SSE, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
13664
13665  def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
13666  def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
13667  def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
13668  def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
13669
13670  def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
13671  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
13672  def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
13673  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
13674
13675  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
13676
13677  def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
13678
13679  def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
13680  def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
13681  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
13682  def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
13683  def_builtin (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
13684  def_builtin (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
13685
13686  def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
13687
13688  /* Original 3DNow!  */
13689  def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
13690  def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
13691  def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
13692  def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
13693  def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
13694  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
13695  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
13696  def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
13697  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
13698  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
13699  def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
13700  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
13701  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
13702  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
13703  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
13704  def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
13705  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
13706  def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
13707  def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
13708  def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
13709
13710  /* 3DNow! extension as used in the Athlon CPU.  */
13711  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
13712  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
13713  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
13714  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
13715  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
13716  def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
13717
13718  def_builtin (MASK_SSE, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
13719
13720  /* SSE2 */
13721  def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
13722  def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
13723
13724  def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
13725  def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
13726  def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
13727
13728  def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
13729  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
13730  def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
13731  def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
13732  def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
13733  def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
13734
13735  def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
13736  def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
13737  def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
13738  def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
13739
13740  def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
13741  def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
13742  def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
13743  def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
13744  def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
13745
13746  def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
13747  def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
13748  def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
13749  def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
13750
13751  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
13752  def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
13753
13754  def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
13755
13756  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
13757  def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
13758
13759  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
13760  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
13761  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
13762  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
13763  def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
13764
13765  def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
13766
13767  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
13768  def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
13769  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
13770  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
13771
13772  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
13773  def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
13774  def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
13775
13776  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
13777  def_builtin (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
13778  def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
13779  def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
13780
13781  def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
13782  def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
13783  def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
13784  def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
13785  def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
13786  def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
13787  def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
13788
13789  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
13790  def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
13791  def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
13792
13793  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
13794  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
13795  def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
13796  def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
13797  def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
13798  def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
13799  def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
13800
13801  def_builtin (MASK_SSE, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
13802
13803  def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
13804  def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
13805  def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
13806
13807  def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
13808  def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
13809  def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
13810
13811  def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
13812  def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
13813
13814  def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
13815  def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
13816  def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
13817  def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
13818
13819  def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
13820  def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
13821  def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
13822  def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
13823
13824  def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
13825  def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
13826
13827  def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
13828
13829  /* Prescott New Instructions.  */
13830  def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
13831	       void_ftype_pcvoid_unsigned_unsigned,
13832	       IX86_BUILTIN_MONITOR);
13833  def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
13834	       void_ftype_unsigned_unsigned,
13835	       IX86_BUILTIN_MWAIT);
13836  def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
13837	       v4sf_ftype_v4sf,
13838	       IX86_BUILTIN_MOVSHDUP);
13839  def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
13840	       v4sf_ftype_v4sf,
13841	       IX86_BUILTIN_MOVSLDUP);
13842  def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
13843	       v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
13844  def_builtin (MASK_SSE3, "__builtin_ia32_loadddup",
13845	       v2df_ftype_pcdouble, IX86_BUILTIN_LOADDDUP);
13846  def_builtin (MASK_SSE3, "__builtin_ia32_movddup",
13847	       v2df_ftype_v2df, IX86_BUILTIN_MOVDDUP);
13848}
13849
13850/* Errors in the source file can cause expand_expr to return const0_rtx
13851   where we expect a vector.  To avoid crashing, use one of the vector
13852   clear instructions.  */
13853static rtx
13854safe_vector_operand (rtx x, enum machine_mode mode)
13855{
13856  if (x != const0_rtx)
13857    return x;
13858  x = gen_reg_rtx (mode);
13859
13860  if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
13861    emit_insn (gen_mmx_clrdi (mode == DImode ? x
13862			      : gen_rtx_SUBREG (DImode, x, 0)));
13863  else
13864    emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
13865				: gen_rtx_SUBREG (V4SFmode, x, 0),
13866				CONST0_RTX (V4SFmode)));
13867  return x;
13868}
13869
13870/* Subroutine of ix86_expand_builtin to take care of binop insns.  */
13871
13872static rtx
13873ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
13874{
13875  rtx pat;
13876  tree arg0 = TREE_VALUE (arglist);
13877  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13878  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13879  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13880  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13881  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13882  enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13883
13884  if (VECTOR_MODE_P (mode0))
13885    op0 = safe_vector_operand (op0, mode0);
13886  if (VECTOR_MODE_P (mode1))
13887    op1 = safe_vector_operand (op1, mode1);
13888
13889  if (! target
13890      || GET_MODE (target) != tmode
13891      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13892    target = gen_reg_rtx (tmode);
13893
13894  if (GET_MODE (op1) == SImode && mode1 == TImode)
13895    {
13896      rtx x = gen_reg_rtx (V4SImode);
13897      emit_insn (gen_sse2_loadd (x, op1));
13898      op1 = gen_lowpart (TImode, x);
13899    }
13900
13901  /* In case the insn wants input operands in modes different from
13902     the result, abort.  */
13903  if ((GET_MODE (op0) != mode0 && GET_MODE (op0) != VOIDmode)
13904      || (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode))
13905    abort ();
13906
13907  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13908    op0 = copy_to_mode_reg (mode0, op0);
13909  if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13910    op1 = copy_to_mode_reg (mode1, op1);
13911
13912  /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
13913     yet one of the two must not be a memory.  This is normally enforced
13914     by expanders, but we didn't bother to create one here.  */
13915  if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
13916    op0 = copy_to_mode_reg (mode0, op0);
13917
13918  pat = GEN_FCN (icode) (target, op0, op1);
13919  if (! pat)
13920    return 0;
13921  emit_insn (pat);
13922  return target;
13923}
13924
13925/* Subroutine of ix86_expand_builtin to take care of stores.  */
13926
13927static rtx
13928ix86_expand_store_builtin (enum insn_code icode, tree arglist)
13929{
13930  rtx pat;
13931  tree arg0 = TREE_VALUE (arglist);
13932  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13933  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13934  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13935  enum machine_mode mode0 = insn_data[icode].operand[0].mode;
13936  enum machine_mode mode1 = insn_data[icode].operand[1].mode;
13937
13938  if (VECTOR_MODE_P (mode1))
13939    op1 = safe_vector_operand (op1, mode1);
13940
13941  op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13942  op1 = copy_to_mode_reg (mode1, op1);
13943
13944  pat = GEN_FCN (icode) (op0, op1);
13945  if (pat)
13946    emit_insn (pat);
13947  return 0;
13948}
13949
13950/* Subroutine of ix86_expand_builtin to take care of unop insns.  */
13951
13952static rtx
13953ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
13954			  rtx target, int do_load)
13955{
13956  rtx pat;
13957  tree arg0 = TREE_VALUE (arglist);
13958  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13959  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13960  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13961
13962  if (! target
13963      || GET_MODE (target) != tmode
13964      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13965    target = gen_reg_rtx (tmode);
13966  if (do_load)
13967    op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13968  else
13969    {
13970      if (VECTOR_MODE_P (mode0))
13971	op0 = safe_vector_operand (op0, mode0);
13972
13973      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13974	op0 = copy_to_mode_reg (mode0, op0);
13975    }
13976
13977  pat = GEN_FCN (icode) (target, op0);
13978  if (! pat)
13979    return 0;
13980  emit_insn (pat);
13981  return target;
13982}
13983
13984/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
13985   sqrtss, rsqrtss, rcpss.  */
13986
13987static rtx
13988ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
13989{
13990  rtx pat;
13991  tree arg0 = TREE_VALUE (arglist);
13992  rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13993  enum machine_mode tmode = insn_data[icode].operand[0].mode;
13994  enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13995
13996  if (! target
13997      || GET_MODE (target) != tmode
13998      || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13999    target = gen_reg_rtx (tmode);
14000
14001  if (VECTOR_MODE_P (mode0))
14002    op0 = safe_vector_operand (op0, mode0);
14003
14004  if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14005    op0 = copy_to_mode_reg (mode0, op0);
14006
14007  op1 = op0;
14008  if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
14009    op1 = copy_to_mode_reg (mode0, op1);
14010
14011  pat = GEN_FCN (icode) (target, op0, op1);
14012  if (! pat)
14013    return 0;
14014  emit_insn (pat);
14015  return target;
14016}
14017
14018/* Subroutine of ix86_expand_builtin to take care of comparison insns.  */
14019
14020static rtx
14021ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
14022			 rtx target)
14023{
14024  rtx pat;
14025  tree arg0 = TREE_VALUE (arglist);
14026  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14027  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14028  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14029  rtx op2;
14030  enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
14031  enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
14032  enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
14033  enum rtx_code comparison = d->comparison;
14034
14035  if (VECTOR_MODE_P (mode0))
14036    op0 = safe_vector_operand (op0, mode0);
14037  if (VECTOR_MODE_P (mode1))
14038    op1 = safe_vector_operand (op1, mode1);
14039
14040  /* Swap operands if we have a comparison that isn't available in
14041     hardware.  */
14042  if (d->flag)
14043    {
14044      rtx tmp = gen_reg_rtx (mode1);
14045      emit_move_insn (tmp, op1);
14046      op1 = op0;
14047      op0 = tmp;
14048    }
14049
14050  if (! target
14051      || GET_MODE (target) != tmode
14052      || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
14053    target = gen_reg_rtx (tmode);
14054
14055  if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
14056    op0 = copy_to_mode_reg (mode0, op0);
14057  if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
14058    op1 = copy_to_mode_reg (mode1, op1);
14059
14060  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14061  pat = GEN_FCN (d->icode) (target, op0, op1, op2);
14062  if (! pat)
14063    return 0;
14064  emit_insn (pat);
14065  return target;
14066}
14067
14068/* Subroutine of ix86_expand_builtin to take care of comi insns.  */
14069
14070static rtx
14071ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
14072		      rtx target)
14073{
14074  rtx pat;
14075  tree arg0 = TREE_VALUE (arglist);
14076  tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14077  rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14078  rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14079  rtx op2;
14080  enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
14081  enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
14082  enum rtx_code comparison = d->comparison;
14083
14084  if (VECTOR_MODE_P (mode0))
14085    op0 = safe_vector_operand (op0, mode0);
14086  if (VECTOR_MODE_P (mode1))
14087    op1 = safe_vector_operand (op1, mode1);
14088
14089  /* Swap operands if we have a comparison that isn't available in
14090     hardware.  */
14091  if (d->flag)
14092    {
14093      rtx tmp = op1;
14094      op1 = op0;
14095      op0 = tmp;
14096    }
14097
14098  target = gen_reg_rtx (SImode);
14099  emit_move_insn (target, const0_rtx);
14100  target = gen_rtx_SUBREG (QImode, target, 0);
14101
14102  if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
14103    op0 = copy_to_mode_reg (mode0, op0);
14104  if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
14105    op1 = copy_to_mode_reg (mode1, op1);
14106
14107  op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
14108  pat = GEN_FCN (d->icode) (op0, op1);
14109  if (! pat)
14110    return 0;
14111  emit_insn (pat);
14112  emit_insn (gen_rtx_SET (VOIDmode,
14113			  gen_rtx_STRICT_LOW_PART (VOIDmode, target),
14114			  gen_rtx_fmt_ee (comparison, QImode,
14115					  SET_DEST (pat),
14116					  const0_rtx)));
14117
14118  return SUBREG_REG (target);
14119}
14120
14121/* Expand an expression EXP that calls a built-in function,
14122   with result going to TARGET if that's convenient
14123   (and in mode MODE if that's convenient).
14124   SUBTARGET may be used as the target for computing one of EXP's operands.
14125   IGNORE is nonzero if the value is to be ignored.  */
14126
14127rtx
14128ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14129		     enum machine_mode mode ATTRIBUTE_UNUSED,
14130		     int ignore ATTRIBUTE_UNUSED)
14131{
14132  const struct builtin_description *d;
14133  size_t i;
14134  enum insn_code icode;
14135  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
14136  tree arglist = TREE_OPERAND (exp, 1);
14137  tree arg0, arg1, arg2;
14138  rtx op0, op1, op2, pat;
14139  enum machine_mode tmode, mode0, mode1, mode2;
14140  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
14141
14142  switch (fcode)
14143    {
14144    case IX86_BUILTIN_EMMS:
14145      emit_insn (gen_emms ());
14146      return 0;
14147
14148    case IX86_BUILTIN_SFENCE:
14149      emit_insn (gen_sfence ());
14150      return 0;
14151
14152    case IX86_BUILTIN_PEXTRW:
14153    case IX86_BUILTIN_PEXTRW128:
14154      icode = (fcode == IX86_BUILTIN_PEXTRW
14155	       ? CODE_FOR_mmx_pextrw
14156	       : CODE_FOR_sse2_pextrw);
14157      arg0 = TREE_VALUE (arglist);
14158      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14159      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14160      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14161      tmode = insn_data[icode].operand[0].mode;
14162      mode0 = insn_data[icode].operand[1].mode;
14163      mode1 = insn_data[icode].operand[2].mode;
14164
14165      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14166	op0 = copy_to_mode_reg (mode0, op0);
14167      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14168	{
14169	  error ("selector must be an integer constant in the range 0..%i",
14170		  fcode == IX86_BUILTIN_PEXTRW ? 3:7);
14171	  return gen_reg_rtx (tmode);
14172	}
14173      if (target == 0
14174	  || GET_MODE (target) != tmode
14175	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14176	target = gen_reg_rtx (tmode);
14177      pat = GEN_FCN (icode) (target, op0, op1);
14178      if (! pat)
14179	return 0;
14180      emit_insn (pat);
14181      return target;
14182
14183    case IX86_BUILTIN_PINSRW:
14184    case IX86_BUILTIN_PINSRW128:
14185      icode = (fcode == IX86_BUILTIN_PINSRW
14186	       ? CODE_FOR_mmx_pinsrw
14187	       : CODE_FOR_sse2_pinsrw);
14188      arg0 = TREE_VALUE (arglist);
14189      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14190      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14191      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14192      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14193      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14194      tmode = insn_data[icode].operand[0].mode;
14195      mode0 = insn_data[icode].operand[1].mode;
14196      mode1 = insn_data[icode].operand[2].mode;
14197      mode2 = insn_data[icode].operand[3].mode;
14198
14199      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14200	op0 = copy_to_mode_reg (mode0, op0);
14201      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14202	op1 = copy_to_mode_reg (mode1, op1);
14203      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14204	{
14205	  error ("selector must be an integer constant in the range 0..%i",
14206		  fcode == IX86_BUILTIN_PINSRW ? 15:255);
14207	  return const0_rtx;
14208	}
14209      if (target == 0
14210	  || GET_MODE (target) != tmode
14211	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14212	target = gen_reg_rtx (tmode);
14213      pat = GEN_FCN (icode) (target, op0, op1, op2);
14214      if (! pat)
14215	return 0;
14216      emit_insn (pat);
14217      return target;
14218
14219    case IX86_BUILTIN_MASKMOVQ:
14220    case IX86_BUILTIN_MASKMOVDQU:
14221      icode = (fcode == IX86_BUILTIN_MASKMOVQ
14222	       ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
14223	       : (TARGET_64BIT ? CODE_FOR_sse2_maskmovdqu_rex64
14224		  : CODE_FOR_sse2_maskmovdqu));
14225      /* Note the arg order is different from the operand order.  */
14226      arg1 = TREE_VALUE (arglist);
14227      arg2 = TREE_VALUE (TREE_CHAIN (arglist));
14228      arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14229      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14230      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14231      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14232      mode0 = insn_data[icode].operand[0].mode;
14233      mode1 = insn_data[icode].operand[1].mode;
14234      mode2 = insn_data[icode].operand[2].mode;
14235
14236      if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
14237	op0 = copy_to_mode_reg (mode0, op0);
14238      if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14239	op1 = copy_to_mode_reg (mode1, op1);
14240      if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
14241	op2 = copy_to_mode_reg (mode2, op2);
14242      pat = GEN_FCN (icode) (op0, op1, op2);
14243      if (! pat)
14244	return 0;
14245      emit_insn (pat);
14246      return 0;
14247
14248    case IX86_BUILTIN_SQRTSS:
14249      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
14250    case IX86_BUILTIN_RSQRTSS:
14251      return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
14252    case IX86_BUILTIN_RCPSS:
14253      return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
14254
14255    case IX86_BUILTIN_LOADAPS:
14256      return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
14257
14258    case IX86_BUILTIN_LOADUPS:
14259      return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
14260
14261    case IX86_BUILTIN_STOREAPS:
14262      return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
14263
14264    case IX86_BUILTIN_STOREUPS:
14265      return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
14266
14267    case IX86_BUILTIN_LOADSS:
14268      return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
14269
14270    case IX86_BUILTIN_STORESS:
14271      return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
14272
14273    case IX86_BUILTIN_LOADHPS:
14274    case IX86_BUILTIN_LOADLPS:
14275    case IX86_BUILTIN_LOADHPD:
14276    case IX86_BUILTIN_LOADLPD:
14277      icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
14278	       : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
14279	       : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
14280	       : CODE_FOR_sse2_movsd);
14281      arg0 = TREE_VALUE (arglist);
14282      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14283      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14284      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14285      tmode = insn_data[icode].operand[0].mode;
14286      mode0 = insn_data[icode].operand[1].mode;
14287      mode1 = insn_data[icode].operand[2].mode;
14288
14289      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14290	op0 = copy_to_mode_reg (mode0, op0);
14291      op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
14292      if (target == 0
14293	  || GET_MODE (target) != tmode
14294	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14295	target = gen_reg_rtx (tmode);
14296      pat = GEN_FCN (icode) (target, op0, op1);
14297      if (! pat)
14298	return 0;
14299      emit_insn (pat);
14300      return target;
14301
14302    case IX86_BUILTIN_STOREHPS:
14303    case IX86_BUILTIN_STORELPS:
14304    case IX86_BUILTIN_STOREHPD:
14305    case IX86_BUILTIN_STORELPD:
14306      icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
14307	       : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
14308	       : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
14309	       : CODE_FOR_sse2_movsd);
14310      arg0 = TREE_VALUE (arglist);
14311      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14312      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14313      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14314      mode0 = insn_data[icode].operand[1].mode;
14315      mode1 = insn_data[icode].operand[2].mode;
14316
14317      op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
14318      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14319	op1 = copy_to_mode_reg (mode1, op1);
14320
14321      pat = GEN_FCN (icode) (op0, op0, op1);
14322      if (! pat)
14323	return 0;
14324      emit_insn (pat);
14325      return 0;
14326
14327    case IX86_BUILTIN_MOVNTPS:
14328      return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
14329    case IX86_BUILTIN_MOVNTQ:
14330      return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
14331
14332    case IX86_BUILTIN_LDMXCSR:
14333      op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
14334      target = assign_386_stack_local (SImode, 0);
14335      emit_move_insn (target, op0);
14336      emit_insn (gen_ldmxcsr (target));
14337      return 0;
14338
14339    case IX86_BUILTIN_STMXCSR:
14340      target = assign_386_stack_local (SImode, 0);
14341      emit_insn (gen_stmxcsr (target));
14342      return copy_to_mode_reg (SImode, target);
14343
14344    case IX86_BUILTIN_SHUFPS:
14345    case IX86_BUILTIN_SHUFPD:
14346      icode = (fcode == IX86_BUILTIN_SHUFPS
14347	       ? CODE_FOR_sse_shufps
14348	       : CODE_FOR_sse2_shufpd);
14349      arg0 = TREE_VALUE (arglist);
14350      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14351      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14352      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14353      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14354      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14355      tmode = insn_data[icode].operand[0].mode;
14356      mode0 = insn_data[icode].operand[1].mode;
14357      mode1 = insn_data[icode].operand[2].mode;
14358      mode2 = insn_data[icode].operand[3].mode;
14359
14360      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14361	op0 = copy_to_mode_reg (mode0, op0);
14362      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14363	op1 = copy_to_mode_reg (mode1, op1);
14364      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
14365	{
14366	  /* @@@ better error message */
14367	  error ("mask must be an immediate");
14368	  return gen_reg_rtx (tmode);
14369	}
14370      if (target == 0
14371	  || GET_MODE (target) != tmode
14372	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14373	target = gen_reg_rtx (tmode);
14374      pat = GEN_FCN (icode) (target, op0, op1, op2);
14375      if (! pat)
14376	return 0;
14377      emit_insn (pat);
14378      return target;
14379
14380    case IX86_BUILTIN_PSHUFW:
14381    case IX86_BUILTIN_PSHUFD:
14382    case IX86_BUILTIN_PSHUFHW:
14383    case IX86_BUILTIN_PSHUFLW:
14384      icode = (  fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
14385	       : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
14386	       : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
14387	       : CODE_FOR_mmx_pshufw);
14388      arg0 = TREE_VALUE (arglist);
14389      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14390      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14391      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14392      tmode = insn_data[icode].operand[0].mode;
14393      mode1 = insn_data[icode].operand[1].mode;
14394      mode2 = insn_data[icode].operand[2].mode;
14395
14396      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14397	op0 = copy_to_mode_reg (mode1, op0);
14398      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14399	{
14400	  /* @@@ better error message */
14401	  error ("mask must be an immediate");
14402	  return const0_rtx;
14403	}
14404      if (target == 0
14405	  || GET_MODE (target) != tmode
14406	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14407	target = gen_reg_rtx (tmode);
14408      pat = GEN_FCN (icode) (target, op0, op1);
14409      if (! pat)
14410	return 0;
14411      emit_insn (pat);
14412      return target;
14413
14414    case IX86_BUILTIN_PSLLDQI128:
14415    case IX86_BUILTIN_PSRLDQI128:
14416      icode = (  fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
14417	       : CODE_FOR_sse2_lshrti3);
14418      arg0 = TREE_VALUE (arglist);
14419      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14420      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14421      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14422      tmode = insn_data[icode].operand[0].mode;
14423      mode1 = insn_data[icode].operand[1].mode;
14424      mode2 = insn_data[icode].operand[2].mode;
14425
14426      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
14427	{
14428	  op0 = copy_to_reg (op0);
14429	  op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
14430	}
14431      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
14432	{
14433	  error ("shift must be an immediate");
14434	  return const0_rtx;
14435	}
14436      target = gen_reg_rtx (V2DImode);
14437      pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
14438      if (! pat)
14439	return 0;
14440      emit_insn (pat);
14441      return target;
14442
14443    case IX86_BUILTIN_FEMMS:
14444      emit_insn (gen_femms ());
14445      return NULL_RTX;
14446
14447    case IX86_BUILTIN_PAVGUSB:
14448      return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
14449
14450    case IX86_BUILTIN_PF2ID:
14451      return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
14452
14453    case IX86_BUILTIN_PFACC:
14454      return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
14455
14456    case IX86_BUILTIN_PFADD:
14457     return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
14458
14459    case IX86_BUILTIN_PFCMPEQ:
14460      return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
14461
14462    case IX86_BUILTIN_PFCMPGE:
14463      return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
14464
14465    case IX86_BUILTIN_PFCMPGT:
14466      return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
14467
14468    case IX86_BUILTIN_PFMAX:
14469      return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
14470
14471    case IX86_BUILTIN_PFMIN:
14472      return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
14473
14474    case IX86_BUILTIN_PFMUL:
14475      return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
14476
14477    case IX86_BUILTIN_PFRCP:
14478      return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
14479
14480    case IX86_BUILTIN_PFRCPIT1:
14481      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
14482
14483    case IX86_BUILTIN_PFRCPIT2:
14484      return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
14485
14486    case IX86_BUILTIN_PFRSQIT1:
14487      return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
14488
14489    case IX86_BUILTIN_PFRSQRT:
14490      return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
14491
14492    case IX86_BUILTIN_PFSUB:
14493      return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
14494
14495    case IX86_BUILTIN_PFSUBR:
14496      return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
14497
14498    case IX86_BUILTIN_PI2FD:
14499      return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
14500
14501    case IX86_BUILTIN_PMULHRW:
14502      return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
14503
14504    case IX86_BUILTIN_PF2IW:
14505      return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
14506
14507    case IX86_BUILTIN_PFNACC:
14508      return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
14509
14510    case IX86_BUILTIN_PFPNACC:
14511      return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
14512
14513    case IX86_BUILTIN_PI2FW:
14514      return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
14515
14516    case IX86_BUILTIN_PSWAPDSI:
14517      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
14518
14519    case IX86_BUILTIN_PSWAPDSF:
14520      return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
14521
14522    case IX86_BUILTIN_SSE_ZERO:
14523      target = gen_reg_rtx (V4SFmode);
14524      emit_insn (gen_sse_clrv4sf (target, CONST0_RTX (V4SFmode)));
14525      return target;
14526
14527    case IX86_BUILTIN_MMX_ZERO:
14528      target = gen_reg_rtx (DImode);
14529      emit_insn (gen_mmx_clrdi (target));
14530      return target;
14531
14532    case IX86_BUILTIN_CLRTI:
14533      target = gen_reg_rtx (V2DImode);
14534      emit_insn (gen_sse2_clrti (simplify_gen_subreg (TImode, target, V2DImode, 0)));
14535      return target;
14536
14537
14538    case IX86_BUILTIN_SQRTSD:
14539      return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
14540    case IX86_BUILTIN_LOADAPD:
14541      return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
14542    case IX86_BUILTIN_LOADUPD:
14543      return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
14544
14545    case IX86_BUILTIN_STOREAPD:
14546      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14547    case IX86_BUILTIN_STOREUPD:
14548      return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
14549
14550    case IX86_BUILTIN_LOADSD:
14551      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
14552
14553    case IX86_BUILTIN_STORESD:
14554      return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
14555
14556    case IX86_BUILTIN_SETPD1:
14557      target = assign_386_stack_local (DFmode, 0);
14558      arg0 = TREE_VALUE (arglist);
14559      emit_move_insn (adjust_address (target, DFmode, 0),
14560		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14561      op0 = gen_reg_rtx (V2DFmode);
14562      emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
14563      emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
14564      return op0;
14565
14566    case IX86_BUILTIN_SETPD:
14567      target = assign_386_stack_local (V2DFmode, 0);
14568      arg0 = TREE_VALUE (arglist);
14569      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14570      emit_move_insn (adjust_address (target, DFmode, 0),
14571		      expand_expr (arg0, NULL_RTX, VOIDmode, 0));
14572      emit_move_insn (adjust_address (target, DFmode, 8),
14573		      expand_expr (arg1, NULL_RTX, VOIDmode, 0));
14574      op0 = gen_reg_rtx (V2DFmode);
14575      emit_insn (gen_sse2_movapd (op0, target));
14576      return op0;
14577
14578    case IX86_BUILTIN_LOADRPD:
14579      target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
14580					 gen_reg_rtx (V2DFmode), 1);
14581      emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
14582      return target;
14583
14584    case IX86_BUILTIN_LOADPD1:
14585      target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
14586					 gen_reg_rtx (V2DFmode), 1);
14587      emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
14588      return target;
14589
14590    case IX86_BUILTIN_STOREPD1:
14591      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14592    case IX86_BUILTIN_STORERPD:
14593      return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
14594
14595    case IX86_BUILTIN_CLRPD:
14596      target = gen_reg_rtx (V2DFmode);
14597      emit_insn (gen_sse_clrv2df (target));
14598      return target;
14599
14600    case IX86_BUILTIN_MFENCE:
14601	emit_insn (gen_sse2_mfence ());
14602	return 0;
14603    case IX86_BUILTIN_LFENCE:
14604	emit_insn (gen_sse2_lfence ());
14605	return 0;
14606
14607    case IX86_BUILTIN_CLFLUSH:
14608	arg0 = TREE_VALUE (arglist);
14609	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14610	icode = CODE_FOR_sse2_clflush;
14611	if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
14612	    op0 = copy_to_mode_reg (Pmode, op0);
14613
14614	emit_insn (gen_sse2_clflush (op0));
14615	return 0;
14616
14617    case IX86_BUILTIN_MOVNTPD:
14618      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
14619    case IX86_BUILTIN_MOVNTDQ:
14620      return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
14621    case IX86_BUILTIN_MOVNTI:
14622      return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
14623
14624    case IX86_BUILTIN_LOADDQA:
14625      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqa, arglist, target, 1);
14626    case IX86_BUILTIN_LOADDQU:
14627      return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
14628    case IX86_BUILTIN_LOADD:
14629      return ix86_expand_unop_builtin (CODE_FOR_sse2_loadd, arglist, target, 1);
14630
14631    case IX86_BUILTIN_STOREDQA:
14632      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqa, arglist);
14633    case IX86_BUILTIN_STOREDQU:
14634      return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
14635    case IX86_BUILTIN_STORED:
14636      return ix86_expand_store_builtin (CODE_FOR_sse2_stored, arglist);
14637
14638    case IX86_BUILTIN_MONITOR:
14639      arg0 = TREE_VALUE (arglist);
14640      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14641      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
14642      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14643      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14644      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
14645      if (!REG_P (op0))
14646	op0 = copy_to_mode_reg (SImode, op0);
14647      if (!REG_P (op1))
14648	op1 = copy_to_mode_reg (SImode, op1);
14649      if (!REG_P (op2))
14650	op2 = copy_to_mode_reg (SImode, op2);
14651      emit_insn (gen_monitor (op0, op1, op2));
14652      return 0;
14653
14654    case IX86_BUILTIN_MWAIT:
14655      arg0 = TREE_VALUE (arglist);
14656      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
14657      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
14658      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
14659      if (!REG_P (op0))
14660	op0 = copy_to_mode_reg (SImode, op0);
14661      if (!REG_P (op1))
14662	op1 = copy_to_mode_reg (SImode, op1);
14663      emit_insn (gen_mwait (op0, op1));
14664      return 0;
14665
14666    case IX86_BUILTIN_LOADDDUP:
14667      return ix86_expand_unop_builtin (CODE_FOR_loadddup, arglist, target, 1);
14668
14669    case IX86_BUILTIN_LDDQU:
14670      return ix86_expand_unop_builtin (CODE_FOR_lddqu, arglist, target,
14671				       1);
14672
14673    default:
14674      break;
14675    }
14676
14677  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14678    if (d->code == fcode)
14679      {
14680	/* Compares are treated specially.  */
14681	if (d->icode == CODE_FOR_maskcmpv4sf3
14682	    || d->icode == CODE_FOR_vmmaskcmpv4sf3
14683	    || d->icode == CODE_FOR_maskncmpv4sf3
14684	    || d->icode == CODE_FOR_vmmaskncmpv4sf3
14685	    || d->icode == CODE_FOR_maskcmpv2df3
14686	    || d->icode == CODE_FOR_vmmaskcmpv2df3
14687	    || d->icode == CODE_FOR_maskncmpv2df3
14688	    || d->icode == CODE_FOR_vmmaskncmpv2df3)
14689	  return ix86_expand_sse_compare (d, arglist, target);
14690
14691	return ix86_expand_binop_builtin (d->icode, arglist, target);
14692      }
14693
14694  for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14695    if (d->code == fcode)
14696      return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
14697
14698  for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
14699    if (d->code == fcode)
14700      return ix86_expand_sse_comi (d, arglist, target);
14701
14702  /* @@@ Should really do something sensible here.  */
14703  return 0;
14704}
14705
14706/* Store OPERAND to the memory after reload is completed.  This means
14707   that we can't easily use assign_stack_local.  */
14708rtx
14709ix86_force_to_memory (enum machine_mode mode, rtx operand)
14710{
14711  rtx result;
14712  if (!reload_completed)
14713    abort ();
14714  if (TARGET_RED_ZONE)
14715    {
14716      result = gen_rtx_MEM (mode,
14717			    gen_rtx_PLUS (Pmode,
14718					  stack_pointer_rtx,
14719					  GEN_INT (-RED_ZONE_SIZE)));
14720      emit_move_insn (result, operand);
14721    }
14722  else if (!TARGET_RED_ZONE && TARGET_64BIT)
14723    {
14724      switch (mode)
14725	{
14726	case HImode:
14727	case SImode:
14728	  operand = gen_lowpart (DImode, operand);
14729	  /* FALLTHRU */
14730	case DImode:
14731	  emit_insn (
14732		      gen_rtx_SET (VOIDmode,
14733				   gen_rtx_MEM (DImode,
14734						gen_rtx_PRE_DEC (DImode,
14735							stack_pointer_rtx)),
14736				   operand));
14737	  break;
14738	default:
14739	  abort ();
14740	}
14741      result = gen_rtx_MEM (mode, stack_pointer_rtx);
14742    }
14743  else
14744    {
14745      switch (mode)
14746	{
14747	case DImode:
14748	  {
14749	    rtx operands[2];
14750	    split_di (&operand, 1, operands, operands + 1);
14751	    emit_insn (
14752			gen_rtx_SET (VOIDmode,
14753				     gen_rtx_MEM (SImode,
14754						  gen_rtx_PRE_DEC (Pmode,
14755							stack_pointer_rtx)),
14756				     operands[1]));
14757	    emit_insn (
14758			gen_rtx_SET (VOIDmode,
14759				     gen_rtx_MEM (SImode,
14760						  gen_rtx_PRE_DEC (Pmode,
14761							stack_pointer_rtx)),
14762				     operands[0]));
14763	  }
14764	  break;
14765	case HImode:
14766	  /* It is better to store HImodes as SImodes.  */
14767	  if (!TARGET_PARTIAL_REG_STALL)
14768	    operand = gen_lowpart (SImode, operand);
14769	  /* FALLTHRU */
14770	case SImode:
14771	  emit_insn (
14772		      gen_rtx_SET (VOIDmode,
14773				   gen_rtx_MEM (GET_MODE (operand),
14774						gen_rtx_PRE_DEC (SImode,
14775							stack_pointer_rtx)),
14776				   operand));
14777	  break;
14778	default:
14779	  abort ();
14780	}
14781      result = gen_rtx_MEM (mode, stack_pointer_rtx);
14782    }
14783  return result;
14784}
14785
14786/* Free operand from the memory.  */
14787void
14788ix86_free_from_memory (enum machine_mode mode)
14789{
14790  if (!TARGET_RED_ZONE)
14791    {
14792      int size;
14793
14794      if (mode == DImode || TARGET_64BIT)
14795	size = 8;
14796      else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
14797	size = 2;
14798      else
14799	size = 4;
14800      /* Use LEA to deallocate stack space.  In peephole2 it will be converted
14801         to pop or add instruction if registers are available.  */
14802      emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
14803			      gen_rtx_PLUS (Pmode, stack_pointer_rtx,
14804					    GEN_INT (size))));
14805    }
14806}
14807
14808/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
14809   QImode must go into class Q_REGS.
14810   Narrow ALL_REGS to GENERAL_REGS.  This supports allowing movsf and
14811   movdf to do mem-to-mem moves through integer regs.  */
14812enum reg_class
14813ix86_preferred_reload_class (rtx x, enum reg_class class)
14814{
14815  if (GET_CODE (x) == CONST_VECTOR && x != CONST0_RTX (GET_MODE (x)))
14816    return NO_REGS;
14817  if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
14818    {
14819      /* SSE can't load any constant directly yet.  */
14820      if (SSE_CLASS_P (class))
14821	return NO_REGS;
14822      /* Floats can load 0 and 1.  */
14823      if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
14824	{
14825	  /* Limit class to non-SSE.  Use GENERAL_REGS if possible.  */
14826	  if (MAYBE_SSE_CLASS_P (class))
14827	    return (reg_class_subset_p (class, GENERAL_REGS)
14828		    ? GENERAL_REGS : FLOAT_REGS);
14829	  else
14830	    return class;
14831	}
14832      /* General regs can load everything.  */
14833      if (reg_class_subset_p (class, GENERAL_REGS))
14834	return GENERAL_REGS;
14835      /* In case we haven't resolved FLOAT or SSE yet, give up.  */
14836      if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
14837	return NO_REGS;
14838    }
14839  if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
14840    return NO_REGS;
14841  if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
14842    return Q_REGS;
14843  return class;
14844}
14845
14846/* If we are copying between general and FP registers, we need a memory
14847   location. The same is true for SSE and MMX registers.
14848
14849   The macro can't work reliably when one of the CLASSES is class containing
14850   registers from multiple units (SSE, MMX, integer).  We avoid this by never
14851   combining those units in single alternative in the machine description.
14852   Ensure that this constraint holds to avoid unexpected surprises.
14853
14854   When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
14855   enforce these sanity checks.  */
14856int
14857ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
14858			      enum machine_mode mode, int strict)
14859{
14860  if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
14861      || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
14862      || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
14863      || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
14864      || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
14865      || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
14866    {
14867      if (strict)
14868	abort ();
14869      else
14870	return 1;
14871    }
14872  return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
14873	  || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
14874	       || MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
14875	      && ((mode != SImode && (mode != DImode || !TARGET_64BIT))
14876		  || (!TARGET_INTER_UNIT_MOVES && !optimize_size))));
14877}
14878/* Return the cost of moving data from a register in class CLASS1 to
14879   one in class CLASS2.
14880
14881   It is not required that the cost always equal 2 when FROM is the same as TO;
14882   on some machines it is expensive to move between registers if they are not
14883   general registers.  */
14884int
14885ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
14886			 enum reg_class class2)
14887{
14888  /* In case we require secondary memory, compute cost of the store followed
14889     by load.  In order to avoid bad register allocation choices, we need
14890     for this to be *at least* as high as the symmetric MEMORY_MOVE_COST.  */
14891
14892  if (ix86_secondary_memory_needed (class1, class2, mode, 0))
14893    {
14894      int cost = 1;
14895
14896      cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
14897		   MEMORY_MOVE_COST (mode, class1, 1));
14898      cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
14899		   MEMORY_MOVE_COST (mode, class2, 1));
14900
14901      /* In case of copying from general_purpose_register we may emit multiple
14902         stores followed by single load causing memory size mismatch stall.
14903         Count this as arbitrarily high cost of 20.  */
14904      if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
14905	cost += 20;
14906
14907      /* In the case of FP/MMX moves, the registers actually overlap, and we
14908	 have to switch modes in order to treat them differently.  */
14909      if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
14910          || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
14911	cost += 20;
14912
14913      return cost;
14914    }
14915
14916  /* Moves between SSE/MMX and integer unit are expensive.  */
14917  if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
14918      || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
14919    return ix86_cost->mmxsse_to_integer;
14920  if (MAYBE_FLOAT_CLASS_P (class1))
14921    return ix86_cost->fp_move;
14922  if (MAYBE_SSE_CLASS_P (class1))
14923    return ix86_cost->sse_move;
14924  if (MAYBE_MMX_CLASS_P (class1))
14925    return ix86_cost->mmx_move;
14926  return 2;
14927}
14928
14929/* Return 1 if hard register REGNO can hold a value of machine-mode MODE.  */
14930int
14931ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
14932{
14933  /* Flags and only flags can only hold CCmode values.  */
14934  if (CC_REGNO_P (regno))
14935    return GET_MODE_CLASS (mode) == MODE_CC;
14936  if (GET_MODE_CLASS (mode) == MODE_CC
14937      || GET_MODE_CLASS (mode) == MODE_RANDOM
14938      || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
14939    return 0;
14940  if (FP_REGNO_P (regno))
14941    return VALID_FP_MODE_P (mode);
14942  if (SSE_REGNO_P (regno))
14943    {
14944      /* HACK!  We didn't change all of the constraints for SSE1 for the
14945	 scalar modes on the branch.  Fortunately, they're not required
14946	 for ABI compatibility.  */
14947      if (!TARGET_SSE2 && !VECTOR_MODE_P (mode))
14948	return VALID_SSE_REG_MODE (mode);
14949
14950      /* We implement the move patterns for all vector modes into and
14951         out of SSE registers, even when no operation instructions
14952         are available.  */
14953      return (VALID_SSE_REG_MODE (mode)
14954	      || VALID_SSE2_REG_MODE (mode)
14955	      || VALID_MMX_REG_MODE (mode)
14956	      || VALID_MMX_REG_MODE_3DNOW (mode));
14957    }
14958  if (MMX_REGNO_P (regno))
14959    {
14960      /* We implement the move patterns for 3DNOW modes even in MMX mode,
14961         so if the register is available at all, then we can move data of
14962         the given mode into or out of it.  */
14963      return (VALID_MMX_REG_MODE (mode)
14964	      || VALID_MMX_REG_MODE_3DNOW (mode));
14965    }
14966  /* We handle both integer and floats in the general purpose registers.
14967     In future we should be able to handle vector modes as well.  */
14968  if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
14969    return 0;
14970  /* Take care for QImode values - they can be in non-QI regs, but then
14971     they do cause partial register stalls.  */
14972  if (regno < 4 || mode != QImode || TARGET_64BIT)
14973    return 1;
14974  return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
14975}
14976
14977/* Return the cost of moving data of mode M between a
14978   register and memory.  A value of 2 is the default; this cost is
14979   relative to those in `REGISTER_MOVE_COST'.
14980
14981   If moving between registers and memory is more expensive than
14982   between two registers, you should define this macro to express the
14983   relative cost.
14984
14985   Model also increased moving costs of QImode registers in non
14986   Q_REGS classes.
14987 */
14988int
14989ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
14990{
14991  if (FLOAT_CLASS_P (class))
14992    {
14993      int index;
14994      switch (mode)
14995	{
14996	  case SFmode:
14997	    index = 0;
14998	    break;
14999	  case DFmode:
15000	    index = 1;
15001	    break;
15002	  case XFmode:
15003	    index = 2;
15004	    break;
15005	  default:
15006	    return 100;
15007	}
15008      return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
15009    }
15010  if (SSE_CLASS_P (class))
15011    {
15012      int index;
15013      switch (GET_MODE_SIZE (mode))
15014	{
15015	  case 4:
15016	    index = 0;
15017	    break;
15018	  case 8:
15019	    index = 1;
15020	    break;
15021	  case 16:
15022	    index = 2;
15023	    break;
15024	  default:
15025	    return 100;
15026	}
15027      return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
15028    }
15029  if (MMX_CLASS_P (class))
15030    {
15031      int index;
15032      switch (GET_MODE_SIZE (mode))
15033	{
15034	  case 4:
15035	    index = 0;
15036	    break;
15037	  case 8:
15038	    index = 1;
15039	    break;
15040	  default:
15041	    return 100;
15042	}
15043      return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
15044    }
15045  switch (GET_MODE_SIZE (mode))
15046    {
15047      case 1:
15048	if (in)
15049	  return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
15050		  : ix86_cost->movzbl_load);
15051	else
15052	  return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
15053		  : ix86_cost->int_store[0] + 4);
15054	break;
15055      case 2:
15056	return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
15057      default:
15058	/* Compute number of 32bit moves needed.  TFmode is moved as XFmode.  */
15059	if (mode == TFmode)
15060	  mode = XFmode;
15061	return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
15062		* (((int) GET_MODE_SIZE (mode)
15063		    + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
15064    }
15065}
15066
15067/* Compute a (partial) cost for rtx X.  Return true if the complete
15068   cost has been computed, and false if subexpressions should be
15069   scanned.  In either case, *TOTAL contains the cost result.  */
15070
15071static bool
15072ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
15073{
15074  enum machine_mode mode = GET_MODE (x);
15075
15076  switch (code)
15077    {
15078    case CONST_INT:
15079    case CONST:
15080    case LABEL_REF:
15081    case SYMBOL_REF:
15082      if (TARGET_64BIT && !x86_64_sign_extended_value (x))
15083	*total = 3;
15084      else if (TARGET_64BIT && !x86_64_zero_extended_value (x))
15085	*total = 2;
15086      else if (flag_pic && SYMBOLIC_CONST (x)
15087	       && (!TARGET_64BIT
15088		   || (!GET_CODE (x) != LABEL_REF
15089		       && (GET_CODE (x) != SYMBOL_REF
15090		           || !SYMBOL_REF_LOCAL_P (x)))))
15091	*total = 1;
15092      else
15093	*total = 0;
15094      return true;
15095
15096    case CONST_DOUBLE:
15097      if (mode == VOIDmode)
15098	*total = 0;
15099      else
15100	switch (standard_80387_constant_p (x))
15101	  {
15102	  case 1: /* 0.0 */
15103	    *total = 1;
15104	    break;
15105	  default: /* Other constants */
15106	    *total = 2;
15107	    break;
15108	  case 0:
15109	  case -1:
15110	    /* Start with (MEM (SYMBOL_REF)), since that's where
15111	       it'll probably end up.  Add a penalty for size.  */
15112	    *total = (COSTS_N_INSNS (1)
15113		      + (flag_pic != 0 && !TARGET_64BIT)
15114		      + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
15115	    break;
15116	  }
15117      return true;
15118
15119    case ZERO_EXTEND:
15120      /* The zero extensions is often completely free on x86_64, so make
15121	 it as cheap as possible.  */
15122      if (TARGET_64BIT && mode == DImode
15123	  && GET_MODE (XEXP (x, 0)) == SImode)
15124	*total = 1;
15125      else if (TARGET_ZERO_EXTEND_WITH_AND)
15126	*total = COSTS_N_INSNS (ix86_cost->add);
15127      else
15128	*total = COSTS_N_INSNS (ix86_cost->movzx);
15129      return false;
15130
15131    case SIGN_EXTEND:
15132      *total = COSTS_N_INSNS (ix86_cost->movsx);
15133      return false;
15134
15135    case ASHIFT:
15136      if (GET_CODE (XEXP (x, 1)) == CONST_INT
15137	  && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
15138	{
15139	  HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15140	  if (value == 1)
15141	    {
15142	      *total = COSTS_N_INSNS (ix86_cost->add);
15143	      return false;
15144	    }
15145	  if ((value == 2 || value == 3)
15146	      && !TARGET_DECOMPOSE_LEA
15147	      && ix86_cost->lea <= ix86_cost->shift_const)
15148	    {
15149	      *total = COSTS_N_INSNS (ix86_cost->lea);
15150	      return false;
15151	    }
15152	}
15153      /* FALLTHRU */
15154
15155    case ROTATE:
15156    case ASHIFTRT:
15157    case LSHIFTRT:
15158    case ROTATERT:
15159      if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
15160	{
15161	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15162	    {
15163	      if (INTVAL (XEXP (x, 1)) > 32)
15164		*total = COSTS_N_INSNS(ix86_cost->shift_const + 2);
15165	      else
15166		*total = COSTS_N_INSNS(ix86_cost->shift_const * 2);
15167	    }
15168	  else
15169	    {
15170	      if (GET_CODE (XEXP (x, 1)) == AND)
15171		*total = COSTS_N_INSNS(ix86_cost->shift_var * 2);
15172	      else
15173		*total = COSTS_N_INSNS(ix86_cost->shift_var * 6 + 2);
15174	    }
15175	}
15176      else
15177	{
15178	  if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15179	    *total = COSTS_N_INSNS (ix86_cost->shift_const);
15180	  else
15181	    *total = COSTS_N_INSNS (ix86_cost->shift_var);
15182	}
15183      return false;
15184
15185    case MULT:
15186      if (FLOAT_MODE_P (mode))
15187	*total = COSTS_N_INSNS (ix86_cost->fmul);
15188      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
15189	{
15190	  unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
15191	  int nbits;
15192
15193	  for (nbits = 0; value != 0; value >>= 1)
15194	    nbits++;
15195
15196	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15197			          + nbits * ix86_cost->mult_bit);
15198	}
15199      else
15200	{
15201	  /* This is arbitrary */
15202	  *total = COSTS_N_INSNS (ix86_cost->mult_init[MODE_INDEX (mode)]
15203			          + 7 * ix86_cost->mult_bit);
15204	}
15205      return false;
15206
15207    case DIV:
15208    case UDIV:
15209    case MOD:
15210    case UMOD:
15211      if (FLOAT_MODE_P (mode))
15212	*total = COSTS_N_INSNS (ix86_cost->fdiv);
15213      else
15214	*total = COSTS_N_INSNS (ix86_cost->divide[MODE_INDEX (mode)]);
15215      return false;
15216
15217    case PLUS:
15218      if (FLOAT_MODE_P (mode))
15219	*total = COSTS_N_INSNS (ix86_cost->fadd);
15220      else if (!TARGET_DECOMPOSE_LEA
15221	       && GET_MODE_CLASS (mode) == MODE_INT
15222	       && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
15223	{
15224	  if (GET_CODE (XEXP (x, 0)) == PLUS
15225	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
15226	      && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
15227	      && CONSTANT_P (XEXP (x, 1)))
15228	    {
15229	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
15230	      if (val == 2 || val == 4 || val == 8)
15231		{
15232		  *total = COSTS_N_INSNS (ix86_cost->lea);
15233		  *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15234		  *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
15235				      outer_code);
15236		  *total += rtx_cost (XEXP (x, 1), outer_code);
15237		  return true;
15238		}
15239	    }
15240	  else if (GET_CODE (XEXP (x, 0)) == MULT
15241		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
15242	    {
15243	      HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
15244	      if (val == 2 || val == 4 || val == 8)
15245		{
15246		  *total = COSTS_N_INSNS (ix86_cost->lea);
15247		  *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15248		  *total += rtx_cost (XEXP (x, 1), outer_code);
15249		  return true;
15250		}
15251	    }
15252	  else if (GET_CODE (XEXP (x, 0)) == PLUS)
15253	    {
15254	      *total = COSTS_N_INSNS (ix86_cost->lea);
15255	      *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
15256	      *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
15257	      *total += rtx_cost (XEXP (x, 1), outer_code);
15258	      return true;
15259	    }
15260	}
15261      /* FALLTHRU */
15262
15263    case MINUS:
15264      if (FLOAT_MODE_P (mode))
15265	{
15266	  *total = COSTS_N_INSNS (ix86_cost->fadd);
15267	  return false;
15268	}
15269      /* FALLTHRU */
15270
15271    case AND:
15272    case IOR:
15273    case XOR:
15274      if (!TARGET_64BIT && mode == DImode)
15275	{
15276	  *total = (COSTS_N_INSNS (ix86_cost->add) * 2
15277		    + (rtx_cost (XEXP (x, 0), outer_code)
15278		       << (GET_MODE (XEXP (x, 0)) != DImode))
15279		    + (rtx_cost (XEXP (x, 1), outer_code)
15280	               << (GET_MODE (XEXP (x, 1)) != DImode)));
15281	  return true;
15282	}
15283      /* FALLTHRU */
15284
15285    case NEG:
15286      if (FLOAT_MODE_P (mode))
15287	{
15288	  *total = COSTS_N_INSNS (ix86_cost->fchs);
15289	  return false;
15290	}
15291      /* FALLTHRU */
15292
15293    case NOT:
15294      if (!TARGET_64BIT && mode == DImode)
15295	*total = COSTS_N_INSNS (ix86_cost->add * 2);
15296      else
15297	*total = COSTS_N_INSNS (ix86_cost->add);
15298      return false;
15299
15300    case FLOAT_EXTEND:
15301      if (!TARGET_SSE_MATH
15302	  || mode == XFmode
15303	  || (mode == DFmode && !TARGET_SSE2))
15304	*total = 0;
15305      return false;
15306
15307    case ABS:
15308      if (FLOAT_MODE_P (mode))
15309	*total = COSTS_N_INSNS (ix86_cost->fabs);
15310      return false;
15311
15312    case SQRT:
15313      if (FLOAT_MODE_P (mode))
15314	*total = COSTS_N_INSNS (ix86_cost->fsqrt);
15315      return false;
15316
15317    case UNSPEC:
15318      if (XINT (x, 1) == UNSPEC_TP)
15319	*total = 0;
15320      return false;
15321
15322    default:
15323      return false;
15324    }
15325}
15326
15327#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
15328static void
15329ix86_svr3_asm_out_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
15330{
15331  init_section ();
15332  fputs ("\tpushl $", asm_out_file);
15333  assemble_name (asm_out_file, XSTR (symbol, 0));
15334  fputc ('\n', asm_out_file);
15335}
15336#endif
15337
15338#if TARGET_MACHO
15339
15340static int current_machopic_label_num;
15341
15342/* Given a symbol name and its associated stub, write out the
15343   definition of the stub.  */
15344
15345void
15346machopic_output_stub (FILE *file, const char *symb, const char *stub)
15347{
15348  unsigned int length;
15349  char *binder_name, *symbol_name, lazy_ptr_name[32];
15350  int label = ++current_machopic_label_num;
15351
15352  /* Lose our funky encoding stuff so it doesn't contaminate the stub.  */
15353  symb = (*targetm.strip_name_encoding) (symb);
15354
15355  length = strlen (stub);
15356  binder_name = alloca (length + 32);
15357  GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
15358
15359  length = strlen (symb);
15360  symbol_name = alloca (length + 32);
15361  GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
15362
15363  sprintf (lazy_ptr_name, "L%d$lz", label);
15364
15365  if (MACHOPIC_PURE)
15366    machopic_picsymbol_stub_section ();
15367  else
15368    machopic_symbol_stub_section ();
15369
15370  fprintf (file, "%s:\n", stub);
15371  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15372
15373  if (MACHOPIC_PURE)
15374    {
15375      fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
15376      fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
15377      fprintf (file, "\tjmp %%edx\n");
15378    }
15379  else
15380    fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
15381
15382  fprintf (file, "%s:\n", binder_name);
15383
15384  if (MACHOPIC_PURE)
15385    {
15386      fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
15387      fprintf (file, "\tpushl %%eax\n");
15388    }
15389  else
15390    fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
15391
15392  fprintf (file, "\tjmp dyld_stub_binding_helper\n");
15393
15394  machopic_lazy_symbol_ptr_section ();
15395  fprintf (file, "%s:\n", lazy_ptr_name);
15396  fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
15397  fprintf (file, "\t.long %s\n", binder_name);
15398}
15399#endif /* TARGET_MACHO */
15400
15401/* Order the registers for register allocator.  */
15402
15403void
15404x86_order_regs_for_local_alloc (void)
15405{
15406   int pos = 0;
15407   int i;
15408
15409   /* First allocate the local general purpose registers.  */
15410   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15411     if (GENERAL_REGNO_P (i) && call_used_regs[i])
15412	reg_alloc_order [pos++] = i;
15413
15414   /* Global general purpose registers.  */
15415   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
15416     if (GENERAL_REGNO_P (i) && !call_used_regs[i])
15417	reg_alloc_order [pos++] = i;
15418
15419   /* x87 registers come first in case we are doing FP math
15420      using them.  */
15421   if (!TARGET_SSE_MATH)
15422     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15423       reg_alloc_order [pos++] = i;
15424
15425   /* SSE registers.  */
15426   for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
15427     reg_alloc_order [pos++] = i;
15428   for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
15429     reg_alloc_order [pos++] = i;
15430
15431   /* x87 registers.  */
15432   if (TARGET_SSE_MATH)
15433     for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
15434       reg_alloc_order [pos++] = i;
15435
15436   for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
15437     reg_alloc_order [pos++] = i;
15438
15439   /* Initialize the rest of array as we do not allocate some registers
15440      at all.  */
15441   while (pos < FIRST_PSEUDO_REGISTER)
15442     reg_alloc_order [pos++] = 0;
15443}
15444
15445#ifndef TARGET_USE_MS_BITFIELD_LAYOUT
15446#define TARGET_USE_MS_BITFIELD_LAYOUT 0
15447#endif
15448
15449/* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
15450   struct attribute_spec.handler.  */
15451static tree
15452ix86_handle_struct_attribute (tree *node, tree name,
15453			      tree args ATTRIBUTE_UNUSED,
15454			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
15455{
15456  tree *type = NULL;
15457  if (DECL_P (*node))
15458    {
15459      if (TREE_CODE (*node) == TYPE_DECL)
15460	type = &TREE_TYPE (*node);
15461    }
15462  else
15463    type = node;
15464
15465  if (!(type && (TREE_CODE (*type) == RECORD_TYPE
15466		 || TREE_CODE (*type) == UNION_TYPE)))
15467    {
15468      warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
15469      *no_add_attrs = true;
15470    }
15471
15472  else if ((is_attribute_p ("ms_struct", name)
15473	    && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
15474	   || ((is_attribute_p ("gcc_struct", name)
15475		&& lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
15476    {
15477      warning ("`%s' incompatible attribute ignored",
15478               IDENTIFIER_POINTER (name));
15479      *no_add_attrs = true;
15480    }
15481
15482  return NULL_TREE;
15483}
15484
15485static bool
15486ix86_ms_bitfield_layout_p (tree record_type)
15487{
15488  return (TARGET_USE_MS_BITFIELD_LAYOUT &&
15489	  !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
15490    || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
15491}
15492
15493/* Returns an expression indicating where the this parameter is
15494   located on entry to the FUNCTION.  */
15495
15496static rtx
15497x86_this_parameter (tree function)
15498{
15499  tree type = TREE_TYPE (function);
15500
15501  if (TARGET_64BIT)
15502    {
15503      int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
15504      return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
15505    }
15506
15507  if (ix86_function_regparm (type, function) > 0)
15508    {
15509      tree parm;
15510
15511      parm = TYPE_ARG_TYPES (type);
15512      /* Figure out whether or not the function has a variable number of
15513	 arguments.  */
15514      for (; parm; parm = TREE_CHAIN (parm))
15515	if (TREE_VALUE (parm) == void_type_node)
15516	  break;
15517      /* If not, the this parameter is in the first argument.  */
15518      if (parm)
15519	{
15520	  int regno = 0;
15521	  if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
15522	    regno = 2;
15523	  return gen_rtx_REG (SImode, regno);
15524	}
15525    }
15526
15527  if (aggregate_value_p (TREE_TYPE (type), type))
15528    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
15529  else
15530    return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
15531}
15532
15533/* Determine whether x86_output_mi_thunk can succeed.  */
15534
15535static bool
15536x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
15537			 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
15538			 HOST_WIDE_INT vcall_offset, tree function)
15539{
15540  /* 64-bit can handle anything.  */
15541  if (TARGET_64BIT)
15542    return true;
15543
15544  /* For 32-bit, everything's fine if we have one free register.  */
15545  if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
15546    return true;
15547
15548  /* Need a free register for vcall_offset.  */
15549  if (vcall_offset)
15550    return false;
15551
15552  /* Need a free register for GOT references.  */
15553  if (flag_pic && !(*targetm.binds_local_p) (function))
15554    return false;
15555
15556  /* Otherwise ok.  */
15557  return true;
15558}
15559
15560/* Output the assembler code for a thunk function.  THUNK_DECL is the
15561   declaration for the thunk function itself, FUNCTION is the decl for
15562   the target function.  DELTA is an immediate constant offset to be
15563   added to THIS.  If VCALL_OFFSET is nonzero, the word at
15564   *(*this + vcall_offset) should be added to THIS.  */
15565
15566static void
15567x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
15568		     tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
15569		     HOST_WIDE_INT vcall_offset, tree function)
15570{
15571  rtx xops[3];
15572  rtx this = x86_this_parameter (function);
15573  rtx this_reg, tmp;
15574
15575  /* If VCALL_OFFSET, we'll need THIS in a register.  Might as well
15576     pull it in now and let DELTA benefit.  */
15577  if (REG_P (this))
15578    this_reg = this;
15579  else if (vcall_offset)
15580    {
15581      /* Put the this parameter into %eax.  */
15582      xops[0] = this;
15583      xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
15584      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15585    }
15586  else
15587    this_reg = NULL_RTX;
15588
15589  /* Adjust the this parameter by a fixed constant.  */
15590  if (delta)
15591    {
15592      xops[0] = GEN_INT (delta);
15593      xops[1] = this_reg ? this_reg : this;
15594      if (TARGET_64BIT)
15595	{
15596	  if (!x86_64_general_operand (xops[0], DImode))
15597	    {
15598	      tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15599	      xops[1] = tmp;
15600	      output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
15601	      xops[0] = tmp;
15602	      xops[1] = this;
15603	    }
15604	  output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15605	}
15606      else
15607	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15608    }
15609
15610  /* Adjust the this parameter by a value stored in the vtable.  */
15611  if (vcall_offset)
15612    {
15613      if (TARGET_64BIT)
15614	tmp = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 2 /* R10 */);
15615      else
15616	{
15617	  int tmp_regno = 2 /* ECX */;
15618	  if (lookup_attribute ("fastcall",
15619	      TYPE_ATTRIBUTES (TREE_TYPE (function))))
15620	    tmp_regno = 0 /* EAX */;
15621	  tmp = gen_rtx_REG (SImode, tmp_regno);
15622	}
15623
15624      xops[0] = gen_rtx_MEM (Pmode, this_reg);
15625      xops[1] = tmp;
15626      if (TARGET_64BIT)
15627	output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15628      else
15629	output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15630
15631      /* Adjust the this parameter.  */
15632      xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
15633      if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
15634	{
15635	  rtx tmp2 = gen_rtx_REG (DImode, FIRST_REX_INT_REG + 3 /* R11 */);
15636	  xops[0] = GEN_INT (vcall_offset);
15637	  xops[1] = tmp2;
15638	  output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
15639	  xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
15640	}
15641      xops[1] = this_reg;
15642      if (TARGET_64BIT)
15643	output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
15644      else
15645	output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
15646    }
15647
15648  /* If necessary, drop THIS back to its stack slot.  */
15649  if (this_reg && this_reg != this)
15650    {
15651      xops[0] = this_reg;
15652      xops[1] = this;
15653      output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
15654    }
15655
15656  xops[0] = XEXP (DECL_RTL (function), 0);
15657  if (TARGET_64BIT)
15658    {
15659      if (!flag_pic || (*targetm.binds_local_p) (function))
15660	output_asm_insn ("jmp\t%P0", xops);
15661      else
15662	{
15663	  tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
15664	  tmp = gen_rtx_CONST (Pmode, tmp);
15665	  tmp = gen_rtx_MEM (QImode, tmp);
15666	  xops[0] = tmp;
15667	  output_asm_insn ("jmp\t%A0", xops);
15668	}
15669    }
15670  else
15671    {
15672      if (!flag_pic || (*targetm.binds_local_p) (function))
15673	output_asm_insn ("jmp\t%P0", xops);
15674      else
15675#if TARGET_MACHO
15676	if (TARGET_MACHO)
15677	  {
15678	    const char *ip = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (function));
15679	    tmp = gen_rtx_SYMBOL_REF (Pmode, machopic_stub_name (ip));
15680	    tmp = gen_rtx_MEM (QImode, tmp);
15681	    xops[0] = tmp;
15682	    output_asm_insn ("jmp\t%0", xops);
15683	  }
15684	else
15685#endif /* TARGET_MACHO */
15686	{
15687	  tmp = gen_rtx_REG (SImode, 2 /* ECX */);
15688	  output_set_got (tmp);
15689
15690	  xops[1] = tmp;
15691	  output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
15692	  output_asm_insn ("jmp\t{*}%1", xops);
15693	}
15694    }
15695}
15696
15697static void
15698x86_file_start (void)
15699{
15700  default_file_start ();
15701  if (X86_FILE_START_VERSION_DIRECTIVE)
15702    fputs ("\t.version\t\"01.01\"\n", asm_out_file);
15703  if (X86_FILE_START_FLTUSED)
15704    fputs ("\t.global\t__fltused\n", asm_out_file);
15705  if (ix86_asm_dialect == ASM_INTEL)
15706    fputs ("\t.intel_syntax\n", asm_out_file);
15707}
15708
15709int
15710x86_field_alignment (tree field, int computed)
15711{
15712  enum machine_mode mode;
15713  tree type = TREE_TYPE (field);
15714
15715  if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
15716    return computed;
15717  mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
15718		    ? get_inner_array_type (type) : type);
15719  if (mode == DFmode || mode == DCmode
15720      || GET_MODE_CLASS (mode) == MODE_INT
15721      || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
15722    return MIN (32, computed);
15723  return computed;
15724}
15725
15726/* Output assembler code to FILE to increment profiler label # LABELNO
15727   for profiling a function entry.  */
15728void
15729x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
15730{
15731  if (TARGET_64BIT)
15732    if (flag_pic)
15733      {
15734#ifndef NO_PROFILE_COUNTERS
15735	fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
15736#endif
15737	fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
15738      }
15739    else
15740      {
15741#ifndef NO_PROFILE_COUNTERS
15742	fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
15743#endif
15744	fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15745      }
15746  else if (flag_pic)
15747    {
15748#ifndef NO_PROFILE_COUNTERS
15749      fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
15750	       LPREFIX, labelno, PROFILE_COUNT_REGISTER);
15751#endif
15752      fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
15753    }
15754  else
15755    {
15756#ifndef NO_PROFILE_COUNTERS
15757      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
15758	       PROFILE_COUNT_REGISTER);
15759#endif
15760      fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
15761    }
15762}
15763
15764/* We don't have exact information about the insn sizes, but we may assume
15765   quite safely that we are informed about all 1 byte insns and memory
15766   address sizes.  This is enough to eliminate unnecessary padding in
15767   99% of cases.  */
15768
15769static int
15770min_insn_size (rtx insn)
15771{
15772  int l = 0;
15773
15774  if (!INSN_P (insn) || !active_insn_p (insn))
15775    return 0;
15776
15777  /* Discard alignments we've emit and jump instructions.  */
15778  if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
15779      && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
15780    return 0;
15781  if (GET_CODE (insn) == JUMP_INSN
15782      && (GET_CODE (PATTERN (insn)) == ADDR_VEC
15783	  || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
15784    return 0;
15785
15786  /* Important case - calls are always 5 bytes.
15787     It is common to have many calls in the row.  */
15788  if (GET_CODE (insn) == CALL_INSN
15789      && symbolic_reference_mentioned_p (PATTERN (insn))
15790      && !SIBLING_CALL_P (insn))
15791    return 5;
15792  if (get_attr_length (insn) <= 1)
15793    return 1;
15794
15795  /* For normal instructions we may rely on the sizes of addresses
15796     and the presence of symbol to require 4 bytes of encoding.
15797     This is not the case for jumps where references are PC relative.  */
15798  if (GET_CODE (insn) != JUMP_INSN)
15799    {
15800      l = get_attr_length_address (insn);
15801      if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
15802	l = 4;
15803    }
15804  if (l)
15805    return 1+l;
15806  else
15807    return 2;
15808}
15809
15810/* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
15811   window.  */
15812
15813static void
15814k8_avoid_jump_misspredicts (void)
15815{
15816  rtx insn, start = get_insns ();
15817  int nbytes = 0, njumps = 0;
15818  int isjump = 0;
15819
15820  /* Look for all minimal intervals of instructions containing 4 jumps.
15821     The intervals are bounded by START and INSN.  NBYTES is the total
15822     size of instructions in the interval including INSN and not including
15823     START.  When the NBYTES is smaller than 16 bytes, it is possible
15824     that the end of START and INSN ends up in the same 16byte page.
15825
15826     The smallest offset in the page INSN can start is the case where START
15827     ends on the offset 0.  Offset of INSN is then NBYTES - sizeof (INSN).
15828     We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
15829     */
15830  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
15831    {
15832
15833      nbytes += min_insn_size (insn);
15834      if (rtl_dump_file)
15835        fprintf(rtl_dump_file, "Insn %i estimated to %i bytes\n",
15836		INSN_UID (insn), min_insn_size (insn));
15837      if ((GET_CODE (insn) == JUMP_INSN
15838	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
15839	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
15840	  || GET_CODE (insn) == CALL_INSN)
15841	njumps++;
15842      else
15843	continue;
15844
15845      while (njumps > 3)
15846	{
15847	  start = NEXT_INSN (start);
15848	  if ((GET_CODE (start) == JUMP_INSN
15849	       && GET_CODE (PATTERN (start)) != ADDR_VEC
15850	       && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
15851	      || GET_CODE (start) == CALL_INSN)
15852	    njumps--, isjump = 1;
15853	  else
15854	    isjump = 0;
15855	  nbytes -= min_insn_size (start);
15856	}
15857      if (njumps < 0)
15858	abort ();
15859      if (rtl_dump_file)
15860        fprintf(rtl_dump_file, "Interval %i to %i has %i bytes\n",
15861		INSN_UID (start), INSN_UID (insn), nbytes);
15862
15863      if (njumps == 3 && isjump && nbytes < 16)
15864	{
15865	  int padsize = 15 - nbytes + min_insn_size (insn);
15866
15867	  if (rtl_dump_file)
15868	    fprintf (rtl_dump_file, "Padding insn %i by %i bytes!\n", INSN_UID (insn), padsize);
15869          emit_insn_before (gen_align (GEN_INT (padsize)), insn);
15870	}
15871    }
15872}
15873
15874/* Implement machine specific optimizations.
15875   At the moment we implement single transformation: AMD Athlon works faster
15876   when RET is not destination of conditional jump or directly preceded
15877   by other jump instruction.  We avoid the penalty by inserting NOP just
15878   before the RET instructions in such cases.  */
15879static void
15880ix86_reorg (void)
15881{
15882  edge e;
15883
15884  if (!TARGET_ATHLON_K8 || !optimize || optimize_size)
15885    return;
15886  for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
15887  {
15888    basic_block bb = e->src;
15889    rtx ret = BB_END (bb);
15890    rtx prev;
15891    bool replace = false;
15892
15893    if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
15894	|| !maybe_hot_bb_p (bb))
15895      continue;
15896    for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
15897      if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
15898	break;
15899    if (prev && GET_CODE (prev) == CODE_LABEL)
15900      {
15901	edge e;
15902	for (e = bb->pred; e; e = e->pred_next)
15903	  if (EDGE_FREQUENCY (e) && e->src->index >= 0
15904	      && !(e->flags & EDGE_FALLTHRU))
15905	    replace = true;
15906      }
15907    if (!replace)
15908      {
15909	prev = prev_active_insn (ret);
15910	if (prev
15911	    && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
15912		|| GET_CODE (prev) == CALL_INSN))
15913	  replace = true;
15914	/* Empty functions get branch mispredict even when the jump destination
15915	   is not visible to us.  */
15916	if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
15917	  replace = true;
15918      }
15919    if (replace)
15920      {
15921        emit_insn_before (gen_return_internal_long (), ret);
15922	delete_insn (ret);
15923      }
15924  }
15925  k8_avoid_jump_misspredicts ();
15926}
15927
15928/* Return nonzero when QImode register that must be represented via REX prefix
15929   is used.  */
15930bool
15931x86_extended_QIreg_mentioned_p (rtx insn)
15932{
15933  int i;
15934  extract_insn_cached (insn);
15935  for (i = 0; i < recog_data.n_operands; i++)
15936    if (REG_P (recog_data.operand[i])
15937	&& REGNO (recog_data.operand[i]) >= 4)
15938       return true;
15939  return false;
15940}
15941
15942/* Return nonzero when P points to register encoded via REX prefix.
15943   Called via for_each_rtx.  */
15944static int
15945extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
15946{
15947   unsigned int regno;
15948   if (!REG_P (*p))
15949     return 0;
15950   regno = REGNO (*p);
15951   return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
15952}
15953
15954/* Return true when INSN mentions register that must be encoded using REX
15955   prefix.  */
15956bool
15957x86_extended_reg_mentioned_p (rtx insn)
15958{
15959  return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
15960}
15961
15962/* Generate an unsigned DImode/SImode to FP conversion.  This is the same code
15963   optabs would emit if we didn't have TFmode patterns.  */
15964
15965void
15966x86_emit_floatuns (rtx operands[2])
15967{
15968  rtx neglab, donelab, i0, i1, f0, in, out;
15969  enum machine_mode mode, inmode;
15970
15971  inmode = GET_MODE (operands[1]);
15972  if (inmode != SImode
15973      && inmode != DImode)
15974    abort ();
15975
15976  out = operands[0];
15977  in = force_reg (inmode, operands[1]);
15978  mode = GET_MODE (out);
15979  neglab = gen_label_rtx ();
15980  donelab = gen_label_rtx ();
15981  i1 = gen_reg_rtx (Pmode);
15982  f0 = gen_reg_rtx (mode);
15983
15984  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
15985
15986  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
15987  emit_jump_insn (gen_jump (donelab));
15988  emit_barrier ();
15989
15990  emit_label (neglab);
15991
15992  i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15993  i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
15994  i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
15995  expand_float (f0, i0, 0);
15996  emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
15997
15998  emit_label (donelab);
15999}
16000
16001/* Return if we do not know how to pass TYPE solely in registers.  */
16002bool
16003ix86_must_pass_in_stack (enum machine_mode mode, tree type)
16004{
16005   if (default_must_pass_in_stack (mode, type))
16006     return true;
16007   return (!TARGET_64BIT && type && mode == TImode);
16008}
16009
16010/* Initialize vector TARGET via VALS.  */
16011void
16012ix86_expand_vector_init (rtx target, rtx vals)
16013{
16014  enum machine_mode mode = GET_MODE (target);
16015  int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode));
16016  int n_elts = (GET_MODE_SIZE (mode) / elt_size);
16017  int i;
16018
16019  for (i = n_elts - 1; i >= 0; i--)
16020    if (GET_CODE (XVECEXP (vals, 0, i)) != CONST_INT
16021	&& GET_CODE (XVECEXP (vals, 0, i)) != CONST_DOUBLE)
16022      break;
16023
16024  /* Few special cases first...
16025     ... constants are best loaded from constant pool.  */
16026  if (i < 0)
16027    {
16028      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16029      return;
16030    }
16031
16032  /* ... values where only first field is non-constant are best loaded
16033     from the pool and overwriten via move later.  */
16034  if (!i)
16035    {
16036      rtx op = simplify_gen_subreg (mode, XVECEXP (vals, 0, 0),
16037				    GET_MODE_INNER (mode), 0);
16038
16039      op = force_reg (mode, op);
16040      XVECEXP (vals, 0, 0) = CONST0_RTX (GET_MODE_INNER (mode));
16041      emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
16042      switch (GET_MODE (target))
16043	{
16044	  case V2DFmode:
16045	    emit_insn (gen_sse2_movsd (target, target, op));
16046	    break;
16047	  case V4SFmode:
16048	    emit_insn (gen_sse_movss (target, target, op));
16049	    break;
16050	  default:
16051	    break;
16052	}
16053      return;
16054    }
16055
16056  /* And the busy sequence doing rotations.  */
16057  switch (GET_MODE (target))
16058    {
16059      case V2DFmode:
16060	{
16061	  rtx vecop0 =
16062	    simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 0), DFmode, 0);
16063	  rtx vecop1 =
16064	    simplify_gen_subreg (V2DFmode, XVECEXP (vals, 0, 1), DFmode, 0);
16065
16066	  vecop0 = force_reg (V2DFmode, vecop0);
16067	  vecop1 = force_reg (V2DFmode, vecop1);
16068	  emit_insn (gen_sse2_unpcklpd (target, vecop0, vecop1));
16069	}
16070	break;
16071      case V4SFmode:
16072	{
16073	  rtx vecop0 =
16074	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 0), SFmode, 0);
16075	  rtx vecop1 =
16076	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 1), SFmode, 0);
16077	  rtx vecop2 =
16078	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 2), SFmode, 0);
16079	  rtx vecop3 =
16080	    simplify_gen_subreg (V4SFmode, XVECEXP (vals, 0, 3), SFmode, 0);
16081	  rtx tmp1 = gen_reg_rtx (V4SFmode);
16082	  rtx tmp2 = gen_reg_rtx (V4SFmode);
16083
16084	  vecop0 = force_reg (V4SFmode, vecop0);
16085	  vecop1 = force_reg (V4SFmode, vecop1);
16086	  vecop2 = force_reg (V4SFmode, vecop2);
16087	  vecop3 = force_reg (V4SFmode, vecop3);
16088	  emit_insn (gen_sse_unpcklps (tmp1, vecop1, vecop3));
16089	  emit_insn (gen_sse_unpcklps (tmp2, vecop0, vecop2));
16090	  emit_insn (gen_sse_unpcklps (target, tmp2, tmp1));
16091	}
16092	break;
16093      default:
16094	abort ();
16095    }
16096}
16097
16098#include "gt-i386.h"
16099